busqueda de noticias

2025-03-28 15:42:16 +01:00
parent 6e82756c91
commit 10292bd400
4 changed files with 129 additions and 2 deletions
--- a/bot/build.gradle
+++ b/bot/build.gradle
@ -24,6 +24,8 @@ dependencies {
 	implementation 'org.springframework.boot:spring-boot-starter-logging'
 	implementation 'com.squareup.okhttp3:okhttp:4.11.0'
 	implementation 'org.json:json:20230227'
+	implementation 'org.jsoup:jsoup:1.15.4'
+	implementation 'com.rometools:rome:1.18.0'
 	testImplementation 'org.springframework.boot:spring-boot-starter-test'
 	testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
 }
--- a/bot/src/main/java/es/imunnic/inversionitasBot/NewsArticle.java
+++ b/bot/src/main/java/es/imunnic/inversionitasBot/NewsArticle.java
@ -0,0 +1,31 @@
+package es.imunnic.inversionitasBot;
+
+public class NewsArticle {
+  private String title;
+  private String description;
+  private String url;
+  private String content;
+
+  public NewsArticle(String title, String description, String url, String content) {
+    this.title = title;
+    this.description = description;
+    this.url = url;
+    this.content = content;
+  }
+
+  public String getTitle() {
+    return title;
+  }
+
+  public String getDescription() {
+    return description;
+  }
+
+  public String getUrl() {
+    return url;
+  }
+
+  public String getContent() {
+    return content;
+  }
+}
--- a/bot/src/main/java/es/imunnic/inversionitasBot/TelegramBot.java
+++ b/bot/src/main/java/es/imunnic/inversionitasBot/TelegramBot.java
@ -1,6 +1,7 @@
 package es.imunnic.inversionitasBot;

 import es.imunnic.inversionitasBot.services.ApiService;
+import es.imunnic.inversionitasBot.services.GoogleNewsScraperService;
 import org.json.JSONObject;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.scheduling.annotation.Scheduled;
@ -20,6 +21,7 @@ import java.util.Map;
@Component
 public class TelegramBot extends TelegramLongPollingBot {

+  private GoogleNewsScraperService scrapper = new GoogleNewsScraperService();
  //@Value("${telegram.bot.username}")
  private String BOT_USERNAME = "Inversionitas_Bot";
  //@Value("${telegram.bot.token}")
@ -48,6 +50,9 @@ public class TelegramBot extends TelegramLongPollingBot {

      if (messageText.equalsIgnoreCase("/resumen")) {
        construirResumenNoticias(chatId);
+      } else if (messageText.toLowerCase().startsWith("/busca ")) {
+        String query = messageText.substring(7).trim();
+        buscarNoticiasYEnviar(chatId, query);
      } else {
        sendMessage(chatId, "Comando no reconocido. Usa /resumen para obtener el resumen diario.");
      }
@ -147,6 +152,20 @@ public class TelegramBot extends TelegramLongPollingBot {

    return partes;
  }
+
+  private void buscarNoticiasYEnviar(String chatId, String query) {
+    List<NewsArticle> noticias = scrapper.searchNews(query);
+
+    if (noticias.isEmpty()) {
+      sendMessage(chatId, "No encontré noticias sobre: *" + query + "*");
+      return;
+    }
+
+    StringBuilder mensaje = new StringBuilder("📰 *Noticias sobre " + query + "*:\n\n");
+    for (NewsArticle noticia : noticias) {
+      mensaje.append("🔹 [").append(noticia.getTitle()).append("](").append(noticia.getUrl()).append(")\n");
+    }
+
+    sendMessage(chatId, mensaje.toString());
+  }
 }
-//TODO ajustar los mensajes para que no se pasen
-//TODO ajustar la presentación con la negrita
--- a/bot/src/main/java/es/imunnic/inversionitasBot/services/GoogleNewsScraperService.java
+++ b/bot/src/main/java/es/imunnic/inversionitasBot/services/GoogleNewsScraperService.java
@ -0,0 +1,75 @@
+package es.imunnic.inversionitasBot.services;
+
+import es.imunnic.inversionitasBot.NewsArticle;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import com.rometools.rome.feed.synd.SyndEntry;
+import com.rometools.rome.feed.synd.SyndFeed;
+import com.rometools.rome.io.SyndFeedInput;
+import com.rometools.rome.io.XmlReader;
+import org.springframework.stereotype.Service;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+@Service
+public class GoogleNewsScraperService {
+
+  private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36";
+
+  public List<NewsArticle> searchNews(String query) {
+    List<NewsArticle> newsList = new ArrayList<>();
+    String rssUrl = "https://news.google.com/rss/search?q=" + query.replace(" ", "+") + "&hl=es&gl=ES&ceid=ES%3Aes";
+
+    try {
+      URL url = new URL(rssUrl);
+      HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+      conn.setRequestProperty("User-Agent", USER_AGENT);
+      SyndFeedInput input = new SyndFeedInput();
+      SyndFeed feed = input.build(new XmlReader(conn.getInputStream()));
+
+      for (SyndEntry entry : feed.getEntries()) {
+        String title = entry.getTitle();
+        String link = entry.getLink();
+        String description = (entry.getDescription() != null) ? entry.getDescription().getValue() : "Sin descripción";
+        String finalUrl = resolveFinalUrl(link);
+        String content = extractArticleContent(finalUrl);
+
+        newsList.add(new NewsArticle(title, description, finalUrl, content));
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+    return newsList;
+  }
+
+  private String resolveFinalUrl(String url) {
+    try {
+      HttpURLConnection conn = (HttpURLConnection) new URL(url).openConnection();
+      conn.setInstanceFollowRedirects(true);
+      conn.setRequestProperty("User-Agent", USER_AGENT);
+      conn.connect();
+      return (conn.getHeaderField("Location") != null) ? conn.getHeaderField("Location") : url;
+    } catch (Exception e) {
+      return url;
+    }
+  }
+
+  private String extractArticleContent(String url) {
+    try {
+      Document doc = Jsoup.connect(url).userAgent(USER_AGENT).get();
+      Elements paragraphs = doc.select("article p, .post-content p, .entry-content p, .content p, #article-body p");
+      StringBuilder content = new StringBuilder();
+      for (Element p : paragraphs) {
+        content.append(p.text()).append("\n");
+      }
+      return content.length() > 0 ? content.toString() : "No se encontró contenido relevante";
+    } catch (Exception e) {
+      return "Error al extraer contenido";
+    }
+  }
+}
+