From 10292bd400b7bd7f8750651a764334116ff4dcca Mon Sep 17 00:00:00 2001 From: imunnic Date: Fri, 28 Mar 2025 15:42:16 +0100 Subject: [PATCH] busqueda de noticias --- bot/build.gradle | 2 + .../imunnic/inversionitasBot/NewsArticle.java | 31 ++++++++ .../imunnic/inversionitasBot/TelegramBot.java | 23 +++++- .../services/GoogleNewsScraperService.java | 75 +++++++++++++++++++ 4 files changed, 129 insertions(+), 2 deletions(-) create mode 100644 bot/src/main/java/es/imunnic/inversionitasBot/NewsArticle.java create mode 100644 bot/src/main/java/es/imunnic/inversionitasBot/services/GoogleNewsScraperService.java diff --git a/bot/build.gradle b/bot/build.gradle index 958bf25..c6350f1 100644 --- a/bot/build.gradle +++ b/bot/build.gradle @@ -24,6 +24,8 @@ dependencies { implementation 'org.springframework.boot:spring-boot-starter-logging' implementation 'com.squareup.okhttp3:okhttp:4.11.0' implementation 'org.json:json:20230227' + implementation 'org.jsoup:jsoup:1.15.4' + implementation 'com.rometools:rome:1.18.0' testImplementation 'org.springframework.boot:spring-boot-starter-test' testRuntimeOnly 'org.junit.platform:junit-platform-launcher' } diff --git a/bot/src/main/java/es/imunnic/inversionitasBot/NewsArticle.java b/bot/src/main/java/es/imunnic/inversionitasBot/NewsArticle.java new file mode 100644 index 0000000..170342a --- /dev/null +++ b/bot/src/main/java/es/imunnic/inversionitasBot/NewsArticle.java @@ -0,0 +1,31 @@ +package es.imunnic.inversionitasBot; + +public class NewsArticle { + private String title; + private String description; + private String url; + private String content; + + public NewsArticle(String title, String description, String url, String content) { + this.title = title; + this.description = description; + this.url = url; + this.content = content; + } + + public String getTitle() { + return title; + } + + public String getDescription() { + return description; + } + + public String getUrl() { + return url; + } + + public String getContent() { + return content; + } +} diff --git a/bot/src/main/java/es/imunnic/inversionitasBot/TelegramBot.java b/bot/src/main/java/es/imunnic/inversionitasBot/TelegramBot.java index 03fda8b..550cc98 100644 --- a/bot/src/main/java/es/imunnic/inversionitasBot/TelegramBot.java +++ b/bot/src/main/java/es/imunnic/inversionitasBot/TelegramBot.java @@ -1,6 +1,7 @@ package es.imunnic.inversionitasBot; import es.imunnic.inversionitasBot.services.ApiService; +import es.imunnic.inversionitasBot.services.GoogleNewsScraperService; import org.json.JSONObject; import org.springframework.beans.factory.annotation.Value; import org.springframework.scheduling.annotation.Scheduled; @@ -20,6 +21,7 @@ import java.util.Map; @Component public class TelegramBot extends TelegramLongPollingBot { + private GoogleNewsScraperService scrapper = new GoogleNewsScraperService(); //@Value("${telegram.bot.username}") private String BOT_USERNAME = "Inversionitas_Bot"; //@Value("${telegram.bot.token}") @@ -48,6 +50,9 @@ public class TelegramBot extends TelegramLongPollingBot { if (messageText.equalsIgnoreCase("/resumen")) { construirResumenNoticias(chatId); + } else if (messageText.toLowerCase().startsWith("/busca ")) { + String query = messageText.substring(7).trim(); + buscarNoticiasYEnviar(chatId, query); } else { sendMessage(chatId, "Comando no reconocido. Usa /resumen para obtener el resumen diario."); } @@ -147,6 +152,20 @@ public class TelegramBot extends TelegramLongPollingBot { return partes; } + + private void buscarNoticiasYEnviar(String chatId, String query) { + List noticias = scrapper.searchNews(query); + + if (noticias.isEmpty()) { + sendMessage(chatId, "No encontré noticias sobre: *" + query + "*"); + return; + } + + StringBuilder mensaje = new StringBuilder("📰 *Noticias sobre " + query + "*:\n\n"); + for (NewsArticle noticia : noticias) { + mensaje.append("🔹 [").append(noticia.getTitle()).append("](").append(noticia.getUrl()).append(")\n"); + } + + sendMessage(chatId, mensaje.toString()); + } } -//TODO ajustar los mensajes para que no se pasen -//TODO ajustar la presentación con la negrita diff --git a/bot/src/main/java/es/imunnic/inversionitasBot/services/GoogleNewsScraperService.java b/bot/src/main/java/es/imunnic/inversionitasBot/services/GoogleNewsScraperService.java new file mode 100644 index 0000000..f52dd9a --- /dev/null +++ b/bot/src/main/java/es/imunnic/inversionitasBot/services/GoogleNewsScraperService.java @@ -0,0 +1,75 @@ +package es.imunnic.inversionitasBot.services; + +import es.imunnic.inversionitasBot.NewsArticle; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import com.rometools.rome.feed.synd.SyndEntry; +import com.rometools.rome.feed.synd.SyndFeed; +import com.rometools.rome.io.SyndFeedInput; +import com.rometools.rome.io.XmlReader; +import org.springframework.stereotype.Service; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +@Service +public class GoogleNewsScraperService { + + private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"; + + public List searchNews(String query) { + List newsList = new ArrayList<>(); + String rssUrl = "https://news.google.com/rss/search?q=" + query.replace(" ", "+") + "&hl=es&gl=ES&ceid=ES%3Aes"; + + try { + URL url = new URL(rssUrl); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty("User-Agent", USER_AGENT); + SyndFeedInput input = new SyndFeedInput(); + SyndFeed feed = input.build(new XmlReader(conn.getInputStream())); + + for (SyndEntry entry : feed.getEntries()) { + String title = entry.getTitle(); + String link = entry.getLink(); + String description = (entry.getDescription() != null) ? entry.getDescription().getValue() : "Sin descripción"; + String finalUrl = resolveFinalUrl(link); + String content = extractArticleContent(finalUrl); + + newsList.add(new NewsArticle(title, description, finalUrl, content)); + } + } catch (Exception e) { + e.printStackTrace(); + } + return newsList; + } + + private String resolveFinalUrl(String url) { + try { + HttpURLConnection conn = (HttpURLConnection) new URL(url).openConnection(); + conn.setInstanceFollowRedirects(true); + conn.setRequestProperty("User-Agent", USER_AGENT); + conn.connect(); + return (conn.getHeaderField("Location") != null) ? conn.getHeaderField("Location") : url; + } catch (Exception e) { + return url; + } + } + + private String extractArticleContent(String url) { + try { + Document doc = Jsoup.connect(url).userAgent(USER_AGENT).get(); + Elements paragraphs = doc.select("article p, .post-content p, .entry-content p, .content p, #article-body p"); + StringBuilder content = new StringBuilder(); + for (Element p : paragraphs) { + content.append(p.text()).append("\n"); + } + return content.length() > 0 ? content.toString() : "No se encontró contenido relevante"; + } catch (Exception e) { + return "Error al extraer contenido"; + } + } +} +