From 2e78736b6301cf19c47c27211450fabf68304f1a Mon Sep 17 00:00:00 2001 From: imunnic Date: Fri, 28 Mar 2025 19:28:59 +0100 Subject: [PATCH] Busqueda sin enlaces --- .../InversionitasBotApplication.java | 39 ++++++++++++++ .../imunnic/inversionitasBot/TelegramBot.java | 53 +++++++++++++------ .../services/GoogleNewsScraperService.java | 41 +++++++------- 3 files changed, 99 insertions(+), 34 deletions(-) diff --git a/bot/src/main/java/es/imunnic/inversionitasBot/InversionitasBotApplication.java b/bot/src/main/java/es/imunnic/inversionitasBot/InversionitasBotApplication.java index 761d9a1..a149655 100644 --- a/bot/src/main/java/es/imunnic/inversionitasBot/InversionitasBotApplication.java +++ b/bot/src/main/java/es/imunnic/inversionitasBot/InversionitasBotApplication.java @@ -1,5 +1,8 @@ package es.imunnic.inversionitasBot; +import es.imunnic.inversionitasBot.services.GoogleNewsScraperService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.context.ApplicationContext; @@ -8,13 +11,19 @@ import org.telegram.telegrambots.meta.TelegramBotsApi; import org.telegram.telegrambots.meta.exceptions.TelegramApiException; import org.telegram.telegrambots.updatesreceivers.DefaultBotSession; +import java.util.List; + @SpringBootApplication @EnableScheduling public class InversionitasBotApplication { + private static final Logger logger = LoggerFactory.getLogger(GoogleNewsScraperService.class); public static void main(String[] args) { ApplicationContext context = SpringApplication.run(InversionitasBotApplication.class, args); InversionitasBotApplication app = context.getBean(InversionitasBotApplication.class); + /*GoogleNewsScraperService scraper = new GoogleNewsScraperService(); + System.out.println(scraper.searchNews("Farma")); + buscarNoticiasYEnviar("Farma", scraper);*/ app.initBot(); } @@ -28,4 +37,34 @@ public class InversionitasBotApplication { } } + private static void buscarNoticiasYEnviar(String query, GoogleNewsScraperService scraper) { + String mensajeInicio = "Analizando noticias...esto puede tardar un poco, ten paciencia..."; + String mensajeError = "Lo siento... se me ha licuado el cerebro... Mira los logs para saber más."; + System.out.println(mensajeInicio); + + try { + List noticias = scraper.searchNews(query); + + if (noticias.isEmpty()) { + System.out.println("No encontré noticias sobre: " + query + ""); + return; + } + + StringBuilder mensaje = new StringBuilder("📰 Noticias sobre " + query + ":\n\n"); + for (NewsArticle noticia : noticias) { + mensaje.append("") + .append(noticia.getTitle()) + .append("\n"); + } + + System.out.println(mensaje.toString()); + } catch (Exception e) { + System.out.println(mensajeError); + System.out.println("
" + e.toString() + "
"); + } + } + + } diff --git a/bot/src/main/java/es/imunnic/inversionitasBot/TelegramBot.java b/bot/src/main/java/es/imunnic/inversionitasBot/TelegramBot.java index 8666443..29f10e4 100644 --- a/bot/src/main/java/es/imunnic/inversionitasBot/TelegramBot.java +++ b/bot/src/main/java/es/imunnic/inversionitasBot/TelegramBot.java @@ -3,6 +3,8 @@ package es.imunnic.inversionitasBot; import es.imunnic.inversionitasBot.services.ApiService; import es.imunnic.inversionitasBot.services.GoogleNewsScraperService; import org.json.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Value; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Component; @@ -21,6 +23,7 @@ import java.util.Map; @Component public class TelegramBot extends TelegramLongPollingBot { + private static final Logger logger = LoggerFactory.getLogger(GoogleNewsScraperService.class); private GoogleNewsScraperService scrapper = new GoogleNewsScraperService(); //@Value("${telegram.bot.username}") private String BOT_USERNAME = "Inversionitas_Bot"; @@ -85,14 +88,25 @@ public class TelegramBot extends TelegramLongPollingBot { } } - protected void sendMessageHtml(String chatId, String text) { + protected void sendMessageHtml(String chatId, String text){ + this.sendMessageHtml(chatId, text, null); + } + + protected void sendMessageHtml(String chatId, String text, Integer threadId) { List partes = dividirMensaje(text); for (String parte : partes) { + logger.info(parte); + parte = this.escapeHtml(parte); SendMessage sendMessage = new SendMessage(); sendMessage.setChatId(chatId); sendMessage.setText(parte); - sendMessage.setParseMode("HTML"); + sendMessage.setParseMode("HTML"); // Se debe usar HTML si el contenido es HTML + + // Agregar el ID del tema si es distinto de null + if (threadId != null) { + sendMessage.setMessageThreadId(threadId); + } try { execute(sendMessage); @@ -172,32 +186,34 @@ public class TelegramBot extends TelegramLongPollingBot { } private void buscarNoticiasYEnviar(String chatId, String query) { - String mensajeInicio = "Analizando noticias...esto puede tardar un poco, ten paciencia..."; - String mensajeError = "Lo siento... se me ha licuado el cerebro... Mira los logs para saber más."; - sendMessageHtml(chatId, mensajeInicio); + String mensajeInicio = "Analizando noticias...esto puede tardar un poco, ten paciencia..."; + String mensajeError = "Lo siento... se me ha licuado el cerebro... Mira los logs para saber más."; + sendMessage(chatId, escapeMarkdown(mensajeInicio)); try { List noticias = scrapper.searchNews(query); if (noticias.isEmpty()) { - sendMessageHtml(chatId, "No encontré noticias sobre: " + query + ""); + sendMessage(chatId, "No encontré noticias sobre: " + query); return; } - StringBuilder mensaje = new StringBuilder("📰 Noticias sobre " + query + ":\n\n"); - System.out.println(mensaje); + StringBuilder mensaje = new StringBuilder("📰 Noticias sobre " + query + ":\n\n"); + for (NewsArticle noticia : noticias) { - mensaje.append("🔹 ") - .append(noticia.getTitle()) - .append("\n"); + mensaje//.append("[") + .append("🔹 ") + .append(escapeMarkdown(noticia.getTitle())) + .append("\n")// Escapando el título de la noticia + //.append("](") + //.append(escapeMarkdown(noticia.getUrl())) // Escapando la URL de la noticia + //.append(")\n") + ; } - sendMessageHtml(chatId, mensaje.toString()); + sendMessage(chatId, mensaje.toString()); } catch (Exception e) { - sendMessageHtml(chatId, mensajeError); - sendMessageHtml(chatId, "
" + e.toString() + "
"); + sendMessage(chatId, escapeMarkdown(mensajeError)); } } @@ -208,4 +224,9 @@ public class TelegramBot extends TelegramLongPollingBot { } return text.replaceAll("([_*\\[\\]()~`>#+\\-=|{}.!])", "\\\\$1"); } + + private String escapeHtml(String text) { + text = text.replaceAll("]+)(?", ""); + return text; + } } diff --git a/bot/src/main/java/es/imunnic/inversionitasBot/services/GoogleNewsScraperService.java b/bot/src/main/java/es/imunnic/inversionitasBot/services/GoogleNewsScraperService.java index 169867d..d3112bd 100644 --- a/bot/src/main/java/es/imunnic/inversionitasBot/services/GoogleNewsScraperService.java +++ b/bot/src/main/java/es/imunnic/inversionitasBot/services/GoogleNewsScraperService.java @@ -9,6 +9,8 @@ import com.rometools.rome.feed.synd.SyndEntry; import com.rometools.rome.feed.synd.SyndFeed; import com.rometools.rome.io.SyndFeedInput; import com.rometools.rome.io.XmlReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import java.net.HttpURLConnection; import java.net.URL; @@ -18,30 +20,43 @@ import java.util.List; @Service public class GoogleNewsScraperService { + private static final Logger logger = LoggerFactory.getLogger(GoogleNewsScraperService.class); private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"; public List searchNews(String query) { List newsList = new ArrayList<>(); String rssUrl = "https://news.google.com/rss/search?q=" + query.replace(" ", "+") + "&hl=es&gl=ES&ceid=ES%3Aes"; + logger.info("Buscando noticias para: {}", query); + logger.info("URL del RSS: {}", rssUrl); + try { URL url = new URL(rssUrl); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestProperty("User-Agent", USER_AGENT); + SyndFeedInput input = new SyndFeedInput(); SyndFeed feed = input.build(new XmlReader(conn.getInputStream())); - for (SyndEntry entry : feed.getEntries()) { + List entries = feed.getEntries(); + int limit = Math.min(entries.size(), 10); // Máximo 10 noticias + + for (int i = 0; i < limit; i++) { + SyndEntry entry = entries.get(i); String title = entry.getTitle(); String link = entry.getLink(); String description = (entry.getDescription() != null) ? entry.getDescription().getValue() : "Sin descripción"; String finalUrl = resolveFinalUrl(link); - //String content = extractArticleContent(finalUrl); + + logger.info("Noticia {}: {}", (i + 1), title); + logger.info("Enlace: {}", finalUrl); newsList.add(new NewsArticle(title, description, finalUrl)); } + + logger.info("Total de noticias procesadas: {}", newsList.size()); } catch (Exception e) { - e.printStackTrace(); + logger.error("Error al obtener las noticias", e); } return newsList; } @@ -52,24 +67,14 @@ public class GoogleNewsScraperService { conn.setInstanceFollowRedirects(true); conn.setRequestProperty("User-Agent", USER_AGENT); conn.connect(); - return (conn.getHeaderField("Location") != null) ? conn.getHeaderField("Location") : url; + String resolvedUrl = (conn.getHeaderField("Location") != null) ? conn.getHeaderField("Location") : url; + + logger.debug("Redirección resuelta: {} → {}", url, resolvedUrl); + return resolvedUrl; } catch (Exception e) { + logger.warn("No se pudo resolver la URL: {}", url, e); return url; } } - - /*private String extractArticleContent(String url) { - try { - Document doc = Jsoup.connect(url).userAgent(USER_AGENT).get(); - Elements paragraphs = doc.select("article p, .post-content p, .entry-content p, .content p, #article-body p"); - StringBuilder content = new StringBuilder(); - for (Element p : paragraphs) { - content.append(p.text()).append("\n"); - } - return content.length() > 0 ? content.toString() : "No se encontró contenido relevante"; - } catch (Exception e) { - return "Error al extraer contenido"; - } - }*/ }