busqueda de noticias

This commit is contained in:
2025-03-28 15:42:16 +01:00
parent 6e82756c91
commit 10292bd400
4 changed files with 129 additions and 2 deletions

View File

@ -24,6 +24,8 @@ dependencies {
implementation 'org.springframework.boot:spring-boot-starter-logging'
implementation 'com.squareup.okhttp3:okhttp:4.11.0'
implementation 'org.json:json:20230227'
implementation 'org.jsoup:jsoup:1.15.4'
implementation 'com.rometools:rome:1.18.0'
testImplementation 'org.springframework.boot:spring-boot-starter-test'
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
}

View File

@ -0,0 +1,31 @@
package es.imunnic.inversionitasBot;
public class NewsArticle {
private String title;
private String description;
private String url;
private String content;
public NewsArticle(String title, String description, String url, String content) {
this.title = title;
this.description = description;
this.url = url;
this.content = content;
}
public String getTitle() {
return title;
}
public String getDescription() {
return description;
}
public String getUrl() {
return url;
}
public String getContent() {
return content;
}
}

View File

@ -1,6 +1,7 @@
package es.imunnic.inversionitasBot;
import es.imunnic.inversionitasBot.services.ApiService;
import es.imunnic.inversionitasBot.services.GoogleNewsScraperService;
import org.json.JSONObject;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Scheduled;
@ -20,6 +21,7 @@ import java.util.Map;
@Component
public class TelegramBot extends TelegramLongPollingBot {
private GoogleNewsScraperService scrapper = new GoogleNewsScraperService();
//@Value("${telegram.bot.username}")
private String BOT_USERNAME = "Inversionitas_Bot";
//@Value("${telegram.bot.token}")
@ -48,6 +50,9 @@ public class TelegramBot extends TelegramLongPollingBot {
if (messageText.equalsIgnoreCase("/resumen")) {
construirResumenNoticias(chatId);
} else if (messageText.toLowerCase().startsWith("/busca ")) {
String query = messageText.substring(7).trim();
buscarNoticiasYEnviar(chatId, query);
} else {
sendMessage(chatId, "Comando no reconocido. Usa /resumen para obtener el resumen diario.");
}
@ -147,6 +152,20 @@ public class TelegramBot extends TelegramLongPollingBot {
return partes;
}
private void buscarNoticiasYEnviar(String chatId, String query) {
List<NewsArticle> noticias = scrapper.searchNews(query);
if (noticias.isEmpty()) {
sendMessage(chatId, "No encontré noticias sobre: *" + query + "*");
return;
}
StringBuilder mensaje = new StringBuilder("📰 *Noticias sobre " + query + "*:\n\n");
for (NewsArticle noticia : noticias) {
mensaje.append("🔹 [").append(noticia.getTitle()).append("](").append(noticia.getUrl()).append(")\n");
}
sendMessage(chatId, mensaje.toString());
}
}
//TODO ajustar los mensajes para que no se pasen
//TODO ajustar la presentación con la negrita

View File

@ -0,0 +1,75 @@
package es.imunnic.inversionitasBot.services;
import es.imunnic.inversionitasBot.NewsArticle;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rometools.rome.feed.synd.SyndEntry;
import com.rometools.rome.feed.synd.SyndFeed;
import com.rometools.rome.io.SyndFeedInput;
import com.rometools.rome.io.XmlReader;
import org.springframework.stereotype.Service;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
@Service
public class GoogleNewsScraperService {
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36";
public List<NewsArticle> searchNews(String query) {
List<NewsArticle> newsList = new ArrayList<>();
String rssUrl = "https://news.google.com/rss/search?q=" + query.replace(" ", "+") + "&hl=es&gl=ES&ceid=ES%3Aes";
try {
URL url = new URL(rssUrl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty("User-Agent", USER_AGENT);
SyndFeedInput input = new SyndFeedInput();
SyndFeed feed = input.build(new XmlReader(conn.getInputStream()));
for (SyndEntry entry : feed.getEntries()) {
String title = entry.getTitle();
String link = entry.getLink();
String description = (entry.getDescription() != null) ? entry.getDescription().getValue() : "Sin descripción";
String finalUrl = resolveFinalUrl(link);
String content = extractArticleContent(finalUrl);
newsList.add(new NewsArticle(title, description, finalUrl, content));
}
} catch (Exception e) {
e.printStackTrace();
}
return newsList;
}
private String resolveFinalUrl(String url) {
try {
HttpURLConnection conn = (HttpURLConnection) new URL(url).openConnection();
conn.setInstanceFollowRedirects(true);
conn.setRequestProperty("User-Agent", USER_AGENT);
conn.connect();
return (conn.getHeaderField("Location") != null) ? conn.getHeaderField("Location") : url;
} catch (Exception e) {
return url;
}
}
private String extractArticleContent(String url) {
try {
Document doc = Jsoup.connect(url).userAgent(USER_AGENT).get();
Elements paragraphs = doc.select("article p, .post-content p, .entry-content p, .content p, #article-body p");
StringBuilder content = new StringBuilder();
for (Element p : paragraphs) {
content.append(p.text()).append("\n");
}
return content.length() > 0 ? content.toString() : "No se encontró contenido relevante";
} catch (Exception e) {
return "Error al extraer contenido";
}
}
}