Busqueda sin enlaces
This commit is contained in:
@ -1,5 +1,8 @@
|
|||||||
package es.imunnic.inversionitasBot;
|
package es.imunnic.inversionitasBot;
|
||||||
|
|
||||||
|
import es.imunnic.inversionitasBot.services.GoogleNewsScraperService;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.boot.SpringApplication;
|
import org.springframework.boot.SpringApplication;
|
||||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||||
import org.springframework.context.ApplicationContext;
|
import org.springframework.context.ApplicationContext;
|
||||||
@ -8,13 +11,19 @@ import org.telegram.telegrambots.meta.TelegramBotsApi;
|
|||||||
import org.telegram.telegrambots.meta.exceptions.TelegramApiException;
|
import org.telegram.telegrambots.meta.exceptions.TelegramApiException;
|
||||||
import org.telegram.telegrambots.updatesreceivers.DefaultBotSession;
|
import org.telegram.telegrambots.updatesreceivers.DefaultBotSession;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@SpringBootApplication
|
@SpringBootApplication
|
||||||
@EnableScheduling
|
@EnableScheduling
|
||||||
public class InversionitasBotApplication {
|
public class InversionitasBotApplication {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(GoogleNewsScraperService.class);
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
ApplicationContext context = SpringApplication.run(InversionitasBotApplication.class, args);
|
ApplicationContext context = SpringApplication.run(InversionitasBotApplication.class, args);
|
||||||
InversionitasBotApplication app = context.getBean(InversionitasBotApplication.class);
|
InversionitasBotApplication app = context.getBean(InversionitasBotApplication.class);
|
||||||
|
/*GoogleNewsScraperService scraper = new GoogleNewsScraperService();
|
||||||
|
System.out.println(scraper.searchNews("Farma"));
|
||||||
|
buscarNoticiasYEnviar("Farma", scraper);*/
|
||||||
app.initBot();
|
app.initBot();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -28,4 +37,34 @@ public class InversionitasBotApplication {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void buscarNoticiasYEnviar(String query, GoogleNewsScraperService scraper) {
|
||||||
|
String mensajeInicio = "<i>Analizando noticias...esto puede tardar un poco, ten paciencia...</i>";
|
||||||
|
String mensajeError = "<b>Lo siento...</b> se me ha licuado el cerebro... Mira los logs para saber más.";
|
||||||
|
System.out.println(mensajeInicio);
|
||||||
|
|
||||||
|
try {
|
||||||
|
List<NewsArticle> noticias = scraper.searchNews(query);
|
||||||
|
|
||||||
|
if (noticias.isEmpty()) {
|
||||||
|
System.out.println("No encontré noticias sobre: <b>" + query + "</b>");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuilder mensaje = new StringBuilder("📰 <b>Noticias sobre " + query + ":</b>\n\n");
|
||||||
|
for (NewsArticle noticia : noticias) {
|
||||||
|
mensaje.append("<a href=\"")
|
||||||
|
.append(noticia.getUrl())
|
||||||
|
.append("\">")
|
||||||
|
.append(noticia.getTitle())
|
||||||
|
.append("</a>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println(mensaje.toString());
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.out.println(mensajeError);
|
||||||
|
System.out.println("<pre>" + e.toString() + "</pre>");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,8 @@ package es.imunnic.inversionitasBot;
|
|||||||
import es.imunnic.inversionitasBot.services.ApiService;
|
import es.imunnic.inversionitasBot.services.ApiService;
|
||||||
import es.imunnic.inversionitasBot.services.GoogleNewsScraperService;
|
import es.imunnic.inversionitasBot.services.GoogleNewsScraperService;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.scheduling.annotation.Scheduled;
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
@ -21,6 +23,7 @@ import java.util.Map;
|
|||||||
@Component
|
@Component
|
||||||
public class TelegramBot extends TelegramLongPollingBot {
|
public class TelegramBot extends TelegramLongPollingBot {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(GoogleNewsScraperService.class);
|
||||||
private GoogleNewsScraperService scrapper = new GoogleNewsScraperService();
|
private GoogleNewsScraperService scrapper = new GoogleNewsScraperService();
|
||||||
//@Value("${telegram.bot.username}")
|
//@Value("${telegram.bot.username}")
|
||||||
private String BOT_USERNAME = "Inversionitas_Bot";
|
private String BOT_USERNAME = "Inversionitas_Bot";
|
||||||
@ -85,14 +88,25 @@ public class TelegramBot extends TelegramLongPollingBot {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void sendMessageHtml(String chatId, String text) {
|
protected void sendMessageHtml(String chatId, String text){
|
||||||
|
this.sendMessageHtml(chatId, text, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void sendMessageHtml(String chatId, String text, Integer threadId) {
|
||||||
List<String> partes = dividirMensaje(text);
|
List<String> partes = dividirMensaje(text);
|
||||||
|
|
||||||
for (String parte : partes) {
|
for (String parte : partes) {
|
||||||
|
logger.info(parte);
|
||||||
|
parte = this.escapeHtml(parte);
|
||||||
SendMessage sendMessage = new SendMessage();
|
SendMessage sendMessage = new SendMessage();
|
||||||
sendMessage.setChatId(chatId);
|
sendMessage.setChatId(chatId);
|
||||||
sendMessage.setText(parte);
|
sendMessage.setText(parte);
|
||||||
sendMessage.setParseMode("HTML");
|
sendMessage.setParseMode("HTML"); // Se debe usar HTML si el contenido es HTML
|
||||||
|
|
||||||
|
// Agregar el ID del tema si es distinto de null
|
||||||
|
if (threadId != null) {
|
||||||
|
sendMessage.setMessageThreadId(threadId);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
execute(sendMessage);
|
execute(sendMessage);
|
||||||
@ -172,32 +186,34 @@ public class TelegramBot extends TelegramLongPollingBot {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void buscarNoticiasYEnviar(String chatId, String query) {
|
private void buscarNoticiasYEnviar(String chatId, String query) {
|
||||||
String mensajeInicio = "<i>Analizando noticias...esto puede tardar un poco, ten paciencia...</i>";
|
String mensajeInicio = "Analizando noticias...esto puede tardar un poco, ten paciencia...";
|
||||||
String mensajeError = "<b>Lo siento...</b> se me ha licuado el cerebro... Mira los logs para saber más.";
|
String mensajeError = "Lo siento... se me ha licuado el cerebro... Mira los logs para saber más.";
|
||||||
sendMessageHtml(chatId, mensajeInicio);
|
sendMessage(chatId, escapeMarkdown(mensajeInicio));
|
||||||
|
|
||||||
try {
|
try {
|
||||||
List<NewsArticle> noticias = scrapper.searchNews(query);
|
List<NewsArticle> noticias = scrapper.searchNews(query);
|
||||||
|
|
||||||
if (noticias.isEmpty()) {
|
if (noticias.isEmpty()) {
|
||||||
sendMessageHtml(chatId, "No encontré noticias sobre: <b>" + query + "</b>");
|
sendMessage(chatId, "No encontré noticias sobre: " + query);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
StringBuilder mensaje = new StringBuilder("📰 <b>Noticias sobre " + query + ":</b>\n\n");
|
StringBuilder mensaje = new StringBuilder("📰 Noticias sobre " + query + ":\n\n");
|
||||||
System.out.println(mensaje);
|
|
||||||
for (NewsArticle noticia : noticias) {
|
for (NewsArticle noticia : noticias) {
|
||||||
mensaje.append("🔹 <a href=\"")
|
mensaje//.append("[")
|
||||||
.append(noticia.getUrl())
|
.append("🔹 ")
|
||||||
.append("\">")
|
.append(escapeMarkdown(noticia.getTitle()))
|
||||||
.append(noticia.getTitle())
|
.append("\n")// Escapando el título de la noticia
|
||||||
.append("</a>\n");
|
//.append("](")
|
||||||
|
//.append(escapeMarkdown(noticia.getUrl())) // Escapando la URL de la noticia
|
||||||
|
//.append(")\n")
|
||||||
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
sendMessageHtml(chatId, mensaje.toString());
|
sendMessage(chatId, mensaje.toString());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
sendMessageHtml(chatId, mensajeError);
|
sendMessage(chatId, escapeMarkdown(mensajeError));
|
||||||
sendMessageHtml(chatId, "<pre>" + e.toString() + "</pre>");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -208,4 +224,9 @@ public class TelegramBot extends TelegramLongPollingBot {
|
|||||||
}
|
}
|
||||||
return text.replaceAll("([_*\\[\\]()~`>#+\\-=|{}.!])", "\\\\$1");
|
return text.replaceAll("([_*\\[\\]()~`>#+\\-=|{}.!])", "\\\\$1");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String escapeHtml(String text) {
|
||||||
|
text = text.replaceAll("<a([^>]+)(?<!/)>", "<a$1></a>");
|
||||||
|
return text;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,8 @@ import com.rometools.rome.feed.synd.SyndEntry;
|
|||||||
import com.rometools.rome.feed.synd.SyndFeed;
|
import com.rometools.rome.feed.synd.SyndFeed;
|
||||||
import com.rometools.rome.io.SyndFeedInput;
|
import com.rometools.rome.io.SyndFeedInput;
|
||||||
import com.rometools.rome.io.XmlReader;
|
import com.rometools.rome.io.XmlReader;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import java.net.HttpURLConnection;
|
import java.net.HttpURLConnection;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
@ -18,30 +20,43 @@ import java.util.List;
|
|||||||
@Service
|
@Service
|
||||||
public class GoogleNewsScraperService {
|
public class GoogleNewsScraperService {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(GoogleNewsScraperService.class);
|
||||||
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36";
|
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36";
|
||||||
|
|
||||||
public List<NewsArticle> searchNews(String query) {
|
public List<NewsArticle> searchNews(String query) {
|
||||||
List<NewsArticle> newsList = new ArrayList<>();
|
List<NewsArticle> newsList = new ArrayList<>();
|
||||||
String rssUrl = "https://news.google.com/rss/search?q=" + query.replace(" ", "+") + "&hl=es&gl=ES&ceid=ES%3Aes";
|
String rssUrl = "https://news.google.com/rss/search?q=" + query.replace(" ", "+") + "&hl=es&gl=ES&ceid=ES%3Aes";
|
||||||
|
|
||||||
|
logger.info("Buscando noticias para: {}", query);
|
||||||
|
logger.info("URL del RSS: {}", rssUrl);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
URL url = new URL(rssUrl);
|
URL url = new URL(rssUrl);
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||||
conn.setRequestProperty("User-Agent", USER_AGENT);
|
conn.setRequestProperty("User-Agent", USER_AGENT);
|
||||||
|
|
||||||
SyndFeedInput input = new SyndFeedInput();
|
SyndFeedInput input = new SyndFeedInput();
|
||||||
SyndFeed feed = input.build(new XmlReader(conn.getInputStream()));
|
SyndFeed feed = input.build(new XmlReader(conn.getInputStream()));
|
||||||
|
|
||||||
for (SyndEntry entry : feed.getEntries()) {
|
List<SyndEntry> entries = feed.getEntries();
|
||||||
|
int limit = Math.min(entries.size(), 10); // Máximo 10 noticias
|
||||||
|
|
||||||
|
for (int i = 0; i < limit; i++) {
|
||||||
|
SyndEntry entry = entries.get(i);
|
||||||
String title = entry.getTitle();
|
String title = entry.getTitle();
|
||||||
String link = entry.getLink();
|
String link = entry.getLink();
|
||||||
String description = (entry.getDescription() != null) ? entry.getDescription().getValue() : "Sin descripción";
|
String description = (entry.getDescription() != null) ? entry.getDescription().getValue() : "Sin descripción";
|
||||||
String finalUrl = resolveFinalUrl(link);
|
String finalUrl = resolveFinalUrl(link);
|
||||||
//String content = extractArticleContent(finalUrl);
|
|
||||||
|
logger.info("Noticia {}: {}", (i + 1), title);
|
||||||
|
logger.info("Enlace: {}", finalUrl);
|
||||||
|
|
||||||
newsList.add(new NewsArticle(title, description, finalUrl));
|
newsList.add(new NewsArticle(title, description, finalUrl));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger.info("Total de noticias procesadas: {}", newsList.size());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
e.printStackTrace();
|
logger.error("Error al obtener las noticias", e);
|
||||||
}
|
}
|
||||||
return newsList;
|
return newsList;
|
||||||
}
|
}
|
||||||
@ -52,24 +67,14 @@ public class GoogleNewsScraperService {
|
|||||||
conn.setInstanceFollowRedirects(true);
|
conn.setInstanceFollowRedirects(true);
|
||||||
conn.setRequestProperty("User-Agent", USER_AGENT);
|
conn.setRequestProperty("User-Agent", USER_AGENT);
|
||||||
conn.connect();
|
conn.connect();
|
||||||
return (conn.getHeaderField("Location") != null) ? conn.getHeaderField("Location") : url;
|
String resolvedUrl = (conn.getHeaderField("Location") != null) ? conn.getHeaderField("Location") : url;
|
||||||
|
|
||||||
|
logger.debug("Redirección resuelta: {} → {}", url, resolvedUrl);
|
||||||
|
return resolvedUrl;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
logger.warn("No se pudo resolver la URL: {}", url, e);
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*private String extractArticleContent(String url) {
|
|
||||||
try {
|
|
||||||
Document doc = Jsoup.connect(url).userAgent(USER_AGENT).get();
|
|
||||||
Elements paragraphs = doc.select("article p, .post-content p, .entry-content p, .content p, #article-body p");
|
|
||||||
StringBuilder content = new StringBuilder();
|
|
||||||
for (Element p : paragraphs) {
|
|
||||||
content.append(p.text()).append("\n");
|
|
||||||
}
|
|
||||||
return content.length() > 0 ? content.toString() : "No se encontró contenido relevante";
|
|
||||||
} catch (Exception e) {
|
|
||||||
return "Error al extraer contenido";
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user