docker ready sin pruebas
This commit is contained in:
@ -2,6 +2,15 @@ import json
|
||||
import requests
|
||||
import sys
|
||||
from bs4 import BeautifulSoup
|
||||
import logging
|
||||
|
||||
# Configuración del logging
|
||||
LOG_FILE = "app.log"
|
||||
logging.basicConfig(
|
||||
filename=LOG_FILE, # Archivo de logs
|
||||
level=logging.INFO, # Nivel de logging (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
||||
format="%(asctime)s - %(levelname)s - %(message)s", # Formato de los logs
|
||||
)
|
||||
|
||||
def download_html_as_human(url):
|
||||
"""
|
||||
@ -70,7 +79,7 @@ def get_author_from_url(url):
|
||||
"""
|
||||
html_content = download_html_as_human(url)
|
||||
if not html_content:
|
||||
print("error")
|
||||
logging.info("error, no se pudo descargar la pagina")
|
||||
return "No se pudo descargar la página."
|
||||
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
|
@ -1,66 +1,91 @@
|
||||
import requests
|
||||
import json
|
||||
import os
|
||||
import logging
|
||||
|
||||
# Configuración del logging
|
||||
LOG_FILE = "app.log"
|
||||
logging.basicConfig(
|
||||
filename=LOG_FILE, # Archivo de logs
|
||||
level=logging.INFO, # Nivel de logging (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
||||
format="%(asctime)s - %(levelname)s - %(message)s", # Formato de los logs
|
||||
)
|
||||
|
||||
# Obtener variables de entorno
|
||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/generate")
|
||||
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3")
|
||||
|
||||
def is_security_related(prompt):
|
||||
url = "http://localhost:11434/api/generate"
|
||||
logging.info(f"Checking if topic is security-related: {prompt}")
|
||||
data = {
|
||||
"model": "llama3",
|
||||
"model": OLLAMA_MODEL,
|
||||
"prompt": f"Does the following topic relate to national defense, armed forces, police, espionage, or intelligence? Answer only with 'true' or 'false'. Topic: {prompt}",
|
||||
}
|
||||
|
||||
response = requests.post(url, json=data)
|
||||
|
||||
try:
|
||||
# Dividir la respuesta en líneas y parsear cada una
|
||||
response = requests.post(OLLAMA_URL, json=data)
|
||||
response.raise_for_status() # Lanza una excepción si la solicitud falla
|
||||
|
||||
for line in response.text.strip().split("\n"):
|
||||
json_data = json.loads(line)
|
||||
if "response" in json_data and json_data["response"].strip():
|
||||
return json_data["response"].strip().lower() == "true"
|
||||
result = json_data["response"].strip().lower() == "true"
|
||||
logging.info(f"Result for '{prompt}': {result}")
|
||||
return result
|
||||
|
||||
except requests.RequestException as e:
|
||||
logging.error(f"Request error: {e}")
|
||||
except json.JSONDecodeError as e:
|
||||
print("JSON Decode Error:", e)
|
||||
logging.error(f"JSON Decode Error: {e}")
|
||||
|
||||
return False
|
||||
|
||||
def is_critico(prompt):
|
||||
url = "http://localhost:11434/api/generate"
|
||||
logging.info(f"Checking if topic is critical of security forces: {prompt}")
|
||||
data = {
|
||||
"model": "llama3",
|
||||
"prompt": f"Does the following text critics the armed forces, security forces as Guardia Civil or Police, intelligence agencies such as CNI? Answer only with 'true' or 'false'. Topic: {prompt}",
|
||||
"model": OLLAMA_MODEL,
|
||||
"prompt": f"Does the following text criticizes the armed forces, security forces as Guardia Civil or Police, intelligence agencies such as CNI? Answer only with 'true' or 'false'. Topic: {prompt}",
|
||||
}
|
||||
|
||||
response = requests.post(url, json=data)
|
||||
|
||||
try:
|
||||
# Dividir la respuesta en líneas y parsear cada una
|
||||
response = requests.post(OLLAMA_URL, json=data)
|
||||
response.raise_for_status()
|
||||
|
||||
for line in response.text.strip().split("\n"):
|
||||
json_data = json.loads(line)
|
||||
if "response" in json_data and json_data["response"].strip():
|
||||
return json_data["response"].strip().lower() == "true"
|
||||
result = json_data["response"].strip().lower() == "true"
|
||||
logging.info(f"Result for '{prompt}': {result}")
|
||||
return result
|
||||
|
||||
except requests.RequestException as e:
|
||||
logging.error(f"Request error: {e}")
|
||||
except json.JSONDecodeError as e:
|
||||
print("JSON Decode Error:", e)
|
||||
logging.error(f"JSON Decode Error: {e}")
|
||||
|
||||
return False
|
||||
|
||||
def is_favorable(prompt):
|
||||
url = "http://localhost:11434/api/generate"
|
||||
logging.info(f"Checking if topic is favorable to security forces: {prompt}")
|
||||
data = {
|
||||
"model": "llama3",
|
||||
"prompt": f"Does the following text favours the armed forces, security forces as Guardia Civil or Police, intelligence agencies such as CNI? Answer only with 'true' or 'false'. Topic: {prompt}",
|
||||
"model": OLLAMA_MODEL,
|
||||
"prompt": f"Does the following text favor the armed forces, security forces as Guardia Civil or Police, intelligence agencies such as CNI? Answer only with 'true' or 'false'. Topic: {prompt}",
|
||||
}
|
||||
|
||||
response = requests.post(url, json=data)
|
||||
|
||||
try:
|
||||
# Dividir la respuesta en líneas y parsear cada una
|
||||
response = requests.post(OLLAMA_URL, json=data)
|
||||
response.raise_for_status()
|
||||
|
||||
for line in response.text.strip().split("\n"):
|
||||
json_data = json.loads(line)
|
||||
if "response" in json_data and json_data["response"].strip():
|
||||
return json_data["response"].strip().lower() == "true"
|
||||
result = json_data["response"].strip().lower() == "true"
|
||||
logging.info(f"Result for '{prompt}': {result}")
|
||||
return result
|
||||
|
||||
except requests.RequestException as e:
|
||||
logging.error(f"Request error: {e}")
|
||||
except json.JSONDecodeError as e:
|
||||
print("JSON Decode Error:", e)
|
||||
logging.error(f"JSON Decode Error: {e}")
|
||||
|
||||
return False
|
||||
|
||||
|
@ -1,14 +1,20 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import subprocess
|
||||
from googlenewsdecoder import gnewsdecoder
|
||||
from iacorrector import is_security_related, is_critico, is_favorable # Importa la función desde iacorrector.py
|
||||
from datetime import datetime
|
||||
import pytz
|
||||
import logging
|
||||
|
||||
# Configuración del logging
|
||||
LOG_FILE = "app.log"
|
||||
logging.basicConfig(
|
||||
filename=LOG_FILE, # Archivo de logs
|
||||
level=logging.INFO, # Nivel de logging (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
||||
format="%(asctime)s - %(levelname)s - %(message)s", # Formato de los logs
|
||||
)
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
@ -23,7 +29,7 @@ def get_author_from_script(url):
|
||||
author = result.stdout.strip()
|
||||
return author if author else "Desconocido"
|
||||
except Exception as e:
|
||||
print(f"Error al obtener el autor para {url}: {e}")
|
||||
logging.info(f"Error al obtener el autor para {url}: {e}")
|
||||
return "Desconocido"
|
||||
|
||||
def get_url_from_google_news(url):
|
||||
@ -36,7 +42,7 @@ def get_url_from_google_news(url):
|
||||
else:
|
||||
return "N/C"
|
||||
except Exception as e:
|
||||
print(f"Error occurred: {e}")
|
||||
logging.info(f"Error occurred: {e}")
|
||||
|
||||
def get_article_content(url):
|
||||
"""
|
||||
@ -45,7 +51,7 @@ def get_article_content(url):
|
||||
try:
|
||||
response = requests.get(url, headers=HEADERS)
|
||||
if response.status_code != 200:
|
||||
print(f"Error al acceder a {url}: Código {response.status_code}")
|
||||
logging.info(f"Error al acceder a {url}: Código {response.status_code}")
|
||||
return "No se pudo obtener el contenido"
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
@ -68,7 +74,7 @@ def get_article_content(url):
|
||||
return "No se encontró contenido relevante"
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error al extraer contenido de {url}: {e}")
|
||||
logging.info(f"Error al extraer contenido de {url}: {e}")
|
||||
return "Error al extraer contenido"
|
||||
|
||||
def search_news(query):
|
||||
@ -79,7 +85,7 @@ def search_news(query):
|
||||
response = requests.get(base_url, headers=HEADERS)
|
||||
|
||||
if response.status_code != 200:
|
||||
print(f"Error al acceder a la página para la consulta '{query}': {response.status_code}")
|
||||
logging.info(f"Error al acceder a la página para la consulta '{query}': {response.status_code}")
|
||||
return []
|
||||
|
||||
soup = BeautifulSoup(response.content, 'xml')
|
||||
@ -123,19 +129,19 @@ def search_news(query):
|
||||
insertar_datos(news_item)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error al procesar un artículo para '{query}': {e}")
|
||||
logging.info(f"Error al procesar un artículo para '{query}': {e}")
|
||||
|
||||
return news_list
|
||||
|
||||
def insertar_datos(news_item):
|
||||
API_URL = "http://127.0.0.1:8001/news/"
|
||||
API_URL = "http://localhost:8000/news/"
|
||||
|
||||
response = requests.post(API_URL, json=news_item)
|
||||
|
||||
if response.status_code == 200:
|
||||
print(f"Noticia '{news_item['titulo']}' creada con éxito.")
|
||||
logging.info(f"Noticia '{news_item['titulo']}' creada con éxito.")
|
||||
else:
|
||||
print(f"Error al insertar '{news_item['titulo']}':", response.status_code, response.json())
|
||||
logging.info(f"Error al insertar '{news_item['titulo']}':", response.status_code, response.json())
|
||||
|
||||
def search_from_keywords_file():
|
||||
"""
|
||||
@ -151,14 +157,14 @@ def search_from_keywords_file():
|
||||
keywords = [keyword.strip() for keyword in keywords]
|
||||
|
||||
for keyword in keywords:
|
||||
print(f"\nBuscando noticias sobre: {keyword}")
|
||||
logging.info(f"\nBuscando noticias sobre: {keyword}")
|
||||
search_news(keyword)
|
||||
time.sleep(2) # Pausa para evitar bloqueos por demasiadas solicitudes en poco tiempo
|
||||
|
||||
except FileNotFoundError:
|
||||
print("No se encontró el archivo 'keywords.txt'.")
|
||||
logging.info("No se encontró el archivo 'keywords.txt'.")
|
||||
except Exception as e:
|
||||
print(f"Error al leer el archivo 'keywords.txt': {e}")
|
||||
logging.info(f"Error al leer el archivo 'keywords.txt': {e}")
|
||||
|
||||
# Ejecutar la búsqueda desde el archivo
|
||||
search_from_keywords_file()
|
||||
|
Reference in New Issue
Block a user