import time
import mysql.connector
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

# Config DB
db = mysql.connector.connect(
    host="localhost",
    user="radioysk_raster2025",
    password="[NwrDDg-kDts4]Rc",
    database="radioysk_raster2025"
)
cursor = db.cursor(dictionary=True)

# Configuración Selenium con Chrome headless
chrome_options = Options()
chrome_options.add_argument("--headless=new")  # usar Chrome headless moderno
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115 Safari/537.36")

driver = webdriver.Chrome(options=chrome_options)

cursor.execute("SELECT id, nombre, url, selector_noticias FROM fuentes WHERE plataforma='web' AND activa=1")
fuentes = cursor.fetchall()

print("=== TEST SELECTORES WEB (DINÁMICO CON SELENIUM) ===")

for f in fuentes:
    print(f"\n🌐 {f['nombre']} → {f['url']}")
    if not f['selector_noticias']:
        print("⚠️ No tiene selector definido.")
        continue
    
    try:
        driver.get(f['url'])
        time.sleep(5)  # esperar carga de JS
        html = driver.page_source
        soup = BeautifulSoup(html, "html.parser")
        noticias = soup.select(f['selector_noticias'])

        if noticias:
            print(f"   📊 Encontradas {len(noticias)} noticias:")
            for n in noticias[:5]:  # solo primeros 5 titulares
                titulo = n.get_text(strip=True)
                if titulo:
                    print(f" 📰 {titulo}")
        else:
            print("❌ No devolvió resultados, revisar selector.")
    except Exception as e:
        print(f"❌ Error cargando {f['url']}: {e}")

driver.quit()
cursor.close()
db.close()
