from __future__ import annotations

import json
import re
import sys
import time
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from email.utils import parsedate_to_datetime
from pathlib import Path
from html import unescape
from html.parser import HTMLParser
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from typing import Iterable
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode, urljoin, urlparse
from urllib.request import Request, urlopen


PORT = 8765
BASE_DIR = Path(__file__).resolve().parent
NEWS_JSON = BASE_DIR / "noticias.json"
USER_AGENT = (
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
    "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126 Safari/537.36"
)
AI_TERMS = (
    " ai ",
    "artificial intelligence",
    "inteligencia artificial",
    "machine learning",
    "openai",
    "anthropic",
    "deepmind",
    "gemini",
    "chatgpt",
    "claude",
    "llm",
)
BLOCKED_EXTENSIONS = (
    ".zip",
    ".pdf",
    ".png",
    ".jpg",
    ".jpeg",
    ".gif",
    ".svg",
    ".webp",
    ".mp4",
    ".mov",
    ".css",
    ".js",
)
DATE_PATTERNS = (
    r"20\d{2}-\d{2}-\d{2}",
    r"\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{1,2},\s+20\d{2}\b",
    r"\b\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+20\d{2}\b",
)
TRANSLATIONS = (
    (r"\bartificial intelligence\b", "inteligencia artificial"),
    (r"\bAI\b", "IA"),
    (r"\bmachine learning\b", "aprendizaje automático"),
    (r"\bdeep learning\b", "aprendizaje profundo"),
    (r"\blarge language model[s]?\b", "modelos de lenguaje grandes"),
    (r"\blanguage model[s]?\b", "modelos de lenguaje"),
    (r"\bmodel[s]?\b", "modelos"),
    (r"\bchatbot[s]?\b", "chatbots"),
    (r"\bagent[s]?\b", "agentes"),
    (r"\bstartup[s]?\b", "empresas emergentes"),
    (r"\bcompany\b", "compañía"),
    (r"\bcompanies\b", "compañías"),
    (r"\bresearch\b", "investigación"),
    (r"\bresearcher[s]?\b", "investigadores"),
    (r"\bscientist[s]?\b", "científicos"),
    (r"\bdeveloper[s]?\b", "desarrolladores"),
    (r"\buser[s]?\b", "usuarios"),
    (r"\bcustomer[s]?\b", "clientes"),
    (r"\btool[s]?\b", "herramientas"),
    (r"\bproduct[s]?\b", "productos"),
    (r"\bfeature[s]?\b", "funciones"),
    (r"\blaunch(?:es|ed)?\b", "lanza"),
    (r"\brelease(?:s|d)?\b", "publica"),
    (r"\bannounce(?:s|d)?\b", "anuncia"),
    (r"\bintroduce(?:s|d)?\b", "presenta"),
    (r"\bbuild(?:s|ing)?\b", "desarrolla"),
    (r"\bcreate(?:s|d)?\b", "crea"),
    (r"\bgenerate(?:s|d)?\b", "genera"),
    (r"\btrain(?:s|ed|ing)?\b", "entrena"),
    (r"\bimprove(?:s|d)?\b", "mejora"),
    (r"\bupdate(?:s|d)?\b", "actualiza"),
    (r"\bsafety\b", "seguridad"),
    (r"\bsecurity\b", "seguridad"),
    (r"\bprivacy\b", "privacidad"),
    (r"\bgovernance\b", "gobernanza"),
    (r"\brisk[s]?\b", "riesgos"),
    (r"\bbenchmark[s]?\b", "pruebas de referencia"),
    (r"\bopen source\b", "código abierto"),
    (r"\bcloud\b", "nube"),
    (r"\bdata center[s]?\b", "centros de datos"),
    (r"\bchip[s]?\b", "chips"),
    (r"\bcompute\b", "cómputo"),
    (r"\benterprise\b", "empresa"),
    (r"\bbusiness\b", "negocio"),
    (r"\bmarket\b", "mercado"),
    (r"\bwork\b", "trabajo"),
    (r"\bjob[s]?\b", "empleos"),
    (r"\bhealth\b", "salud"),
    (r"\bscience\b", "ciencia"),
    (r"\bsoftware\b", "software"),
    (r"\bcode\b", "código"),
    (r"\bnew\b", "nuevo"),
    (r"\bmore\b", "más"),
    (r"\bfirst\b", "primer"),
    (r"\bmajor\b", "importante"),
)


@dataclass
class Link:
    href: str
    text: str


class LinkParser(HTMLParser):
    def __init__(self) -> None:
        super().__init__()
        self.links: list[Link] = []
        self._href: str | None = None
        self._text: list[str] = []
        self.title = ""
        self._in_title = False

    def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
        if tag == "a":
            attrs_dict = dict(attrs)
            self._href = attrs_dict.get("href")
            self._text = []
        elif tag == "title":
            self._in_title = True

    def handle_endtag(self, tag: str) -> None:
        if tag == "a" and self._href:
            text = clean(" ".join(self._text))
            self.links.append(Link(self._href, text))
            self._href = None
            self._text = []
        elif tag == "title":
            self._in_title = False

    def handle_data(self, data: str) -> None:
        if self._href:
            self._text.append(data)
        if self._in_title:
            self.title += data


def clean(value: str) -> str:
    value = re.sub(r"<script[\s\S]*?</script>", " ", value, flags=re.I)
    value = re.sub(r"<style[\s\S]*?</style>", " ", value, flags=re.I)
    value = re.sub(r"<[^>]+>", " ", value)
    value = unescape(value)
    return re.sub(r"\s+", " ", value).strip()


def looks_english(text: str) -> bool:
    sample = f" {text[:2000].lower()} "
    english_hits = sum(1 for word in (" the ", " and ", " for ", " with ", " from ", " this ", " that ", " will ", " has ", " have ", " users ", " company ") if word in sample)
    spanish_hits = sum(1 for word in (" el ", " la ", " los ", " las ", " de ", " que ", " para ", " con ", " una ", " usuarios ", " compañía ") if word in sample)
    return english_hits > spanish_hits + 1


def translate_to_spanish(text: str) -> str:
    translated = text
    for pattern, replacement in TRANSLATIONS:
        translated = re.sub(pattern, replacement, translated, flags=re.I)
    return clean(translated)


def translate_with_google(text: str) -> str:
    if not text:
        return text
    chunks = []
    current = ""
    for sentence in re.split(r"(?<=[.!?])\s+", text):
        if len(current) + len(sentence) > 4200 and current:
            chunks.append(current)
            current = sentence
        else:
            current = f"{current} {sentence}".strip()
    if current:
        chunks.append(current)

    translated_chunks: list[str] = []
    for chunk in chunks:
        params = urlencode(
            {
                "client": "gtx",
                "sl": "en",
                "tl": "es",
                "dt": "t",
                "q": chunk,
            }
        )
        url = f"https://translate.googleapis.com/translate_a/single?{params}"
        raw, _ = fetch(url, timeout=8)
        data = json.loads(raw)
        translated_chunks.append("".join(part[0] for part in data[0] if part and part[0]))
        time.sleep(0.1)
    return clean(" ".join(translated_chunks))


def translate_if_english(text: str) -> tuple[str, bool]:
    if looks_english(text):
        try:
            translated = translate_with_google(text)
        except Exception:
            translated = translate_to_spanish(text)
        return translated, translated != text
    return text, False


def fetch(url: str, timeout: int = 4) -> tuple[str, str]:
    req = Request(url, headers={"User-Agent": USER_AGENT, "Accept": "text/html,application/xhtml+xml"})
    with urlopen(req, timeout=timeout) as response:
        final_url = response.geturl()
        charset = response.headers.get_content_charset() or "utf-8"
        data = response.read(900_000)
    return data.decode(charset, errors="replace"), final_url


def same_site(base_url: str, href: str) -> bool:
    base_host = urlparse(base_url).hostname or ""
    host = urlparse(href).hostname or ""
    return host.replace("www.", "") == base_host.replace("www.", "")


def normalize_link(base_url: str, href: str) -> str | None:
    if not href or href.startswith(("mailto:", "javascript:", "#")):
        return None
    absolute = urljoin(base_url, href)
    parsed = urlparse(absolute)
    if parsed.scheme not in ("http", "https"):
        return None
    if parsed.path.lower().endswith(BLOCKED_EXTENSIONS):
        return None
    return absolute.split("#", 1)[0]


def looks_like_article(url: str, text: str) -> bool:
    haystack = f" {url.lower()} {text.lower()} "
    path = urlparse(url).path.lower()
    if path in ("", "/", "/news", "/blog", "/discover", "/blog/community"):
        return False
    has_article_path = any(part in path for part in ("/news/", "/blog/", "/story/", "/article/", "/2026/", "/discover/", "/posts/"))
    has_ai = any(term in haystack for term in AI_TERMS)
    return has_article_path or (has_ai and len(clean(text)) > 24)


def extract_date(text: str, fallback: datetime) -> str:
    lower = text.lower()
    relative = re.search(r"\b(?:about\s+)?(\d+)\s+(hour|hours|day|days)\s+ago\b", lower)
    if relative:
        amount = int(relative.group(1))
        unit = relative.group(2)
        delta = timedelta(hours=amount) if unit.startswith("hour") else timedelta(days=amount)
        return (fallback - delta).date().isoformat()
    if "yesterday" in lower or "ayer" in lower:
        return (fallback - timedelta(days=1)).date().isoformat()
    for pattern in DATE_PATTERNS:
        match = re.search(pattern, text, flags=re.I)
        if not match:
            continue
        raw = match.group(0)
        for fmt in ("%Y-%m-%d", "%B %d, %Y", "%b %d, %Y", "%d %B %Y", "%d %b %Y"):
            try:
                return datetime.strptime(raw.replace(".", ""), fmt).date().isoformat()
            except ValueError:
                pass
    try:
        return parsedate_to_datetime(text).date().isoformat()
    except Exception:
        return fallback.date().isoformat()


def title_terms(title: str) -> set[str]:
    words = re.findall(r"[A-Za-zÁÉÍÓÚáéíóúÑñ0-9]{4,}", title.lower())
    stop = {
        "with",
        "from",
        "that",
        "this",
        "para",
        "como",
        "sobre",
        "desde",
        "esta",
        "este",
        "news",
        "article",
        "story",
    }
    return {word for word in words if word not in stop}


def remove_unrelated_sections(html: str) -> str:
    cleaned = re.sub(r"<script[\s\S]*?</script>", " ", html, flags=re.I)
    cleaned = re.sub(r"<style[\s\S]*?</style>", " ", cleaned, flags=re.I)
    cleaned = re.sub(r"<nav[\s\S]*?</nav>", " ", cleaned, flags=re.I)
    cleaned = re.sub(r"<footer[\s\S]*?</footer>", " ", cleaned, flags=re.I)
    cleaned = re.sub(r"<aside[\s\S]*?</aside>", " ", cleaned, flags=re.I)
    cleaned = re.sub(
        r"<section[^>]*(related|recommend|latest|popular|newsletter|signup|more)[^>]*>[\s\S]*?</section>",
        " ",
        cleaned,
        flags=re.I,
    )
    cleaned = re.sub(
        r"<div[^>]*(related|recommend|latest|popular|newsletter|signup|more)[^>]*>[\s\S]*?</div>",
        " ",
        cleaned,
        flags=re.I,
    )
    return cleaned


def extract_content_blocks(html: str) -> list[str]:
    html = remove_unrelated_sections(html)
    blocks: list[str] = []
    for pattern in (r"<p\b[^>]*>([\s\S]*?)</p>", r"<li\b[^>]*>([\s\S]*?)</li>"):
        for match in re.finditer(pattern, html, flags=re.I):
            text = clean(match.group(1))
            lower = text.lower()
            if len(text) < 45:
                continue
            if any(
                phrase in lower
                for phrase in (
                    "sign up",
                    "subscribe",
                    "related stories",
                    "related articles",
                    "more from",
                    "read more",
                    "all rights reserved",
                    "cookie",
                    "privacy policy",
                    "terms of service",
                    "advertisement",
                    "sponsored",
                    "newsletter",
                )
            ):
                continue
            if len(re.findall(r"https?://|www\.", text)) > 1:
                continue
            blocks.append(text)
    if blocks:
        return blocks
    text = clean(html)
    return [sentence for sentence in re.split(r"(?<=[.!?])\s+", text) if len(sentence) > 60]


def score_block(block: str, terms: set[str], position: int) -> int:
    lower = f" {block.lower()} "
    score = 0
    score += sum(4 for term in terms if term in lower)
    score += sum(2 for term in AI_TERMS if term.strip() and term in lower)
    if position < 8:
        score += 2
    if len(block) > 260:
        score += 1
    return score


def build_article_summary(html: str, title: str, target: int = 1500) -> str:
    blocks = extract_content_blocks(html)
    terms = title_terms(title)
    ranked = [
        (index, block, score_block(block, terms, index))
        for index, block in enumerate(blocks)
    ]
    relevant = [(index, block) for index, block, score in ranked if score > 0]
    if len(relevant) < 3:
        relevant = [(index, block) for index, block, _ in ranked[:8]]
    relevant = sorted(relevant, key=lambda item: item[0])

    selected: list[str] = []
    for _, block in relevant:
        candidate = clean(" ".join(selected + [block]))
        if len(candidate) > target + 180:
            continue
        selected.append(block)
        if len(clean(" ".join(selected))) >= target - 120:
            break

    summary = clean(" ".join(selected))
    if len(summary) > target + 120:
        summary = summary[: target + 120].rsplit(" ", 1)[0] + "..."
    return summary


def summarize(html: str, title: str, source: str, date: str) -> str:
    base = build_article_summary(html, title, 1500)
    if not base:
        base = f"Noticia detectada desde {source}: {title}."
    suffix = f" Fecha estimada de publicación: {date}. Fuente: {source}."
    summary = clean(f"{base}{suffix}")
    if len(summary) > 1650:
        summary = summary[:1647].rsplit(" ", 1)[0] + "..."
    return summary


def source_from_url(url: str) -> str:
    host = urlparse(url).hostname or "Fuente"
    host = host.replace("www.", "")
    return host.split(".")[0].replace("-", " ").title()


def scrape_single_article(url: str) -> dict[str, str]:
    now = datetime.now(timezone.utc)
    html, final_url = fetch(url)
    parser = LinkParser()
    parser.feed(html)
    source = source_from_url(final_url)
    title = clean(parser.title) or source
    date = extract_date(html[:80_000], now)
    detail = summarize(html, title, source, date)
    title, title_translated = translate_if_english(title)
    detail, detail_translated = translate_if_english(detail)
    if title_translated or detail_translated:
        detail = f"{detail} Traducción automática al español aplicada desde la fuente original en inglés."
    return {
        "title": title[:180],
        "source": source,
        "date": date,
        "topic": "Noticia manual",
        "url": final_url,
        "detail": detail,
        "manual": True,
        "refreshed": True,
    }


def assign_ids(items: Iterable[dict[str, str]]) -> list[dict[str, str]]:
    numbered: list[dict[str, str]] = []
    for index, item in enumerate(items, start=1):
        copy = dict(item)
        copy["id"] = index
        numbered.append(copy)
    return numbered


def save_news_json(items: Iterable[dict[str, str]]) -> list[dict[str, str]]:
    numbered = assign_ids(items)
    payload = {
        "updatedAt": datetime.now(timezone.utc).isoformat(),
        "count": len(numbered),
        "items": numbered,
    }
    NEWS_JSON.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
    return numbered


def load_news_json() -> dict:
    if not NEWS_JSON.exists():
        return {"updatedAt": None, "count": 0, "items": []}
    return json.loads(NEWS_JSON.read_text(encoding="utf-8"))


def merge_refresh_with_existing(scraped_items: Iterable[dict[str, str]]) -> list[dict[str, str]]:
    existing_items = load_news_json().get("items", [])
    existing_by_url = {
        item.get("url"): item
        for item in existing_items
        if item.get("url")
    }
    existing_by_title = {
        clean(item.get("title", "")).lower(): item
        for item in existing_items
        if item.get("title")
    }
    merged: list[dict[str, str]] = []
    seen: set[str] = set()
    for scraped in scraped_items:
        url = scraped.get("url")
        title_key = clean(scraped.get("title", "")).lower()
        stored = existing_by_url.get(url) or existing_by_title.get(title_key)
        item = dict(stored or scraped)
        key = item.get("url") or clean(item.get("title", "")).lower()
        if key in seen:
            continue
        seen.add(key)
        merged.append(item)
    return merged


def delete_news_item(news_id: int | None = None, url: str | None = None) -> list[dict[str, str]]:
    payload = load_news_json()
    items = payload.get("items", [])
    remaining = []
    for item in items:
        if news_id is not None and int(item.get("id", -1)) == news_id:
            continue
        if url and item.get("url") == url:
            continue
        remaining.append(item)
    return save_news_json(remaining)


def upsert_manual_news(item: dict) -> list[dict[str, str]]:
    payload = load_news_json()
    items = payload.get("items", [])
    url = item.get("url")
    items = [existing for existing in items if existing.get("url") != url]
    item["manual"] = True
    item["refreshed"] = True
    item["manualPinnedAt"] = int(time.time() * 1000)
    items.insert(0, item)
    return save_news_json(items)


def update_news_item(item: dict) -> list[dict[str, str]]:
    payload = load_news_json()
    items = payload.get("items", [])
    news_id = item.get("id")
    url = item.get("url")
    updated = False
    for index, existing in enumerate(items):
        same_id = news_id is not None and int(existing.get("id", -1)) == int(news_id)
        same_url = url and existing.get("url") == url
        if same_id or same_url:
            merged = {**existing, **item}
            items[index] = merged
            updated = True
            break
    if not updated:
        items.insert(0, item)
    return save_news_json(items)


def set_news_locked(news_id: int | None = None, url: str | None = None, locked: bool = False) -> list[dict[str, str]]:
    payload = load_news_json()
    items = payload.get("items", [])
    for item in items:
        same_id = news_id is not None and int(item.get("id", -1)) == news_id
        same_url = url and item.get("url") == url
        if same_id or same_url:
            item["locked"] = bool(locked)
            break
    return save_news_json(items)


def scrape_source(name: str, url: str, days_back: int, max_articles: int = 6) -> list[dict[str, str]]:
    is_huggingface = "huggingface.co/blog" in url
    deadline = time.monotonic() + (28 if is_huggingface else 16)
    now = datetime.now(timezone.utc)
    cutoff = now - timedelta(days=days_back)
    html, final_url = fetch(url)
    parser = LinkParser()
    parser.feed(html)
    candidates: list[tuple[str, str]] = []
    seen: set[str] = set()
    for link in parser.links:
        href = normalize_link(final_url, link.href)
        if not href or href in seen or not same_site(final_url, href):
            continue
        if looks_like_article(href, link.text):
            candidates.append((href, link.text))
            seen.add(href)
        if len(candidates) >= (24 if is_huggingface else 8):
            break
    if is_huggingface:
        candidates.sort(
            key=lambda item: 0
            if re.search(r"\b(?:about\s+)?\d+\s+(hour|hours|day|days)\s+ago\b|June\s+\d{1,2},\s+20\d{2}", item[1], re.I)
            else 1
        )

    results: list[dict[str, str]] = []
    for article_url, link_text in candidates:
        if time.monotonic() > deadline:
            break
        try:
            article_html, article_final = fetch(article_url)
            article_parser = LinkParser()
            article_parser.feed(article_html)
            title = clean(article_parser.title) or clean(link_text)
            date = extract_date(f"{link_text} {article_html[:80_000]}", now)
            published = datetime.fromisoformat(date).replace(tzinfo=timezone.utc)
            if published < cutoff or published > now + timedelta(days=1):
                continue
            detail = summarize(article_html, title, name, date)
            title, title_translated = translate_if_english(title)
            detail, detail_translated = translate_if_english(detail)
            if title_translated or detail_translated:
                detail = f"{detail} Traducción automática al español aplicada desde la fuente original en inglés."
            results.append(
                {
                    "title": title[:180],
                    "source": re.sub(r" News$| AI$", "", name),
                    "date": date,
                    "topic": "Búsqueda refrescada",
                    "url": article_final,
                    "detail": detail,
                    "refreshed": True,
                }
            )
        except Exception:
            continue
        time.sleep(0.1)
        if len(results) >= max_articles:
            break
    return results


class Handler(BaseHTTPRequestHandler):
    def _send(self, status: int, payload: dict) -> None:
        body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
        self.send_response(status)
        self.send_header("Content-Type", "application/json; charset=utf-8")
        self.send_header("Access-Control-Allow-Origin", "*")
        self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
        self.send_header("Access-Control-Allow-Headers", "Content-Type")
        self.send_header("Content-Length", str(len(body)))
        self.end_headers()
        self.wfile.write(body)

    def do_OPTIONS(self) -> None:
        self._send(200, {"ok": True})

    def do_GET(self) -> None:
        if self.path == "/health":
            self._send(200, {"ok": True, "service": "news-scraper"})
        elif self.path == "/news":
            self._send(200, {"ok": True, **load_news_json()})
        else:
            self._send(404, {"ok": False, "error": "not found"})

    def do_POST(self) -> None:
        if self.path == "/scrape_one":
            try:
                length = int(self.headers.get("Content-Length", "0"))
                payload = json.loads(self.rfile.read(length).decode("utf-8"))
                url = payload.get("url", "")
                if not url:
                    self._send(400, {"ok": False, "error": "missing url"})
                    return
                self._send(200, {"ok": True, "item": scrape_single_article(url)})
            except Exception as exc:
                self._send(500, {"ok": False, "error": str(exc)})
            return
        if self.path == "/delete_news":
            try:
                length = int(self.headers.get("Content-Length", "0"))
                payload = json.loads(self.rfile.read(length).decode("utf-8"))
                news_id = payload.get("id")
                url = payload.get("url")
                saved = delete_news_item(int(news_id) if news_id is not None else None, url)
                self._send(200, {"ok": True, "items": saved, "count": len(saved)})
            except Exception as exc:
                self._send(500, {"ok": False, "error": str(exc)})
            return
        if self.path == "/save_manual_news":
            try:
                length = int(self.headers.get("Content-Length", "0"))
                item = json.loads(self.rfile.read(length).decode("utf-8"))
                if not item.get("title") or not item.get("url") or not item.get("detail"):
                    self._send(400, {"ok": False, "error": "title, url and detail are required"})
                    return
                saved = upsert_manual_news(item)
                self._send(200, {"ok": True, "items": saved, "count": len(saved), "json": str(NEWS_JSON)})
            except Exception as exc:
                self._send(500, {"ok": False, "error": str(exc)})
            return
        if self.path == "/update_news":
            try:
                length = int(self.headers.get("Content-Length", "0"))
                item = json.loads(self.rfile.read(length).decode("utf-8"))
                if not item.get("title") or not item.get("url") or not item.get("detail"):
                    self._send(400, {"ok": False, "error": "title, url and detail are required"})
                    return
                saved = update_news_item(item)
                self._send(200, {"ok": True, "items": saved, "count": len(saved), "json": str(NEWS_JSON)})
            except Exception as exc:
                self._send(500, {"ok": False, "error": str(exc)})
            return
        if self.path == "/lock_news":
            try:
                length = int(self.headers.get("Content-Length", "0"))
                payload = json.loads(self.rfile.read(length).decode("utf-8"))
                news_id = payload.get("id")
                url = payload.get("url")
                locked = bool(payload.get("locked"))
                saved = set_news_locked(int(news_id) if news_id is not None else None, url, locked)
                self._send(200, {"ok": True, "items": saved, "count": len(saved)})
            except Exception as exc:
                self._send(500, {"ok": False, "error": str(exc)})
            return
        if self.path != "/refresh":
            self._send(404, {"ok": False, "error": "not found"})
            return
        try:
            length = int(self.headers.get("Content-Length", "0"))
            payload = json.loads(self.rfile.read(length).decode("utf-8"))
            sources = payload.get("sources", [])
            days_back = max(1, min(int(payload.get("daysBack", 7)), 60))
            all_results: list[dict[str, str]] = []
            logs: list[str] = []
            for source in sources:
                name = source.get("name") or "Fuente"
                url = source.get("url") or ""
                try:
                    results = scrape_source(name, url, days_back)
                    all_results.extend(results)
                    logs.append(f"{name}: {len(results)} noticias encontradas por scraping.")
                except (HTTPError, URLError, TimeoutError) as exc:
                    logs.append(f"{name}: no se pudo leer ({exc}).")
                except Exception as exc:
                    logs.append(f"{name}: error de scraping ({exc}).")
            saved = save_news_json(merge_refresh_with_existing(all_results))
            self._send(200, {"ok": True, "items": saved, "logs": logs, "json": str(NEWS_JSON)})
        except Exception as exc:
            self._send(500, {"ok": False, "error": str(exc)})


def main() -> int:
    server = ThreadingHTTPServer(("127.0.0.1", PORT), Handler)
    print(f"Servidor de noticias activo en http://127.0.0.1:{PORT}")
    server.serve_forever()
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
