"""
Competitor Pricing Tracker — main orchestrator
Scrapes Notion, monday.com, and ClickUp pricing pages daily.
Uses Gemini Flash to extract structured data and summarize changes vs the
previous snapshot, then writes results to Google Sheets.

Usage:
    python -m scraper.main
"""

import json
import os
import sys
import yaml
from datetime import datetime, timezone
from pathlib import Path

from dotenv import load_dotenv

load_dotenv()

ROOT = Path(__file__).parent.parent
CONFIG_PATH = ROOT / "config.yaml"
SNAPSHOT_PATH = ROOT / "data" / "previous_prices.json"


# ---------------------------------------------------------------------------
# Snapshot helpers
# ---------------------------------------------------------------------------

def load_snapshot() -> dict:
    if SNAPSHOT_PATH.exists():
        return json.loads(SNAPSHOT_PATH.read_text())
    return {"snapshot_date": None, "competitors": {}}


def save_snapshot(data: dict):
    SNAPSHOT_PATH.parent.mkdir(parents=True, exist_ok=True)
    data["snapshot_date"] = datetime.now(timezone.utc).date().isoformat()
    SNAPSHOT_PATH.write_text(json.dumps(data, indent=2))


# ---------------------------------------------------------------------------
# Page scraping
# ---------------------------------------------------------------------------

def get_page_text(url: str) -> str:
    """
    Load a JS-rendered pricing page with Playwright and return its visible text.
    Strips nav/footer noise. Caps output at 300 lines to stay within token budget.
    """
    from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_page()
        page.set_extra_http_headers({
            "User-Agent": (
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
                "AppleWebKit/537.36 (KHTML, like Gecko) "
                "Chrome/120.0.0.0 Safari/537.36"
            )
        })
        try:
            page.goto(url, wait_until="networkidle", timeout=35000)
        except PWTimeout:
            # Page partially loaded — still attempt extraction
            page.wait_for_timeout(3000)

        text = page.evaluate("""
            () => {
                // Remove non-content elements for cleaner extraction
                ['script', 'style', 'nav', 'footer', 'header', 'iframe'].forEach(tag => {
                    document.querySelectorAll(tag).forEach(el => el.remove());
                });
                return document.body.innerText;
            }
        """)
        browser.close()

    lines = [line.strip() for line in text.split("\n") if line.strip()]
    return "\n".join(lines[:300])


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def main():
    config = yaml.safe_load(CONFIG_PATH.read_text())
    context = (ROOT / "profile" / "context.md").read_text()
    snapshot = load_snapshot()

    spreadsheet_id = (
        os.environ.get("SPREADSHEET_ID")
        or config["storage"]["spreadsheet_id"]
    )

    from ai.client import analyze_competitor
    from storage.sheets_sync import SheetSync

    sync = SheetSync(spreadsheet_id)
    today = datetime.now(timezone.utc).date().isoformat()
    new_snapshot = {"snapshot_date": today, "competitors": {}}

    for comp in config["competitors"]:
        name = comp["name"]
        slug = comp["slug"]
        url = comp["url"]
        print(f"\n[{name}] Scraping {url} ...")

        try:
            page_text = get_page_text(url)
            print(f"[{name}] {len(page_text)} chars extracted")
        except Exception as exc:
            print(f"[{name}] SCRAPE FAILED: {exc}")
            # Preserve existing snapshot for this competitor
            if slug in snapshot.get("competitors", {}):
                new_snapshot["competitors"][slug] = snapshot["competitors"][slug]
            continue

        previous = snapshot.get("competitors", {}).get(slug, {})

        print(f"[{name}] Calling Gemini (extract + diff + summary) ...")
        result = analyze_competitor(
            name=name,
            page_text=page_text,
            previous=previous,
            context=context,
        )

        if not result:
            print(f"[{name}] Gemini returned no result — skipping")
            if slug in snapshot.get("competitors", {}):
                new_snapshot["competitors"][slug] = snapshot["competitors"][slug]
            continue

        plans = result.get("plans", [])
        changes = result.get("changes", [])
        summary = result.get("summary", "")
        has_changes = result.get("has_changes", False)

        print(
            f"[{name}] {len(plans)} plans | "
            f"{len(changes)} changes | "
            f"has_changes={has_changes}"
        )

        new_snapshot["competitors"][slug] = {
            "name": name,
            "url": url,
            "plans": plans,
        }

        if plans:
            sync.write_raw_pricing(name, plans, today)
        if changes:
            sync.append_changes_log(name, changes, today)
        sync.append_ai_summary(name, summary, has_changes, today)

    save_snapshot(new_snapshot)
    print(f"\nDone — snapshot saved to {SNAPSHOT_PATH}")


if __name__ == "__main__":
    main()
