#!/usr/bin/env python3
"""
Serve the Rightmove dashboard over a local HTTP server.

Usage:
    python serve.py          # serves on http://localhost:8080
    python serve.py 9000     # custom port
"""

import fcntl
import http.server
import io
import json
import os
import queue
import re
import subprocess
import sys
import tempfile
import threading
import webbrowser
from pathlib import Path

PORT = int(sys.argv[1]) if len(sys.argv) > 1 else 8080
ROOT = Path(__file__).parent
CACHE_FILE = ROOT / "rightmove_cache.json"
SCRAPER_FILE = ROOT / "rightmove_scraper.py"

# ── Authentication ────────────────────────────────────────────────────────────
# Set the RIGHTMOVE_TOKEN environment variable to require a token on all
# mutating requests (POST endpoints + /run_scraper).
# In your systemd service file add:
#   Environment="RIGHTMOVE_TOKEN=your-random-secret-here"
# Generate a good token with: python3 -c "import secrets; print(secrets.token_hex(32))"
# Leave unset (empty string) to disable auth — fine for local-only use.
API_TOKEN = os.environ.get("RIGHTMOVE_TOKEN", "").strip()

os.chdir(ROOT)

# ── Cache file access ─────────────────────────────────────────────────────────
# Both the scraper subprocess AND serve.py's POST handlers write to the same
# JSON file.  We use a threading lock for in-process serialisation, and an
# fcntl advisory lock for cross-process serialisation with the scraper.
#
# Known gap: the scraper does not itself acquire an fcntl lock before writing
# (it uses atomic tempfile+rename).  The window where a race could corrupt data
# is extremely narrow (only during os.replace()), but for a single-user Pi
# running one daily scrape this is acceptable.  A future improvement would be
# to add fcntl locking to the scraper's save_cache() as well.
_cache_rw_lock = threading.Lock()


def _validate_listing_url(url: str) -> bool:
    """Reject anything that isn't a well-formed Rightmove property URL.

    Prevents probing for arbitrary cache keys and limits the attack surface
    on the POST endpoints when the server is internet-facing.
    """
    if not isinstance(url, str):
        return False
    if len(url) > 200:
        return False
    if not url.startswith("https://www.rightmove.co.uk/properties/"):
        return False
    # Must end with a numeric property ID (optionally followed by a slash)
    if not re.search(r"/properties/\d+/?#?$", url):
        return False
    return True


def _load_cache_locked() -> dict:
    """Read cache.json with an advisory file lock (blocks concurrent writers)."""
    with open(CACHE_FILE, "r", encoding="utf-8") as f:
        fcntl.flock(f, fcntl.LOCK_SH)
        try:
            return json.load(f)
        finally:
            fcntl.flock(f, fcntl.LOCK_UN)


def _save_cache_locked(cache: dict) -> None:
    """Atomically write cache.json: temp file → fsync → rename."""
    tmp_fd, tmp_path = tempfile.mkstemp(
        dir=CACHE_FILE.parent, prefix=".rightmove_tmp_", suffix=".json"
    )
    try:
        with os.fdopen(tmp_fd, "w", encoding="utf-8") as f:
            # Acquire exclusive lock on the temp file while writing
            fcntl.flock(f, fcntl.LOCK_EX)
            json.dump(cache, f, indent=2, ensure_ascii=False)
            f.flush()
            os.fsync(f.fileno())
            fcntl.flock(f, fcntl.LOCK_UN)
        os.replace(tmp_path, CACHE_FILE)   # atomic on POSIX
    except Exception:
        try:
            os.unlink(tmp_path)
        except OSError:
            pass
        raise
_scraper_lock        = threading.Lock()
_scraper_running     = False          # True while scraper subprocess is alive
_scraper_log_lines   = []            # full history of log lines (for late joiners)
_scraper_subscribers = []            # list of queue.Queue, one per SSE client
_scraper_state_lock  = threading.Lock()

def _broadcast(line: str) -> None:
    """Append line to history and push to every live SSE subscriber."""
    with _scraper_state_lock:
        _scraper_log_lines.append(line)
        dead = []
        for q in _scraper_subscribers:
            try:
                q.put_nowait(line)
            except queue.Full:
                dead.append(q)
        for q in dead:
            _scraper_subscribers.remove(q)


class Handler(http.server.SimpleHTTPRequestHandler):
    extensions_map = {
        **http.server.SimpleHTTPRequestHandler.extensions_map,
        ".json": "application/json",
    }

    def _check_auth(self) -> bool:
        """Return True if the request is authorised, False (and send 401) if not.

        Auth is only enforced when RIGHTMOVE_TOKEN is set in the environment.
        Clients must send the token in the X-Api-Token request header.
        """
        if not API_TOKEN:
            return True  # auth disabled — local-only mode
        provided = self.headers.get("X-Api-Token", "").strip()
        if not provided:
            self._respond(401, {"error": "X-Api-Token header required"})
            return False
        # Constant-time comparison to prevent timing attacks
        import hmac
        if not hmac.compare_digest(provided, API_TOKEN):
            self._respond(403, {"error": "invalid token"})
            return False
        return True

    def do_GET(self):
        if self.path == "/scraper_status":
            self._handle_scraper_status()
        elif self.path.startswith("/scraper_log"):
            self._handle_scraper_log_sse()
        else:
            super().do_GET()

    def do_POST(self):
        if not self._check_auth():
            return
        if self.path == "/update_sqft":
            self._handle_update_sqft()
        elif self.path == "/update_location":
            self._handle_update_location()
        elif self.path == "/update_notes":
            self._handle_update_notes()
        elif self.path == "/run_scraper":
            self._handle_run_scraper()
        else:
            self._respond(404, {"error": "not found"})

    def _handle_scraper_status(self):
        with _scraper_state_lock:
            running = _scraper_running
        self._respond(200, {"running": running})

    def _handle_scraper_log_sse(self):
        """Server-Sent Events stream — delivers live scraper log lines."""
        # Register a per-client queue
        q = queue.Queue(maxsize=500)
        with _scraper_state_lock:
            # Replay existing lines so the client catches up
            for line in _scraper_log_lines:
                q.put_nowait(line)
            _scraper_subscribers.append(q)

        self.send_response(200)
        self.send_header("Content-Type", "text/event-stream")
        self.send_header("Cache-Control", "no-cache")
        self.send_header("Access-Control-Allow-Origin", "*")
        self.send_header("X-Accel-Buffering", "no")
        self.end_headers()

        try:
            while True:
                try:
                    line = q.get(timeout=20)
                    # SSE format: "data: <payload>\n\n"
                    payload = "data: " + line.replace("\n", " ") + "\n\n"
                    self.wfile.write(payload.encode("utf-8"))
                    self.wfile.flush()
                    if line == "__DONE__":
                        break
                except queue.Empty:
                    # Send a keep-alive comment
                    self.wfile.write(b": keep-alive\n\n")
                    self.wfile.flush()
        except (BrokenPipeError, ConnectionResetError):
            pass
        finally:
            with _scraper_state_lock:
                try:
                    _scraper_subscribers.remove(q)
                except ValueError:
                    pass

    def _handle_update_sqft(self):
        length = int(self.headers.get("Content-Length", 0))
        try:
            body = json.loads(self.rfile.read(length))
        except json.JSONDecodeError:
            self._respond(400, {"error": "invalid JSON"})
            return

        url  = body.get("url")
        sqft = body.get("sqft")

        if not url or not _validate_listing_url(url):
            self._respond(400, {"error": "invalid or missing url"})
            return
        if sqft is None:
            self._respond(400, {"error": "url and sqft required"})
            return
        try:
            sqft = float(sqft)
            if sqft <= 0:
                raise ValueError
        except (ValueError, TypeError):
            self._respond(400, {"error": "sqft must be a positive number"})
            return

        try:
            with _cache_rw_lock:
                cache = _load_cache_locked()
        except (FileNotFoundError, json.JSONDecodeError) as exc:
            self._respond(500, {"error": str(exc)})
            return

        listings = cache.get("listings", {})
        if url not in listings:
            self._respond(404, {"error": "listing not found"})
            return

        entry = listings[url]
        entry["manual_sqft"] = sqft

        if entry.get("history"):
            for record in entry["history"]:
                record["sqft"]=sqft
                if record.get("price"):
                    record["price_per_sqft"] = round(record["price"] / sqft, 2)

        with _cache_rw_lock:
            _save_cache_locked(cache)

        self._respond(200, {"ok": True})

    def _handle_update_location(self):
        length = int(self.headers.get("Content-Length", 0))
        try:
            body = json.loads(self.rfile.read(length))
        except json.JSONDecodeError:
            self._respond(400, {"error": "invalid JSON"})
            return

        url = body.get("url")
        lat = body.get("lat")
        lng = body.get("lng")

        if not url or not _validate_listing_url(url):
            self._respond(400, {"error": "invalid or missing url"})
            return
        if lat is None or lng is None:
            self._respond(400, {"error": "lat and lng required"})
            return
        try:
            lat = float(lat)
            lng = float(lng)
            if not (-90 <= lat <= 90) or not (-180 <= lng <= 180):
                raise ValueError
        except (ValueError, TypeError):
            self._respond(400, {"error": "lat must be -90..90, lng must be -180..180"})
            return

        try:
            with _cache_rw_lock:
                cache = _load_cache_locked()
        except (FileNotFoundError, json.JSONDecodeError) as exc:
            self._respond(500, {"error": str(exc)})
            return

        listings = cache.get("listings", {})
        if url not in listings:
            self._respond(404, {"error": "listing not found"})
            return

        listings[url]["manual_lat"] = lat
        listings[url]["manual_lng"] = lng

        with _cache_rw_lock:
            _save_cache_locked(cache)

        self._respond(200, {"ok": True})

    def _handle_update_notes(self):
        length = int(self.headers.get("Content-Length", 0))
        try:
            body = json.loads(self.rfile.read(length))
        except json.JSONDecodeError:
            self._respond(400, {"error": "invalid JSON"})
            return

        url   = body.get("url")
        notes = body.get("notes", "")

        if not url or not _validate_listing_url(url):
            self._respond(400, {"error": "invalid or missing url"})
            return
        if not isinstance(notes, str):
            self._respond(400, {"error": "notes must be a string"})
            return
        notes = notes.strip()   # normalise whitespace
        if len(notes) > 2000:
            self._respond(400, {"error": "notes must be 2000 characters or fewer"})
            return

        try:
            with _cache_rw_lock:
                cache = _load_cache_locked()
        except (FileNotFoundError, json.JSONDecodeError) as exc:
            self._respond(500, {"error": str(exc)})
            return

        listings = cache.get("listings", {})
        if url not in listings:
            self._respond(404, {"error": "listing not found"})
            return

        if notes:
            listings[url]["notes"] = notes
        else:
            listings[url].pop("notes", None)   # remove key if note cleared

        with _cache_rw_lock:
            _save_cache_locked(cache)

        self._respond(200, {"ok": True})

    def _handle_run_scraper(self):
        global _scraper_running
        if not _scraper_lock.acquire(blocking=False):
            self._respond(409, {"error": "Scraper is already running"})
            return

        # Clear previous log history for a fresh run
        with _scraper_state_lock:
            _scraper_log_lines.clear()
            _scraper_running = True

        self._respond(200, {"ok": True, "message": "Scraper started — connect to /scraper_log for live output"})

        def _stream():
            global _scraper_running
            try:
                _broadcast("═" * 60)
                _broadcast(f"  Scraper started")
                _broadcast("═" * 60)
                proc = subprocess.Popen(
                    [sys.executable, "-u", str(SCRAPER_FILE)],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,   # merge stderr into stdout
                    text=True,
                    cwd=str(ROOT),
                    bufsize=1,
                )
                for line in proc.stdout:
                    line = line.rstrip("\n")
                    if line:
                        _broadcast(line)
                proc.wait()
                _broadcast("─" * 60)
                if proc.returncode == 0:
                    _broadcast("  ✓ Scraper finished successfully")
                else:
                    _broadcast(f"  ✗ Scraper exited with code {proc.returncode}")
                _broadcast("─" * 60)
            except Exception as exc:
                _broadcast(f"  ✗ Server error launching scraper: {exc}")
            finally:
                _broadcast("__DONE__")
                with _scraper_state_lock:
                    _scraper_running = False
                _scraper_lock.release()

        threading.Thread(target=_stream, daemon=True).start()

    def _respond(self, status, data):
        body = json.dumps(data).encode()
        self.send_response(status)
        self.send_header("Content-Type", "application/json")
        self.send_header("Access-Control-Allow-Origin", "*")
        self.send_header("Content-Length", len(body))
        self.end_headers()
        self.wfile.write(body)

    def log_message(self, format, *args):
        if args and ("rightmove_cache.json" in str(args[0]) or "scraper_log" in str(args[0])):
            return
        super().log_message(format, *args)


# ThreadingHTTPServer handles concurrent requests so auto-refresh fetches
# don't hang while the scraper is running.
server = http.server.ThreadingHTTPServer(("", PORT), Handler)
url = f"http://localhost:{PORT}/rightmove_dashboard.html"
print(f"Serving at {url}")
print("Press Ctrl+C to stop.")
webbrowser.open(url)
try:
    server.serve_forever()
except KeyboardInterrupt:
    print("\nServer stopped.")