diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml new file mode 100644 index 0000000..024e000 --- /dev/null +++ b/.gitea/workflows/deploy.yml @@ -0,0 +1,62 @@ +name: Build and Deploy BlaaAi + +on: + push: + branches: + - main + workflow_dispatch: + +env: + SERVICE_NAME: blaaai + +jobs: + build-and-deploy: + runs-on: debian-host + + env: + PATH: /usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/sbin:/bin:/snap/bin + DOCKER_HOST: unix:///var/run/docker.sock + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Log in to Docker Registry + run: | + echo "${{ secrets.HARBOR_ROBOT_TOKEN }}" | docker login registry.i80.dk -u "robot\$gitserver" --password-stdin + + - name: Build Docker image + run: | + SHA=$(git rev-parse --short HEAD) + docker build \ + --build-arg BUILD_VERSION=${{ github.run_number }} \ + --build-arg GIT_COMMIT=$SHA \ + --build-arg BUILD_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ) \ + -t registry.i80.dk/gitea/${SERVICE_NAME}:latest \ + -t registry.i80.dk/gitea/${SERVICE_NAME}:$SHA . + + - name: Push Docker image + run: | + SHA=$(git rev-parse --short HEAD) + docker push registry.i80.dk/gitea/${SERVICE_NAME}:latest + docker push registry.i80.dk/gitea/${SERVICE_NAME}:$SHA + + - name: Deploy to Nomad + run: | + nomad job validate ${SERVICE_NAME}.nomad + nomad job run ${SERVICE_NAME}.nomad + env: + NOMAD_ADDR: "https://nomad.i80.dk:4646" + + - name: Verify deployment + run: | + echo "Deployment triggered — checking status..." + sleep 20 + nomad job status ${SERVICE_NAME} + env: + NOMAD_ADDR: "https://nomad.i80.dk:4646" + + - name: Health check + run: | + sleep 30 + curl -sf https://${SERVICE_NAME}.i80.dk/health && echo "Health check passed" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..da8e280 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +__pycache__/ +*.pyc +.env +.env.* +data/ +*.log +.DS_Store diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..3c3800a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,34 @@ +FROM python:3.12-slim + +ARG BUILD_VERSION=unknown +ARG GIT_COMMIT=unknown +ARG BUILD_TIME=unknown + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + BUILD_VERSION=${BUILD_VERSION} \ + GIT_COMMIT=${GIT_COMMIT} \ + BUILD_TIME=${BUILD_TIME} + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Install Playwright browsers +RUN playwright install chromium && playwright install-deps chromium + +COPY . . + +RUN mkdir -p data static templates + +EXPOSE 8000 + +HEALTHCHECK --interval=10s --timeout=5s --start-period=30s --retries=3 \ + CMD curl -f http://localhost:${PORT:-8000}/health || exit 1 + +CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port ${PORT:-8000}"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b9ce8ef --- /dev/null +++ b/Makefile @@ -0,0 +1,45 @@ +.PHONY: dev fetch score run list install ai help + +URL ?= https://www.dba.dk/mobility/search/car?mileage_to=175000\&price_from=15000\&price_to=110000\®istration_class=1\&year_from=2014 +UUID ?= +PREFS ?= + +## Start udviklingsserver +dev: + python3 -m uvicorn app:app --host 0.0.0.0 --port 8000 --reload + +## Hent annoncer — ny søgning: make fetch URL="https://dba.dk/..." +## gen-fetch: make fetch UUID= +fetch: +ifdef UUID + python3 fetch_dba.py $(UUID) +else + python3 fetch_dba.py "$(URL)" +endif + +## Score annoncer — make score UUID= [PREFS="ingen franske biler"] +score: +ifdef UUID + python3 score.py $(UUID) $(if $(PREFS),--prefs "$(PREFS)",) +else + python3 score.py $(if $(PREFS),--prefs "$(PREFS)",) +endif + +## Hent + score i ét hak +run: fetch score + +## List alle søgninger +list: + python3 fetch_dba.py --list + +## Installer Python-afhængigheder +install: + pip install fastapi uvicorn python-multipart anthropic requests jinja2 + +## Genoptag Copilot session +ai: + copilot --resume=2093191e-06df-4810-b13f-076be1f8995b + +## Vis denne hjælp +help: + @grep -E '^##' Makefile | sed 's/## //' diff --git a/app.py b/app.py new file mode 100644 index 0000000..197f111 --- /dev/null +++ b/app.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 +""" +BlaaAi — AI-powered DBA listing analyzer +FastAPI backend + +Usage: + uvicorn app:app --reload --port 8000 +""" + +import json, os, smtplib +from concurrent.futures import ThreadPoolExecutor +from datetime import datetime, timezone +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from pathlib import Path +from typing import Optional + +from fastapi import BackgroundTasks, FastAPI, HTTPException +from fastapi.responses import HTMLResponse +from fastapi.staticfiles import StaticFiles +from fastapi.templating import Jinja2Templates +from pydantic import BaseModel +from starlette.requests import Request + +from fetch_dba import ( + DATA_DIR, ITEM_CACHE, create_search, detect_domain, enrich_listings, + fetch_page, find_new, listings_file, load_meta, load_seen, + save_seen, seen_file, list_searches, +) +from score import CRITERIA, METRICS_FILE, score_listings + +app = FastAPI(title="BlaaAi") +app.mount("/static", StaticFiles(directory="static"), name="static") +templates = Jinja2Templates(directory="templates") +executor = ThreadPoolExecutor(max_workers=4) + + +# ── Meta helpers ────────────────────────────────────────────────────────────── + +def update_meta(search_id: str, **kwargs) -> dict: + p = DATA_DIR / search_id / "meta.json" + meta = json.loads(p.read_text()) + meta.update(kwargs) + p.write_text(json.dumps(meta, ensure_ascii=False, indent=2)) + return meta + + +# ── Background pipeline ─────────────────────────────────────────────────────── + +def run_fetch_and_score(search_id: str, prefs: str = "") -> None: + """Synchronous fetch + score pipeline — runs in thread pool.""" + try: + meta = load_meta(search_id) + search_url = meta["url"] + + # ── Fetch ────────────────────────────────────────────────────────────── + update_meta(search_id, status="fetching") + sf = seen_file(search_id) + lf = listings_file(search_id) + seen = load_seen(sf) + + listings, _ = fetch_page(search_url, 1) + new_items = find_new(listings, seen) + + if new_items: + new_items = enrich_listings(new_items) + existing = json.loads(lf.read_text()) if lf.exists() else [] + existing.extend(new_items) + lf.write_text(json.dumps(existing, ensure_ascii=False, indent=2)) + save_seen(sf, seen | {l["id"] for l in listings}) + + # ── Score ────────────────────────────────────────────────────────────── + update_meta(search_id, status="scoring") + items = json.loads(lf.read_text()) if lf.exists() else [] + domain = detect_domain(search_url) + criteria = CRITERIA[domain] + + score_listings(items, criteria, prefs, force=False, source_file=lf) + + update_meta( + search_id, + status="ready", + listing_count=len(items), + last_scored_at=datetime.now().isoformat(timespec="seconds"), + prefs=prefs, + ) + + except Exception as e: + update_meta(search_id, status="error", error=str(e)) + raise + + +# ── Pydantic models ─────────────────────────────────────────────────────────── + +class NewSearchRequest(BaseModel): + url: str + prefs: Optional[str] = "" + + +class EmailRequest(BaseModel): + email: str + + +# ── Routes ──────────────────────────────────────────────────────────────────── + +@app.get("/health") +async def health(): + return { + "status": "healthy", + "timestamp": datetime.now(timezone.utc).isoformat(), + "version": os.getenv("BUILD_VERSION", "unknown"), + "commit": os.getenv("GIT_COMMIT", "unknown")[:7], + } + + +@app.get("/", response_class=HTMLResponse) +async def index(request: Request): + return templates.TemplateResponse(request, "index.html") + + +@app.get("/search/{search_id}", response_class=HTMLResponse) +async def search_view(request: Request, search_id: str): + return templates.TemplateResponse(request, "index.html", {"search_id": search_id}) + + +@app.post("/api/searches") +async def create(body: NewSearchRequest, background_tasks: BackgroundTasks): + search_id = create_search(body.url) + update_meta(search_id, status="queued", prefs=body.prefs or "") + background_tasks.add_task(run_fetch_and_score, search_id, body.prefs or "") + return {"id": search_id, "status": "queued"} + + +@app.get("/api/searches") +async def get_all(): + return list_searches() + + +@app.get("/api/searches/{search_id}") +async def get_search(search_id: str): + try: + meta = load_meta(search_id) + except FileNotFoundError: + raise HTTPException(404, "Søgning ikke fundet") + + lf = listings_file(search_id) + if lf.exists(): + items = json.loads(lf.read_text()) + ranked = sorted( + [i for i in items if i.get("ai_score") is not None], + key=lambda x: x["ai_score"], + reverse=True, + ) + meta["listings"] = ranked + meta["listing_count"] = len(items) + meta["scored_count"] = len(ranked) + if "status" not in meta and ranked: + meta["status"] = "ready" + + if "status" not in meta: + meta["status"] = "ready" if meta.get("listing_count", 0) > 0 else "unknown" + + return meta + + +@app.post("/api/searches/{search_id}/rescore") +async def rescore(search_id: str, body: NewSearchRequest, background_tasks: BackgroundTasks): + try: + load_meta(search_id) + except FileNotFoundError: + raise HTTPException(404, "Søgning ikke fundet") + + update_meta(search_id, status="queued", prefs=body.prefs or "") + background_tasks.add_task(run_fetch_and_score, search_id, body.prefs or "") + return {"id": search_id, "status": "queued"} + + +@app.get("/metrics") +async def get_metrics(): + global_metrics = {} + if METRICS_FILE.exists(): + global_metrics = json.loads(METRICS_FILE.read_text()) + + # Per-search breakdown + searches = [] + if DATA_DIR.exists(): + for d in sorted(DATA_DIR.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True): + mf = d / "metrics.json" + if mf.exists(): + searches.append(json.loads(mf.read_text())) + + # Item cache stats + cached_items = len(list(ITEM_CACHE.glob("*.json"))) if ITEM_CACHE.exists() else 0 + + return {**global_metrics, "item_cache_size": cached_items, "searches": searches} + + +@app.post("/api/searches/{search_id}/email") +async def send_email(search_id: str, body: EmailRequest): + try: + meta = load_meta(search_id) + except FileNotFoundError: + raise HTTPException(404, "Søgning ikke fundet") + + if meta.get("status") != "ready": + raise HTTPException(400, "Analysen er ikke færdig endnu") + + lf = listings_file(search_id) + if not lf.exists(): + raise HTTPException(400, "Ingen resultater at sende") + + items = json.loads(lf.read_text()) + ranked = sorted( + [i for i in items if i.get("ai_score") is not None], + key=lambda x: x["ai_score"], + reverse=True, + )[:10] + + html = _build_email_html(ranked, meta) + _send_email(body.email, "🔍 Dine DBA-resultater fra BlaaAi", html) + return {"status": "sent", "to": body.email} + + +# ── Email helpers ───────────────────────────────────────────────────────────── + +def _build_email_html(ranked: list[dict], meta: dict) -> str: + rows = "" + for i, item in enumerate(ranked, 1): + score = item.get("ai_score", 0) + bar = "█" * int(score) + "░" * (10 - int(score)) + reason = item.get("ai_reason", "") + warn = f'

⚠️ {item["ai_warnings"]}

' if item.get("ai_warnings") else "" + rows += f""" + + + #{i} [{score}] {item['name']}
+ {item.get('price_dkk','?')} DKK + {bar}
+

{reason}

+ {warn} + Se annonce → + + """ + + return f""" + +

🔍 Dine DBA-resultater

+

Søgning: {meta['url'][:60]}…

+ {rows}
+

Leveret af BlaaAi

+ """ + + +def _send_email(to: str, subject: str, html: str) -> None: + host = os.environ.get("SMTP_HOST", "") + user = os.environ.get("SMTP_USER", "") + pwd = os.environ.get("SMTP_PASS", "") + port = int(os.environ.get("SMTP_PORT", "587")) + + if not host: + raise HTTPException(503, "Email ikke konfigureret (SMTP_HOST mangler)") + + msg = MIMEMultipart("alternative") + msg["Subject"] = subject + msg["From"] = user + msg["To"] = to + msg.attach(MIMEText(html, "html")) + + with smtplib.SMTP(host, port) as s: + s.starttls() + s.login(user, pwd) + s.sendmail(user, to, msg.as_string()) + + +if __name__ == "__main__": + import uvicorn + port = int(os.getenv("PORT", "8000")) + uvicorn.run("app:app", host="0.0.0.0", port=port, reload=True) diff --git a/background.png b/background.png new file mode 100644 index 0000000..fd0353d Binary files /dev/null and b/background.png differ diff --git a/blaaai.nomad b/blaaai.nomad new file mode 100644 index 0000000..229c9b6 --- /dev/null +++ b/blaaai.nomad @@ -0,0 +1,123 @@ +job "blaaai" { + region = "global" + datacenters = ["dc1"] + type = "service" + + meta { + uuid = uuidv4() + } + + update { + stagger = "30s" + max_parallel = 1 + auto_revert = true + progress_deadline = "25m" + } + + group "blaaai-group" { + count = 1 + + constraint { + attribute = "${node.unique.name}" + value = "autobox.i80.dk" + } + + update { + canary = 1 + auto_promote = true + min_healthy_time = "15s" + healthy_deadline = "20m" + progress_deadline = "25m" + auto_revert = true + } + + network { + port "http" {} + } + + reschedule { + attempts = 5 + interval = "10m" + delay = "30s" + delay_function = "exponential" + max_delay = "120s" + unlimited = false + } + + volume "blaaai-data" { + type = "host" + source = "blaaai-data" + read_only = false + } + + service { + provider = "consul" + name = "blaaai" + port = "http" + + tags = [ + "traefik.enable=true", + "traefik.http.routers.blaaai.rule=Host(`blaaai.i80.dk`)", + "traefik.http.routers.blaaai.tls=true", + ] + + canary_tags = [ + "traefik.enable=false", + ] + + check { + name = "http_health_check" + type = "http" + port = "http" + path = "/health" + interval = "10s" + timeout = "5s" + } + } + + task "blaaai-task" { + driver = "docker" + + config { + image = "registry.i80.dk/gitea/blaaai:latest" + ports = ["http"] + force_pull = true + auth { + username = "robot$gitserver" + password = "${HARBOR_ROBOT_TOKEN}" + } + } + + volume_mount { + volume = "blaaai-data" + destination = "/app/data" + read_only = false + } + + restart { + attempts = 10 + interval = "10m" + delay = "15s" + mode = "fail" + } + + env { + PORT = "${NOMAD_PORT_http}" + } + + template { + data = < str: + """Return 'mobility' or 'recommerce' based on URL path.""" + return "mobility" if "/mobility/" in url else "recommerce" + + +def url_slug(url: str) -> str: + """Create a short filename-safe slug from a search URL.""" + parsed = urlparse(url) + q = parse_qs(parsed.query).get("q", [""])[0] + path_tail = parsed.path.rstrip("/").split("/")[-1] + label = re.sub(r"[^\w]", "_", q or path_tail).strip("_").lower()[:30] + short = hashlib.md5(url.encode()).hexdigest()[:6] + return f"{label}_{short}" if label else short + + +def page_url(search_url: str, page: int) -> str: + sep = "&" if "?" in search_url else "?" + return search_url + (f"{sep}page={page}" if page > 1 else "") + + +# ── Search page parsing ─────────────────────────────────────────────────────── + +def fetch_page(search_url: str, page: int = 1) -> tuple[list[dict], int]: + """Fetch one search result page. Returns (listings, total_count).""" + resp = requests.get(page_url(search_url, page), headers=HEADERS, timeout=15) + resp.raise_for_status() + return parse_search_page(resp.text) + + +def parse_search_page(html: str) -> tuple[list[dict], int]: + listings: list[dict] = [] + total = 0 + + m = re.search(r"([\d\.]+)\s+annonce", html) + if m: + total = int(m.group(1).replace(".", "")) + + for block in re.findall( + r']*type="application/ld\+json"[^>]*>(.*?)', + html, re.DOTALL + ): + try: + data = json.loads(block) + if data.get("@type") != "CollectionPage": + continue + for item in data.get("mainEntity", {}).get("itemListElement", []): + p = item.get("item", {}) + item_url = p.get("url", "") + # ID is always the last numeric path segment + item_id = re.search(r"/(\d+)/?$", item_url) + listings.append({ + "id": item_id.group(1) if item_id else item_url.split("/")[-1], + "name": p.get("name") or f"{p.get('brand',{}).get('name','')} {p.get('model','')}".strip(), + "brand": p.get("brand", {}).get("name"), + "model": p.get("model"), + "description": p.get("description"), + "price_dkk": p.get("offers", {}).get("price"), + "url": item_url, + "image": p.get("image"), + "condition": p.get("itemCondition", "").replace("https://schema.org/", ""), + }) + except (json.JSONDecodeError, KeyError): + pass + + return listings, total + + +def fetch_all_pages(search_url: str) -> list[dict]: + first_page, total = fetch_page(search_url, 1) + if total == 0: + # Try counting items directly if total not found in HTML + total = len(first_page) + items_per_page = len(first_page) or 49 + pages = math.ceil(total / items_per_page) if total else 1 + print(f"Total: {total} listings across {pages} pages", file=sys.stderr) + + all_listings = first_page + for p in range(2, pages + 1): + print(f" Fetching page {p}/{pages}…", file=sys.stderr) + listings, _ = fetch_page(search_url, p) + all_listings.extend(listings) + if not listings: + break + time.sleep(0.5) + + return all_listings + + +# ── Item detail fetching ────────────────────────────────────────────────────── + +def page_to_text(html: str) -> str: + """Strip HTML tags and return clean visible text, trimmed of navigation/footer noise.""" + # Remove script and style blocks entirely + text = re.sub(r"<(script|style)[^>]*>.*?", "", html, flags=re.DOTALL | re.IGNORECASE) + # Strip all remaining tags + text = re.sub(r"<[^>]+>", " ", text) + text = re.sub(r"\s+", " ", text).strip() + # Cut off at footer noise (everything after "For virksomheder" is boilerplate) + for cutoff in ["For virksomheder", "Annoncens metadata", "DBA Boost"]: + idx = text.find(cutoff) + if idx > 200: + text = text[:idx].strip() + break + return text + + +def fetch_item_details(item: dict) -> dict: + """Fetch raw visible text from an item page, using file cache.""" + item_id = item.get("id", "") + cache_key = ITEM_CACHE / f"{item_id}.json" + ITEM_CACHE.mkdir(parents=True, exist_ok=True) + + # Serve from cache if fresh enough + if cache_key.exists(): + try: + cached = json.loads(cache_key.read_text()) + cached_at = datetime.fromisoformat(cached["cached_at"]).replace(tzinfo=timezone.utc) + age_h = (datetime.now(timezone.utc) - cached_at).total_seconds() / 3600 + if age_h < CACHE_TTL_H: + return {"raw_text": cached["raw_text"], "from_cache": True} + except Exception: + pass # corrupt cache entry → re-fetch + + try: + resp = requests.get(item["url"], headers=HEADERS, timeout=10) + resp.raise_for_status() + raw_text = page_to_text(resp.text) + cache_key.write_text(json.dumps({ + "id": item_id, + "raw_text": raw_text, + "cached_at": datetime.now(timezone.utc).isoformat(timespec="seconds"), + }, ensure_ascii=False)) + return {"raw_text": raw_text} + except Exception: + return {"raw_text": ""} + + +def enrich_listings(listings: list[dict], workers: int = 8) -> list[dict]: + print(f"Fetching details for {len(listings)} items…", file=sys.stderr) + with ThreadPoolExecutor(max_workers=workers) as ex: + futures = {ex.submit(fetch_item_details, l): i for i, l in enumerate(listings)} + for future in as_completed(futures): + result = future.result() + listings[futures[future]]["details"] = result + cached = sum(1 for l in listings if l.get("details", {}).get("from_cache")) + fetched = len(listings) - cached + print(f" ✓ {fetched} hentet fra DBA, {cached} fra cache", file=sys.stderr) + return listings + + +# ── Data directory helpers ──────────────────────────────────────────────────── + +def search_dir(search_id: str) -> Path: + return DATA_DIR / search_id + + +def create_search(url: str) -> str: + """Create a new search directory and return its UUID.""" + search_id = str(_uuid.uuid4()) + d = search_dir(search_id) + d.mkdir(parents=True, exist_ok=True) + meta = { + "id": search_id, + "url": url, + "domain": detect_domain(url), + "created_at": datetime.now().isoformat(timespec="seconds"), + } + (d / "meta.json").write_text(json.dumps(meta, ensure_ascii=False, indent=2)) + return search_id + + +def load_meta(search_id: str) -> dict: + p = search_dir(search_id) / "meta.json" + if not p.exists(): + raise FileNotFoundError(f"Ingen søgning med UUID {search_id}") + return json.loads(p.read_text()) + + +def listings_file(search_id: str) -> Path: + return search_dir(search_id) / "listings.json" + + +def seen_file(search_id: str) -> Path: + return search_dir(search_id) / "seen.json" + + +def list_searches() -> list[dict]: + if not DATA_DIR.exists(): + return [] + results = [] + for d in sorted(DATA_DIR.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True): + meta_path = d / "meta.json" + if meta_path.exists(): + meta = json.loads(meta_path.read_text()) + lf = d / "listings.json" + meta["listing_count"] = len(json.loads(lf.read_text())) if lf.exists() else 0 + results.append(meta) + return results + + +def load_seen(state_file: Path) -> set[str]: + return set(json.loads(state_file.read_text())) if state_file.exists() else set() + +def save_seen(state_file: Path, ids: set[str]) -> None: + state_file.write_text(json.dumps(sorted(ids))) + +def find_new(listings: list[dict], seen: set[str]) -> list[dict]: + return [l for l in listings if l["id"] not in seen] + + +# ── Output ──────────────────────────────────────────────────────────────────── + +def print_listing(item: dict) -> None: + text = item.get("details", {}).get("raw_text", "") + preview = (text[:160] + "…") if len(text) > 160 else text + print( + f"[{item['id']}] {item['name']}\n" + f" Pris: {item['price_dkk']} DKK | {item.get('condition','')}\n" + f" {item['url']}\n" + f" {preview}\n" + ) + + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main() -> None: + args = [a for a in sys.argv[1:] if not a.startswith("-")] + flags = [a for a in sys.argv[1:] if a.startswith("-")] + + fetch_all = "--all" in flags + + first = args[0] if args else None + + # ── list existing searches ───────────────────────────────────────────────── + if "--list" in flags or first == "list": + searches = list_searches() + if not searches: + print("Ingen søgninger endnu. Kør: python fetch_dba.py ") + return + print(f"{'UUID':36} {'Oprettet':19} {'#':4} URL") + print("─" * 100) + for s in searches: + print(f"{s['id']:36} {s['created_at']:19} {s['listing_count']:4} {s['url'][:60]}") + return + + # ── resolve search_id or create new ─────────────────────────────────────── + if first and UUID_RE.match(first): + search_id = first + meta = load_meta(search_id) + search_url = meta["url"] + print(f"🔄 Bruger eksisterende søgning: {search_id}", file=sys.stderr) + else: + search_url = first if first and first.startswith("http") else DEFAULT_URL + search_id = create_search(search_url) + print(f"✨ Ny søgning oprettet: {search_id}", file=sys.stderr) + + domain = detect_domain(search_url) + sf = seen_file(search_id) + lf = listings_file(search_id) + + emoji = "🚗" if domain == "mobility" else "🛒" + print(f"{emoji} Domain: {domain} | {'All pages' if fetch_all else 'Page 1'}", file=sys.stderr) + print(f" URL: {search_url}", file=sys.stderr) + print(f" Dir: data/{search_id}/", file=sys.stderr) + + seen = load_seen(sf) + + listings = fetch_all_pages(search_url) if fetch_all else fetch_page(search_url, 1)[0] + + new_listings = find_new(listings, seen) + if not new_listings: + print("Ingen nye annoncer siden sidst.") + return + + new_listings = enrich_listings(new_listings) + + existing = json.loads(lf.read_text()) if lf.exists() else [] + existing.extend(new_listings) + lf.write_text(json.dumps(existing, ensure_ascii=False, indent=2)) + print(f"💾 Gemt {len(new_listings)} nye → data/{search_id}/listings.json ({len(existing)} total)\n", file=sys.stderr) + print(f"\n📋 UUID: {search_id}") + + print(f"{emoji} {len(new_listings)} ny(e) annonce(r):\n") + for item in new_listings: + print_listing(item) + + save_seen(sf, seen | {l["id"] for l in listings}) + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..be0f282 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +anthropic==0.104.1 +beautifulsoup4==4.14.3 +fastapi==0.136.3 +httpx==0.28.1 +jinja2>=3.1.0 +playwright>=1.40.0 +pydantic==2.13.3 +starlette==1.1.0 +uvicorn==0.48.0 +python-multipart>=0.0.6 diff --git a/score.py b/score.py new file mode 100644 index 0000000..f772336 --- /dev/null +++ b/score.py @@ -0,0 +1,561 @@ +#!/usr/bin/env python3 +""" +AI-powered scoring of DBA listings using Claude. + +Usage: + python3 score.py results_car_89a242.json + python3 score.py results_rtx_3090_623595.json + python3 score.py results_car_89a242.json --top 10 # show only top N + python3 score.py results_car_89a242.json --save # write ranked output to ranked_*.json + python3 score.py results_car_89a242.json --force # ignore cache, re-score everything + python3 score.py results_car_89a242.json --prefs "Ikke franske biler" + +Scores are cached in results_*.json — only new/unscored listings call Claude. +Change --prefs to invalidate cache and re-score with new preferences. + +Requires: + ANTHROPIC_API_KEY env var + pip install anthropic +""" + +import hashlib, json, os, re, sys, uuid as _uuid +from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import datetime +from pathlib import Path +import anthropic + +MODEL = "claude-haiku-4-5-20251001" # fast + cheap; swap to sonnet for better ranking +API_KEY = "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" +MAX_TOKENS = 2048 +BASE_DIR = Path(__file__).parent +DATA_DIR = BASE_DIR / "data" +SCORE_CACHE = BASE_DIR / "data" / "score_cache" # persistent cross-search score cache +UUID_RE = re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$") +METRICS_FILE = DATA_DIR / "metrics.json" + +# Pricing: Claude Haiku 4.5 — https://www.anthropic.com/pricing +_PRICE_INPUT_PER_TOKEN = 0.80 / 1_000_000 # $0.80 per MTok +_PRICE_OUTPUT_PER_TOKEN = 4.00 / 1_000_000 # $4.00 per MTok + + +def calc_cost(input_tokens: int, output_tokens: int) -> float: + return round(input_tokens * _PRICE_INPUT_PER_TOKEN + output_tokens * _PRICE_OUTPUT_PER_TOKEN, 6) + + +def update_metrics(search_id: str, input_tokens: int, output_tokens: int, listings_scored: int) -> None: + """Save per-search metrics and update global metrics.json.""" + cost = calc_cost(input_tokens, output_tokens) + now = datetime.now().isoformat(timespec="seconds") + + # Per-search metrics + search_dir = DATA_DIR / search_id + if search_dir.exists(): + search_metrics = { + "search_id": search_id, + "scored_at": now, + "model": MODEL, + "listings_scored": listings_scored, + "input_tokens": input_tokens, + "output_tokens": output_tokens, + "cost_usd": cost, + } + (search_dir / "metrics.json").write_text(json.dumps(search_metrics, indent=2)) + + # Global metrics + global_metrics = {} + if METRICS_FILE.exists(): + try: + global_metrics = json.loads(METRICS_FILE.read_text()) + except Exception: + pass + + global_metrics["total_searches"] = global_metrics.get("total_searches", 0) + 1 + global_metrics["total_listings_scored"] = global_metrics.get("total_listings_scored", 0) + listings_scored + global_metrics["total_input_tokens"] = global_metrics.get("total_input_tokens", 0) + input_tokens + global_metrics["total_output_tokens"] = global_metrics.get("total_output_tokens", 0) + output_tokens + global_metrics["total_cost_usd"] = round(global_metrics.get("total_cost_usd", 0.0) + cost, 6) + global_metrics["last_updated"] = now + + METRICS_FILE.write_text(json.dumps(global_metrics, indent=2)) + + +def prefs_hash(prefs: str) -> str: + """Short stable hash of the user's preference string (empty → 'none').""" + return hashlib.md5(prefs.strip().encode()).hexdigest()[:8] if prefs.strip() else "none" + + +def _score_cache_key(item_id: str, prefs: str, category: str) -> Path: + """Return path to the persistent score cache file for this item+context.""" + ph = prefs_hash(prefs) + ch = hashlib.md5(category.encode()).hexdigest()[:6] + return SCORE_CACHE / f"{item_id}_{ph}_{ch}.json" + + +def load_score_cache(item_id: str, prefs: str, category: str) -> dict | None: + """Return cached score dict or None if not cached.""" + p = _score_cache_key(item_id, prefs, category) + if p.exists(): + try: + return json.loads(p.read_text()) + except Exception: + pass + return None + + +def save_score_cache(item_id: str, prefs: str, category: str, score_data: dict) -> None: + """Persist a score result so future searches with same item/prefs/category hit cache.""" + SCORE_CACHE.mkdir(parents=True, exist_ok=True) + p = _score_cache_key(item_id, prefs, category) + p.write_text(json.dumps(score_data, ensure_ascii=False)) + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def trim_text(raw: str, max_chars: int = 800) -> str: + """Cut DBA boilerplate header/footer, keep the meat.""" + # Skip past the standard navigation header + for marker in ["Varebeskrivelse", "Beskrivelse", "Specifikationer"]: + idx = raw.find(marker) + if idx != -1: + raw = raw[idx:] + break + # Trim to max length + if len(raw) > max_chars: + raw = raw[:max_chars] + "…" + return raw.strip() + + +def extract_structured_fields(raw: str) -> dict: + """Pull key structured fields out of DBA raw_text before trimming.""" + fields = {} + patterns = { + "year": r"(?:Modelår|Årstal|Årgang)[^\d]*(\d{4})", + "km": r"Kilometertal\s+([\d\.,]+ km)", + "condition": r"Stand\s*:\s*([^\n|]{3,60})", + "gear": r"Geartype\s+(\S+)", + "fuel": r"Drivmiddel\s+(\S+)", + "owners": r"Antal ejere\s+(\d+)", + } + for key, pattern in patterns.items(): + m = re.search(pattern, raw, re.IGNORECASE) + if m: + fields[key] = m.group(1).strip() + return fields + + +def listing_summary(item: dict, idx: int) -> str: + """Compact text representation of a listing for the AI prompt.""" + raw = item.get("details", {}).get("raw_text", item.get("description", "")) + fields = extract_structured_fields(raw) + text = trim_text(raw) + + meta_parts = [] + if fields.get("year"): + meta_parts.append(f"Årgang: {fields['year']}") + if fields.get("km"): + meta_parts.append(f"Km: {fields['km']}") + if fields.get("fuel"): + meta_parts.append(f"Brændstof: {fields['fuel']}") + if fields.get("gear"): + meta_parts.append(f"Gear: {fields['gear']}") + if fields.get("owners"): + meta_parts.append(f"Ejere: {fields['owners']}") + if fields.get("condition"): + meta_parts.append(f"Stand: {fields['condition']}") + + meta_line = " | ".join(meta_parts) + + return ( + f"--- Annonce #{idx + 1} (ID: {item['id']}) ---\n" + f"Navn: {item['name']}\n" + f"Pris: {item['price_dkk']} DKK\n" + + (f"{meta_line}\n" if meta_line else "") + + f"{text}\n" + ) + + +def detect_category(items: list[dict]) -> str: + """Detect category from item URLs and breadcrumb in raw_text.""" + if not items: + return "brugte varer" + url = items[0].get("url", "") + if "/mobility/" in url: + return "brugte biler" + + # Extract breadcrumb from raw_text to detect subcategory + raw = items[0].get("details", {}).get("raw_text", "") + m = re.search(r"Du er her\s+(.+?)(?:\n|Billedgalleri)", raw) + breadcrumb = m.group(1).lower() if m else "" + + for keywords, context_key in _CATEGORY_MAP: + if any(kw in breadcrumb for kw in keywords): + return context_key + + return "brugte varer" + + +KNOWLEDGE_CONTEXT = { + "brugte biler": ( + "- Kendte reliabilitetsproblemer (fx Peugeot 1.2 PureTech timing-kæde, VW DSG-gearkasse, BMW N47 dieselmotor)\n" + "- Km-stand og alder sat i forhold til markedsværdi for den specifikke model og variant\n" + "- Kendte stærke og svage modeller (fx Toyota/Mazda høj reliabilitet, Renault/Citroën/Fiat lavere)\n" + "- Typiske brugtpriser for modellen baseret på år og km" + ), + "elektronik": ( + "- Produktgenerationens relative ydelse og markedsværdi (fx RTX 4070 > RTX 3080, iPhone 15 > 13)\n" + "- Kendte problemer: mining-slid på GPU'er, batterinedgang på telefoner/laptops, kondensatorfejl\n" + "- Hvad er en rimelig brugtpris for dette produkt i denne stand?\n" + "- Stand er afgørende — 'Som ny' vs 'Brugt - med synlige brugsspor' bør veje tungt" + ), + "sport": ( + "- Kendte mærker og deres relative kvalitet (fx Titleist/Callaway/TaylorMade til golf, Shimano-grupper til cykler)\n" + "- Produktets alder og teknologisk forældelse (fx ældre golfkøller med stålskaft vs moderne grafit)\n" + "- Stand er meget afgørende for sportsudstyr — slid påvirker ydeevne direkte\n" + "- Hvad er en rimelig brugtpris for dette udstyr i denne stand og fra dette mærke?" + ), + "møbler": ( + "- Kendte mærker og materialer (fx massivt træ > spånplade, dansk design har høj gensalgsværdi)\n" + "- Stand og alder — patina kan være positivt for vintage, negativt for moderne møbler\n" + "- Originale vs efterligninger (fx IKEA POÄNG vs original Fritz Hansen)\n" + "- Hvad er en rimelig brugtpris baseret på stand, alder og mærke?" + ), + "brugte varer": ( + "- Produktets markedsværdi brugt i denne stand\n" + "- Kendte problemer eller svagheder ved denne model/variant\n" + "- Stand er afgørende — 'Som ny' vs 'Brugt - med synlige brugsspor' bør veje tungt\n" + "- Er varen komplet? Mangler tilbehør eller dokumentation?" + ), +} + +# Breadcrumb keywords → knowledge context key +_CATEGORY_MAP = [ + (["elektronik", "computer", "grafikkort", "telefon", "mobil", "tv", "hifi", "kamera"], "elektronik"), + (["golf", "sport", "cykel", "fitness", "jagt", "fiskeri", "friluftsliv"], "sport"), + (["møbel", "stol", "bord", "sofa", "seng", "reol", "lampe", "bolig", "indretning"], "møbler"), +] + + +def build_prompt(items: list[dict], category: str, criteria: str, prefs: str = "") -> str: + summaries = "\n".join(listing_summary(i, n) for n, i in enumerate(items)) + + prefs_block = "" + if prefs.strip(): + prefs_block = f""" +KØBERENS EGNE PRÆFERENCER (vigtig — vej disse tungt i din scoring): +{prefs.strip()} +Annoncer der strider mod disse præferencer skal have markant lavere score. +""" + + knowledge = KNOWLEDGE_CONTEXT.get(category, KNOWLEDGE_CONTEXT["brugte varer"]) + + return f"""Du er en ekspert køberrådgiver for {category} på DBA. + +Brug BÅDE annonceteksten OG din egen viden om produkterne: +{knowledge} +{prefs_block} +Scorer HVER annonce UAFHÆNGIGT på en absolut skala 1-10 baseret på disse kriterier: +{criteria} + +ABSOLUT SCORESKALA (brug din viden om markedet — scoren må IKKE afhænge af de andre annoncer i denne batch): +- 9-10: Fremragende køb — markant under markedspris, pålidelig model, god stand/historik +- 7-8: Godt køb — fair pris, solid model, få eller ingen bekymringer +- 5-6: Middel — markedspris, eller visse risici/ukendte faktorer +- 3-4: Under middel — overpriset eller kendte modelproblem +- 1-2: Undgå — alvorlige røde flag, stor risiko eller klart overpriset + +WARNINGS — list KUN konkrete, faktuelle røde flag der er direkte støttet af annonceteksten eller veldokumenterede modelproblemer: +- Nævn KUN ting der er bekræftet i annonceteksten (fx "sælger nævner støj", "ingen billeder", "kun afhentning") +- Eller veldokumenterede modelspecifikke problemer (fx "Turbo-variant har historisk køleproblemer") +- Skriv IKKE generiske advarsler om mining, stand etc. medmindre det eksplicit nævnes i annoncen +- Hvis ingen konkrete røde flag: tom streng "" + +Returner KUN et JSON-array — ingen forklaringer udenfor JSON: +[ + {{ + "id": "annonce-ID", + "score": 8.5, + "reason": "Begrundelse på dansk (maks 2 sætninger). Nævn gerne konkret viden om modellen.", + "warnings": "Kun konkrete røde flag fra annonceteksten eller kendte modelproblemer. Tom streng hvis ingen." + }}, + ... +] + +Alle {len(items)} annoncer skal med. Score er 1-10 (10 = suverænt køb). + +ANNONCER: +{summaries}""" + + +# ── Scoring ─────────────────────────────────────────────────────────────────── + +def score_listings( + items: list[dict], + criteria: str, + prefs: str = "", + batch_size: int = 10, + force: bool = False, + source_file: Path | None = None, +) -> list[dict]: + """Score listings with AI — skips items that are already cached. Runs batches in parallel.""" + client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY", API_KEY)) + category = detect_category(items) + phash = prefs_hash(prefs) + + # ── Split: persistent score cache → in-file cache → needs AI scoring ──── + to_score, cached = [], [] + now = datetime.now().isoformat(timespec="seconds") + for item in items: + if not force: + # 1. Check persistent cross-search score cache + sc = load_score_cache(str(item["id"]), prefs, category) + if sc: + item["ai_score"] = sc["score"] + item["ai_rank"] = sc.get("rank") + item["ai_reason"] = sc.get("reason", "") + item["ai_warnings"] = sc.get("warnings", "") + item["ai_prefs_hash"] = phash + item["ai_scored_at"] = sc.get("scored_at", now) + cached.append(item) + continue + # 2. In-file cache (same search UUID, already scored) + if item.get("ai_score") is not None and item.get("ai_prefs_hash") == phash: + cached.append(item) + continue + to_score.append(item) + + if cached: + print(f" ♻️ {len(cached)} annoncer genbruger cache", file=sys.stderr) + if to_score: + print(f" 🤖 {len(to_score)} annoncer sendes til AI…", file=sys.stderr) + elif not cached: + print(" Ingen annoncer at score.", file=sys.stderr) + return [] + + # ── Score only uncached items — parallel batches ────────────────────────── + all_scores: dict[str, dict] = {} + if to_score: + batches = [to_score[i:i + batch_size] for i in range(0, len(to_score), batch_size)] + print(f" ({len(batches)} parallelle batches à max {batch_size})", file=sys.stderr) + + def score_batch(b_idx: int, batch: list[dict]) -> tuple[dict[str, dict], int, int]: + prompt = build_prompt(batch, category, criteria, prefs) + response = client.messages.create( + model=MODEL, + max_tokens=MAX_TOKENS, + temperature=0, + messages=[{"role": "user", "content": prompt}], + ) + inp = response.usage.input_tokens + out = response.usage.output_tokens + text = response.content[0].text.strip() + json_m = re.search(r"\[.*\]", text, re.DOTALL) + if not json_m: + print(f" ⚠ Kunne ikke parse svar fra batch {b_idx + 1}:\n{text[:300]}", file=sys.stderr) + return {}, inp, out + result = {} + for s in json.loads(json_m.group(0)): + result[str(s["id"])] = s + print(f" ✓ Batch {b_idx + 1}/{len(batches)} færdig ({len(result)} scores, {inp}+{out} tok)", file=sys.stderr) + return result, inp, out + + total_input = total_output = 0 + with ThreadPoolExecutor(max_workers=min(len(batches), 8)) as pool: + futures = {pool.submit(score_batch, i, b): i for i, b in enumerate(batches)} + for future in as_completed(futures): + scores, inp, out = future.result() + all_scores.update(scores) + total_input += inp + total_output += out + + # Write scores + cache metadata back onto items + now = datetime.now().isoformat(timespec="seconds") + for item in to_score: + s = all_scores.get(str(item["id"]), {}) + if s: + item["ai_score"] = s.get("score") + item["ai_rank"] = s.get("rank") + item["ai_reason"] = s.get("reason", "") + item["ai_warnings"] = s.get("warnings", "") + # Persist to cross-search score cache so same item never re-scored + save_score_cache(str(item["id"]), prefs, category, { + "score": s.get("score"), + "rank": s.get("rank"), + "reason": s.get("reason", ""), + "warnings": s.get("warnings", ""), + "scored_at": now, + }) + item["ai_prefs_hash"] = phash + item["ai_scored_at"] = now + + # Auto-save scores back into source file so cache persists next run + if source_file: + all_items_map = {str(i["id"]): i for i in cached + to_score} + source_file.write_text(json.dumps(list(all_items_map.values()), ensure_ascii=False, indent=2)) + scored_count = sum(1 for i in to_score if i.get("ai_score") is not None) + print(f" 💾 {scored_count} nye scores gemt → {source_file}", file=sys.stderr) + cost = calc_cost(total_input, total_output) + print(f" 💰 {total_input}+{total_output} tokens → ${cost:.4f}", file=sys.stderr) + update_metrics(source_file.parent.name, total_input, total_output, scored_count) + + # ── Combine, re-sort, re-rank ───────────────────────────────────────────── + combined = [i for i in (cached + to_score) if i.get("ai_score") is not None] + combined.sort(key=lambda x: x["ai_score"], reverse=True) + for rank, item in enumerate(combined, 1): + item["ai_rank"] = rank + + return combined + + +# ── Output ──────────────────────────────────────────────────────────────────── + +def print_results(ranked: list[dict], top: int | None = None) -> None: + show = ranked[:top] if top else ranked + print(f"\n{'═' * 60}") + print(f" TOP {len(show)} ANNONCER (af {len(ranked)} scoret)") + print(f"{'═' * 60}\n") + for item in show: + score = item.get("ai_score", "?") + bar = "█" * int(score) + "░" * (10 - int(score)) if isinstance(score, (int, float)) else "" + print( + f"#{item['ai_rank']:>2} [{score:4.1f}] {bar} {item['name']}\n" + f" Pris: {item['price_dkk']} DKK | {item['url']}\n" + f" ✅ {item.get('ai_reason','')}\n" + ) + if item.get("ai_warnings"): + print(f" ⚠️ {item['ai_warnings']}\n") + + +# ── Main ────────────────────────────────────────────────────────────────────── + +CRITERIA = { + "mobility": ( + "- Pris ift. markedsværdi for den specifikke model/år/km (brug din viden)\n" + "- Modelreliabilitet og kendte svagheder (timing-kæde, gearkasse, rust etc.)\n" + "- Km-stand og alder (Årgang og Kilometertal er angivet hvis tilgængeligt)\n" + "- Privat sælger foretrukket (forhandler = højere pris, ingen reklamationsret ved brugt)\n" + "- Servicehistorik, nysynet, tandrem nævnt?\n" + "- Udstyrsniveau og antal ejere" + ), + "recommerce": ( + "- Pris ift. aktuel markedsværdi for produktet (brug din viden om typiske priser)\n" + "- Produktgenerationens relative ydelse og værdi (fx GPU-generationer, produktionsår)\n" + "- Stand (DBA's standbeskrivelse er angivet: 'Som ny', 'Brugt - men i god stand', 'Brugt - med synlige brugsspor')\n" + "- Kendte problemer med denne model/variant\n" + "- Er varen komplet? Mangler tilbehør?\n" + "- Privat sælger foretrukket" + ), +} + + +def main() -> None: + if "ANTHROPIC_API_KEY" not in os.environ and not API_KEY: + print("Fejl: ANTHROPIC_API_KEY er ikke sat.", file=sys.stderr) + sys.exit(1) + + # Parse args properly — handles both --top 3 and --top=3 + top_n = None + prefs = "" + force = False + save = False + positional = [] + + argv = sys.argv[1:] + i = 0 + while i < len(argv): + a = argv[i] + if a in ("--top", "--prefs") and i + 1 < len(argv): + if a == "--top": + top_n = int(argv[i + 1]) + else: + prefs = argv[i + 1] + i += 2 + elif a.startswith("--top="): + top_n = int(a[6:]) + i += 1 + elif a.startswith("--prefs="): + prefs = a[8:] + i += 1 + elif a == "--force": + force = True + i += 1 + elif a == "--save": + save = True + i += 1 + elif not a.startswith("--"): + positional.append(a) + i += 1 + else: + i += 1 + + if not positional: + # Auto-detect: most recent data//listings.json + searches = sorted( + (d for d in DATA_DIR.iterdir() if (d / "listings.json").exists()), + key=lambda d: d.stat().st_mtime, reverse=True + ) if DATA_DIR.exists() else [] + if not searches: + print("Ingen søgninger fundet. Kør fetch_dba.py først.", file=sys.stderr) + sys.exit(1) + search_dir = searches[0] + results_file = search_dir / "listings.json" + print(f"Bruger nyeste søgning: {search_dir.name}", file=sys.stderr) + else: + ref = positional[0] + if UUID_RE.match(ref): + results_file = DATA_DIR / ref / "listings.json" + else: + results_file = Path(ref) + if not results_file.exists(): + print(f"Fejl: {results_file} ikke fundet.", file=sys.stderr) + sys.exit(1) + + items = json.loads(results_file.read_text()) + print(f"Loaded {len(items)} annoncer fra {results_file}", file=sys.stderr) + + domain = "mobility" if items and "/mobility/" in items[0].get("url", "") else "recommerce" + criteria = CRITERIA[domain] + + # ── Interaktiv refinement-løkke (op til 3 forsøg) ──────────────────────── + MAX_ROUNDS = 3 + interactive = sys.stdin.isatty() and not prefs + + for attempt in range(MAX_ROUNDS): + if prefs: + print(f"\n🎯 Præferencer: {prefs}", file=sys.stderr) + + ranked = score_listings(items, criteria, prefs, force=force, source_file=results_file) + # After first run, don't force re-score on subsequent interactive rounds + force = False + print_results(ranked, top_n) + + if save: + out = results_file.parent / "ranked.json" + out.write_text(json.dumps(ranked, ensure_ascii=False, indent=2)) + print(f"\n💾 Ranked output gemt → {out}", file=sys.stderr) + + if not interactive or attempt >= MAX_ROUNDS - 1: + break + + remaining = MAX_ROUNDS - attempt - 1 + print(f"\n{'─' * 60}") + print(f" Tilføj præferencer for at re-score ({remaining} forsøg tilbage)") + print(f" Eks: 'Ikke franske biler' / 'Helst manuel gear' / 'Max 50 km fra Aarhus'") + print(f" (Tryk Enter for at afslutte)") + print(f"{'─' * 60}") + + try: + new_prefs = input(" > ").strip() + except (EOFError, KeyboardInterrupt): + break + + if not new_prefs: + break + + prefs = f"{prefs}\n{new_prefs}".strip() if prefs else new_prefs + # Force re-score when prefs change (cache hash will differ anyway, but be explicit) + force = True + print(f"\n🔄 Re-scorer med dine præferencer…\n", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/static/background.png b/static/background.png new file mode 100644 index 0000000..48ec874 Binary files /dev/null and b/static/background.png differ diff --git a/static/blaaai_tutorial.mp4 b/static/blaaai_tutorial.mp4 new file mode 100644 index 0000000..08f8570 Binary files /dev/null and b/static/blaaai_tutorial.mp4 differ diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..9fea81c --- /dev/null +++ b/templates/index.html @@ -0,0 +1,536 @@ + + + + + + BlaaAi — Find den bedste annonce + + + + + + + + + +
+
+ +
+ + +
+
+
+ + + + +
+ + +
+

AI-powered annonce analyse

+

+ Find den bedste
DBA-annonce. +

+ +
+ + + + +
+ + +
+
+ +

+ Paste en DBA søge-URL — AI'en gennemgår alle annoncer og rangerer dem efter pris, stand og kvalitet.
+ AI kan tage fejl. Brug det som inspiration, ikke som facit. +

+
+ + + + + + + +
+ + + + +