Add Dockerfile, Nomad spec, Gitea CI/CD, requirements and health endpoint
Some checks failed
Build and Deploy BlaaAi / build-and-deploy (push) Has been cancelled

This commit is contained in:
Henrik Jess Nielsen
2026-05-24 19:14:41 +02:00
parent 57c29456ab
commit b6e70d61f9
13 changed files with 1987 additions and 0 deletions

View File

@@ -0,0 +1,62 @@
name: Build and Deploy BlaaAi
on:
push:
branches:
- main
workflow_dispatch:
env:
SERVICE_NAME: blaaai
jobs:
build-and-deploy:
runs-on: debian-host
env:
PATH: /usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/sbin:/bin:/snap/bin
DOCKER_HOST: unix:///var/run/docker.sock
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Log in to Docker Registry
run: |
echo "${{ secrets.HARBOR_ROBOT_TOKEN }}" | docker login registry.i80.dk -u "robot\$gitserver" --password-stdin
- name: Build Docker image
run: |
SHA=$(git rev-parse --short HEAD)
docker build \
--build-arg BUILD_VERSION=${{ github.run_number }} \
--build-arg GIT_COMMIT=$SHA \
--build-arg BUILD_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ) \
-t registry.i80.dk/gitea/${SERVICE_NAME}:latest \
-t registry.i80.dk/gitea/${SERVICE_NAME}:$SHA .
- name: Push Docker image
run: |
SHA=$(git rev-parse --short HEAD)
docker push registry.i80.dk/gitea/${SERVICE_NAME}:latest
docker push registry.i80.dk/gitea/${SERVICE_NAME}:$SHA
- name: Deploy to Nomad
run: |
nomad job validate ${SERVICE_NAME}.nomad
nomad job run ${SERVICE_NAME}.nomad
env:
NOMAD_ADDR: "https://nomad.i80.dk:4646"
- name: Verify deployment
run: |
echo "Deployment triggered — checking status..."
sleep 20
nomad job status ${SERVICE_NAME}
env:
NOMAD_ADDR: "https://nomad.i80.dk:4646"
- name: Health check
run: |
sleep 30
curl -sf https://${SERVICE_NAME}.i80.dk/health && echo "Health check passed"

7
.gitignore vendored Normal file
View File

@@ -0,0 +1,7 @@
__pycache__/
*.pyc
.env
.env.*
data/
*.log
.DS_Store

34
Dockerfile Normal file
View File

@@ -0,0 +1,34 @@
FROM python:3.12-slim
ARG BUILD_VERSION=unknown
ARG GIT_COMMIT=unknown
ARG BUILD_TIME=unknown
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
BUILD_VERSION=${BUILD_VERSION} \
GIT_COMMIT=${GIT_COMMIT} \
BUILD_TIME=${BUILD_TIME}
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Install Playwright browsers
RUN playwright install chromium && playwright install-deps chromium
COPY . .
RUN mkdir -p data static templates
EXPOSE 8000
HEALTHCHECK --interval=10s --timeout=5s --start-period=30s --retries=3 \
CMD curl -f http://localhost:${PORT:-8000}/health || exit 1
CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port ${PORT:-8000}"]

45
Makefile Normal file
View File

@@ -0,0 +1,45 @@
.PHONY: dev fetch score run list install ai help
URL ?= https://www.dba.dk/mobility/search/car?mileage_to=175000\&price_from=15000\&price_to=110000\&registration_class=1\&year_from=2014
UUID ?=
PREFS ?=
## Start udviklingsserver
dev:
python3 -m uvicorn app:app --host 0.0.0.0 --port 8000 --reload
## Hent annoncer — ny søgning: make fetch URL="https://dba.dk/..."
## gen-fetch: make fetch UUID=<uuid>
fetch:
ifdef UUID
python3 fetch_dba.py $(UUID)
else
python3 fetch_dba.py "$(URL)"
endif
## Score annoncer — make score UUID=<uuid> [PREFS="ingen franske biler"]
score:
ifdef UUID
python3 score.py $(UUID) $(if $(PREFS),--prefs "$(PREFS)",)
else
python3 score.py $(if $(PREFS),--prefs "$(PREFS)",)
endif
## Hent + score i ét hak
run: fetch score
## List alle søgninger
list:
python3 fetch_dba.py --list
## Installer Python-afhængigheder
install:
pip install fastapi uvicorn python-multipart anthropic requests jinja2
## Genoptag Copilot session
ai:
copilot --resume=2093191e-06df-4810-b13f-076be1f8995b
## Vis denne hjælp
help:
@grep -E '^##' Makefile | sed 's/## //'

278
app.py Normal file
View File

@@ -0,0 +1,278 @@
#!/usr/bin/env python3
"""
BlaaAi — AI-powered DBA listing analyzer
FastAPI backend
Usage:
uvicorn app:app --reload --port 8000
"""
import json, os, smtplib
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timezone
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from pathlib import Path
from typing import Optional
from fastapi import BackgroundTasks, FastAPI, HTTPException
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from pydantic import BaseModel
from starlette.requests import Request
from fetch_dba import (
DATA_DIR, ITEM_CACHE, create_search, detect_domain, enrich_listings,
fetch_page, find_new, listings_file, load_meta, load_seen,
save_seen, seen_file, list_searches,
)
from score import CRITERIA, METRICS_FILE, score_listings
app = FastAPI(title="BlaaAi")
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")
executor = ThreadPoolExecutor(max_workers=4)
# ── Meta helpers ──────────────────────────────────────────────────────────────
def update_meta(search_id: str, **kwargs) -> dict:
p = DATA_DIR / search_id / "meta.json"
meta = json.loads(p.read_text())
meta.update(kwargs)
p.write_text(json.dumps(meta, ensure_ascii=False, indent=2))
return meta
# ── Background pipeline ───────────────────────────────────────────────────────
def run_fetch_and_score(search_id: str, prefs: str = "") -> None:
"""Synchronous fetch + score pipeline — runs in thread pool."""
try:
meta = load_meta(search_id)
search_url = meta["url"]
# ── Fetch ──────────────────────────────────────────────────────────────
update_meta(search_id, status="fetching")
sf = seen_file(search_id)
lf = listings_file(search_id)
seen = load_seen(sf)
listings, _ = fetch_page(search_url, 1)
new_items = find_new(listings, seen)
if new_items:
new_items = enrich_listings(new_items)
existing = json.loads(lf.read_text()) if lf.exists() else []
existing.extend(new_items)
lf.write_text(json.dumps(existing, ensure_ascii=False, indent=2))
save_seen(sf, seen | {l["id"] for l in listings})
# ── Score ──────────────────────────────────────────────────────────────
update_meta(search_id, status="scoring")
items = json.loads(lf.read_text()) if lf.exists() else []
domain = detect_domain(search_url)
criteria = CRITERIA[domain]
score_listings(items, criteria, prefs, force=False, source_file=lf)
update_meta(
search_id,
status="ready",
listing_count=len(items),
last_scored_at=datetime.now().isoformat(timespec="seconds"),
prefs=prefs,
)
except Exception as e:
update_meta(search_id, status="error", error=str(e))
raise
# ── Pydantic models ───────────────────────────────────────────────────────────
class NewSearchRequest(BaseModel):
url: str
prefs: Optional[str] = ""
class EmailRequest(BaseModel):
email: str
# ── Routes ────────────────────────────────────────────────────────────────────
@app.get("/health")
async def health():
return {
"status": "healthy",
"timestamp": datetime.now(timezone.utc).isoformat(),
"version": os.getenv("BUILD_VERSION", "unknown"),
"commit": os.getenv("GIT_COMMIT", "unknown")[:7],
}
@app.get("/", response_class=HTMLResponse)
async def index(request: Request):
return templates.TemplateResponse(request, "index.html")
@app.get("/search/{search_id}", response_class=HTMLResponse)
async def search_view(request: Request, search_id: str):
return templates.TemplateResponse(request, "index.html", {"search_id": search_id})
@app.post("/api/searches")
async def create(body: NewSearchRequest, background_tasks: BackgroundTasks):
search_id = create_search(body.url)
update_meta(search_id, status="queued", prefs=body.prefs or "")
background_tasks.add_task(run_fetch_and_score, search_id, body.prefs or "")
return {"id": search_id, "status": "queued"}
@app.get("/api/searches")
async def get_all():
return list_searches()
@app.get("/api/searches/{search_id}")
async def get_search(search_id: str):
try:
meta = load_meta(search_id)
except FileNotFoundError:
raise HTTPException(404, "Søgning ikke fundet")
lf = listings_file(search_id)
if lf.exists():
items = json.loads(lf.read_text())
ranked = sorted(
[i for i in items if i.get("ai_score") is not None],
key=lambda x: x["ai_score"],
reverse=True,
)
meta["listings"] = ranked
meta["listing_count"] = len(items)
meta["scored_count"] = len(ranked)
if "status" not in meta and ranked:
meta["status"] = "ready"
if "status" not in meta:
meta["status"] = "ready" if meta.get("listing_count", 0) > 0 else "unknown"
return meta
@app.post("/api/searches/{search_id}/rescore")
async def rescore(search_id: str, body: NewSearchRequest, background_tasks: BackgroundTasks):
try:
load_meta(search_id)
except FileNotFoundError:
raise HTTPException(404, "Søgning ikke fundet")
update_meta(search_id, status="queued", prefs=body.prefs or "")
background_tasks.add_task(run_fetch_and_score, search_id, body.prefs or "")
return {"id": search_id, "status": "queued"}
@app.get("/metrics")
async def get_metrics():
global_metrics = {}
if METRICS_FILE.exists():
global_metrics = json.loads(METRICS_FILE.read_text())
# Per-search breakdown
searches = []
if DATA_DIR.exists():
for d in sorted(DATA_DIR.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True):
mf = d / "metrics.json"
if mf.exists():
searches.append(json.loads(mf.read_text()))
# Item cache stats
cached_items = len(list(ITEM_CACHE.glob("*.json"))) if ITEM_CACHE.exists() else 0
return {**global_metrics, "item_cache_size": cached_items, "searches": searches}
@app.post("/api/searches/{search_id}/email")
async def send_email(search_id: str, body: EmailRequest):
try:
meta = load_meta(search_id)
except FileNotFoundError:
raise HTTPException(404, "Søgning ikke fundet")
if meta.get("status") != "ready":
raise HTTPException(400, "Analysen er ikke færdig endnu")
lf = listings_file(search_id)
if not lf.exists():
raise HTTPException(400, "Ingen resultater at sende")
items = json.loads(lf.read_text())
ranked = sorted(
[i for i in items if i.get("ai_score") is not None],
key=lambda x: x["ai_score"],
reverse=True,
)[:10]
html = _build_email_html(ranked, meta)
_send_email(body.email, "🔍 Dine DBA-resultater fra BlaaAi", html)
return {"status": "sent", "to": body.email}
# ── Email helpers ─────────────────────────────────────────────────────────────
def _build_email_html(ranked: list[dict], meta: dict) -> str:
rows = ""
for i, item in enumerate(ranked, 1):
score = item.get("ai_score", 0)
bar = "" * int(score) + "" * (10 - int(score))
reason = item.get("ai_reason", "")
warn = f'<p style="color:#dc2626;font-size:13px">⚠️ {item["ai_warnings"]}</p>' if item.get("ai_warnings") else ""
rows += f"""
<tr>
<td style="padding:16px;border-bottom:1px solid #e5e7eb">
<strong>#{i} [{score}] {item['name']}</strong><br>
<span style="color:#6b7280">{item.get('price_dkk','?')} DKK</span>
<span style="font-family:monospace;color:#6366f1;margin-left:8px">{bar}</span><br>
<p style="margin:6px 0;font-size:14px">{reason}</p>
{warn}
<a href="{item['url']}" style="color:#6366f1;font-size:13px">Se annonce →</a>
</td>
</tr>"""
return f"""
<html><body style="font-family:sans-serif;max-width:600px;margin:0 auto;padding:20px">
<h1 style="color:#1f2937">🔍 Dine DBA-resultater</h1>
<p style="color:#6b7280">Søgning: <a href="{meta['url']}">{meta['url'][:60]}…</a></p>
<table style="width:100%;border-collapse:collapse">{rows}</table>
<p style="color:#9ca3af;font-size:12px;margin-top:24px">Leveret af BlaaAi</p>
</body></html>"""
def _send_email(to: str, subject: str, html: str) -> None:
host = os.environ.get("SMTP_HOST", "")
user = os.environ.get("SMTP_USER", "")
pwd = os.environ.get("SMTP_PASS", "")
port = int(os.environ.get("SMTP_PORT", "587"))
if not host:
raise HTTPException(503, "Email ikke konfigureret (SMTP_HOST mangler)")
msg = MIMEMultipart("alternative")
msg["Subject"] = subject
msg["From"] = user
msg["To"] = to
msg.attach(MIMEText(html, "html"))
with smtplib.SMTP(host, port) as s:
s.starttls()
s.login(user, pwd)
s.sendmail(user, to, msg.as_string())
if __name__ == "__main__":
import uvicorn
port = int(os.getenv("PORT", "8000"))
uvicorn.run("app:app", host="0.0.0.0", port=port, reload=True)

BIN
background.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

123
blaaai.nomad Normal file
View File

@@ -0,0 +1,123 @@
job "blaaai" {
region = "global"
datacenters = ["dc1"]
type = "service"
meta {
uuid = uuidv4()
}
update {
stagger = "30s"
max_parallel = 1
auto_revert = true
progress_deadline = "25m"
}
group "blaaai-group" {
count = 1
constraint {
attribute = "${node.unique.name}"
value = "autobox.i80.dk"
}
update {
canary = 1
auto_promote = true
min_healthy_time = "15s"
healthy_deadline = "20m"
progress_deadline = "25m"
auto_revert = true
}
network {
port "http" {}
}
reschedule {
attempts = 5
interval = "10m"
delay = "30s"
delay_function = "exponential"
max_delay = "120s"
unlimited = false
}
volume "blaaai-data" {
type = "host"
source = "blaaai-data"
read_only = false
}
service {
provider = "consul"
name = "blaaai"
port = "http"
tags = [
"traefik.enable=true",
"traefik.http.routers.blaaai.rule=Host(`blaaai.i80.dk`)",
"traefik.http.routers.blaaai.tls=true",
]
canary_tags = [
"traefik.enable=false",
]
check {
name = "http_health_check"
type = "http"
port = "http"
path = "/health"
interval = "10s"
timeout = "5s"
}
}
task "blaaai-task" {
driver = "docker"
config {
image = "registry.i80.dk/gitea/blaaai:latest"
ports = ["http"]
force_pull = true
auth {
username = "robot$gitserver"
password = "${HARBOR_ROBOT_TOKEN}"
}
}
volume_mount {
volume = "blaaai-data"
destination = "/app/data"
read_only = false
}
restart {
attempts = 10
interval = "10m"
delay = "15s"
mode = "fail"
}
env {
PORT = "${NOMAD_PORT_http}"
}
template {
data = <<EOT
ANTHROPIC_API_KEY={{ key "blaaai/anthropic_api_key" }}
HARBOR_ROBOT_TOKEN={{ key "blaaai/harbor_robot_token" }}
EOT
destination = "secrets/app.env"
env = true
}
resources {
cpu = 500
memory = 512
}
}
}
}

331
fetch_dba.py Normal file
View File

@@ -0,0 +1,331 @@
#!/usr/bin/env python3
"""
dba.dk universal listing monitor — works for any DBA search URL.
Usage:
python3 fetch_dba.py [URL] [--all]
URL Any dba.dk search URL (mobility/cars or recommerce/general goods).
Falls back to DEFAULT_URL if omitted.
--all Fetch all pages (default: page 1 only).
Examples:
python3 fetch_dba.py
python3 fetch_dba.py --all
python3 fetch_dba.py "https://www.dba.dk/recommerce/forsale/search?q=rtx+3090"
python3 fetch_dba.py "https://www.dba.dk/recommerce/forsale/search?q=golf+driver&price_to=3000" --all
"""
import hashlib, re, json, sys, time, math, uuid as _uuid
from datetime import datetime, timezone, timedelta
from pathlib import Path
from urllib.parse import urlparse, parse_qs
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
DEFAULT_URL = (
"https://www.dba.dk/mobility/search/car"
"?mileage_to=175000&price_from=15000&price_to=110000"
"&registration_class=1&year_from=2014"
)
HEADERS = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"}
BASE_DIR = Path(__file__).parent
DATA_DIR = BASE_DIR / "data"
ITEM_CACHE = BASE_DIR / "data" / "item_cache"
CACHE_TTL_H = 24 # hours before a cached item detail is re-fetched
UUID_RE = re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$")
# ── URL helpers ───────────────────────────────────────────────────────────────
def detect_domain(url: str) -> str:
"""Return 'mobility' or 'recommerce' based on URL path."""
return "mobility" if "/mobility/" in url else "recommerce"
def url_slug(url: str) -> str:
"""Create a short filename-safe slug from a search URL."""
parsed = urlparse(url)
q = parse_qs(parsed.query).get("q", [""])[0]
path_tail = parsed.path.rstrip("/").split("/")[-1]
label = re.sub(r"[^\w]", "_", q or path_tail).strip("_").lower()[:30]
short = hashlib.md5(url.encode()).hexdigest()[:6]
return f"{label}_{short}" if label else short
def page_url(search_url: str, page: int) -> str:
sep = "&" if "?" in search_url else "?"
return search_url + (f"{sep}page={page}" if page > 1 else "")
# ── Search page parsing ───────────────────────────────────────────────────────
def fetch_page(search_url: str, page: int = 1) -> tuple[list[dict], int]:
"""Fetch one search result page. Returns (listings, total_count)."""
resp = requests.get(page_url(search_url, page), headers=HEADERS, timeout=15)
resp.raise_for_status()
return parse_search_page(resp.text)
def parse_search_page(html: str) -> tuple[list[dict], int]:
listings: list[dict] = []
total = 0
m = re.search(r"([\d\.]+)\s+annonce", html)
if m:
total = int(m.group(1).replace(".", ""))
for block in re.findall(
r'<script[^>]*type="application/ld\+json"[^>]*>(.*?)</script>',
html, re.DOTALL
):
try:
data = json.loads(block)
if data.get("@type") != "CollectionPage":
continue
for item in data.get("mainEntity", {}).get("itemListElement", []):
p = item.get("item", {})
item_url = p.get("url", "")
# ID is always the last numeric path segment
item_id = re.search(r"/(\d+)/?$", item_url)
listings.append({
"id": item_id.group(1) if item_id else item_url.split("/")[-1],
"name": p.get("name") or f"{p.get('brand',{}).get('name','')} {p.get('model','')}".strip(),
"brand": p.get("brand", {}).get("name"),
"model": p.get("model"),
"description": p.get("description"),
"price_dkk": p.get("offers", {}).get("price"),
"url": item_url,
"image": p.get("image"),
"condition": p.get("itemCondition", "").replace("https://schema.org/", ""),
})
except (json.JSONDecodeError, KeyError):
pass
return listings, total
def fetch_all_pages(search_url: str) -> list[dict]:
first_page, total = fetch_page(search_url, 1)
if total == 0:
# Try counting items directly if total not found in HTML
total = len(first_page)
items_per_page = len(first_page) or 49
pages = math.ceil(total / items_per_page) if total else 1
print(f"Total: {total} listings across {pages} pages", file=sys.stderr)
all_listings = first_page
for p in range(2, pages + 1):
print(f" Fetching page {p}/{pages}", file=sys.stderr)
listings, _ = fetch_page(search_url, p)
all_listings.extend(listings)
if not listings:
break
time.sleep(0.5)
return all_listings
# ── Item detail fetching ──────────────────────────────────────────────────────
def page_to_text(html: str) -> str:
"""Strip HTML tags and return clean visible text, trimmed of navigation/footer noise."""
# Remove script and style blocks entirely
text = re.sub(r"<(script|style)[^>]*>.*?</\1>", "", html, flags=re.DOTALL | re.IGNORECASE)
# Strip all remaining tags
text = re.sub(r"<[^>]+>", " ", text)
text = re.sub(r"\s+", " ", text).strip()
# Cut off at footer noise (everything after "For virksomheder" is boilerplate)
for cutoff in ["For virksomheder", "Annoncens metadata", "DBA Boost"]:
idx = text.find(cutoff)
if idx > 200:
text = text[:idx].strip()
break
return text
def fetch_item_details(item: dict) -> dict:
"""Fetch raw visible text from an item page, using file cache."""
item_id = item.get("id", "")
cache_key = ITEM_CACHE / f"{item_id}.json"
ITEM_CACHE.mkdir(parents=True, exist_ok=True)
# Serve from cache if fresh enough
if cache_key.exists():
try:
cached = json.loads(cache_key.read_text())
cached_at = datetime.fromisoformat(cached["cached_at"]).replace(tzinfo=timezone.utc)
age_h = (datetime.now(timezone.utc) - cached_at).total_seconds() / 3600
if age_h < CACHE_TTL_H:
return {"raw_text": cached["raw_text"], "from_cache": True}
except Exception:
pass # corrupt cache entry → re-fetch
try:
resp = requests.get(item["url"], headers=HEADERS, timeout=10)
resp.raise_for_status()
raw_text = page_to_text(resp.text)
cache_key.write_text(json.dumps({
"id": item_id,
"raw_text": raw_text,
"cached_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
}, ensure_ascii=False))
return {"raw_text": raw_text}
except Exception:
return {"raw_text": ""}
def enrich_listings(listings: list[dict], workers: int = 8) -> list[dict]:
print(f"Fetching details for {len(listings)} items…", file=sys.stderr)
with ThreadPoolExecutor(max_workers=workers) as ex:
futures = {ex.submit(fetch_item_details, l): i for i, l in enumerate(listings)}
for future in as_completed(futures):
result = future.result()
listings[futures[future]]["details"] = result
cached = sum(1 for l in listings if l.get("details", {}).get("from_cache"))
fetched = len(listings) - cached
print(f"{fetched} hentet fra DBA, {cached} fra cache", file=sys.stderr)
return listings
# ── Data directory helpers ────────────────────────────────────────────────────
def search_dir(search_id: str) -> Path:
return DATA_DIR / search_id
def create_search(url: str) -> str:
"""Create a new search directory and return its UUID."""
search_id = str(_uuid.uuid4())
d = search_dir(search_id)
d.mkdir(parents=True, exist_ok=True)
meta = {
"id": search_id,
"url": url,
"domain": detect_domain(url),
"created_at": datetime.now().isoformat(timespec="seconds"),
}
(d / "meta.json").write_text(json.dumps(meta, ensure_ascii=False, indent=2))
return search_id
def load_meta(search_id: str) -> dict:
p = search_dir(search_id) / "meta.json"
if not p.exists():
raise FileNotFoundError(f"Ingen søgning med UUID {search_id}")
return json.loads(p.read_text())
def listings_file(search_id: str) -> Path:
return search_dir(search_id) / "listings.json"
def seen_file(search_id: str) -> Path:
return search_dir(search_id) / "seen.json"
def list_searches() -> list[dict]:
if not DATA_DIR.exists():
return []
results = []
for d in sorted(DATA_DIR.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True):
meta_path = d / "meta.json"
if meta_path.exists():
meta = json.loads(meta_path.read_text())
lf = d / "listings.json"
meta["listing_count"] = len(json.loads(lf.read_text())) if lf.exists() else 0
results.append(meta)
return results
def load_seen(state_file: Path) -> set[str]:
return set(json.loads(state_file.read_text())) if state_file.exists() else set()
def save_seen(state_file: Path, ids: set[str]) -> None:
state_file.write_text(json.dumps(sorted(ids)))
def find_new(listings: list[dict], seen: set[str]) -> list[dict]:
return [l for l in listings if l["id"] not in seen]
# ── Output ────────────────────────────────────────────────────────────────────
def print_listing(item: dict) -> None:
text = item.get("details", {}).get("raw_text", "")
preview = (text[:160] + "") if len(text) > 160 else text
print(
f"[{item['id']}] {item['name']}\n"
f" Pris: {item['price_dkk']} DKK | {item.get('condition','')}\n"
f" {item['url']}\n"
f" {preview}\n"
)
# ── Main ──────────────────────────────────────────────────────────────────────
def main() -> None:
args = [a for a in sys.argv[1:] if not a.startswith("-")]
flags = [a for a in sys.argv[1:] if a.startswith("-")]
fetch_all = "--all" in flags
first = args[0] if args else None
# ── list existing searches ─────────────────────────────────────────────────
if "--list" in flags or first == "list":
searches = list_searches()
if not searches:
print("Ingen søgninger endnu. Kør: python fetch_dba.py <url>")
return
print(f"{'UUID':36} {'Oprettet':19} {'#':4} URL")
print("" * 100)
for s in searches:
print(f"{s['id']:36} {s['created_at']:19} {s['listing_count']:4} {s['url'][:60]}")
return
# ── resolve search_id or create new ───────────────────────────────────────
if first and UUID_RE.match(first):
search_id = first
meta = load_meta(search_id)
search_url = meta["url"]
print(f"🔄 Bruger eksisterende søgning: {search_id}", file=sys.stderr)
else:
search_url = first if first and first.startswith("http") else DEFAULT_URL
search_id = create_search(search_url)
print(f"✨ Ny søgning oprettet: {search_id}", file=sys.stderr)
domain = detect_domain(search_url)
sf = seen_file(search_id)
lf = listings_file(search_id)
emoji = "🚗" if domain == "mobility" else "🛒"
print(f"{emoji} Domain: {domain} | {'All pages' if fetch_all else 'Page 1'}", file=sys.stderr)
print(f" URL: {search_url}", file=sys.stderr)
print(f" Dir: data/{search_id}/", file=sys.stderr)
seen = load_seen(sf)
listings = fetch_all_pages(search_url) if fetch_all else fetch_page(search_url, 1)[0]
new_listings = find_new(listings, seen)
if not new_listings:
print("Ingen nye annoncer siden sidst.")
return
new_listings = enrich_listings(new_listings)
existing = json.loads(lf.read_text()) if lf.exists() else []
existing.extend(new_listings)
lf.write_text(json.dumps(existing, ensure_ascii=False, indent=2))
print(f"💾 Gemt {len(new_listings)} nye → data/{search_id}/listings.json ({len(existing)} total)\n", file=sys.stderr)
print(f"\n📋 UUID: {search_id}")
print(f"{emoji} {len(new_listings)} ny(e) annonce(r):\n")
for item in new_listings:
print_listing(item)
save_seen(sf, seen | {l["id"] for l in listings})
if __name__ == "__main__":
main()

10
requirements.txt Normal file
View File

@@ -0,0 +1,10 @@
anthropic==0.104.1
beautifulsoup4==4.14.3
fastapi==0.136.3
httpx==0.28.1
jinja2>=3.1.0
playwright>=1.40.0
pydantic==2.13.3
starlette==1.1.0
uvicorn==0.48.0
python-multipart>=0.0.6

561
score.py Normal file
View File

@@ -0,0 +1,561 @@
#!/usr/bin/env python3
"""
AI-powered scoring of DBA listings using Claude.
Usage:
python3 score.py results_car_89a242.json
python3 score.py results_rtx_3090_623595.json
python3 score.py results_car_89a242.json --top 10 # show only top N
python3 score.py results_car_89a242.json --save # write ranked output to ranked_*.json
python3 score.py results_car_89a242.json --force # ignore cache, re-score everything
python3 score.py results_car_89a242.json --prefs "Ikke franske biler"
Scores are cached in results_*.json — only new/unscored listings call Claude.
Change --prefs to invalidate cache and re-score with new preferences.
Requires:
ANTHROPIC_API_KEY env var
pip install anthropic
"""
import hashlib, json, os, re, sys, uuid as _uuid
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from pathlib import Path
import anthropic
MODEL = "claude-haiku-4-5-20251001" # fast + cheap; swap to sonnet for better ranking
API_KEY = "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"
MAX_TOKENS = 2048
BASE_DIR = Path(__file__).parent
DATA_DIR = BASE_DIR / "data"
SCORE_CACHE = BASE_DIR / "data" / "score_cache" # persistent cross-search score cache
UUID_RE = re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$")
METRICS_FILE = DATA_DIR / "metrics.json"
# Pricing: Claude Haiku 4.5 — https://www.anthropic.com/pricing
_PRICE_INPUT_PER_TOKEN = 0.80 / 1_000_000 # $0.80 per MTok
_PRICE_OUTPUT_PER_TOKEN = 4.00 / 1_000_000 # $4.00 per MTok
def calc_cost(input_tokens: int, output_tokens: int) -> float:
return round(input_tokens * _PRICE_INPUT_PER_TOKEN + output_tokens * _PRICE_OUTPUT_PER_TOKEN, 6)
def update_metrics(search_id: str, input_tokens: int, output_tokens: int, listings_scored: int) -> None:
"""Save per-search metrics and update global metrics.json."""
cost = calc_cost(input_tokens, output_tokens)
now = datetime.now().isoformat(timespec="seconds")
# Per-search metrics
search_dir = DATA_DIR / search_id
if search_dir.exists():
search_metrics = {
"search_id": search_id,
"scored_at": now,
"model": MODEL,
"listings_scored": listings_scored,
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"cost_usd": cost,
}
(search_dir / "metrics.json").write_text(json.dumps(search_metrics, indent=2))
# Global metrics
global_metrics = {}
if METRICS_FILE.exists():
try:
global_metrics = json.loads(METRICS_FILE.read_text())
except Exception:
pass
global_metrics["total_searches"] = global_metrics.get("total_searches", 0) + 1
global_metrics["total_listings_scored"] = global_metrics.get("total_listings_scored", 0) + listings_scored
global_metrics["total_input_tokens"] = global_metrics.get("total_input_tokens", 0) + input_tokens
global_metrics["total_output_tokens"] = global_metrics.get("total_output_tokens", 0) + output_tokens
global_metrics["total_cost_usd"] = round(global_metrics.get("total_cost_usd", 0.0) + cost, 6)
global_metrics["last_updated"] = now
METRICS_FILE.write_text(json.dumps(global_metrics, indent=2))
def prefs_hash(prefs: str) -> str:
"""Short stable hash of the user's preference string (empty → 'none')."""
return hashlib.md5(prefs.strip().encode()).hexdigest()[:8] if prefs.strip() else "none"
def _score_cache_key(item_id: str, prefs: str, category: str) -> Path:
"""Return path to the persistent score cache file for this item+context."""
ph = prefs_hash(prefs)
ch = hashlib.md5(category.encode()).hexdigest()[:6]
return SCORE_CACHE / f"{item_id}_{ph}_{ch}.json"
def load_score_cache(item_id: str, prefs: str, category: str) -> dict | None:
"""Return cached score dict or None if not cached."""
p = _score_cache_key(item_id, prefs, category)
if p.exists():
try:
return json.loads(p.read_text())
except Exception:
pass
return None
def save_score_cache(item_id: str, prefs: str, category: str, score_data: dict) -> None:
"""Persist a score result so future searches with same item/prefs/category hit cache."""
SCORE_CACHE.mkdir(parents=True, exist_ok=True)
p = _score_cache_key(item_id, prefs, category)
p.write_text(json.dumps(score_data, ensure_ascii=False))
# ── Helpers ───────────────────────────────────────────────────────────────────
def trim_text(raw: str, max_chars: int = 800) -> str:
"""Cut DBA boilerplate header/footer, keep the meat."""
# Skip past the standard navigation header
for marker in ["Varebeskrivelse", "Beskrivelse", "Specifikationer"]:
idx = raw.find(marker)
if idx != -1:
raw = raw[idx:]
break
# Trim to max length
if len(raw) > max_chars:
raw = raw[:max_chars] + ""
return raw.strip()
def extract_structured_fields(raw: str) -> dict:
"""Pull key structured fields out of DBA raw_text before trimming."""
fields = {}
patterns = {
"year": r"(?:Modelår|Årstal|Årgang)[^\d]*(\d{4})",
"km": r"Kilometertal\s+([\d\.,]+ km)",
"condition": r"Stand\s*:\s*([^\n|]{3,60})",
"gear": r"Geartype\s+(\S+)",
"fuel": r"Drivmiddel\s+(\S+)",
"owners": r"Antal ejere\s+(\d+)",
}
for key, pattern in patterns.items():
m = re.search(pattern, raw, re.IGNORECASE)
if m:
fields[key] = m.group(1).strip()
return fields
def listing_summary(item: dict, idx: int) -> str:
"""Compact text representation of a listing for the AI prompt."""
raw = item.get("details", {}).get("raw_text", item.get("description", ""))
fields = extract_structured_fields(raw)
text = trim_text(raw)
meta_parts = []
if fields.get("year"):
meta_parts.append(f"Årgang: {fields['year']}")
if fields.get("km"):
meta_parts.append(f"Km: {fields['km']}")
if fields.get("fuel"):
meta_parts.append(f"Brændstof: {fields['fuel']}")
if fields.get("gear"):
meta_parts.append(f"Gear: {fields['gear']}")
if fields.get("owners"):
meta_parts.append(f"Ejere: {fields['owners']}")
if fields.get("condition"):
meta_parts.append(f"Stand: {fields['condition']}")
meta_line = " | ".join(meta_parts)
return (
f"--- Annonce #{idx + 1} (ID: {item['id']}) ---\n"
f"Navn: {item['name']}\n"
f"Pris: {item['price_dkk']} DKK\n"
+ (f"{meta_line}\n" if meta_line else "")
+ f"{text}\n"
)
def detect_category(items: list[dict]) -> str:
"""Detect category from item URLs and breadcrumb in raw_text."""
if not items:
return "brugte varer"
url = items[0].get("url", "")
if "/mobility/" in url:
return "brugte biler"
# Extract breadcrumb from raw_text to detect subcategory
raw = items[0].get("details", {}).get("raw_text", "")
m = re.search(r"Du er her\s+(.+?)(?:\n|Billedgalleri)", raw)
breadcrumb = m.group(1).lower() if m else ""
for keywords, context_key in _CATEGORY_MAP:
if any(kw in breadcrumb for kw in keywords):
return context_key
return "brugte varer"
KNOWLEDGE_CONTEXT = {
"brugte biler": (
"- Kendte reliabilitetsproblemer (fx Peugeot 1.2 PureTech timing-kæde, VW DSG-gearkasse, BMW N47 dieselmotor)\n"
"- Km-stand og alder sat i forhold til markedsværdi for den specifikke model og variant\n"
"- Kendte stærke og svage modeller (fx Toyota/Mazda høj reliabilitet, Renault/Citroën/Fiat lavere)\n"
"- Typiske brugtpriser for modellen baseret på år og km"
),
"elektronik": (
"- Produktgenerationens relative ydelse og markedsværdi (fx RTX 4070 > RTX 3080, iPhone 15 > 13)\n"
"- Kendte problemer: mining-slid på GPU'er, batterinedgang på telefoner/laptops, kondensatorfejl\n"
"- Hvad er en rimelig brugtpris for dette produkt i denne stand?\n"
"- Stand er afgørende — 'Som ny' vs 'Brugt - med synlige brugsspor' bør veje tungt"
),
"sport": (
"- Kendte mærker og deres relative kvalitet (fx Titleist/Callaway/TaylorMade til golf, Shimano-grupper til cykler)\n"
"- Produktets alder og teknologisk forældelse (fx ældre golfkøller med stålskaft vs moderne grafit)\n"
"- Stand er meget afgørende for sportsudstyr — slid påvirker ydeevne direkte\n"
"- Hvad er en rimelig brugtpris for dette udstyr i denne stand og fra dette mærke?"
),
"møbler": (
"- Kendte mærker og materialer (fx massivt træ > spånplade, dansk design har høj gensalgsværdi)\n"
"- Stand og alder — patina kan være positivt for vintage, negativt for moderne møbler\n"
"- Originale vs efterligninger (fx IKEA POÄNG vs original Fritz Hansen)\n"
"- Hvad er en rimelig brugtpris baseret på stand, alder og mærke?"
),
"brugte varer": (
"- Produktets markedsværdi brugt i denne stand\n"
"- Kendte problemer eller svagheder ved denne model/variant\n"
"- Stand er afgørende — 'Som ny' vs 'Brugt - med synlige brugsspor' bør veje tungt\n"
"- Er varen komplet? Mangler tilbehør eller dokumentation?"
),
}
# Breadcrumb keywords → knowledge context key
_CATEGORY_MAP = [
(["elektronik", "computer", "grafikkort", "telefon", "mobil", "tv", "hifi", "kamera"], "elektronik"),
(["golf", "sport", "cykel", "fitness", "jagt", "fiskeri", "friluftsliv"], "sport"),
(["møbel", "stol", "bord", "sofa", "seng", "reol", "lampe", "bolig", "indretning"], "møbler"),
]
def build_prompt(items: list[dict], category: str, criteria: str, prefs: str = "") -> str:
summaries = "\n".join(listing_summary(i, n) for n, i in enumerate(items))
prefs_block = ""
if prefs.strip():
prefs_block = f"""
KØBERENS EGNE PRÆFERENCER (vigtig — vej disse tungt i din scoring):
{prefs.strip()}
Annoncer der strider mod disse præferencer skal have markant lavere score.
"""
knowledge = KNOWLEDGE_CONTEXT.get(category, KNOWLEDGE_CONTEXT["brugte varer"])
return f"""Du er en ekspert køberrådgiver for {category} på DBA.
Brug BÅDE annonceteksten OG din egen viden om produkterne:
{knowledge}
{prefs_block}
Scorer HVER annonce UAFHÆNGIGT på en absolut skala 1-10 baseret på disse kriterier:
{criteria}
ABSOLUT SCORESKALA (brug din viden om markedet — scoren må IKKE afhænge af de andre annoncer i denne batch):
- 9-10: Fremragende køb — markant under markedspris, pålidelig model, god stand/historik
- 7-8: Godt køb — fair pris, solid model, få eller ingen bekymringer
- 5-6: Middel — markedspris, eller visse risici/ukendte faktorer
- 3-4: Under middel — overpriset eller kendte modelproblem
- 1-2: Undgå — alvorlige røde flag, stor risiko eller klart overpriset
WARNINGS — list KUN konkrete, faktuelle røde flag der er direkte støttet af annonceteksten eller veldokumenterede modelproblemer:
- Nævn KUN ting der er bekræftet i annonceteksten (fx "sælger nævner støj", "ingen billeder", "kun afhentning")
- Eller veldokumenterede modelspecifikke problemer (fx "Turbo-variant har historisk køleproblemer")
- Skriv IKKE generiske advarsler om mining, stand etc. medmindre det eksplicit nævnes i annoncen
- Hvis ingen konkrete røde flag: tom streng ""
Returner KUN et JSON-array — ingen forklaringer udenfor JSON:
[
{{
"id": "annonce-ID",
"score": 8.5,
"reason": "Begrundelse på dansk (maks 2 sætninger). Nævn gerne konkret viden om modellen.",
"warnings": "Kun konkrete røde flag fra annonceteksten eller kendte modelproblemer. Tom streng hvis ingen."
}},
...
]
Alle {len(items)} annoncer skal med. Score er 1-10 (10 = suverænt køb).
ANNONCER:
{summaries}"""
# ── Scoring ───────────────────────────────────────────────────────────────────
def score_listings(
items: list[dict],
criteria: str,
prefs: str = "",
batch_size: int = 10,
force: bool = False,
source_file: Path | None = None,
) -> list[dict]:
"""Score listings with AI — skips items that are already cached. Runs batches in parallel."""
client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY", API_KEY))
category = detect_category(items)
phash = prefs_hash(prefs)
# ── Split: persistent score cache → in-file cache → needs AI scoring ────
to_score, cached = [], []
now = datetime.now().isoformat(timespec="seconds")
for item in items:
if not force:
# 1. Check persistent cross-search score cache
sc = load_score_cache(str(item["id"]), prefs, category)
if sc:
item["ai_score"] = sc["score"]
item["ai_rank"] = sc.get("rank")
item["ai_reason"] = sc.get("reason", "")
item["ai_warnings"] = sc.get("warnings", "")
item["ai_prefs_hash"] = phash
item["ai_scored_at"] = sc.get("scored_at", now)
cached.append(item)
continue
# 2. In-file cache (same search UUID, already scored)
if item.get("ai_score") is not None and item.get("ai_prefs_hash") == phash:
cached.append(item)
continue
to_score.append(item)
if cached:
print(f" ♻️ {len(cached)} annoncer genbruger cache", file=sys.stderr)
if to_score:
print(f" 🤖 {len(to_score)} annoncer sendes til AI…", file=sys.stderr)
elif not cached:
print(" Ingen annoncer at score.", file=sys.stderr)
return []
# ── Score only uncached items — parallel batches ──────────────────────────
all_scores: dict[str, dict] = {}
if to_score:
batches = [to_score[i:i + batch_size] for i in range(0, len(to_score), batch_size)]
print(f" ({len(batches)} parallelle batches à max {batch_size})", file=sys.stderr)
def score_batch(b_idx: int, batch: list[dict]) -> tuple[dict[str, dict], int, int]:
prompt = build_prompt(batch, category, criteria, prefs)
response = client.messages.create(
model=MODEL,
max_tokens=MAX_TOKENS,
temperature=0,
messages=[{"role": "user", "content": prompt}],
)
inp = response.usage.input_tokens
out = response.usage.output_tokens
text = response.content[0].text.strip()
json_m = re.search(r"\[.*\]", text, re.DOTALL)
if not json_m:
print(f" ⚠ Kunne ikke parse svar fra batch {b_idx + 1}:\n{text[:300]}", file=sys.stderr)
return {}, inp, out
result = {}
for s in json.loads(json_m.group(0)):
result[str(s["id"])] = s
print(f" ✓ Batch {b_idx + 1}/{len(batches)} færdig ({len(result)} scores, {inp}+{out} tok)", file=sys.stderr)
return result, inp, out
total_input = total_output = 0
with ThreadPoolExecutor(max_workers=min(len(batches), 8)) as pool:
futures = {pool.submit(score_batch, i, b): i for i, b in enumerate(batches)}
for future in as_completed(futures):
scores, inp, out = future.result()
all_scores.update(scores)
total_input += inp
total_output += out
# Write scores + cache metadata back onto items
now = datetime.now().isoformat(timespec="seconds")
for item in to_score:
s = all_scores.get(str(item["id"]), {})
if s:
item["ai_score"] = s.get("score")
item["ai_rank"] = s.get("rank")
item["ai_reason"] = s.get("reason", "")
item["ai_warnings"] = s.get("warnings", "")
# Persist to cross-search score cache so same item never re-scored
save_score_cache(str(item["id"]), prefs, category, {
"score": s.get("score"),
"rank": s.get("rank"),
"reason": s.get("reason", ""),
"warnings": s.get("warnings", ""),
"scored_at": now,
})
item["ai_prefs_hash"] = phash
item["ai_scored_at"] = now
# Auto-save scores back into source file so cache persists next run
if source_file:
all_items_map = {str(i["id"]): i for i in cached + to_score}
source_file.write_text(json.dumps(list(all_items_map.values()), ensure_ascii=False, indent=2))
scored_count = sum(1 for i in to_score if i.get("ai_score") is not None)
print(f" 💾 {scored_count} nye scores gemt → {source_file}", file=sys.stderr)
cost = calc_cost(total_input, total_output)
print(f" 💰 {total_input}+{total_output} tokens → ${cost:.4f}", file=sys.stderr)
update_metrics(source_file.parent.name, total_input, total_output, scored_count)
# ── Combine, re-sort, re-rank ─────────────────────────────────────────────
combined = [i for i in (cached + to_score) if i.get("ai_score") is not None]
combined.sort(key=lambda x: x["ai_score"], reverse=True)
for rank, item in enumerate(combined, 1):
item["ai_rank"] = rank
return combined
# ── Output ────────────────────────────────────────────────────────────────────
def print_results(ranked: list[dict], top: int | None = None) -> None:
show = ranked[:top] if top else ranked
print(f"\n{'' * 60}")
print(f" TOP {len(show)} ANNONCER (af {len(ranked)} scoret)")
print(f"{'' * 60}\n")
for item in show:
score = item.get("ai_score", "?")
bar = "" * int(score) + "" * (10 - int(score)) if isinstance(score, (int, float)) else ""
print(
f"#{item['ai_rank']:>2} [{score:4.1f}] {bar} {item['name']}\n"
f" Pris: {item['price_dkk']} DKK | {item['url']}\n"
f"{item.get('ai_reason','')}\n"
)
if item.get("ai_warnings"):
print(f" ⚠️ {item['ai_warnings']}\n")
# ── Main ──────────────────────────────────────────────────────────────────────
CRITERIA = {
"mobility": (
"- Pris ift. markedsværdi for den specifikke model/år/km (brug din viden)\n"
"- Modelreliabilitet og kendte svagheder (timing-kæde, gearkasse, rust etc.)\n"
"- Km-stand og alder (Årgang og Kilometertal er angivet hvis tilgængeligt)\n"
"- Privat sælger foretrukket (forhandler = højere pris, ingen reklamationsret ved brugt)\n"
"- Servicehistorik, nysynet, tandrem nævnt?\n"
"- Udstyrsniveau og antal ejere"
),
"recommerce": (
"- Pris ift. aktuel markedsværdi for produktet (brug din viden om typiske priser)\n"
"- Produktgenerationens relative ydelse og værdi (fx GPU-generationer, produktionsår)\n"
"- Stand (DBA's standbeskrivelse er angivet: 'Som ny', 'Brugt - men i god stand', 'Brugt - med synlige brugsspor')\n"
"- Kendte problemer med denne model/variant\n"
"- Er varen komplet? Mangler tilbehør?\n"
"- Privat sælger foretrukket"
),
}
def main() -> None:
if "ANTHROPIC_API_KEY" not in os.environ and not API_KEY:
print("Fejl: ANTHROPIC_API_KEY er ikke sat.", file=sys.stderr)
sys.exit(1)
# Parse args properly — handles both --top 3 and --top=3
top_n = None
prefs = ""
force = False
save = False
positional = []
argv = sys.argv[1:]
i = 0
while i < len(argv):
a = argv[i]
if a in ("--top", "--prefs") and i + 1 < len(argv):
if a == "--top":
top_n = int(argv[i + 1])
else:
prefs = argv[i + 1]
i += 2
elif a.startswith("--top="):
top_n = int(a[6:])
i += 1
elif a.startswith("--prefs="):
prefs = a[8:]
i += 1
elif a == "--force":
force = True
i += 1
elif a == "--save":
save = True
i += 1
elif not a.startswith("--"):
positional.append(a)
i += 1
else:
i += 1
if not positional:
# Auto-detect: most recent data/<uuid>/listings.json
searches = sorted(
(d for d in DATA_DIR.iterdir() if (d / "listings.json").exists()),
key=lambda d: d.stat().st_mtime, reverse=True
) if DATA_DIR.exists() else []
if not searches:
print("Ingen søgninger fundet. Kør fetch_dba.py <url> først.", file=sys.stderr)
sys.exit(1)
search_dir = searches[0]
results_file = search_dir / "listings.json"
print(f"Bruger nyeste søgning: {search_dir.name}", file=sys.stderr)
else:
ref = positional[0]
if UUID_RE.match(ref):
results_file = DATA_DIR / ref / "listings.json"
else:
results_file = Path(ref)
if not results_file.exists():
print(f"Fejl: {results_file} ikke fundet.", file=sys.stderr)
sys.exit(1)
items = json.loads(results_file.read_text())
print(f"Loaded {len(items)} annoncer fra {results_file}", file=sys.stderr)
domain = "mobility" if items and "/mobility/" in items[0].get("url", "") else "recommerce"
criteria = CRITERIA[domain]
# ── Interaktiv refinement-løkke (op til 3 forsøg) ────────────────────────
MAX_ROUNDS = 3
interactive = sys.stdin.isatty() and not prefs
for attempt in range(MAX_ROUNDS):
if prefs:
print(f"\n🎯 Præferencer: {prefs}", file=sys.stderr)
ranked = score_listings(items, criteria, prefs, force=force, source_file=results_file)
# After first run, don't force re-score on subsequent interactive rounds
force = False
print_results(ranked, top_n)
if save:
out = results_file.parent / "ranked.json"
out.write_text(json.dumps(ranked, ensure_ascii=False, indent=2))
print(f"\n💾 Ranked output gemt → {out}", file=sys.stderr)
if not interactive or attempt >= MAX_ROUNDS - 1:
break
remaining = MAX_ROUNDS - attempt - 1
print(f"\n{'' * 60}")
print(f" Tilføj præferencer for at re-score ({remaining} forsøg tilbage)")
print(f" Eks: 'Ikke franske biler' / 'Helst manuel gear' / 'Max 50 km fra Aarhus'")
print(f" (Tryk Enter for at afslutte)")
print(f"{'' * 60}")
try:
new_prefs = input(" > ").strip()
except (EOFError, KeyboardInterrupt):
break
if not new_prefs:
break
prefs = f"{prefs}\n{new_prefs}".strip() if prefs else new_prefs
# Force re-score when prefs change (cache hash will differ anyway, but be explicit)
force = True
print(f"\n🔄 Re-scorer med dine præferencer…\n", file=sys.stderr)
if __name__ == "__main__":
main()

BIN
static/background.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

BIN
static/blaaai_tutorial.mp4 Normal file

Binary file not shown.

536
templates/index.html Normal file
View File

@@ -0,0 +1,536 @@
<!DOCTYPE html>
<html lang="da">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>BlaaAi — Find den bedste annonce</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&display=swap" rel="stylesheet">
<script src="https://cdn.tailwindcss.com"></script>
<script>
tailwind.config = {
theme: {
extend: {
fontFamily: { sans: ['Space Grotesk', 'sans-serif'] },
colors: {
ink: { DEFAULT: '#09090b', 50: '#f4f4f5', 100: '#e4e4e7', 200: '#27272a', 300: '#3f3f46' }
}
}
}
}
</script>
<style>
body { background: #fafaf9; color: #0c0a09; font-family: 'Space Grotesk', sans-serif; }
.card-enter { animation: fadeUp .25s ease both; }
@keyframes fadeUp { from { opacity:0; transform:translateY(10px); } to { opacity:1; transform:translateY(0); } }
.spinner { animation: spin .9s linear infinite; }
@keyframes spin { to { transform: rotate(360deg); } }
.score-fill { background: #0c0a09; transition: width .6s cubic-bezier(.4,0,.2,1); }
@keyframes shimmer { 0%{background-position:-400% 0} 100%{background-position:400% 0} }
.skeleton { background:linear-gradient(90deg,#f0efed 25%,#e8e5e1 50%,#f0efed 75%);background-size:400% 100%;animation:shimmer 1.6s ease-in-out infinite;border-radius:4px; }
input, textarea {
background: #fff;
border: 1px solid #e7e5e4;
color: #0c0a09;
outline: none;
transition: border-color .15s;
}
input:focus, textarea:focus { border-color: #a8a29e; }
input::placeholder, textarea::placeholder { color: #a8a29e; }
.btn-primary {
background: #0c0a09; color: #fafaf9;
font-weight: 600; letter-spacing: -.01em;
transition: background .15s, transform .1s;
}
.btn-primary:hover:not(:disabled) { background: #292524; }
.btn-primary:active:not(:disabled) { transform: scale(.98); }
.btn-primary:disabled { opacity: .4; cursor: default; }
.card {
background: #fff;
border: 1px solid #e7e5e4;
transition: border-color .15s, box-shadow .15s;
}
.card:hover { border-color: #d6d3d1; box-shadow: 0 1px 8px rgba(0,0,0,.05); }
/* Video modal */
#video-modal { display:none; position:fixed; inset:0; z-index:50; align-items:center; justify-content:center; }
#video-modal.open { display:flex; }
#video-backdrop { position:absolute; inset:0; background:rgba(0,0,0,.7); backdrop-filter:blur(4px); }
#video-box {
position:relative; z-index:1; width:min(860px,92vw);
background:#0f172a; border-radius:16px; overflow:hidden;
box-shadow:0 24px 80px rgba(0,0,0,.6);
animation: modalIn .2s ease both;
}
@keyframes modalIn { from{opacity:0;transform:scale(.96)} to{opacity:1;transform:scale(1)} }
#video-box video { display:block; width:100%; }
#video-close {
position:absolute; top:12px; right:12px; z-index:2;
background:rgba(255,255,255,.1); border:none; color:#fff;
width:32px; height:32px; border-radius:50%; cursor:pointer;
font-size:1rem; display:flex; align-items:center; justify-content:center;
transition:background .15s;
}
#video-close:hover { background:rgba(255,255,255,.25); }
.btn-help {
display:inline-flex; align-items:center; gap:6px;
font-size:.75rem; font-weight:500; color:#78716c;
border:1px solid #e7e5e4; border-radius:8px;
padding:6px 12px; cursor:pointer; background:transparent;
transition:color .15s, border-color .15s;
}
.btn-help:hover { color:#0c0a09; border-color:#a8a29e; }
.tag-top { background:#f0fdf4; color:#15803d; border:1px solid #bbf7d0; }
.tag-good { background:#eff6ff; color:#1d4ed8; border:1px solid #bfdbfe; }
.tag-mid { background:#fefce8; color:#a16207; border:1px solid #fde68a; }
a.annonce-link {
color: #a8a29e; font-size:.75rem; letter-spacing:.02em;
text-decoration: none; transition: color .15s;
}
a.annonce-link:hover { color: #0c0a09; }
header { border-bottom: 1px solid #e7e5e4; background: #fafaf9; }
</style>
</head>
<body style="
background-color: #fafaf9;
background-image: url('/static/background.png');
background-repeat: no-repeat;
background-position: bottom center;
background-size: clamp(600px, 80vw, 1100px) auto;
background-attachment: fixed;
">
<!-- Top bar (always visible) -->
<header>
<div class="max-w-2xl mx-auto px-6 py-4 flex items-center justify-between">
<button onclick="resetForm()" class="flex items-center gap-2 hover:opacity-60 transition-opacity">
<span style="font-size:1.1rem;line-height:1"></span>
<span class="font-semibold tracking-tight">BlaaAi</span>
</button>
<div class="flex items-center gap-3">
<span id="header-status" class="text-xs" style="color:#a8a29e"></span>
<button class="btn-help" onclick="openVideoModal()">
<svg width="13" height="13" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24">
<circle cx="12" cy="12" r="10"/><polygon points="10 8 16 12 10 16 10 8" fill="currentColor" stroke="none"/>
</svg>
Video hjælp
</button>
</div>
</div>
</header>
<!-- ── VIDEO MODAL ── -->
<div id="video-modal" role="dialog" aria-modal="true" aria-label="Video hjælp">
<div id="video-backdrop" onclick="closeVideoModal()"></div>
<div id="video-box">
<button id="video-close" onclick="closeVideoModal()" aria-label="Luk video"></button>
<video id="help-video" controls preload="metadata"
poster=""
src="/static/blaaai_tutorial.mp4">
</video>
</div>
</div>
<main class="max-w-2xl mx-auto px-6" style="min-height:100vh;display:flex;flex-direction:column;justify-content:center;padding-top:2rem;padding-bottom:22rem">
<!-- ── FORM ── -->
<section id="form-section" class="py-12 relative">
<p class="text-xs font-medium tracking-widest uppercase mb-5" style="color:#a8a29e">AI-powered annonce analyse</p>
<h1 class="text-4xl font-bold tracking-tight leading-tight mb-10" style="letter-spacing:-.03em">
Find den bedste<br>DBA-annonce.
</h1>
<div class="space-y-3">
<input
id="url-input"
type="url"
placeholder="https://www.dba.dk/mobility/search/…"
class="w-full px-4 py-3 rounded-lg text-sm"
/>
<div id="prefs-section" class="hidden">
<textarea
id="prefs-input"
rows="2"
placeholder="Dine præferencer — fx 'ingen franske biler, helst automatgear'"
class="w-full px-4 py-3 rounded-lg text-sm resize-none"
></textarea>
</div>
<div class="flex items-center gap-3 pt-1">
<button
id="submit-btn"
onclick="submitSearch()"
class="btn-primary px-6 py-3 rounded-lg text-sm flex items-center gap-2"
>
<span>Analyser</span>
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14 5l7 7m0 0l-7 7m7-7H3"/>
</svg>
</button>
<button onclick="togglePrefs()" id="prefs-btn"
class="text-sm px-4 py-3 rounded-lg transition-colors"
style="color:#78716c;border:1px solid #e7e5e4">
+ Præferencer
</button>
</div>
</div>
<p class="text-xs mt-8 leading-relaxed" style="color:#a8a29e">
Paste en DBA søge-URL — AI'en gennemgår alle annoncer og rangerer dem efter pris, stand og kvalitet.<br>
<span style="color:#d6d3d1">AI kan tage fejl. Brug det som inspiration, ikke som facit.</span>
</p>
</section>
<!-- ── STATUS ── -->
<section id="status-section" class="hidden pt-8 pb-20">
<div class="flex items-center gap-3 mb-1">
<svg class="spinner w-4 h-4 shrink-0" fill="none" viewBox="0 0 24 24" style="color:#a8a29e">
<circle class="opacity-20" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="3"/>
<path class="opacity-80" fill="currentColor" d="M4 12a8 8 0 018-8v8z"/>
</svg>
<p id="status-text" class="font-medium text-sm">Henter annoncer…</p>
<span id="status-progress" class="text-xs ml-auto" style="color:#a8a29e"></span>
</div>
<p id="status-sub" class="text-xs mb-6 ml-7" style="color:#a8a29e;min-height:1.2em"></p>
<div id="skeleton-container" class="space-y-2"></div>
</section>
<!-- ── RESULTS ── -->
<section id="results-section" class="hidden pt-8 pb-20">
<div class="flex items-baseline justify-between mb-8">
<div>
<h2 class="text-xl font-bold tracking-tight">Resultater</h2>
<p id="result-count" class="text-xs mt-1" style="color:#a8a29e"></p>
</div>
</div>
<div id="listings-container" class="space-y-2"></div>
<!-- Email -->
<div class="mt-12 pt-8" style="border-top:1px solid #e7e5e4">
<p class="text-sm font-medium mb-3">Send top-10 på email</p>
<div class="flex gap-2">
<input
id="email-input"
type="email"
placeholder="din@email.dk"
class="flex-1 px-4 py-2.5 rounded-lg text-sm"
/>
<button
onclick="sendEmail()"
id="email-btn"
class="btn-primary px-5 py-2.5 rounded-lg text-sm"
>Send</button>
</div>
<p id="email-status" class="text-xs mt-2 hidden" style="color:#71717a"></p>
</div>
</section>
</main>
<script>
function openVideoModal() {
document.getElementById('video-modal').classList.add('open');
document.body.style.overflow = 'hidden';
const v = document.getElementById('help-video');
v.currentTime = 0;
v.play();
}
function closeVideoModal() {
document.getElementById('video-modal').classList.remove('open');
document.body.style.overflow = '';
const v = document.getElementById('help-video');
v.pause();
v.currentTime = 0;
}
document.getElementById('help-video').addEventListener('ended', closeVideoModal);
document.addEventListener('keydown', e => {
if (e.key === 'Escape') closeVideoModal();
});
const PRELOAD_ID = {{ search_id | tojson if search_id is defined else 'null' }};
let currentSearchId = null;
let pollInterval = null;
let skeletonsRendered = false;
let msgInterval = null;
const statusMessages = {
queued: ["Stiller i kø…", "Venter på ledig plads"],
fetching: ["Henter annoncer fra DBA…", "Indlæser titler, priser og beskrivelser"],
scoring: ["AI analyserer annoncerne…", ""],
ready: ["Analyse færdig", ""],
error: ["Noget gik galt", "Tjek søgelinket og prøv igen"],
};
const funMessages = [
"Vurderer bilers pålidelighed og historik…",
"Sammenligner kilometertal og prisforhold…",
"Tjekker om sælger lyder troværdig…",
"Vurderer forventede vedligeholdelsesomkostninger…",
"Sammenligner mod aktuelle markedspriser…",
"Analyserer annoncebeskrivelserne for røde flag…",
"Overvejer årstal og udstyrsgrad…",
"Ranker efter kvalitet og pris…",
"Tjekker km-stand mod forventet for alderen…",
"Næsten færdig — finpudser rangeringen…",
];
function togglePrefs() {
const s = document.getElementById("prefs-section");
const b = document.getElementById("prefs-btn");
s.classList.toggle("hidden");
b.textContent = s.classList.contains("hidden") ? "+ Præferencer" : " Præferencer";
}
async function submitSearch() {
const url = document.getElementById("url-input").value.trim();
const prefs = document.getElementById("prefs-input").value.trim();
if (!url.startsWith("https://www.dba.dk")) {
alert("Indsæt et gyldigt DBA søgelink");
return;
}
const btn = document.getElementById("submit-btn");
btn.disabled = true;
btn.innerHTML = `<svg class="spinner w-4 h-4" fill="none" viewBox="0 0 24 24"><circle class="opacity-20" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="3"/><path class="opacity-80" fill="currentColor" d="M4 12a8 8 0 018-8v8z"/></svg><span>Sender…</span>`;
try {
const res = await fetch("/api/searches", {
method: "POST",
headers: {"Content-Type": "application/json"},
body: JSON.stringify({url, prefs}),
});
const data = await res.json();
currentSearchId = data.id;
showStatus();
startPolling(data.id);
} catch(e) {
alert("Fejl: " + e.message);
btn.disabled = false;
btn.innerHTML = `<span>Analyser</span><svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14 5l7 7m0 0l-7 7m7-7H3"/></svg>`;
}
}
function showStatus() {
document.getElementById("form-section").classList.add("hidden");
document.getElementById("status-section").classList.remove("hidden");
document.getElementById("results-section").classList.add("hidden");
}
function startPolling(id) {
history.pushState(null, "", `/search/${id}`);
pollInterval = setInterval(() => poll(id), 2000);
}
function startFunMessages() {
let i = 0;
const el = document.getElementById("status-sub");
el.textContent = funMessages[0];
msgInterval = setInterval(() => {
i = (i + 1) % funMessages.length;
el.textContent = funMessages[i];
}, 3500);
}
function renderSkeletons(count) {
if (skeletonsRendered) return;
skeletonsRendered = true;
const container = document.getElementById("skeleton-container");
container.innerHTML = "";
for (let i = 0; i < count; i++) {
const card = document.createElement("div");
card.className = "card rounded-xl p-5";
card.style.animationDelay = `${i * 40}ms`;
card.innerHTML = `
<div class="flex items-start gap-4">
<div class="skeleton shrink-0 mt-1" style="width:1rem;height:.75rem;border-radius:3px"></div>
<div class="flex-1 space-y-2.5">
<div class="flex gap-2">
<div class="skeleton" style="height:.8rem;width:7rem;border-radius:3px"></div>
<div class="skeleton" style="height:.8rem;width:4rem;border-radius:3px"></div>
</div>
<div class="skeleton" style="height:.4rem;width:100%;border-radius:9999px"></div>
<div class="skeleton" style="height:.7rem;width:75%;border-radius:3px"></div>
<div class="skeleton" style="height:.7rem;width:55%;border-radius:3px"></div>
</div>
<div class="shrink-0 ml-2 space-y-2">
<div class="skeleton" style="height:.9rem;width:5rem;border-radius:3px"></div>
<div class="skeleton" style="height:.7rem;width:3rem;border-radius:3px;margin-left:auto"></div>
</div>
</div>`;
container.appendChild(card);
}
}
function stopFunMessages() {
if (msgInterval) { clearInterval(msgInterval); msgInterval = null; }
}
async function poll(id) {
try {
const res = await fetch(`/api/searches/${id}`);
const data = await res.json();
const total = data.listing_count || 0;
const scored = data.scored_count || 0;
// Show skeletons as soon as we know the count
if (total > 0) renderSkeletons(total);
// Update status text
const [main] = statusMessages[data.status] || ["Arbejder…"];
document.getElementById("status-text").textContent = main;
document.getElementById("header-status").textContent = data.status === "ready" ? "" : main;
// Progress counter during scoring
if (data.status === "scoring" && total > 0) {
document.getElementById("status-progress").textContent = `${scored}/${total}`;
if (!msgInterval) startFunMessages();
} else if (data.status !== "scoring") {
const [, sub] = statusMessages[data.status] || ["", ""];
document.getElementById("status-sub").textContent = sub;
}
if (data.status === "ready") {
stopFunMessages();
clearInterval(pollInterval);
showResults(data);
} else if (data.status === "error") {
stopFunMessages();
clearInterval(pollInterval);
}
} catch(e) {
console.error("Poll fejl:", e);
}
}
function showResults(data) {
document.getElementById("status-section").classList.add("hidden");
document.getElementById("results-section").classList.remove("hidden");
document.getElementById("header-status").textContent = "";
const listings = data.listings || [];
document.getElementById("result-count").textContent =
`${listings.length} annoncer analyseret · sorteret efter AI-score`;
const container = document.getElementById("listings-container");
container.innerHTML = "";
listings.forEach((item, i) => {
const score = item.ai_score || 0;
const pct = Math.round(score * 10);
const warn = item.ai_warnings
? `<p class="text-xs mt-2" style="color:#dc2626">↑ ${item.ai_warnings}</p>` : "";
const tag = rankTag(score);
const card = document.createElement("div");
card.className = "card rounded-xl p-5 card-enter";
card.style.animationDelay = `${i * 30}ms`;
card.innerHTML = `
<div class="flex items-start gap-4">
<span class="text-xs font-mono pt-0.5 shrink-0" style="color:#d6d3d1;min-width:1.5rem">${i+1}</span>
<div class="flex-1 min-w-0">
<div class="flex items-center gap-2 mb-2 flex-wrap">
<span class="font-semibold text-sm tracking-tight">${item.name}</span>
<span class="text-xs" style="color:#a8a29e">${item.description || ""}</span>
${tag}
</div>
<div class="flex items-center gap-3 mb-3">
<div class="flex-1 rounded-full" style="background:#f5f5f4;height:3px">
<div class="score-fill rounded-full" style="height:3px;width:${pct}%"></div>
</div>
<span class="text-xs font-bold tabular-nums shrink-0">${score.toFixed(1)}<span style="color:#d6d3d1">/10</span></span>
</div>
<p class="text-xs leading-relaxed" style="color:#78716c">${item.ai_reason || ""}</p>
${warn}
</div>
<div class="shrink-0 text-right ml-2">
<p class="font-semibold text-sm tabular-nums">${Number(item.price_dkk || 0).toLocaleString("da-DK")}<span class="text-xs font-normal ml-0.5" style="color:#a8a29e">kr</span></p>
<a href="${item.url}" target="_blank" class="annonce-link mt-1.5 block">Se →</a>
</div>
</div>`;
container.appendChild(card);
});
}
function rankTag(score) {
if (score >= 8.5) return '<span class="tag-top text-xs px-2 py-0.5 rounded font-medium">Topvalg</span>';
if (score >= 7.5) return '<span class="tag-good text-xs px-2 py-0.5 rounded font-medium">Godt køb</span>';
if (score >= 6.5) return '<span class="tag-mid text-xs px-2 py-0.5 rounded font-medium">Middel</span>';
return '';
}
async function sendEmail() {
const email = document.getElementById("email-input").value.trim();
const btn = document.getElementById("email-btn");
const status = document.getElementById("email-status");
if (!email) { alert("Indtast en email-adresse"); return; }
if (!currentSearchId) return;
btn.disabled = true;
btn.textContent = "Sender…";
try {
const res = await fetch(`/api/searches/${currentSearchId}/email`, {
method: "POST",
headers: {"Content-Type": "application/json"},
body: JSON.stringify({email}),
});
const data = await res.json();
status.classList.remove("hidden");
if (res.ok) {
status.style.color = "#4ade80";
status.textContent = `Sendt til ${email}`;
} else {
status.style.color = "#f87171";
status.textContent = data.detail || "Fejl ved afsendelse";
btn.disabled = false; btn.textContent = "Send";
}
} catch(e) {
status.classList.remove("hidden");
status.style.color = "#f87171";
status.textContent = "Netværksfejl — prøv igen";
btn.disabled = false; btn.textContent = "Send";
}
}
function resetForm() {
if (pollInterval) clearInterval(pollInterval);
stopFunMessages();
skeletonsRendered = false;
currentSearchId = null;
document.getElementById("form-section").classList.remove("hidden");
document.getElementById("status-section").classList.add("hidden");
document.getElementById("results-section").classList.add("hidden");
document.getElementById("skeleton-container").innerHTML = "";
document.getElementById("status-progress").textContent = "";
document.getElementById("listings-container").innerHTML = "";
document.getElementById("url-input").value = "";
document.getElementById("prefs-input").value = "";
document.getElementById("email-input").value = "";
document.getElementById("header-status").textContent = "";
document.getElementById("email-status").classList.add("hidden");
const btn = document.getElementById("submit-btn");
btn.disabled = false;
btn.innerHTML = `<span>Analyser</span><svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14 5l7 7m0 0l-7 7m7-7H3"/></svg>`;
history.pushState(null, "", "/");
}
if (PRELOAD_ID) {
currentSearchId = PRELOAD_ID;
showStatus();
poll(PRELOAD_ID).then(() => {
if (!document.getElementById("status-section").classList.contains("hidden")) {
startPolling(PRELOAD_ID);
}
});
}
</script>
</body>
</html>