279 lines
10 KiB
Python
279 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
BlaaAi — AI-powered DBA listing analyzer
|
|
FastAPI backend
|
|
|
|
Usage:
|
|
uvicorn app:app --reload --port 8000
|
|
"""
|
|
|
|
import json, os, smtplib
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from datetime import datetime, timezone
|
|
from email.mime.multipart import MIMEMultipart
|
|
from email.mime.text import MIMEText
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from fastapi import BackgroundTasks, FastAPI, HTTPException
|
|
from fastapi.responses import HTMLResponse
|
|
from fastapi.staticfiles import StaticFiles
|
|
from fastapi.templating import Jinja2Templates
|
|
from pydantic import BaseModel
|
|
from starlette.requests import Request
|
|
|
|
from fetch_dba import (
|
|
DATA_DIR, ITEM_CACHE, create_search, detect_domain, enrich_listings,
|
|
fetch_page, find_new, listings_file, load_meta, load_seen,
|
|
save_seen, seen_file, list_searches,
|
|
)
|
|
from score import CRITERIA, METRICS_FILE, score_listings
|
|
|
|
app = FastAPI(title="BlaaAi")
|
|
app.mount("/static", StaticFiles(directory="static"), name="static")
|
|
templates = Jinja2Templates(directory="templates")
|
|
executor = ThreadPoolExecutor(max_workers=4)
|
|
|
|
|
|
# ── Meta helpers ──────────────────────────────────────────────────────────────
|
|
|
|
def update_meta(search_id: str, **kwargs) -> dict:
|
|
p = DATA_DIR / search_id / "meta.json"
|
|
meta = json.loads(p.read_text())
|
|
meta.update(kwargs)
|
|
p.write_text(json.dumps(meta, ensure_ascii=False, indent=2))
|
|
return meta
|
|
|
|
|
|
# ── Background pipeline ───────────────────────────────────────────────────────
|
|
|
|
def run_fetch_and_score(search_id: str, prefs: str = "") -> None:
|
|
"""Synchronous fetch + score pipeline — runs in thread pool."""
|
|
try:
|
|
meta = load_meta(search_id)
|
|
search_url = meta["url"]
|
|
|
|
# ── Fetch ──────────────────────────────────────────────────────────────
|
|
update_meta(search_id, status="fetching")
|
|
sf = seen_file(search_id)
|
|
lf = listings_file(search_id)
|
|
seen = load_seen(sf)
|
|
|
|
listings, _ = fetch_page(search_url, 1)
|
|
new_items = find_new(listings, seen)
|
|
|
|
if new_items:
|
|
new_items = enrich_listings(new_items)
|
|
existing = json.loads(lf.read_text()) if lf.exists() else []
|
|
existing.extend(new_items)
|
|
lf.write_text(json.dumps(existing, ensure_ascii=False, indent=2))
|
|
save_seen(sf, seen | {l["id"] for l in listings})
|
|
|
|
# ── Score ──────────────────────────────────────────────────────────────
|
|
update_meta(search_id, status="scoring")
|
|
items = json.loads(lf.read_text()) if lf.exists() else []
|
|
domain = detect_domain(search_url)
|
|
criteria = CRITERIA[domain]
|
|
|
|
score_listings(items, criteria, prefs, force=False, source_file=lf)
|
|
|
|
update_meta(
|
|
search_id,
|
|
status="ready",
|
|
listing_count=len(items),
|
|
last_scored_at=datetime.now().isoformat(timespec="seconds"),
|
|
prefs=prefs,
|
|
)
|
|
|
|
except Exception as e:
|
|
update_meta(search_id, status="error", error=str(e))
|
|
raise
|
|
|
|
|
|
# ── Pydantic models ───────────────────────────────────────────────────────────
|
|
|
|
class NewSearchRequest(BaseModel):
|
|
url: str
|
|
prefs: Optional[str] = ""
|
|
|
|
|
|
class EmailRequest(BaseModel):
|
|
email: str
|
|
|
|
|
|
# ── Routes ────────────────────────────────────────────────────────────────────
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
return {
|
|
"status": "healthy",
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"version": os.getenv("BUILD_VERSION", "unknown"),
|
|
"commit": os.getenv("GIT_COMMIT", "unknown")[:7],
|
|
}
|
|
|
|
|
|
@app.get("/", response_class=HTMLResponse)
|
|
async def index(request: Request):
|
|
return templates.TemplateResponse(request, "index.html")
|
|
|
|
|
|
@app.get("/search/{search_id}", response_class=HTMLResponse)
|
|
async def search_view(request: Request, search_id: str):
|
|
return templates.TemplateResponse(request, "index.html", {"search_id": search_id})
|
|
|
|
|
|
@app.post("/api/searches")
|
|
async def create(body: NewSearchRequest, background_tasks: BackgroundTasks):
|
|
search_id = create_search(body.url)
|
|
update_meta(search_id, status="queued", prefs=body.prefs or "")
|
|
background_tasks.add_task(run_fetch_and_score, search_id, body.prefs or "")
|
|
return {"id": search_id, "status": "queued"}
|
|
|
|
|
|
@app.get("/api/searches")
|
|
async def get_all():
|
|
return list_searches()
|
|
|
|
|
|
@app.get("/api/searches/{search_id}")
|
|
async def get_search(search_id: str):
|
|
try:
|
|
meta = load_meta(search_id)
|
|
except FileNotFoundError:
|
|
raise HTTPException(404, "Søgning ikke fundet")
|
|
|
|
lf = listings_file(search_id)
|
|
if lf.exists():
|
|
items = json.loads(lf.read_text())
|
|
ranked = sorted(
|
|
[i for i in items if i.get("ai_score") is not None],
|
|
key=lambda x: x["ai_score"],
|
|
reverse=True,
|
|
)
|
|
meta["listings"] = ranked
|
|
meta["listing_count"] = len(items)
|
|
meta["scored_count"] = len(ranked)
|
|
if "status" not in meta and ranked:
|
|
meta["status"] = "ready"
|
|
|
|
if "status" not in meta:
|
|
meta["status"] = "ready" if meta.get("listing_count", 0) > 0 else "unknown"
|
|
|
|
return meta
|
|
|
|
|
|
@app.post("/api/searches/{search_id}/rescore")
|
|
async def rescore(search_id: str, body: NewSearchRequest, background_tasks: BackgroundTasks):
|
|
try:
|
|
load_meta(search_id)
|
|
except FileNotFoundError:
|
|
raise HTTPException(404, "Søgning ikke fundet")
|
|
|
|
update_meta(search_id, status="queued", prefs=body.prefs or "")
|
|
background_tasks.add_task(run_fetch_and_score, search_id, body.prefs or "")
|
|
return {"id": search_id, "status": "queued"}
|
|
|
|
|
|
@app.get("/metrics")
|
|
async def get_metrics():
|
|
global_metrics = {}
|
|
if METRICS_FILE.exists():
|
|
global_metrics = json.loads(METRICS_FILE.read_text())
|
|
|
|
# Per-search breakdown
|
|
searches = []
|
|
if DATA_DIR.exists():
|
|
for d in sorted(DATA_DIR.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True):
|
|
mf = d / "metrics.json"
|
|
if mf.exists():
|
|
searches.append(json.loads(mf.read_text()))
|
|
|
|
# Item cache stats
|
|
cached_items = len(list(ITEM_CACHE.glob("*.json"))) if ITEM_CACHE.exists() else 0
|
|
|
|
return {**global_metrics, "item_cache_size": cached_items, "searches": searches}
|
|
|
|
|
|
@app.post("/api/searches/{search_id}/email")
|
|
async def send_email(search_id: str, body: EmailRequest):
|
|
try:
|
|
meta = load_meta(search_id)
|
|
except FileNotFoundError:
|
|
raise HTTPException(404, "Søgning ikke fundet")
|
|
|
|
if meta.get("status") != "ready":
|
|
raise HTTPException(400, "Analysen er ikke færdig endnu")
|
|
|
|
lf = listings_file(search_id)
|
|
if not lf.exists():
|
|
raise HTTPException(400, "Ingen resultater at sende")
|
|
|
|
items = json.loads(lf.read_text())
|
|
ranked = sorted(
|
|
[i for i in items if i.get("ai_score") is not None],
|
|
key=lambda x: x["ai_score"],
|
|
reverse=True,
|
|
)[:10]
|
|
|
|
html = _build_email_html(ranked, meta)
|
|
_send_email(body.email, "🔍 Dine DBA-resultater fra BlaaAi", html)
|
|
return {"status": "sent", "to": body.email}
|
|
|
|
|
|
# ── Email helpers ─────────────────────────────────────────────────────────────
|
|
|
|
def _build_email_html(ranked: list[dict], meta: dict) -> str:
|
|
rows = ""
|
|
for i, item in enumerate(ranked, 1):
|
|
score = item.get("ai_score", 0)
|
|
bar = "█" * int(score) + "░" * (10 - int(score))
|
|
reason = item.get("ai_reason", "")
|
|
warn = f'<p style="color:#dc2626;font-size:13px">⚠️ {item["ai_warnings"]}</p>' if item.get("ai_warnings") else ""
|
|
rows += f"""
|
|
<tr>
|
|
<td style="padding:16px;border-bottom:1px solid #e5e7eb">
|
|
<strong>#{i} [{score}] {item['name']}</strong><br>
|
|
<span style="color:#6b7280">{item.get('price_dkk','?')} DKK</span>
|
|
<span style="font-family:monospace;color:#6366f1;margin-left:8px">{bar}</span><br>
|
|
<p style="margin:6px 0;font-size:14px">{reason}</p>
|
|
{warn}
|
|
<a href="{item['url']}" style="color:#6366f1;font-size:13px">Se annonce →</a>
|
|
</td>
|
|
</tr>"""
|
|
|
|
return f"""
|
|
<html><body style="font-family:sans-serif;max-width:600px;margin:0 auto;padding:20px">
|
|
<h1 style="color:#1f2937">🔍 Dine DBA-resultater</h1>
|
|
<p style="color:#6b7280">Søgning: <a href="{meta['url']}">{meta['url'][:60]}…</a></p>
|
|
<table style="width:100%;border-collapse:collapse">{rows}</table>
|
|
<p style="color:#9ca3af;font-size:12px;margin-top:24px">Leveret af BlaaAi</p>
|
|
</body></html>"""
|
|
|
|
|
|
def _send_email(to: str, subject: str, html: str) -> None:
|
|
host = os.environ.get("SMTP_HOST", "")
|
|
user = os.environ.get("SMTP_USER", "")
|
|
pwd = os.environ.get("SMTP_PASS", "")
|
|
port = int(os.environ.get("SMTP_PORT", "587"))
|
|
|
|
if not host:
|
|
raise HTTPException(503, "Email ikke konfigureret (SMTP_HOST mangler)")
|
|
|
|
msg = MIMEMultipart("alternative")
|
|
msg["Subject"] = subject
|
|
msg["From"] = user
|
|
msg["To"] = to
|
|
msg.attach(MIMEText(html, "html"))
|
|
|
|
with smtplib.SMTP(host, port) as s:
|
|
s.starttls()
|
|
s.login(user, pwd)
|
|
s.sendmail(user, to, msg.as_string())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
port = int(os.getenv("PORT", "8000"))
|
|
uvicorn.run("app:app", host="0.0.0.0", port=port, reload=True)
|