Files
BlaaAI/app.py

279 lines
10 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
BlaaAi AI-powered DBA listing analyzer
FastAPI backend
Usage:
uvicorn app:app --reload --port 8000
"""
import json, os, smtplib
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timezone
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from pathlib import Path
from typing import Optional
from fastapi import BackgroundTasks, FastAPI, HTTPException
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from pydantic import BaseModel
from starlette.requests import Request
from fetch_dba import (
DATA_DIR, ITEM_CACHE, create_search, detect_domain, enrich_listings,
fetch_page, find_new, listings_file, load_meta, load_seen,
save_seen, seen_file, list_searches,
)
from score import CRITERIA, METRICS_FILE, score_listings
app = FastAPI(title="BlaaAi")
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")
executor = ThreadPoolExecutor(max_workers=4)
# ── Meta helpers ──────────────────────────────────────────────────────────────
def update_meta(search_id: str, **kwargs) -> dict:
p = DATA_DIR / search_id / "meta.json"
meta = json.loads(p.read_text())
meta.update(kwargs)
p.write_text(json.dumps(meta, ensure_ascii=False, indent=2))
return meta
# ── Background pipeline ───────────────────────────────────────────────────────
def run_fetch_and_score(search_id: str, prefs: str = "") -> None:
"""Synchronous fetch + score pipeline — runs in thread pool."""
try:
meta = load_meta(search_id)
search_url = meta["url"]
# ── Fetch ──────────────────────────────────────────────────────────────
update_meta(search_id, status="fetching")
sf = seen_file(search_id)
lf = listings_file(search_id)
seen = load_seen(sf)
listings, _ = fetch_page(search_url, 1)
new_items = find_new(listings, seen)
if new_items:
new_items = enrich_listings(new_items)
existing = json.loads(lf.read_text()) if lf.exists() else []
existing.extend(new_items)
lf.write_text(json.dumps(existing, ensure_ascii=False, indent=2))
save_seen(sf, seen | {l["id"] for l in listings})
# ── Score ──────────────────────────────────────────────────────────────
update_meta(search_id, status="scoring")
items = json.loads(lf.read_text()) if lf.exists() else []
domain = detect_domain(search_url)
criteria = CRITERIA[domain]
score_listings(items, criteria, prefs, force=False, source_file=lf)
update_meta(
search_id,
status="ready",
listing_count=len(items),
last_scored_at=datetime.now().isoformat(timespec="seconds"),
prefs=prefs,
)
except Exception as e:
update_meta(search_id, status="error", error=str(e))
raise
# ── Pydantic models ───────────────────────────────────────────────────────────
class NewSearchRequest(BaseModel):
url: str
prefs: Optional[str] = ""
class EmailRequest(BaseModel):
email: str
# ── Routes ────────────────────────────────────────────────────────────────────
@app.get("/health")
async def health():
return {
"status": "healthy",
"timestamp": datetime.now(timezone.utc).isoformat(),
"version": os.getenv("BUILD_VERSION", "unknown"),
"commit": os.getenv("GIT_COMMIT", "unknown")[:7],
}
@app.get("/", response_class=HTMLResponse)
async def index(request: Request):
return templates.TemplateResponse(request, "index.html")
@app.get("/search/{search_id}", response_class=HTMLResponse)
async def search_view(request: Request, search_id: str):
return templates.TemplateResponse(request, "index.html", {"search_id": search_id})
@app.post("/api/searches")
async def create(body: NewSearchRequest, background_tasks: BackgroundTasks):
search_id = create_search(body.url)
update_meta(search_id, status="queued", prefs=body.prefs or "")
background_tasks.add_task(run_fetch_and_score, search_id, body.prefs or "")
return {"id": search_id, "status": "queued"}
@app.get("/api/searches")
async def get_all():
return list_searches()
@app.get("/api/searches/{search_id}")
async def get_search(search_id: str):
try:
meta = load_meta(search_id)
except FileNotFoundError:
raise HTTPException(404, "Søgning ikke fundet")
lf = listings_file(search_id)
if lf.exists():
items = json.loads(lf.read_text())
ranked = sorted(
[i for i in items if i.get("ai_score") is not None],
key=lambda x: x["ai_score"],
reverse=True,
)
meta["listings"] = ranked
meta["listing_count"] = len(items)
meta["scored_count"] = len(ranked)
if "status" not in meta and ranked:
meta["status"] = "ready"
if "status" not in meta:
meta["status"] = "ready" if meta.get("listing_count", 0) > 0 else "unknown"
return meta
@app.post("/api/searches/{search_id}/rescore")
async def rescore(search_id: str, body: NewSearchRequest, background_tasks: BackgroundTasks):
try:
load_meta(search_id)
except FileNotFoundError:
raise HTTPException(404, "Søgning ikke fundet")
update_meta(search_id, status="queued", prefs=body.prefs or "")
background_tasks.add_task(run_fetch_and_score, search_id, body.prefs or "")
return {"id": search_id, "status": "queued"}
@app.get("/metrics")
async def get_metrics():
global_metrics = {}
if METRICS_FILE.exists():
global_metrics = json.loads(METRICS_FILE.read_text())
# Per-search breakdown
searches = []
if DATA_DIR.exists():
for d in sorted(DATA_DIR.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True):
mf = d / "metrics.json"
if mf.exists():
searches.append(json.loads(mf.read_text()))
# Item cache stats
cached_items = len(list(ITEM_CACHE.glob("*.json"))) if ITEM_CACHE.exists() else 0
return {**global_metrics, "item_cache_size": cached_items, "searches": searches}
@app.post("/api/searches/{search_id}/email")
async def send_email(search_id: str, body: EmailRequest):
try:
meta = load_meta(search_id)
except FileNotFoundError:
raise HTTPException(404, "Søgning ikke fundet")
if meta.get("status") != "ready":
raise HTTPException(400, "Analysen er ikke færdig endnu")
lf = listings_file(search_id)
if not lf.exists():
raise HTTPException(400, "Ingen resultater at sende")
items = json.loads(lf.read_text())
ranked = sorted(
[i for i in items if i.get("ai_score") is not None],
key=lambda x: x["ai_score"],
reverse=True,
)[:10]
html = _build_email_html(ranked, meta)
_send_email(body.email, "🔍 Dine DBA-resultater fra BlaaAi", html)
return {"status": "sent", "to": body.email}
# ── Email helpers ─────────────────────────────────────────────────────────────
def _build_email_html(ranked: list[dict], meta: dict) -> str:
rows = ""
for i, item in enumerate(ranked, 1):
score = item.get("ai_score", 0)
bar = "" * int(score) + "" * (10 - int(score))
reason = item.get("ai_reason", "")
warn = f'<p style="color:#dc2626;font-size:13px">⚠️ {item["ai_warnings"]}</p>' if item.get("ai_warnings") else ""
rows += f"""
<tr>
<td style="padding:16px;border-bottom:1px solid #e5e7eb">
<strong>#{i} [{score}] {item['name']}</strong><br>
<span style="color:#6b7280">{item.get('price_dkk','?')} DKK</span>
<span style="font-family:monospace;color:#6366f1;margin-left:8px">{bar}</span><br>
<p style="margin:6px 0;font-size:14px">{reason}</p>
{warn}
<a href="{item['url']}" style="color:#6366f1;font-size:13px">Se annonce </a>
</td>
</tr>"""
return f"""
<html><body style="font-family:sans-serif;max-width:600px;margin:0 auto;padding:20px">
<h1 style="color:#1f2937">🔍 Dine DBA-resultater</h1>
<p style="color:#6b7280">Søgning: <a href="{meta['url']}">{meta['url'][:60]}</a></p>
<table style="width:100%;border-collapse:collapse">{rows}</table>
<p style="color:#9ca3af;font-size:12px;margin-top:24px">Leveret af BlaaAi</p>
</body></html>"""
def _send_email(to: str, subject: str, html: str) -> None:
host = os.environ.get("SMTP_HOST", "")
user = os.environ.get("SMTP_USER", "")
pwd = os.environ.get("SMTP_PASS", "")
port = int(os.environ.get("SMTP_PORT", "587"))
if not host:
raise HTTPException(503, "Email ikke konfigureret (SMTP_HOST mangler)")
msg = MIMEMultipart("alternative")
msg["Subject"] = subject
msg["From"] = user
msg["To"] = to
msg.attach(MIMEText(html, "html"))
with smtplib.SMTP(host, port) as s:
s.starttls()
s.login(user, pwd)
s.sendmail(user, to, msg.as_string())
if __name__ == "__main__":
import uvicorn
port = int(os.getenv("PORT", "8000"))
uvicorn.run("app:app", host="0.0.0.0", port=port, reload=True)