Add data quality normalization for cars: mileage in thousands and leasing prices
All checks were successful
Build and Deploy BlaaAi / build-and-deploy (push) Successful in 4m24s
All checks were successful
Build and Deploy BlaaAi / build-and-deploy (push) Successful in 4m24s
- detect km < 500 on non-new cars (e.g. '130' instead of '130.000') - detect prices < 5.000 kr (likely monthly leasing rate not sale price) - flags shown in AI prompt → forces low score (1-3) - orange warning box in UI distinct from AI warnings
This commit is contained in:
62
score.py
62
score.py
@@ -143,6 +143,57 @@ def extract_structured_fields(raw: str) -> dict:
|
|||||||
return fields
|
return fields
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_listing(item: dict, category: str) -> dict:
|
||||||
|
"""
|
||||||
|
Detect and flag data quality issues for cars:
|
||||||
|
- Mileage written as thousands (130 instead of 130.000)
|
||||||
|
- Suspiciously low price (likely a leasing monthly rate, not full price)
|
||||||
|
|
||||||
|
Adds 'data_quality_flags' list to item (in-place) and returns item.
|
||||||
|
"""
|
||||||
|
if category != "brugte biler":
|
||||||
|
return item
|
||||||
|
|
||||||
|
flags = []
|
||||||
|
raw = item.get("details", {}).get("raw_text", "")
|
||||||
|
fields = extract_structured_fields(raw)
|
||||||
|
|
||||||
|
# ── Mileage check ─────────────────────────────────────────────────────────
|
||||||
|
km_str = fields.get("km", "")
|
||||||
|
if km_str:
|
||||||
|
km_digits = re.sub(r"[^\d]", "", km_str)
|
||||||
|
if km_digits:
|
||||||
|
km_val = int(km_digits)
|
||||||
|
# Likely written in thousands: 130 km instead of 130.000 km
|
||||||
|
# Cars below 500 km that aren't brand new (year < current-1) are suspicious
|
||||||
|
year_str = fields.get("year", "")
|
||||||
|
current_year = datetime.now().year
|
||||||
|
car_age = (current_year - int(year_str)) if year_str.isdigit() else 99
|
||||||
|
if km_val < 500 and car_age > 1:
|
||||||
|
corrected_km = km_val * 1000
|
||||||
|
flags.append(
|
||||||
|
f"⚠️ Kilometertallet ser forkert ud: sælger har skrevet '{km_val} km' "
|
||||||
|
f"— sandsynligvis ment {corrected_km:,} km. "
|
||||||
|
f"Annoncen nedprioriteres pga. vildledende km-angivelse."
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Price check ───────────────────────────────────────────────────────────
|
||||||
|
try:
|
||||||
|
price = float(str(item.get("price_dkk", 0)).replace(",", "."))
|
||||||
|
if 0 < price < 5000:
|
||||||
|
flags.append(
|
||||||
|
f"⚠️ Prisen på {int(price):,} kr. er usandsynlig lav for en bil — "
|
||||||
|
f"sandsynligvis en månedlig leasingydelse, ikke salgspris. "
|
||||||
|
f"Annoncen nedprioriteres pga. misvisende prisangivelse."
|
||||||
|
)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
if flags:
|
||||||
|
item["data_quality_flags"] = flags
|
||||||
|
return item
|
||||||
|
|
||||||
|
|
||||||
def listing_summary(item: dict, idx: int) -> str:
|
def listing_summary(item: dict, idx: int) -> str:
|
||||||
"""Compact text representation of a listing for the AI prompt."""
|
"""Compact text representation of a listing for the AI prompt."""
|
||||||
raw = item.get("details", {}).get("raw_text", item.get("description", ""))
|
raw = item.get("details", {}).get("raw_text", item.get("description", ""))
|
||||||
@@ -165,11 +216,18 @@ def listing_summary(item: dict, idx: int) -> str:
|
|||||||
|
|
||||||
meta_line = " | ".join(meta_parts)
|
meta_line = " | ".join(meta_parts)
|
||||||
|
|
||||||
|
# Include any data quality flags so AI factors them into scoring
|
||||||
|
quality_issues = item.get("data_quality_flags", [])
|
||||||
|
quality_block = ""
|
||||||
|
if quality_issues:
|
||||||
|
quality_block = "\nDATA KVALITETSPROBLEM (giv lav score 1-3):\n" + "\n".join(quality_issues) + "\n"
|
||||||
|
|
||||||
return (
|
return (
|
||||||
f"--- Annonce #{idx + 1} (ID: {item['id']}) ---\n"
|
f"--- Annonce #{idx + 1} (ID: {item['id']}) ---\n"
|
||||||
f"Navn: {item['name']}\n"
|
f"Navn: {item['name']}\n"
|
||||||
f"Pris: {item['price_dkk']} DKK\n"
|
f"Pris: {item['price_dkk']} DKK\n"
|
||||||
+ (f"{meta_line}\n" if meta_line else "")
|
+ (f"{meta_line}\n" if meta_line else "")
|
||||||
|
+ quality_block
|
||||||
+ f"{text}\n"
|
+ f"{text}\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -301,6 +359,10 @@ def score_listings(
|
|||||||
category = detect_category(items)
|
category = detect_category(items)
|
||||||
phash = prefs_hash(prefs)
|
phash = prefs_hash(prefs)
|
||||||
|
|
||||||
|
# ── Normalize data quality issues before any scoring ────────────────────
|
||||||
|
for item in items:
|
||||||
|
normalize_listing(item, category)
|
||||||
|
|
||||||
# ── Split: persistent score cache → in-file cache → needs AI scoring ────
|
# ── Split: persistent score cache → in-file cache → needs AI scoring ────
|
||||||
to_score, cached = [], []
|
to_score, cached = [], []
|
||||||
now = datetime.now().isoformat(timespec="seconds")
|
now = datetime.now().isoformat(timespec="seconds")
|
||||||
|
|||||||
@@ -425,6 +425,10 @@
|
|||||||
const pct = Math.round(score * 10);
|
const pct = Math.round(score * 10);
|
||||||
const warn = item.ai_warnings
|
const warn = item.ai_warnings
|
||||||
? `<p class="text-xs mt-2" style="color:#dc2626">↑ ${item.ai_warnings}</p>` : "";
|
? `<p class="text-xs mt-2" style="color:#dc2626">↑ ${item.ai_warnings}</p>` : "";
|
||||||
|
const qualityFlags = (item.data_quality_flags || []).length > 0
|
||||||
|
? `<div class="mt-2 rounded-lg px-3 py-2" style="background:#fff7ed;border:1px solid #fed7aa">
|
||||||
|
${item.data_quality_flags.map(f => `<p class="text-xs" style="color:#c2410c">${f}</p>`).join("")}
|
||||||
|
</div>` : "";
|
||||||
const tag = rankTag(score);
|
const tag = rankTag(score);
|
||||||
|
|
||||||
const card = document.createElement("div");
|
const card = document.createElement("div");
|
||||||
@@ -447,6 +451,7 @@
|
|||||||
</div>
|
</div>
|
||||||
<p class="text-xs leading-relaxed" style="color:#78716c">${item.ai_reason || ""}</p>
|
<p class="text-xs leading-relaxed" style="color:#78716c">${item.ai_reason || ""}</p>
|
||||||
${warn}
|
${warn}
|
||||||
|
${qualityFlags}
|
||||||
</div>
|
</div>
|
||||||
<div class="shrink-0 text-right ml-2">
|
<div class="shrink-0 text-right ml-2">
|
||||||
<p class="font-semibold text-sm tabular-nums">${Number(item.price_dkk || 0).toLocaleString("da-DK")}<span class="text-xs font-normal ml-0.5" style="color:#a8a29e">kr</span></p>
|
<p class="font-semibold text-sm tabular-nums">${Number(item.price_dkk || 0).toLocaleString("da-DK")}<span class="text-xs font-normal ml-0.5" style="color:#a8a29e">kr</span></p>
|
||||||
|
|||||||
Reference in New Issue
Block a user