diff --git a/crates/kreuzberg-cli/src/logging.rs b/crates/kreuzberg-cli/src/logging.rs index ab00fe3..bd14694 100644 --- a/crates/kreuzberg-cli/src/logging.rs +++ b/crates/kreuzberg-cli/src/logging.rs @@ -19,6 +19,10 @@ const QUIET_DIRECTIVES: &[&str] = &[ "hyper_util=warn", "hf_hub=info", "tower_http=info", + // html5ever emits "foster parenting not implemented" warnings for malformed + // HTML tables found in PDF-extracted content; these are expected and unfixable + // at this level, so suppress below error. + "html5ever=error", ]; /// Extract the target crate name from a directive string like `"ureq=warn"`. @@ -219,6 +223,16 @@ mod tests { ); } + #[test] + fn html5ever_suppressed_at_default() { + let filter = build_env_filter(None); + let directives = filter_directives(&filter); + assert!( + directives.contains("html5ever=error"), + "html5ever must be suppressed to error at default; got: {directives}" + ); + } + #[test] fn similar_target_name_does_not_block_suppression() { // A user-supplied directive for `hf_hub_server` must NOT cause the