543 lines
20 KiB
Rust
543 lines
20 KiB
Rust
|
|
//! Diff two [`ExtractionResult`] values.
|
||
|
|
//!
|
||
|
|
//! This module is gated behind the `diff` Cargo feature. Enable it by adding
|
||
|
|
//! `kreuzberg = { features = ["diff"] }` to your `Cargo.toml`.
|
||
|
|
//!
|
||
|
|
//! # Example
|
||
|
|
//!
|
||
|
|
//! ```rust,no_run
|
||
|
|
//! use kreuzberg::{ExtractionResult, diff::{compare, DiffOptions}};
|
||
|
|
//!
|
||
|
|
//! # fn main() {
|
||
|
|
//! let a = ExtractionResult::default();
|
||
|
|
//! let b = ExtractionResult::default();
|
||
|
|
//! let opts = DiffOptions::default();
|
||
|
|
//! let result = compare(&a, &b, &opts);
|
||
|
|
//! assert!(result.content_diff.is_empty());
|
||
|
|
//! # }
|
||
|
|
//! ```
|
||
|
|
|
||
|
|
pub mod types;
|
||
|
|
|
||
|
|
pub use types::{
|
||
|
|
CellChange, DiffHunk, DiffLine, DiffOptions, EmbeddedChanges, EmbeddedDiff, ExtractionDiff, TableDiff,
|
||
|
|
};
|
||
|
|
|
||
|
|
use similar::{ChangeTag, DiffOp, TextDiff};
|
||
|
|
|
||
|
|
use crate::types::extraction::{ArchiveEntry, ExtractionResult};
|
||
|
|
use crate::types::tables::Table;
|
||
|
|
|
||
|
|
/// Default number of context lines on each side of a changed region.
|
||
|
|
const CONTEXT_LINES: usize = 3;
|
||
|
|
|
||
|
|
/// Compare two extraction results and return a structured diff.
|
||
|
|
///
|
||
|
|
/// The comparison is purely structural — no I/O, no side effects. All fields
|
||
|
|
/// of [`ExtractionDiff`] are populated according to the provided [`DiffOptions`].
|
||
|
|
///
|
||
|
|
/// # Arguments
|
||
|
|
///
|
||
|
|
/// * `a` — the "before" extraction result
|
||
|
|
/// * `b` — the "after" extraction result
|
||
|
|
/// * `opts` — controls which sections are compared and optional truncation
|
||
|
|
///
|
||
|
|
/// # Example
|
||
|
|
///
|
||
|
|
/// ```rust,no_run
|
||
|
|
/// use kreuzberg::{ExtractionResult, diff::{compare, DiffOptions}};
|
||
|
|
///
|
||
|
|
/// # fn main() {
|
||
|
|
/// let mut a = ExtractionResult::default();
|
||
|
|
/// let mut b = ExtractionResult::default();
|
||
|
|
/// a.content = "Hello world".to_string();
|
||
|
|
/// b.content = "Hello Rust".to_string();
|
||
|
|
///
|
||
|
|
/// let diff = compare(&a, &b, &DiffOptions::default());
|
||
|
|
/// assert_eq!(diff.content_diff.len(), 1);
|
||
|
|
/// # }
|
||
|
|
/// ```
|
||
|
|
pub fn compare(a: &ExtractionResult, b: &ExtractionResult, opts: &DiffOptions) -> ExtractionDiff {
|
||
|
|
let content_diff = diff_content(&a.content, &b.content, opts);
|
||
|
|
let (tables_added, tables_removed, tables_changed) = diff_tables(&a.tables, &b.tables);
|
||
|
|
let metadata_changed = if opts.include_metadata {
|
||
|
|
diff_metadata(&a.metadata, &b.metadata)
|
||
|
|
} else {
|
||
|
|
serde_json::Value::Null
|
||
|
|
};
|
||
|
|
let embedded_changes = if opts.include_embedded {
|
||
|
|
diff_embedded(a.children.as_deref(), b.children.as_deref(), opts)
|
||
|
|
} else {
|
||
|
|
EmbeddedChanges {
|
||
|
|
added: vec![],
|
||
|
|
removed: vec![],
|
||
|
|
changed: vec![],
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
ExtractionDiff {
|
||
|
|
content_diff,
|
||
|
|
tables_added,
|
||
|
|
tables_removed,
|
||
|
|
tables_changed,
|
||
|
|
metadata_changed,
|
||
|
|
embedded_changes,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// ── Content diff ─────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
fn diff_content(a: &str, b: &str, opts: &DiffOptions) -> Vec<DiffHunk> {
|
||
|
|
let a_text = apply_truncation(a, opts.max_content_chars);
|
||
|
|
let b_text = apply_truncation(b, opts.max_content_chars);
|
||
|
|
let a_ref: &str = a_text.as_deref().unwrap_or(a);
|
||
|
|
let b_ref: &str = b_text.as_deref().unwrap_or(b);
|
||
|
|
|
||
|
|
let text_diff = TextDiff::from_lines(a_ref, b_ref);
|
||
|
|
|
||
|
|
if text_diff.ratio() == 1.0 {
|
||
|
|
return vec![];
|
||
|
|
}
|
||
|
|
|
||
|
|
let mut hunks = Vec::new();
|
||
|
|
for group in text_diff.grouped_ops(CONTEXT_LINES) {
|
||
|
|
let hunk_from_line = hunk_old_start(&group);
|
||
|
|
let hunk_to_line = hunk_new_start(&group);
|
||
|
|
let hunk_from_count = hunk_old_len(&group);
|
||
|
|
let hunk_to_count = hunk_new_len(&group);
|
||
|
|
let mut lines = Vec::new();
|
||
|
|
|
||
|
|
for op in &group {
|
||
|
|
for change in text_diff.iter_changes(op) {
|
||
|
|
let text = change.value().trim_end_matches('\n').to_string();
|
||
|
|
let line = match change.tag() {
|
||
|
|
ChangeTag::Equal => DiffLine::Context(text),
|
||
|
|
ChangeTag::Insert => DiffLine::Added(text),
|
||
|
|
ChangeTag::Delete => DiffLine::Removed(text),
|
||
|
|
};
|
||
|
|
lines.push(line);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if !lines.is_empty() {
|
||
|
|
hunks.push(DiffHunk {
|
||
|
|
from_line: hunk_from_line,
|
||
|
|
from_count: hunk_from_count,
|
||
|
|
to_line: hunk_to_line,
|
||
|
|
to_count: hunk_to_count,
|
||
|
|
lines,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
}
|
||
|
|
hunks
|
||
|
|
}
|
||
|
|
|
||
|
|
fn hunk_old_start(ops: &[DiffOp]) -> usize {
|
||
|
|
ops.first().map_or(0, |op| op.old_range().start)
|
||
|
|
}
|
||
|
|
|
||
|
|
fn hunk_new_start(ops: &[DiffOp]) -> usize {
|
||
|
|
ops.first().map_or(0, |op| op.new_range().start)
|
||
|
|
}
|
||
|
|
|
||
|
|
fn hunk_old_len(ops: &[DiffOp]) -> usize {
|
||
|
|
let start = ops.first().map_or(0, |op| op.old_range().start);
|
||
|
|
let end = ops.last().map_or(0, |op| op.old_range().end);
|
||
|
|
end.saturating_sub(start)
|
||
|
|
}
|
||
|
|
|
||
|
|
fn hunk_new_len(ops: &[DiffOp]) -> usize {
|
||
|
|
let start = ops.first().map_or(0, |op| op.new_range().start);
|
||
|
|
let end = ops.last().map_or(0, |op| op.new_range().end);
|
||
|
|
end.saturating_sub(start)
|
||
|
|
}
|
||
|
|
|
||
|
|
fn apply_truncation(text: &str, limit: Option<usize>) -> Option<String> {
|
||
|
|
limit.map(|n| {
|
||
|
|
let mut boundary = n.min(text.len());
|
||
|
|
while !text.is_char_boundary(boundary) {
|
||
|
|
boundary -= 1;
|
||
|
|
}
|
||
|
|
text[..boundary].to_string()
|
||
|
|
})
|
||
|
|
}
|
||
|
|
|
||
|
|
// ── Table diff ────────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
fn diff_tables(a_tables: &[Table], b_tables: &[Table]) -> (Vec<Table>, Vec<Table>, Vec<TableDiff>) {
|
||
|
|
let min_len = a_tables.len().min(b_tables.len());
|
||
|
|
let mut tables_changed = Vec::new();
|
||
|
|
|
||
|
|
for idx in 0..min_len {
|
||
|
|
let a_t = &a_tables[idx];
|
||
|
|
let b_t = &b_tables[idx];
|
||
|
|
|
||
|
|
if tables_same_shape(a_t, b_t) {
|
||
|
|
let cell_changes = diff_cells(a_t, b_t);
|
||
|
|
if !cell_changes.is_empty() {
|
||
|
|
tables_changed.push(TableDiff {
|
||
|
|
from_index: idx,
|
||
|
|
to_index: idx,
|
||
|
|
cell_changes,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
// Different shape — treat the pair as remove + add.
|
||
|
|
// The "removed" side is reported in tables_removed and "added" in tables_added.
|
||
|
|
// We handle this by falling through to the asymmetric slice handling below.
|
||
|
|
// But we need to signal that these shouldn't be counted as "paired" — so we
|
||
|
|
// emit them as add + remove even though they share the same index.
|
||
|
|
tables_changed.push(TableDiff {
|
||
|
|
from_index: idx,
|
||
|
|
to_index: idx,
|
||
|
|
// No cell-level changes: shapes differ; report as a structural replacement.
|
||
|
|
cell_changes: vec![],
|
||
|
|
});
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
let tables_removed: Vec<Table> = if a_tables.len() > b_tables.len() {
|
||
|
|
a_tables[min_len..].to_vec()
|
||
|
|
} else {
|
||
|
|
vec![]
|
||
|
|
};
|
||
|
|
let tables_added: Vec<Table> = if b_tables.len() > a_tables.len() {
|
||
|
|
b_tables[min_len..].to_vec()
|
||
|
|
} else {
|
||
|
|
vec![]
|
||
|
|
};
|
||
|
|
|
||
|
|
(tables_added, tables_removed, tables_changed)
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Two tables are considered the same shape if and only if their row and column counts match.
|
||
|
|
///
|
||
|
|
/// Header content is NOT compared — column reordering with the same dimensions will produce
|
||
|
|
/// per-cell `CellChange` entries for every cell whose value differs, not a structural replacement.
|
||
|
|
///
|
||
|
|
/// TODO: smarter shape-matching that aligns tables by header names (instead of positional
|
||
|
|
/// index) is a follow-up; for now dimensions-only is the v1 default.
|
||
|
|
fn tables_same_shape(a: &Table, b: &Table) -> bool {
|
||
|
|
if a.cells.len() != b.cells.len() {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
let a_cols = a.cells.first().map_or(0, Vec::len);
|
||
|
|
let b_cols = b.cells.first().map_or(0, Vec::len);
|
||
|
|
a_cols == b_cols
|
||
|
|
}
|
||
|
|
|
||
|
|
fn diff_cells(a: &Table, b: &Table) -> Vec<CellChange> {
|
||
|
|
let mut changes = Vec::new();
|
||
|
|
for (row_idx, (a_row, b_row)) in a.cells.iter().zip(b.cells.iter()).enumerate() {
|
||
|
|
for (col_idx, (a_cell, b_cell)) in a_row.iter().zip(b_row.iter()).enumerate() {
|
||
|
|
if a_cell != b_cell {
|
||
|
|
changes.push(CellChange {
|
||
|
|
row: row_idx,
|
||
|
|
col: col_idx,
|
||
|
|
from: a_cell.clone(),
|
||
|
|
to: b_cell.clone(),
|
||
|
|
});
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
changes
|
||
|
|
}
|
||
|
|
|
||
|
|
// ── Metadata diff ─────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
fn diff_metadata(a: &crate::types::metadata::Metadata, b: &crate::types::metadata::Metadata) -> serde_json::Value {
|
||
|
|
let a_val = serde_json::to_value(a).unwrap_or(serde_json::Value::Null);
|
||
|
|
let b_val = serde_json::to_value(b).unwrap_or(serde_json::Value::Null);
|
||
|
|
|
||
|
|
let a_obj = a_val.as_object().cloned().unwrap_or_default();
|
||
|
|
let b_obj = b_val.as_object().cloned().unwrap_or_default();
|
||
|
|
|
||
|
|
let mut added = serde_json::Map::new();
|
||
|
|
let mut removed = serde_json::Map::new();
|
||
|
|
let mut changed = serde_json::Map::new();
|
||
|
|
|
||
|
|
for (key, b_value) in &b_obj {
|
||
|
|
match a_obj.get(key) {
|
||
|
|
None => {
|
||
|
|
added.insert(key.clone(), b_value.clone());
|
||
|
|
}
|
||
|
|
Some(a_value) if a_value != b_value => {
|
||
|
|
changed.insert(key.clone(), serde_json::json!({ "from": a_value, "to": b_value }));
|
||
|
|
}
|
||
|
|
_ => {}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
for (key, a_value) in &a_obj {
|
||
|
|
if !b_obj.contains_key(key) {
|
||
|
|
removed.insert(key.clone(), a_value.clone());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
serde_json::json!({ "added": added, "removed": removed, "changed": changed })
|
||
|
|
}
|
||
|
|
|
||
|
|
// ── Embedded diff ─────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
fn diff_embedded(
|
||
|
|
a_children: Option<&[ArchiveEntry]>,
|
||
|
|
b_children: Option<&[ArchiveEntry]>,
|
||
|
|
opts: &DiffOptions,
|
||
|
|
) -> EmbeddedChanges {
|
||
|
|
let a_entries = a_children.unwrap_or(&[]);
|
||
|
|
let b_entries = b_children.unwrap_or(&[]);
|
||
|
|
|
||
|
|
let mut added = Vec::new();
|
||
|
|
let mut removed = Vec::new();
|
||
|
|
let mut changed = Vec::new();
|
||
|
|
|
||
|
|
for b_entry in b_entries {
|
||
|
|
match a_entries.iter().find(|e| e.path == b_entry.path) {
|
||
|
|
None => added.push(b_entry.clone()),
|
||
|
|
Some(a_entry) => {
|
||
|
|
let child_diff = compare(&a_entry.result, &b_entry.result, opts);
|
||
|
|
if is_nonempty_diff(&child_diff) {
|
||
|
|
changed.push(EmbeddedDiff {
|
||
|
|
path: b_entry.path.clone(),
|
||
|
|
diff: Box::new(child_diff),
|
||
|
|
});
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
for a_entry in a_entries {
|
||
|
|
if !b_entries.iter().any(|e| e.path == a_entry.path) {
|
||
|
|
removed.push(a_entry.clone());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
EmbeddedChanges {
|
||
|
|
added,
|
||
|
|
removed,
|
||
|
|
changed,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn is_nonempty_diff(diff: &ExtractionDiff) -> bool {
|
||
|
|
!diff.content_diff.is_empty()
|
||
|
|
|| !diff.tables_added.is_empty()
|
||
|
|
|| !diff.tables_removed.is_empty()
|
||
|
|
|| !diff.tables_changed.is_empty()
|
||
|
|
|| !diff.embedded_changes.added.is_empty()
|
||
|
|
|| !diff.embedded_changes.removed.is_empty()
|
||
|
|
|| !diff.embedded_changes.changed.is_empty()
|
||
|
|
|| is_nonempty_metadata_diff(&diff.metadata_changed)
|
||
|
|
}
|
||
|
|
|
||
|
|
fn is_nonempty_metadata_diff(val: &serde_json::Value) -> bool {
|
||
|
|
if val.is_null() {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
let empty_obj = serde_json::json!({ "added": {}, "removed": {}, "changed": {} });
|
||
|
|
val != &empty_obj
|
||
|
|
}
|
||
|
|
|
||
|
|
// ── Tests ─────────────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
#[cfg(all(test, feature = "diff"))]
|
||
|
|
mod tests {
|
||
|
|
use super::*;
|
||
|
|
use crate::types::{extraction::ExtractionResult, tables::Table};
|
||
|
|
|
||
|
|
fn empty_result() -> ExtractionResult {
|
||
|
|
ExtractionResult::default()
|
||
|
|
}
|
||
|
|
|
||
|
|
fn result_with_content(content: &str) -> ExtractionResult {
|
||
|
|
ExtractionResult {
|
||
|
|
content: content.to_string(),
|
||
|
|
..Default::default()
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn result_with_tables(tables: Vec<Table>) -> ExtractionResult {
|
||
|
|
ExtractionResult {
|
||
|
|
tables,
|
||
|
|
..Default::default()
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn simple_table(cells: Vec<Vec<&str>>) -> Table {
|
||
|
|
Table {
|
||
|
|
cells: cells
|
||
|
|
.into_iter()
|
||
|
|
.map(|row| row.into_iter().map(str::to_string).collect())
|
||
|
|
.collect(),
|
||
|
|
markdown: String::new(),
|
||
|
|
page_number: 1,
|
||
|
|
bounding_box: None,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// ── identical inputs ──────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn should_produce_empty_diff_for_identical_inputs() {
|
||
|
|
let a = empty_result();
|
||
|
|
let b = empty_result();
|
||
|
|
let diff = compare(&a, &b, &DiffOptions::default());
|
||
|
|
|
||
|
|
assert!(diff.content_diff.is_empty());
|
||
|
|
assert!(diff.tables_added.is_empty());
|
||
|
|
assert!(diff.tables_removed.is_empty());
|
||
|
|
assert!(diff.tables_changed.is_empty());
|
||
|
|
assert!(diff.embedded_changes.added.is_empty());
|
||
|
|
assert!(diff.embedded_changes.removed.is_empty());
|
||
|
|
assert!(diff.embedded_changes.changed.is_empty());
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn should_produce_empty_diff_for_both_empty_results() {
|
||
|
|
let diff = compare(
|
||
|
|
&ExtractionResult::default(),
|
||
|
|
&ExtractionResult::default(),
|
||
|
|
&DiffOptions::default(),
|
||
|
|
);
|
||
|
|
assert!(!is_nonempty_diff(&diff));
|
||
|
|
}
|
||
|
|
|
||
|
|
// ── content diff ─────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn should_produce_one_hunk_for_single_line_change() {
|
||
|
|
let a = result_with_content("Hello world");
|
||
|
|
let b = result_with_content("Hello Rust");
|
||
|
|
let diff = compare(&a, &b, &DiffOptions::default());
|
||
|
|
|
||
|
|
assert_eq!(diff.content_diff.len(), 1, "expected exactly one hunk");
|
||
|
|
let hunk = &diff.content_diff[0];
|
||
|
|
let has_removed = hunk
|
||
|
|
.lines
|
||
|
|
.iter()
|
||
|
|
.any(|l| matches!(l, DiffLine::Removed(t) if t == "Hello world"));
|
||
|
|
let has_added = hunk
|
||
|
|
.lines
|
||
|
|
.iter()
|
||
|
|
.any(|l| matches!(l, DiffLine::Added(t) if t == "Hello Rust"));
|
||
|
|
assert!(has_removed, "expected 'Hello world' as Removed line");
|
||
|
|
assert!(has_added, "expected 'Hello Rust' as Added line");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn should_report_correct_line_numbers_for_single_line_change() {
|
||
|
|
let a = result_with_content("line one\nline two\nline three");
|
||
|
|
let b = result_with_content("line one\nline TWO\nline three");
|
||
|
|
let diff = compare(&a, &b, &DiffOptions::default());
|
||
|
|
|
||
|
|
assert_eq!(diff.content_diff.len(), 1);
|
||
|
|
let hunk = &diff.content_diff[0];
|
||
|
|
// With 3-line context the hunk expands to include surrounding lines.
|
||
|
|
// Three-line text with change at line 1 (0-indexed): context pulls the
|
||
|
|
// hunk start back to line 0 (beginning of file).
|
||
|
|
assert_eq!(hunk.from_line, 0);
|
||
|
|
assert_eq!(hunk.to_line, 0);
|
||
|
|
// All 3 lines appear: one context, one changed, one context.
|
||
|
|
assert_eq!(hunk.from_count, 3);
|
||
|
|
assert_eq!(hunk.to_count, 3);
|
||
|
|
// The hunk must contain the changed lines.
|
||
|
|
let has_removed = hunk
|
||
|
|
.lines
|
||
|
|
.iter()
|
||
|
|
.any(|l| matches!(l, DiffLine::Removed(t) if t == "line two"));
|
||
|
|
let has_added = hunk
|
||
|
|
.lines
|
||
|
|
.iter()
|
||
|
|
.any(|l| matches!(l, DiffLine::Added(t) if t == "line TWO"));
|
||
|
|
assert!(has_removed, "expected 'line two' as Removed line");
|
||
|
|
assert!(has_added, "expected 'line TWO' as Added line");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn should_produce_empty_content_diff_when_content_identical_but_tables_differ() {
|
||
|
|
let mut a = result_with_tables(vec![simple_table(vec![vec!["A", "B"]])]);
|
||
|
|
a.content = "same text".to_string();
|
||
|
|
let mut b = result_with_tables(vec![simple_table(vec![vec!["A", "C"]])]);
|
||
|
|
b.content = "same text".to_string();
|
||
|
|
|
||
|
|
let diff = compare(&a, &b, &DiffOptions::default());
|
||
|
|
assert!(
|
||
|
|
diff.content_diff.is_empty(),
|
||
|
|
"content is identical; no content hunks expected"
|
||
|
|
);
|
||
|
|
assert!(!diff.tables_changed.is_empty(), "table change expected");
|
||
|
|
}
|
||
|
|
|
||
|
|
// ── table diff ───────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn should_detect_single_cell_change_in_same_table() {
|
||
|
|
let a = result_with_tables(vec![simple_table(vec![vec!["A", "B"], vec!["C", "D"]])]);
|
||
|
|
let b = result_with_tables(vec![simple_table(vec![vec!["A", "B"], vec!["C", "X"]])]);
|
||
|
|
let diff = compare(&a, &b, &DiffOptions::default());
|
||
|
|
|
||
|
|
assert_eq!(diff.tables_changed.len(), 1);
|
||
|
|
let table_diff = &diff.tables_changed[0];
|
||
|
|
assert_eq!(table_diff.cell_changes.len(), 1);
|
||
|
|
let change = &table_diff.cell_changes[0];
|
||
|
|
assert_eq!(change.row, 1);
|
||
|
|
assert_eq!(change.col, 1);
|
||
|
|
assert_eq!(change.from, "D");
|
||
|
|
assert_eq!(change.to, "X");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn should_put_extra_table_in_tables_added() {
|
||
|
|
let a = result_with_tables(vec![simple_table(vec![vec!["A"]])]);
|
||
|
|
let b = result_with_tables(vec![simple_table(vec![vec!["A"]]), simple_table(vec![vec!["NEW"]])]);
|
||
|
|
let diff = compare(&a, &b, &DiffOptions::default());
|
||
|
|
|
||
|
|
assert_eq!(diff.tables_added.len(), 1);
|
||
|
|
assert_eq!(diff.tables_added[0].cells[0][0], "NEW");
|
||
|
|
assert!(diff.tables_removed.is_empty());
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn should_put_missing_table_in_tables_removed() {
|
||
|
|
let a = result_with_tables(vec![simple_table(vec![vec!["A"]]), simple_table(vec![vec!["OLD"]])]);
|
||
|
|
let b = result_with_tables(vec![simple_table(vec![vec!["A"]])]);
|
||
|
|
let diff = compare(&a, &b, &DiffOptions::default());
|
||
|
|
|
||
|
|
assert_eq!(diff.tables_removed.len(), 1);
|
||
|
|
assert_eq!(diff.tables_removed[0].cells[0][0], "OLD");
|
||
|
|
assert!(diff.tables_added.is_empty());
|
||
|
|
}
|
||
|
|
|
||
|
|
// ── embedded diff ─────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn should_detect_added_embedded_child() {
|
||
|
|
let a = empty_result();
|
||
|
|
let mut b = empty_result();
|
||
|
|
b.children = Some(vec![ArchiveEntry {
|
||
|
|
path: "doc.txt".to_string(),
|
||
|
|
mime_type: "text/plain".to_string(),
|
||
|
|
result: Box::new(result_with_content("hello")),
|
||
|
|
}]);
|
||
|
|
|
||
|
|
let diff = compare(&a, &b, &DiffOptions::default());
|
||
|
|
assert_eq!(diff.embedded_changes.added.len(), 1);
|
||
|
|
assert_eq!(diff.embedded_changes.added[0].path, "doc.txt");
|
||
|
|
assert!(diff.embedded_changes.removed.is_empty());
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn should_detect_removed_embedded_child() {
|
||
|
|
let mut a = empty_result();
|
||
|
|
a.children = Some(vec![ArchiveEntry {
|
||
|
|
path: "old.txt".to_string(),
|
||
|
|
mime_type: "text/plain".to_string(),
|
||
|
|
result: Box::new(result_with_content("old")),
|
||
|
|
}]);
|
||
|
|
let b = empty_result();
|
||
|
|
|
||
|
|
let diff = compare(&a, &b, &DiffOptions::default());
|
||
|
|
assert_eq!(diff.embedded_changes.removed.len(), 1);
|
||
|
|
assert_eq!(diff.embedded_changes.removed[0].path, "old.txt");
|
||
|
|
assert!(diff.embedded_changes.added.is_empty());
|
||
|
|
}
|
||
|
|
}
|