backup: uncommitted changes from MAC-M9FQ0900T3 2026-05-17 15:52:31

2026-05-17 15:52:31 +02:00
parent fc40157a77
commit 0025043999
7 changed files with 436 additions and 4 deletions
--- a/scripts/sync_principals_catalog.py
+++ b/scripts/sync_principals_catalog.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+"""
+sync_principals_catalog.py — Build principals_catalog.json by scanning .bicep files
+for array param values (GUIDs) and their inline comments.
+
+Scans configured IaC repo directories for patterns like:
+
+    additionalAccess: ['c88bf29d-...'] // LRIADMPRO-IaC-Bicep
+    additionalAccess: [
+      'c88bf29d-...' // LRIADMPRO-IaC-Bicep
+      'another-guid' // Another-SP
+    ]
+
+Usage:
+    python3 scripts/sync_principals_catalog.py
+    python3 scripts/sync_principals_catalog.py --paths ~/IdeaProjects/Bitbucket/IaC
+    python3 scripts/sync_principals_catalog.py --dry-run
+    python3 scripts/sync_principals_catalog.py --output /path/to/principals_catalog.json
+"""
+
+import argparse
+import json
+import logging
+import pathlib
+import re
+import sys
+from datetime import datetime, timezone
+from typing import Any
+
+logging.basicConfig(level=logging.INFO, format="%(levelname)s  %(message)s")
+log = logging.getLogger(__name__)
+
+_REPO_ROOT = pathlib.Path(__file__).parent.parent
+_DEFAULT_OUTPUT = _REPO_ROOT / "principals_catalog.json"
+
+# Default paths to scan — adjust to match your IaC repo locations
+_DEFAULT_SCAN_PATHS = [
+    "~/IdeaProjects/Bitbucket/IaC",
+    "~/IdeaProjects/Bitbucket/LRU",
+]
+
+# Matches a UUID/GUID
+_GUID_RE = re.compile(
+    r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"
+)
+
+# Matches a single quoted GUID optionally followed by an inline comment:
+#   'c88bf29d-...'  // Some label text
+#   'c88bf29d-...'  // or with hash comments
+_ITEM_RE = re.compile(
+    r"'(" + _GUID_RE.pattern + r")'\s*(?://+\s*(.+?)\s*)?$",
+    re.IGNORECASE,
+)
+
+# Matches the opening of an array param assignment:
+#   additionalAccess: [  or  additionalAccess: ['guid'
+_ARRAY_OPEN_RE = re.compile(r"^\s*(\w+)\s*:\s*\[")
+
+
+def _extract_label(comment: str | None) -> str | None:
+    """Clean up an inline comment to use as a display label."""
+    if not comment:
+        return None
+    # Strip trailing punctuation and whitespace
+    return comment.strip().rstrip(".,;")
+
+
+def scan_file(path: pathlib.Path) -> dict[str, list[dict[str, Any]]]:
+    """Scan a single .bicep file and return {param_name: [{id, label, source}]}."""
+    try:
+        text = path.read_text(encoding="utf-8")
+    except Exception as exc:
+        log.debug("Cannot read %s: %s", path, exc)
+        return {}
+
+    lines = text.splitlines()
+    results: dict[str, list[dict[str, Any]]] = {}
+
+    i = 0
+    while i < len(lines):
+        line = lines[i]
+        array_m = _ARRAY_OPEN_RE.match(line)
+        if not array_m:
+            i += 1
+            continue
+
+        param_name = array_m.group(1)
+        # Collect all characters on this and subsequent lines until array closes
+        collected = line[array_m.end() - 1:]  # from '[' onwards
+        j = i + 1
+
+        # If the array doesn't close on the same line, keep accumulating
+        while collected.count("[") > collected.count("]") and j < len(lines):
+            collected += "\n" + lines[j]
+            j += 1
+
+        # Extract all GUID items from the collected block
+        for item_line in collected.splitlines():
+            m = _ITEM_RE.search(item_line)
+            if not m:
+                continue
+            guid = m.group(1).lower()
+            label = _extract_label(m.group(2))
+            entry: dict[str, Any] = {
+                "id": guid,
+                "label": label or guid,
+                "source": str(path),
+            }
+            if label:
+                entry["description"] = f"From {path.name}"
+            results.setdefault(param_name, [])
+            results[param_name].append(entry)
+
+        i = j
+
+    return results
+
+
+def scan_paths(paths: list[pathlib.Path]) -> dict[str, list[dict[str, Any]]]:
+    """Scan all .bicep files under the given paths, deduplicating GUIDs per param."""
+    # param_name → {guid → entry}  (dict for dedup)
+    merged: dict[str, dict[str, dict[str, Any]]] = {}
+    files_scanned = 0
+
+    for base in paths:
+        if not base.exists():
+            log.warning("Path not found, skipping: %s", base)
+            continue
+        for bicep_file in sorted(base.rglob("*.bicep")):
+            file_results = scan_file(bicep_file)
+            files_scanned += 1
+            for param, entries in file_results.items():
+                bucket = merged.setdefault(param, {})
+                for entry in entries:
+                    guid = entry["id"]
+                    if guid not in bucket:
+                        bucket[guid] = entry
+                    else:
+                        # Keep the entry with the most informative label
+                        existing = bucket[guid]
+                        if entry.get("label") and entry["label"] != guid:
+                            if existing.get("label") == guid or not existing.get("label"):
+                                bucket[guid] = entry
+
+    log.info("Scanned %d .bicep files across %d path(s)", files_scanned, len(paths))
+
+    # Flatten back to lists, sorted by label
+    return {
+        param: sorted(entries.values(), key=lambda e: e.get("label", e["id"]).lower())
+        for param, entries in sorted(merged.items())
+    }
+
+
+def build_catalog(scan_paths_list: list[pathlib.Path]) -> dict[str, Any]:
+    params = scan_paths(scan_paths_list)
+    total = sum(len(v) for v in params.values())
+    log.info("Found %d unique entries across %d param(s)", total, len(params))
+    return {
+        "synced_at": datetime.now(timezone.utc).isoformat(),
+        "entry_count": total,
+        "params": params,
+    }
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "--paths",
+        nargs="+",
+        default=_DEFAULT_SCAN_PATHS,
+        metavar="PATH",
+        help="Directories to scan for .bicep files (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print findings without writing the catalog",
+    )
+    parser.add_argument(
+        "--output",
+        default=str(_DEFAULT_OUTPUT),
+        help="Output JSON file (default: %(default)s)",
+    )
+    args = parser.parse_args()
+
+    resolved = [pathlib.Path(p).expanduser().resolve() for p in args.paths]
+    catalog = build_catalog(resolved)
+
+    if args.dry_run:
+        print(f"\n── Principals catalog (dry-run) ──────────────────────")
+        if not catalog["params"]:
+            print("  No GUID values found in .bicep files.")
+        for param, entries in catalog["params"].items():
+            print(f"\n  param: {param}  ({len(entries)} entries)")
+            for e in entries:
+                print(f"    {e['label']:<40}  {e['id']}")
+        print(f"\n  Total: {catalog['entry_count']} entries")
+        return
+
+    out = pathlib.Path(args.output)
+    # Strip internal 'source' field — not needed at runtime
+    for entries in catalog["params"].values():
+        for e in entries:
+            e.pop("source", None)
+
+    out.write_text(json.dumps(catalog, indent=2, ensure_ascii=False), encoding="utf-8")
+    log.info("Written: %s  (%d entries)", out, catalog["entry_count"])
+
+
+if __name__ == "__main__":
+    main()