backup: uncommitted changes from MAC-M9FQ0900T3 2026-05-17 15:52:31

This commit is contained in:
Henrik Jess Nielsen
2026-05-17 15:52:31 +02:00
parent fc40157a77
commit 0025043999
7 changed files with 436 additions and 4 deletions

View File

@@ -0,0 +1,214 @@
#!/usr/bin/env python3
"""
sync_principals_catalog.py — Build principals_catalog.json by scanning .bicep files
for array param values (GUIDs) and their inline comments.
Scans configured IaC repo directories for patterns like:
additionalAccess: ['c88bf29d-...'] // LRIADMPRO-IaC-Bicep
additionalAccess: [
'c88bf29d-...' // LRIADMPRO-IaC-Bicep
'another-guid' // Another-SP
]
Usage:
python3 scripts/sync_principals_catalog.py
python3 scripts/sync_principals_catalog.py --paths ~/IdeaProjects/Bitbucket/IaC
python3 scripts/sync_principals_catalog.py --dry-run
python3 scripts/sync_principals_catalog.py --output /path/to/principals_catalog.json
"""
import argparse
import json
import logging
import pathlib
import re
import sys
from datetime import datetime, timezone
from typing import Any
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
log = logging.getLogger(__name__)
_REPO_ROOT = pathlib.Path(__file__).parent.parent
_DEFAULT_OUTPUT = _REPO_ROOT / "principals_catalog.json"
# Default paths to scan — adjust to match your IaC repo locations
_DEFAULT_SCAN_PATHS = [
"~/IdeaProjects/Bitbucket/IaC",
"~/IdeaProjects/Bitbucket/LRU",
]
# Matches a UUID/GUID
_GUID_RE = re.compile(
r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"
)
# Matches a single quoted GUID optionally followed by an inline comment:
# 'c88bf29d-...' // Some label text
# 'c88bf29d-...' // or with hash comments
_ITEM_RE = re.compile(
r"'(" + _GUID_RE.pattern + r")'\s*(?://+\s*(.+?)\s*)?$",
re.IGNORECASE,
)
# Matches the opening of an array param assignment:
# additionalAccess: [ or additionalAccess: ['guid'
_ARRAY_OPEN_RE = re.compile(r"^\s*(\w+)\s*:\s*\[")
def _extract_label(comment: str | None) -> str | None:
"""Clean up an inline comment to use as a display label."""
if not comment:
return None
# Strip trailing punctuation and whitespace
return comment.strip().rstrip(".,;")
def scan_file(path: pathlib.Path) -> dict[str, list[dict[str, Any]]]:
"""Scan a single .bicep file and return {param_name: [{id, label, source}]}."""
try:
text = path.read_text(encoding="utf-8")
except Exception as exc:
log.debug("Cannot read %s: %s", path, exc)
return {}
lines = text.splitlines()
results: dict[str, list[dict[str, Any]]] = {}
i = 0
while i < len(lines):
line = lines[i]
array_m = _ARRAY_OPEN_RE.match(line)
if not array_m:
i += 1
continue
param_name = array_m.group(1)
# Collect all characters on this and subsequent lines until array closes
collected = line[array_m.end() - 1:] # from '[' onwards
j = i + 1
# If the array doesn't close on the same line, keep accumulating
while collected.count("[") > collected.count("]") and j < len(lines):
collected += "\n" + lines[j]
j += 1
# Extract all GUID items from the collected block
for item_line in collected.splitlines():
m = _ITEM_RE.search(item_line)
if not m:
continue
guid = m.group(1).lower()
label = _extract_label(m.group(2))
entry: dict[str, Any] = {
"id": guid,
"label": label or guid,
"source": str(path),
}
if label:
entry["description"] = f"From {path.name}"
results.setdefault(param_name, [])
results[param_name].append(entry)
i = j
return results
def scan_paths(paths: list[pathlib.Path]) -> dict[str, list[dict[str, Any]]]:
"""Scan all .bicep files under the given paths, deduplicating GUIDs per param."""
# param_name → {guid → entry} (dict for dedup)
merged: dict[str, dict[str, dict[str, Any]]] = {}
files_scanned = 0
for base in paths:
if not base.exists():
log.warning("Path not found, skipping: %s", base)
continue
for bicep_file in sorted(base.rglob("*.bicep")):
file_results = scan_file(bicep_file)
files_scanned += 1
for param, entries in file_results.items():
bucket = merged.setdefault(param, {})
for entry in entries:
guid = entry["id"]
if guid not in bucket:
bucket[guid] = entry
else:
# Keep the entry with the most informative label
existing = bucket[guid]
if entry.get("label") and entry["label"] != guid:
if existing.get("label") == guid or not existing.get("label"):
bucket[guid] = entry
log.info("Scanned %d .bicep files across %d path(s)", files_scanned, len(paths))
# Flatten back to lists, sorted by label
return {
param: sorted(entries.values(), key=lambda e: e.get("label", e["id"]).lower())
for param, entries in sorted(merged.items())
}
def build_catalog(scan_paths_list: list[pathlib.Path]) -> dict[str, Any]:
params = scan_paths(scan_paths_list)
total = sum(len(v) for v in params.values())
log.info("Found %d unique entries across %d param(s)", total, len(params))
return {
"synced_at": datetime.now(timezone.utc).isoformat(),
"entry_count": total,
"params": params,
}
def main() -> None:
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"--paths",
nargs="+",
default=_DEFAULT_SCAN_PATHS,
metavar="PATH",
help="Directories to scan for .bicep files (default: %(default)s)",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Print findings without writing the catalog",
)
parser.add_argument(
"--output",
default=str(_DEFAULT_OUTPUT),
help="Output JSON file (default: %(default)s)",
)
args = parser.parse_args()
resolved = [pathlib.Path(p).expanduser().resolve() for p in args.paths]
catalog = build_catalog(resolved)
if args.dry_run:
print(f"\n── Principals catalog (dry-run) ──────────────────────")
if not catalog["params"]:
print(" No GUID values found in .bicep files.")
for param, entries in catalog["params"].items():
print(f"\n param: {param} ({len(entries)} entries)")
for e in entries:
print(f" {e['label']:<40} {e['id']}")
print(f"\n Total: {catalog['entry_count']} entries")
return
out = pathlib.Path(args.output)
# Strip internal 'source' field — not needed at runtime
for entries in catalog["params"].values():
for e in entries:
e.pop("source", None)
out.write_text(json.dumps(catalog, indent=2, ensure_ascii=False), encoding="utf-8")
log.info("Written: %s (%d entries)", out, catalog["entry_count"])
if __name__ == "__main__":
main()