ignition_scripts/COMPARE_SVG_DWG/compare_dwg_vs_svg.py

import csv
import xml.etree.ElementTree as ET
from pathlib import Path
import re
import sys
from datetime import datetime

from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment
from openpyxl.utils import get_column_letter

# -----------------------------
# SVG suffixes that mean "same device"
# -----------------------------
SVG_SUFFIXES = (
    "_ASSEMBLY",
    "_END",
    "_LINE",
    "_OUT",
    "_RECT",
    "_CIRCLE",
    "_TRIBOTLEFT",
    "_TRITOPRIGHT",
)

# -----------------------------
# DEVICE TYPES TO IGNORE
# (substring match after normalization)
# -----------------------------
IGNORE_CONTAINS = (
    "_BCN",
    "_ENC",
    "_SOL",
)

# Output folder for reports
OUTPUT_DIR = Path("reports")

# -----------------------------
# NORMALIZATION HELPERS
# -----------------------------
_TRAILING_NUMERIC_CHUNKS_RE = re.compile(r"(?:_\d+)+$")


def _contains_svg_suffix_token(n_with_underscores: str) -> bool:
    """
    Return True if any SVG suffix appears as a token boundary:
      e.g. "..._RECT_5_18" contains "_RECT" token
    """
    return any(
        (suffix + "_") in n_with_underscores or n_with_underscores.endswith(suffix)
        for suffix in SVG_SUFFIXES
    )


# -----------------------------
# NORMALIZATION
# -----------------------------
def normalize_name(name: str) -> str:
    if not name:
        return ""

    n = name.strip().upper()

    # ---- VFD1 (DWG) vs VFD (SVG)
    n = re.sub(r"_VFD1$", "_VFD", n)

    # For SVG junk like "..._RECT-5-18", normalize dashes so we can parse consistently
    n_u = n.replace("-", "_")

    # If this looks like an SVG "part id" (RECT/CIRCLE/TRI...), clean it:
    if _contains_svg_suffix_token(n_u):
        # Remove trailing numeric chunks (e.g. _5_18, _1_0_01, etc.)
        n_u = _TRAILING_NUMERIC_CHUNKS_RE.sub("", n_u)

        # Strip SVG part suffixes using the existing list
        for suffix in SVG_SUFFIXES:
            if n_u.endswith(suffix):
                n_u = n_u[: -len(suffix)]
                break

    return n_u


def is_ignored(normalized_name: str) -> bool:
    """Return True if this normalized device name should be ignored."""
    if not normalized_name:
        return True
    return any(token in normalized_name for token in IGNORE_CONTAINS)


# -----------------------------
# FILE PICKER
# -----------------------------
def pick_file(ext: str, label: str) -> Path:
    """
    Scan current folder for files with given extension and let user choose.
    ext example: ".txt" or ".svg"
    """
    cwd = Path.cwd()
    files = sorted([p for p in cwd.iterdir() if p.is_file() and p.suffix.lower() == ext.lower()])

    if not files:
        raise RuntimeError(f"No {ext} files found in: {cwd}")

    # Auto pick if only one file exists
    if len(files) == 1:
        print(f"\nAuto-selected {label}: {files[0].name}")
        return files[0]

    print(f"\nSelect {label} ({ext}):")
    for i, p in enumerate(files, start=1):
        print(f"  {i}) {p.name}")

    while True:
        choice = input(f"Enter number (1-{len(files)}): ").strip()
        if choice.isdigit():
            idx = int(choice)
            if 1 <= idx <= len(files):
                return files[idx - 1]
        print("Invalid choice. Try again.")


# -----------------------------
# LOAD DWG (TXT export)
# -----------------------------
def load_dwg_devices(path: Path) -> tuple[set[str], list[str]]:
    devices: set[str] = set()
    ignored: list[str] = []

    with path.open("r", encoding="utf-8", errors="ignore") as f:
        reader = csv.DictReader(f, delimiter="\t")

        if not reader.fieldnames:
            raise RuntimeError(f"Could not read header from file: {path.name}")

        if "P_TAG1" not in reader.fieldnames:
            raise RuntimeError(f"P_TAG1 column not found in {path.name}. Found: {reader.fieldnames}")

        for row in reader:
            raw = (row.get("P_TAG1") or "").strip()
            if not raw or raw == "<>":
                continue

            normalized = normalize_name(raw)

            if is_ignored(normalized):
                ignored.append(normalized)
                continue

            devices.add(normalized)

    return devices, ignored


# -----------------------------
# LOAD SVG
# -----------------------------
def load_svg_devices(path: Path) -> tuple[set[str], list[str]]:
    devices: set[str] = set()
    ignored: list[str] = []

    tree = ET.parse(path)
    root = tree.getroot()

    for elem in root.iter():
        elem_id = elem.attrib.get("id")
        if not elem_id:
            continue

        normalized = normalize_name(elem_id)

        if is_ignored(normalized):
            ignored.append(normalized)
            continue

        devices.add(normalized)

    return devices, ignored


# -----------------------------
# EXCEL REPORT
# -----------------------------
def _style_header_row(ws, header_fill_hex: str):
    header_fill = PatternFill("solid", fgColor=header_fill_hex)
    header_font = Font(bold=True, color="FFFFFF")
    for cell in ws[1]:
        cell.fill = header_fill
        cell.font = header_font
        cell.alignment = Alignment(horizontal="center", vertical="center")


def _autosize_columns(ws, max_width: int = 60):
    for col in range(1, ws.max_column + 1):
        letter = get_column_letter(col)
        best = 0
        for row in range(1, ws.max_row + 1):
            v = ws.cell(row=row, column=col).value
            if v is None:
                continue
            best = max(best, len(str(v)))
        ws.column_dimensions[letter].width = min(max(best + 2, 12), max_width)


def write_excel_report(
    dwg_file: Path,
    svg_file: Path,
    dwg_devices: set[str],
    svg_devices: set[str],
    dwg_ignored: list[str],
    svg_ignored: list[str],
    missing_in_svg: list[str],
    extra_in_svg: list[str],
) -> Path:
    OUTPUT_DIR.mkdir(exist_ok=True)

    ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out_path = OUTPUT_DIR / f"dwg_vs_svg_report_{ts}.xlsx"

    wb = Workbook()

    # ---------- Summary ----------
    ws = wb.active
    ws.title = "Summary"
    ws.append(["Field", "Value"])
    _style_header_row(ws, "404040")

    summary_rows = [
        ("DWG file", dwg_file.name),
        ("SVG file", svg_file.name),
        ("DWG logical devices", len(dwg_devices)),
        ("SVG logical devices", len(svg_devices)),
        ("Missing in SVG", len(missing_in_svg)),
        ("Extra in SVG", len(extra_in_svg)),
        ("Ignored in DWG", len(dwg_ignored)),
        ("Ignored in SVG", len(svg_ignored)),
        ("Ignore filters", ", ".join(IGNORE_CONTAINS)),
        ("SVG suffixes", ", ".join(SVG_SUFFIXES)),
    ]
    for k, v in summary_rows:
        ws.append([k, v])

    _autosize_columns(ws)

    # ---------- Missing ----------
    ws_miss = wb.create_sheet("Missing_in_SVG")
    ws_miss.append(["Missing Device"])
    _style_header_row(ws_miss, "C00000")  # red
    for d in missing_in_svg:
        ws_miss.append([d])
    _autosize_columns(ws_miss)

    # ---------- Extra ----------
    ws_extra = wb.create_sheet("Extra_in_SVG")
    ws_extra.append(["Extra Device"])
    _style_header_row(ws_extra, "E69138")  # orange
    for d in extra_in_svg:
        ws_extra.append([d])
    _autosize_columns(ws_extra)

    # ---------- Ignored DWG ----------
    ws_idwg = wb.create_sheet("Ignored_DWG")
    ws_idwg.append(["Ignored Device"])
    _style_header_row(ws_idwg, "6A1B9A")  # purple
    for d in sorted(set(dwg_ignored)):
        ws_idwg.append([d])
    _autosize_columns(ws_idwg)

    # ---------- Ignored SVG ----------
    ws_isvg = wb.create_sheet("Ignored_SVG")
    ws_isvg.append(["Ignored Device"])
    _style_header_row(ws_isvg, "6A1B9A")  # purple
    for d in sorted(set(svg_ignored)):
        ws_isvg.append([d])
    _autosize_columns(ws_isvg)

    wb.save(out_path)
    return out_path


# -----------------------------
# MAIN
# -----------------------------
def main():
    try:
        dwg_txt_path = pick_file(".txt", "DWG export text file (DWG devices)")
        svg_path = pick_file(".svg", "SVG file (SVG devices)")
    except RuntimeError as e:
        print(f"Error: {e}")
        sys.exit(1)

    dwg_devices, dwg_ignored = load_dwg_devices(dwg_txt_path)
    svg_devices, svg_ignored = load_svg_devices(svg_path)

    missing_in_svg = sorted(dwg_devices - svg_devices)
    extra_in_svg = sorted(svg_devices - dwg_devices)

    report_path = write_excel_report(
        dwg_txt_path,
        svg_path,
        dwg_devices,
        svg_devices,
        dwg_ignored,
        svg_ignored,
        missing_in_svg,
        extra_in_svg,
    )

    # Console summary (short)
    print("\n====== DWG vs SVG DEVICE CHECK ======")
    print(f"DWG devices: {len(dwg_devices)}   ({dwg_txt_path.name})")
    print(f"SVG devices: {len(svg_devices)}   ({svg_path.name})")
    print(f"Missing in SVG: {len(missing_in_svg)}")
    print(f"Extra in SVG:   {len(extra_in_svg)}")
    print(f"Ignored DWG:    {len(dwg_ignored)}  filters={IGNORE_CONTAINS}")
    print(f"Ignored SVG:    {len(svg_ignored)}  filters={IGNORE_CONTAINS}")
    print(f"\n✅ Excel report saved to: {report_path}")
    print("=====================================")


if __name__ == "__main__":
    main()