ignition_scripts/COMPARE_SVG_DWG/compare_dwg_vs_svg.py

324 lines
9.1 KiB
Python

import csv
import xml.etree.ElementTree as ET
from pathlib import Path
import re
import sys
from datetime import datetime
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment
from openpyxl.utils import get_column_letter
# -----------------------------
# SVG suffixes that mean "same device"
# -----------------------------
SVG_SUFFIXES = (
"_ASSEMBLY",
"_END",
"_LINE",
"_OUT",
"_RECT",
"_CIRCLE",
"_TRIBOTLEFT",
"_TRITOPRIGHT",
)
# -----------------------------
# DEVICE TYPES TO IGNORE
# (substring match after normalization)
# -----------------------------
IGNORE_CONTAINS = (
"_BCN",
"_ENC",
"_SOL",
)
# Output folder for reports
OUTPUT_DIR = Path("reports")
# -----------------------------
# NORMALIZATION HELPERS
# -----------------------------
_TRAILING_NUMERIC_CHUNKS_RE = re.compile(r"(?:_\d+)+$")
def _contains_svg_suffix_token(n_with_underscores: str) -> bool:
"""
Return True if any SVG suffix appears as a token boundary:
e.g. "..._RECT_5_18" contains "_RECT" token
"""
return any(
(suffix + "_") in n_with_underscores or n_with_underscores.endswith(suffix)
for suffix in SVG_SUFFIXES
)
# -----------------------------
# NORMALIZATION
# -----------------------------
def normalize_name(name: str) -> str:
if not name:
return ""
n = name.strip().upper()
# ---- VFD1 (DWG) vs VFD (SVG)
n = re.sub(r"_VFD1$", "_VFD", n)
# For SVG junk like "..._RECT-5-18", normalize dashes so we can parse consistently
n_u = n.replace("-", "_")
# If this looks like an SVG "part id" (RECT/CIRCLE/TRI...), clean it:
if _contains_svg_suffix_token(n_u):
# Remove trailing numeric chunks (e.g. _5_18, _1_0_01, etc.)
n_u = _TRAILING_NUMERIC_CHUNKS_RE.sub("", n_u)
# Strip SVG part suffixes using the existing list
for suffix in SVG_SUFFIXES:
if n_u.endswith(suffix):
n_u = n_u[: -len(suffix)]
break
return n_u
def is_ignored(normalized_name: str) -> bool:
"""Return True if this normalized device name should be ignored."""
if not normalized_name:
return True
return any(token in normalized_name for token in IGNORE_CONTAINS)
# -----------------------------
# FILE PICKER
# -----------------------------
def pick_file(ext: str, label: str) -> Path:
"""
Scan current folder for files with given extension and let user choose.
ext example: ".txt" or ".svg"
"""
cwd = Path.cwd()
files = sorted([p for p in cwd.iterdir() if p.is_file() and p.suffix.lower() == ext.lower()])
if not files:
raise RuntimeError(f"No {ext} files found in: {cwd}")
# Auto pick if only one file exists
if len(files) == 1:
print(f"\nAuto-selected {label}: {files[0].name}")
return files[0]
print(f"\nSelect {label} ({ext}):")
for i, p in enumerate(files, start=1):
print(f" {i}) {p.name}")
while True:
choice = input(f"Enter number (1-{len(files)}): ").strip()
if choice.isdigit():
idx = int(choice)
if 1 <= idx <= len(files):
return files[idx - 1]
print("Invalid choice. Try again.")
# -----------------------------
# LOAD DWG (TXT export)
# -----------------------------
def load_dwg_devices(path: Path) -> tuple[set[str], list[str]]:
devices: set[str] = set()
ignored: list[str] = []
with path.open("r", encoding="utf-8", errors="ignore") as f:
reader = csv.DictReader(f, delimiter="\t")
if not reader.fieldnames:
raise RuntimeError(f"Could not read header from file: {path.name}")
if "P_TAG1" not in reader.fieldnames:
raise RuntimeError(f"P_TAG1 column not found in {path.name}. Found: {reader.fieldnames}")
for row in reader:
raw = (row.get("P_TAG1") or "").strip()
if not raw or raw == "<>":
continue
normalized = normalize_name(raw)
if is_ignored(normalized):
ignored.append(normalized)
continue
devices.add(normalized)
return devices, ignored
# -----------------------------
# LOAD SVG
# -----------------------------
def load_svg_devices(path: Path) -> tuple[set[str], list[str]]:
devices: set[str] = set()
ignored: list[str] = []
tree = ET.parse(path)
root = tree.getroot()
for elem in root.iter():
elem_id = elem.attrib.get("id")
if not elem_id:
continue
normalized = normalize_name(elem_id)
if is_ignored(normalized):
ignored.append(normalized)
continue
devices.add(normalized)
return devices, ignored
# -----------------------------
# EXCEL REPORT
# -----------------------------
def _style_header_row(ws, header_fill_hex: str):
header_fill = PatternFill("solid", fgColor=header_fill_hex)
header_font = Font(bold=True, color="FFFFFF")
for cell in ws[1]:
cell.fill = header_fill
cell.font = header_font
cell.alignment = Alignment(horizontal="center", vertical="center")
def _autosize_columns(ws, max_width: int = 60):
for col in range(1, ws.max_column + 1):
letter = get_column_letter(col)
best = 0
for row in range(1, ws.max_row + 1):
v = ws.cell(row=row, column=col).value
if v is None:
continue
best = max(best, len(str(v)))
ws.column_dimensions[letter].width = min(max(best + 2, 12), max_width)
def write_excel_report(
dwg_file: Path,
svg_file: Path,
dwg_devices: set[str],
svg_devices: set[str],
dwg_ignored: list[str],
svg_ignored: list[str],
missing_in_svg: list[str],
extra_in_svg: list[str],
) -> Path:
OUTPUT_DIR.mkdir(exist_ok=True)
ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
out_path = OUTPUT_DIR / f"dwg_vs_svg_report_{ts}.xlsx"
wb = Workbook()
# ---------- Summary ----------
ws = wb.active
ws.title = "Summary"
ws.append(["Field", "Value"])
_style_header_row(ws, "404040")
summary_rows = [
("DWG file", dwg_file.name),
("SVG file", svg_file.name),
("DWG logical devices", len(dwg_devices)),
("SVG logical devices", len(svg_devices)),
("Missing in SVG", len(missing_in_svg)),
("Extra in SVG", len(extra_in_svg)),
("Ignored in DWG", len(dwg_ignored)),
("Ignored in SVG", len(svg_ignored)),
("Ignore filters", ", ".join(IGNORE_CONTAINS)),
("SVG suffixes", ", ".join(SVG_SUFFIXES)),
]
for k, v in summary_rows:
ws.append([k, v])
_autosize_columns(ws)
# ---------- Missing ----------
ws_miss = wb.create_sheet("Missing_in_SVG")
ws_miss.append(["Missing Device"])
_style_header_row(ws_miss, "C00000") # red
for d in missing_in_svg:
ws_miss.append([d])
_autosize_columns(ws_miss)
# ---------- Extra ----------
ws_extra = wb.create_sheet("Extra_in_SVG")
ws_extra.append(["Extra Device"])
_style_header_row(ws_extra, "E69138") # orange
for d in extra_in_svg:
ws_extra.append([d])
_autosize_columns(ws_extra)
# ---------- Ignored DWG ----------
ws_idwg = wb.create_sheet("Ignored_DWG")
ws_idwg.append(["Ignored Device"])
_style_header_row(ws_idwg, "6A1B9A") # purple
for d in sorted(set(dwg_ignored)):
ws_idwg.append([d])
_autosize_columns(ws_idwg)
# ---------- Ignored SVG ----------
ws_isvg = wb.create_sheet("Ignored_SVG")
ws_isvg.append(["Ignored Device"])
_style_header_row(ws_isvg, "6A1B9A") # purple
for d in sorted(set(svg_ignored)):
ws_isvg.append([d])
_autosize_columns(ws_isvg)
wb.save(out_path)
return out_path
# -----------------------------
# MAIN
# -----------------------------
def main():
try:
dwg_txt_path = pick_file(".txt", "DWG export text file (DWG devices)")
svg_path = pick_file(".svg", "SVG file (SVG devices)")
except RuntimeError as e:
print(f"Error: {e}")
sys.exit(1)
dwg_devices, dwg_ignored = load_dwg_devices(dwg_txt_path)
svg_devices, svg_ignored = load_svg_devices(svg_path)
missing_in_svg = sorted(dwg_devices - svg_devices)
extra_in_svg = sorted(svg_devices - dwg_devices)
report_path = write_excel_report(
dwg_txt_path,
svg_path,
dwg_devices,
svg_devices,
dwg_ignored,
svg_ignored,
missing_in_svg,
extra_in_svg,
)
# Console summary (short)
print("\n====== DWG vs SVG DEVICE CHECK ======")
print(f"DWG devices: {len(dwg_devices)} ({dwg_txt_path.name})")
print(f"SVG devices: {len(svg_devices)} ({svg_path.name})")
print(f"Missing in SVG: {len(missing_in_svg)}")
print(f"Extra in SVG: {len(extra_in_svg)}")
print(f"Ignored DWG: {len(dwg_ignored)} filters={IGNORE_CONTAINS}")
print(f"Ignored SVG: {len(svg_ignored)} filters={IGNORE_CONTAINS}")
print(f"\n✅ Excel report saved to: {report_path}")
print("=====================================")
if __name__ == "__main__":
main()