import csv import xml.etree.ElementTree as ET from pathlib import Path import re import sys from datetime import datetime from openpyxl import Workbook from openpyxl.styles import Font, PatternFill, Alignment from openpyxl.utils import get_column_letter # ----------------------------- # SVG suffixes that mean "same device" # ----------------------------- SVG_SUFFIXES = ( "_ASSEMBLY", "_END", "_LINE", "_OUT", "_RECT", "_CIRCLE", "_TRIBOTLEFT", "_TRITOPRIGHT", ) # ----------------------------- # DEVICE TYPES TO IGNORE # (substring match after normalization) # ----------------------------- IGNORE_CONTAINS = ( "_BCN", "_ENC", "_SOL", ) # Output folder for reports OUTPUT_DIR = Path("reports") # ----------------------------- # NORMALIZATION HELPERS # ----------------------------- _TRAILING_NUMERIC_CHUNKS_RE = re.compile(r"(?:_\d+)+$") def _contains_svg_suffix_token(n_with_underscores: str) -> bool: """ Return True if any SVG suffix appears as a token boundary: e.g. "..._RECT_5_18" contains "_RECT" token """ return any( (suffix + "_") in n_with_underscores or n_with_underscores.endswith(suffix) for suffix in SVG_SUFFIXES ) # ----------------------------- # NORMALIZATION # ----------------------------- def normalize_name(name: str) -> str: if not name: return "" n = name.strip().upper() # ---- VFD1 (DWG) vs VFD (SVG) n = re.sub(r"_VFD1$", "_VFD", n) # For SVG junk like "..._RECT-5-18", normalize dashes so we can parse consistently n_u = n.replace("-", "_") # If this looks like an SVG "part id" (RECT/CIRCLE/TRI...), clean it: if _contains_svg_suffix_token(n_u): # Remove trailing numeric chunks (e.g. _5_18, _1_0_01, etc.) n_u = _TRAILING_NUMERIC_CHUNKS_RE.sub("", n_u) # Strip SVG part suffixes using the existing list for suffix in SVG_SUFFIXES: if n_u.endswith(suffix): n_u = n_u[: -len(suffix)] break return n_u def is_ignored(normalized_name: str) -> bool: """Return True if this normalized device name should be ignored.""" if not normalized_name: return True return any(token in normalized_name for token in IGNORE_CONTAINS) # ----------------------------- # FILE PICKER # ----------------------------- def pick_file(ext: str, label: str) -> Path: """ Scan current folder for files with given extension and let user choose. ext example: ".txt" or ".svg" """ cwd = Path.cwd() files = sorted([p for p in cwd.iterdir() if p.is_file() and p.suffix.lower() == ext.lower()]) if not files: raise RuntimeError(f"No {ext} files found in: {cwd}") # Auto pick if only one file exists if len(files) == 1: print(f"\nAuto-selected {label}: {files[0].name}") return files[0] print(f"\nSelect {label} ({ext}):") for i, p in enumerate(files, start=1): print(f" {i}) {p.name}") while True: choice = input(f"Enter number (1-{len(files)}): ").strip() if choice.isdigit(): idx = int(choice) if 1 <= idx <= len(files): return files[idx - 1] print("Invalid choice. Try again.") # ----------------------------- # LOAD DWG (TXT export) # ----------------------------- def load_dwg_devices(path: Path) -> tuple[set[str], list[str]]: devices: set[str] = set() ignored: list[str] = [] with path.open("r", encoding="utf-8", errors="ignore") as f: reader = csv.DictReader(f, delimiter="\t") if not reader.fieldnames: raise RuntimeError(f"Could not read header from file: {path.name}") if "P_TAG1" not in reader.fieldnames: raise RuntimeError(f"P_TAG1 column not found in {path.name}. Found: {reader.fieldnames}") for row in reader: raw = (row.get("P_TAG1") or "").strip() if not raw or raw == "<>": continue normalized = normalize_name(raw) if is_ignored(normalized): ignored.append(normalized) continue devices.add(normalized) return devices, ignored # ----------------------------- # LOAD SVG # ----------------------------- def load_svg_devices(path: Path) -> tuple[set[str], list[str]]: devices: set[str] = set() ignored: list[str] = [] tree = ET.parse(path) root = tree.getroot() for elem in root.iter(): elem_id = elem.attrib.get("id") if not elem_id: continue normalized = normalize_name(elem_id) if is_ignored(normalized): ignored.append(normalized) continue devices.add(normalized) return devices, ignored # ----------------------------- # EXCEL REPORT # ----------------------------- def _style_header_row(ws, header_fill_hex: str): header_fill = PatternFill("solid", fgColor=header_fill_hex) header_font = Font(bold=True, color="FFFFFF") for cell in ws[1]: cell.fill = header_fill cell.font = header_font cell.alignment = Alignment(horizontal="center", vertical="center") def _autosize_columns(ws, max_width: int = 60): for col in range(1, ws.max_column + 1): letter = get_column_letter(col) best = 0 for row in range(1, ws.max_row + 1): v = ws.cell(row=row, column=col).value if v is None: continue best = max(best, len(str(v))) ws.column_dimensions[letter].width = min(max(best + 2, 12), max_width) def write_excel_report( dwg_file: Path, svg_file: Path, dwg_devices: set[str], svg_devices: set[str], dwg_ignored: list[str], svg_ignored: list[str], missing_in_svg: list[str], extra_in_svg: list[str], ) -> Path: OUTPUT_DIR.mkdir(exist_ok=True) ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") out_path = OUTPUT_DIR / f"dwg_vs_svg_report_{ts}.xlsx" wb = Workbook() # ---------- Summary ---------- ws = wb.active ws.title = "Summary" ws.append(["Field", "Value"]) _style_header_row(ws, "404040") summary_rows = [ ("DWG file", dwg_file.name), ("SVG file", svg_file.name), ("DWG logical devices", len(dwg_devices)), ("SVG logical devices", len(svg_devices)), ("Missing in SVG", len(missing_in_svg)), ("Extra in SVG", len(extra_in_svg)), ("Ignored in DWG", len(dwg_ignored)), ("Ignored in SVG", len(svg_ignored)), ("Ignore filters", ", ".join(IGNORE_CONTAINS)), ("SVG suffixes", ", ".join(SVG_SUFFIXES)), ] for k, v in summary_rows: ws.append([k, v]) _autosize_columns(ws) # ---------- Missing ---------- ws_miss = wb.create_sheet("Missing_in_SVG") ws_miss.append(["Missing Device"]) _style_header_row(ws_miss, "C00000") # red for d in missing_in_svg: ws_miss.append([d]) _autosize_columns(ws_miss) # ---------- Extra ---------- ws_extra = wb.create_sheet("Extra_in_SVG") ws_extra.append(["Extra Device"]) _style_header_row(ws_extra, "E69138") # orange for d in extra_in_svg: ws_extra.append([d]) _autosize_columns(ws_extra) # ---------- Ignored DWG ---------- ws_idwg = wb.create_sheet("Ignored_DWG") ws_idwg.append(["Ignored Device"]) _style_header_row(ws_idwg, "6A1B9A") # purple for d in sorted(set(dwg_ignored)): ws_idwg.append([d]) _autosize_columns(ws_idwg) # ---------- Ignored SVG ---------- ws_isvg = wb.create_sheet("Ignored_SVG") ws_isvg.append(["Ignored Device"]) _style_header_row(ws_isvg, "6A1B9A") # purple for d in sorted(set(svg_ignored)): ws_isvg.append([d]) _autosize_columns(ws_isvg) wb.save(out_path) return out_path # ----------------------------- # MAIN # ----------------------------- def main(): try: dwg_txt_path = pick_file(".txt", "DWG export text file (DWG devices)") svg_path = pick_file(".svg", "SVG file (SVG devices)") except RuntimeError as e: print(f"Error: {e}") sys.exit(1) dwg_devices, dwg_ignored = load_dwg_devices(dwg_txt_path) svg_devices, svg_ignored = load_svg_devices(svg_path) missing_in_svg = sorted(dwg_devices - svg_devices) extra_in_svg = sorted(svg_devices - dwg_devices) report_path = write_excel_report( dwg_txt_path, svg_path, dwg_devices, svg_devices, dwg_ignored, svg_ignored, missing_in_svg, extra_in_svg, ) # Console summary (short) print("\n====== DWG vs SVG DEVICE CHECK ======") print(f"DWG devices: {len(dwg_devices)} ({dwg_txt_path.name})") print(f"SVG devices: {len(svg_devices)} ({svg_path.name})") print(f"Missing in SVG: {len(missing_in_svg)}") print(f"Extra in SVG: {len(extra_in_svg)}") print(f"Ignored DWG: {len(dwg_ignored)} filters={IGNORE_CONTAINS}") print(f"Ignored SVG: {len(svg_ignored)} filters={IGNORE_CONTAINS}") print(f"\n✅ Excel report saved to: {report_path}") print("=====================================") if __name__ == "__main__": main()