import argparse import re import sys from pathlib import Path import pandas as pd # ==== CONFIG YOU CAN TWEAK IF NEEDED ==== # IO rows matching these regex patterns will be ignored SKIP_PATTERNS = [ r"^PDP.*_CB", # PDP*_CB* pattern r"^PDP.*_FWM", # PDP*_FWM* pattern r"^PDP.*_PWM", # PDP*_PWM* pattern r".*_.*_VSU$", # *_*_VSU pattern ] def canonical(tag: str) -> str: """ Apply all agreed normalization rules to a device/assigned tag. """ if not isinstance(tag, str): return None t = tag.strip() if not t: return None # 1) EPC: keep up to ..._EPC # Examples: # NCP1_3_EPC1_1 -> NCP1_3_EPC1 # NCP1_3_EPC2_99 -> NCP1_3_EPC2 m = re.search(r"(.*_EPC\d+)_.*", t) if m: t = m.group(1) # 2) VFD variants -> base _VFD # UL23_25_VFD_DISC -> UL23_25_VFD # S02_1_VFD_STO1 -> S02_1_VFD t = re.sub(r"_VFD_DISC$", "_VFD", t) t = re.sub(r"_VFD_DSIC$", "_VFD", t) t = re.sub(r"_VFD_STO\d*$", "_VFD", t) # 3) SS buttons: SPB_LT / SPB / STPB -> base SS # NCS2_12_SS2_SPB_LT -> NCS2_12_SS2 # NCR1_2_SS1_SPB -> NCR1_2_SS1 # NCR1_2_SS1_STPB -> NCR1_2_SS1 t = re.sub(r"_SPB_LT$", "", t) t = re.sub(r"_STPB$", "", t) t = re.sub(r"_SPB$", "", t) # 4) Generic PB: PB_LT / PB -> base # NCP1_3_JR1_PB -> NCP1_3_JR1 # NCP1_3_JR1_PB_LT -> NCP1_3_JR1 t = re.sub(r"_PB_LT$", "", t) t = re.sub(r"_PB$", "", t) # 5) STO suffix on SR tags -> base SR # _SR1_STO1 -> _SR1 t = re.sub(r"(_SR\d+)_STO\d+$", r"\1", t) # 6) R/S endings -> base # S02_2_LRPE6_R -> S02_2_LRPE6 # S02_2_LRPE6_S -> S02_2_LRPE6 t = re.sub(r"_(R|S)$", "", t) # 7) EX_ESTOP variants -> base EX # *_*_EX_ESTOP* -> *_*_EX t = re.sub(r"_EX_ESTOP.*$", "_EX", t) return t or None def load_io_series(io_path: Path, io_column: str | None) -> pd.Series: """ Load IO Excel and return a cleaned Series of Assigned Device strings. If io_column is None, uses the first column. """ df = pd.read_excel(io_path) if io_column: if io_column not in df.columns: raise SystemExit( f"ERROR: IO column '{io_column}' not found. Available: {list(df.columns)}" ) series = df[io_column] else: # Use first column first_col = df.columns[0] series = df[first_col] series = series.astype(str).str.strip() # Drop SPARE rows mask_spare = series.str.contains("SPARE", case=False, na=False) # Drop rows matching SKIP_PATTERNS (e.g. PDP*_CB*, PDP*_FWM*) mask_skip_pattern = False for pattern in SKIP_PATTERNS: mask_skip_pattern = mask_skip_pattern | series.str.match(pattern, na=False) filtered = series[~(mask_spare | mask_skip_pattern)] return filtered def load_device_tags(dev_path: Path, dev_column: str | None) -> pd.Series: """ Load device Excel and return Series of device tags. Default column is 'P_TAG1' if exists, else first column. Filters out VSU devices (matching *_*_VSU pattern). """ df = pd.read_excel(dev_path) if dev_column: if dev_column not in df.columns: raise SystemExit( f"ERROR: Device column '{dev_column}' not found. Available: {list(df.columns)}" ) series = df[dev_column] else: if "P_TAG1" in df.columns: series = df["P_TAG1"] else: first_col = df.columns[0] series = df[first_col] series = series.astype(str).str.strip() # Filter out VSU devices (matching *_*_VSU pattern) mask_vsu = series.str.match(r".*_.*_VSU$", na=False) filtered = series[~mask_vsu] return filtered def build_io_map(assigned_series: pd.Series) -> dict[str, set[str]]: """ Build mapping: canonical_tag -> set of raw IO assigned device strings. """ io_canon = assigned_series.map(canonical) io_map: dict[str, set[str]] = {} for raw, c in zip(assigned_series, io_canon): if not c: continue io_map.setdefault(c, set()).add(raw) return io_map def detect_duplicate_assignments(assigned_series: pd.Series) -> pd.DataFrame: """ Return dataframe of raw IO assigned device strings that appear more than once. This keeps PB/PB_LT, EPC variants, etc. separate and only flags exact duplicates. """ cleaned = assigned_series.dropna().str.strip() cleaned = cleaned[cleaned != ""] counts = cleaned.value_counts() duplicates = counts[counts > 1] if duplicates.empty: return pd.DataFrame(columns=["Assigned_Device", "Occurrences"]) rows = [] for tag_value, occ in duplicates.items(): rows.append({"Assigned_Device": tag_value, "Occurrences": occ}) return pd.DataFrame(rows) def find_io_without_devices(io_map: dict[str, set[str]], dev_canon: pd.Series) -> pd.DataFrame: """ Return dataframe of IO canonical tags that do not exist in the device list. """ dev_canon_set = set(dev_canon.dropna()) rows = [] for canon_tag, raw_values in sorted(io_map.items()): if canon_tag not in dev_canon_set: rows.append( { "Canonical_Tag": canon_tag, "IO_Assigned_Devices": ", ".join(sorted(raw_values)), "Occurrences": len(raw_values), } ) return pd.DataFrame(rows) def compare(io_path: Path, dev_path: Path, out_path: Path, io_column: str | None = None, dev_column: str | None = None) -> None: """ Main compare routine: IO vs Devices, export Excel, print summary. """ print(f"IO file: {io_path}") print(f"Devices file: {dev_path}") print(f"Output file: {out_path}") print("Loading data...") assigned_series = load_io_series(io_path, io_column) dev_tags = load_device_tags(dev_path, dev_column) print(f"IO rows after filters: {len(assigned_series)}") print(f"Device tags: {len(dev_tags)}") io_map = build_io_map(assigned_series) duplicate_df = detect_duplicate_assignments(assigned_series) dev_canon = dev_tags.map(canonical) io_only_df = find_io_without_devices(io_map, dev_canon) present_flags: list[str] = [] matching_assigned: list[str] = [] for d_tag, d_c in zip(dev_tags, dev_canon): if d_c in io_map: present_flags.append("YES") matching_assigned.append(", ".join(sorted(io_map[d_c]))) else: present_flags.append("NO") matching_assigned.append("") res_df = pd.DataFrame({ "Device_Tag": dev_tags, "Canonical_Tag_Used_For_Check": dev_canon, "Present_In_IO": present_flags, "Matching_IO_Assigned_Devices": matching_assigned, }) # Save result (main sheet + duplicates if any) out_path = out_path.with_suffix(".xlsx") with pd.ExcelWriter(out_path, engine="openpyxl") as writer: res_df.to_excel(writer, index=False, sheet_name="Devices_vs_IO") if not duplicate_df.empty: duplicate_df.to_excel( writer, index=False, sheet_name="Duplicate_IO_Assignments" ) if not io_only_df.empty: io_only_df.to_excel( writer, index=False, sheet_name="IO_Only_Assignments" ) print(f"\nResult saved to: {out_path}") # Summary missing_df = res_df[res_df["Present_In_IO"] == "NO"] total_devices = len(res_df) missing_count = len(missing_df) found_count = total_devices - missing_count print("\n===== SUMMARY =====") print(f"Total devices: {total_devices}") print(f"Found in IO: {found_count}") print(f"Missing: {missing_count}") print(f"IO-only tags: {len(io_only_df)}") if missing_count > 0: print("\nDevices NOT found in IO (after all rules):") for tag in sorted(missing_df["Device_Tag"].tolist()): print(" -", tag) if duplicate_df.empty: print("\nDuplicate IO assignments: none detected.") else: print("\nDuplicate IO assignments detected (same raw value repeated):") for _, row in duplicate_df.sort_values("Assigned_Device").iterrows(): print( f" - {row['Assigned_Device']} ({row['Occurrences']} occurrences)" ) if io_only_df.empty: print("\nIO-only assignments: none detected.") else: print("\nIO-only assignments (no matching device tag):") for _, row in io_only_df.iterrows(): print( f" - {row['Canonical_Tag']} ({row['Occurrences']} occurrences): " f"{row['IO_Assigned_Devices']}" ) def main(argv=None): parser = argparse.ArgumentParser( description="Check that all devices from device list are present in IO file, " "using EPC/VFD/PB/SS/R-S rules." ) parser.add_argument("io_file", help="Input/Output Excel file (IO)") parser.add_argument("devices_file", help="Devices Excel file") parser.add_argument("output_file", help="Output Excel file path (will be .xlsx)") parser.add_argument( "--io-column", help="Column name in IO file to use as Assigned Device (default: first column)", default=None, ) parser.add_argument( "--dev-column", help="Column name in devices file (default: P_TAG1 if exists, else first column)", default=None, ) args = parser.parse_args(argv) io_path = Path(args.io_file) dev_path = Path(args.devices_file) out_path = Path(args.output_file) if not io_path.is_file(): raise SystemExit(f"ERROR: IO file not found: {io_path}") if not dev_path.is_file(): raise SystemExit(f"ERROR: Devices file not found: {dev_path}") compare( io_path=io_path, dev_path=dev_path, out_path=out_path, io_column=args.io_column, dev_column=args.dev_column, ) if __name__ == "__main__": main()