Scripts/AutoCAD/Compare/DPM/check_dpm_devices.py

#!/usr/bin/env python3
"""
Script to compare device lists from two Excel files.
- File 1: Extract all values from P_TAG1 column
- File 2: Extract all devices, ignoring those containing "SPARE"
- Output: Excel file with all devices and a YES/NO column indicating if device is in both files
"""

import argparse
import sys
import pandas as pd
from pathlib import Path


def extract_from_p_tag1_file(file_path, sheet_name=0):
    """
    Extract all device values from the P_TAG1 column.

    Args:
        file_path: Path to Excel file with P_TAG1 column
        sheet_name: Sheet name or index to read

    Returns:
        List of device names (preserving original case)
    """
    try:
        df = pd.read_excel(file_path, sheet_name=sheet_name)

        # Find P_TAG1 column
        if 'P_TAG1' not in df.columns:
            print(f"Error: Column 'P_TAG1' not found in '{file_path}'")
            print(f"Available columns: {list(df.columns)}")
            sys.exit(1)

        # Extract all values from P_TAG1 column, preserving original case
        devices = df['P_TAG1'].dropna().astype(str).str.strip()
        devices = devices[devices != ''].tolist()

        return devices
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        sys.exit(1)
    except Exception as e:
        print(f"Error reading '{file_path}': {e}")
        sys.exit(1)


def extract_from_devices_file(file_path, sheet_name=0):
    """
    Extract all devices from file, ignoring those containing "SPARE".

    Args:
        file_path: Path to Excel file with device list
        sheet_name: Sheet name or index to read

    Returns:
        List of device names (preserving original case)
    """
    try:
        df = pd.read_excel(file_path, sheet_name=sheet_name)

        # Get all values from first column (assuming devices are in first column)
        devices = df.iloc[:, 0].dropna().astype(str).str.strip()
        devices = devices[devices != ''].tolist()

        # Also check if column name itself is a device (if it looks like a device name)
        column_name = str(df.columns[0]).strip()
        column_lower = column_name.lower()

        # If column name looks like a device (contains underscore, not "P_TAG1"), add it
        if '_' in column_name and len(column_name) > 3 and column_lower != 'p_tag1':
            if column_name not in devices:
                devices.append(column_name)

        # Filter out devices containing "SPARE" (case-insensitive)
        filtered_devices = []
        for device in devices:
            device_lower = device.lower()
            if 'spare' not in device_lower:
                filtered_devices.append(device)

        return filtered_devices
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        sys.exit(1)
    except Exception as e:
        print(f"Error reading '{file_path}': {e}")
        sys.exit(1)


def compare_and_create_output(file1_path, file2_path, output_path, sheet1=0, sheet2=0):
    """
    Compare device lists and create Excel output file.

    Args:
        file1_path: Path to file with P_TAG1 column
        file2_path: Path to file with device list
        output_path: Path to output Excel file
        sheet1: Sheet name or index for first file
        sheet2: Sheet name or index for second file
    """
    # Get file names (without path) for column headers
    file1_name = Path(file1_path).name
    file2_name = Path(file2_path).name

    print(f"Reading '{file1_path}' (looking for P_TAG1 column)...")
    devices1 = extract_from_p_tag1_file(file1_path, sheet_name=sheet1)
    print(f"  Found {len(devices1)} devices in P_TAG1 column")

    print(f"Reading '{file2_path}' (extracting all devices, ignoring SPARE)...")
    devices2 = extract_from_devices_file(file2_path, sheet_name=sheet2)
    print(f"  Found {len(devices2)} devices (after filtering SPARE)")

    # Create sets for case-insensitive comparison
    devices1_lower = {d.lower(): d for d in devices1}
    devices2_lower = {d.lower(): d for d in devices2}

    # Get all unique devices (combining both lists, preserving original case)
    all_devices_lower = set(devices1_lower.keys()) | set(devices2_lower.keys())

    # Create result list with original case
    result_data = []
    for device_lower in sorted(all_devices_lower):
        # Get original case (prefer from file1, then file2)
        if device_lower in devices1_lower:
            device_original = devices1_lower[device_lower]
        else:
            device_original = devices2_lower[device_lower]

        # Check if device is in each file (case-insensitive)
        in_file1 = device_lower in devices1_lower
        in_file2 = device_lower in devices2_lower
        in_both = in_file1 and in_file2

        result_data.append({
            'Device': device_original,
            file1_name: "YES" if in_file1 else "NO",
            file2_name: "YES" if in_file2 else "NO",
            'In Both Files': "YES" if in_both else "NO"
        })

    # Create DataFrame
    result_df = pd.DataFrame(result_data)

    # Validate output file extension
    output_path_obj = Path(output_path)
    if output_path_obj.suffix.lower() not in ['.xlsx', '.xls']:
        # If no extension or wrong extension, add .xlsx
        if not output_path_obj.suffix:
            output_path = str(output_path_obj) + '.xlsx'
            print(f"Note: Output file extension added: {output_path}")
        else:
            print(f"Warning: Output file should be .xlsx format. Converting to .xlsx")
            output_path = str(output_path_obj.with_suffix('.xlsx'))

    # Write to Excel
    try:
        # Ensure openpyxl is available
        try:
            import openpyxl
        except ImportError:
            print("Error: openpyxl library is required. Install it with: pip install openpyxl")
            sys.exit(1)

        result_df.to_excel(output_path, index=False, engine='openpyxl')
        print(f"\nOutput saved to: {output_path}")
        print(f"Total devices: {len(result_df)}")
        print(f"Devices in both files: {len(result_df[result_df['In Both Files'] == 'YES'])}")
        print(f"Devices only in one file: {len(result_df[result_df['In Both Files'] == 'NO'])}")
        print(f"\nColumns in output file:")
        print(f"  - Device: Device name")
        print(f"  - {file1_name}: YES if device is in this file")
        print(f"  - {file2_name}: YES if device is in this file")
        print(f"  - In Both Files: YES if device is in both files")
    except Exception as e:
        print(f"Error writing to Excel file: {e}")
        print(f"Make sure the output path is valid and you have write permissions.")
        sys.exit(1)


def main():
    parser = argparse.ArgumentParser(
        description='Compare device lists from two Excel files and create Excel output',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python check_dpm_devices.py file1.xlsx file2.xlsx output.xlsx
  python check_dpm_devices.py file1.xlsx file2.xlsx output.xlsx --sheet1 "Sheet1" --sheet2 "Sheet2"
        """
    )

    parser.add_argument('file1', type=str,
                       help='Path to first Excel file (must have P_TAG1 column)')
    parser.add_argument('file2', type=str,
                       help='Path to second Excel file (device list, SPARE devices will be ignored)')
    parser.add_argument('output', type=str,
                       help='Path to output Excel file (must be .xlsx format)')
    parser.add_argument('--sheet1', type=str, default=0,
                       help='Sheet name or index for first file (default: 0)')
    parser.add_argument('--sheet2', type=str, default=0,
                       help='Sheet name or index for second file (default: 0)')

    args = parser.parse_args()

    # Validate files exist
    if not Path(args.file1).exists():
        print(f"Error: File '{args.file1}' does not exist.")
        sys.exit(1)

    if not Path(args.file2).exists():
        print(f"Error: File '{args.file2}' does not exist.")
        sys.exit(1)

    # Compare and create output
    compare_and_create_output(
        args.file1,
        args.file2,
        args.output,
        sheet1=args.sheet1,
        sheet2=args.sheet2
    )


if __name__ == '__main__':
    main()