PLC_Generation/IO Tree Configuration Generator/excel_data_processor.py

#!/usr/bin/env python3
"""
Excel Data Processor for IO Configuration
==========================================

Processes Excel data with columns: TAGNAME, IP, PARTNUMBER, IO_PATH, DESC, TERM, SIGNAL
Handles FIO, FIOH, ZMX, DPM, VFD(APF) modules and maps IO_PATH to DESC comments.
"""

import pandas as pd
import re
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass, field
from collections import defaultdict

def _is_hub_name_in_desc(desc: str) -> bool:
    """Check if description contains hub patterns like FIO1H1, FIO2H3, etc."""
    if not desc:
        return False
    # Match patterns like FIO1H1, FIO2H3, etc. in description
    hub_pattern = re.compile(r'FIO\d+H\d+', re.IGNORECASE)
    return bool(hub_pattern.search(desc))


@dataclass
class IOPathMapping:
    """Represents an IO path mapping from Excel data."""
    tagname: str
    terminal: str  # From TERM column (e.g., "I0", "SI1", "IO0")
    io_path: str  # From IO_PATH column (e.g., "FL3024_2_VFD1:I.In_0")
    description: str  # From DESC column
    ip_address: str  # From IP column
    part_number: str  # From PARTNUMBER column
    signal: str  # From SIGNAL column (e.g., "O", "I", "IOLink")
    desb: str = ""  # From DESB column (beacon-specific descriptions)


@dataclass
class ModuleData:
    """Represents a complete module with all its IO mappings."""
    tagname: str
    ip_address: str
    part_number: str
    io_mappings: List[IOPathMapping] = field(default_factory=list)
    parent_module: Optional[str] = None  # For FIOH modules
    unknown_part_number: bool = False
    terminal: str = ""  # For FIOH modules - stores the terminal they're found on (IO4/IO12)
    comments: Dict[int, str] = field(default_factory=dict)  # New: Per-point comments {point_index: description}


class ExcelDataProcessor:
    """Processes Excel data for IO configuration generation."""

    # Known part number mappings
    PART_NUMBER_MAP = {
        # APF modules
        "35S-6D1-P001": {"type": "APF", "hp": "1"},
        "35S-6D2-P101": {"type": "APF", "hp": "2"},
        "35S-6D3-P101": {"type": "APF", "hp": "3"},
        "35S-6D4-P111": {"type": "APF", "hp": "5"},
        "35S-6D5-P111": {"type": "APF", "hp": "7.5"},
        "35S-6D6-P111": {"type": "APF", "hp": "10"},

        # FIOH modules (Turck Hubs)
        "TBIL-M1-16DXP": {"type": "FIOH"},

        # DPM modules (to be implemented later)
        "OS30-002404-2S": {"type": "DPM"},

        # ZMX modules
        "ZMX-3DE2500HF-Q7-AMZCHD": {"type": "ZMX"},

        # EXTENDO modules (Siemens ET 200SP - CALJAN network)
        "6ES7 158-3MU10-0XA0": {"type": "EXTENDO"},
        "CALJAN": {"type": "EXTENDO"},

        # IO-Link Master modules
        "5032-8IOLM12DR": {"type": "IOLM"},
        "1734-IB16": {"type": "IB16"},
        "1734-IB16S": {"type": "IB16S"},
        "1734-OB16E": {"type": "OB16E"},

        # ControlLogix I/O modules (1756 series)
        "1756-IB16": {"type": "IB16"},
        "1756-IB16S": {"type": "IB16S"},
        "1756-OB16E": {"type": "OB16E"},

        # PMM modules (Power Monitoring Module)
        "1420-V2-ENT": {"type": "PMM"},

        # SIO modules (Safety Input/Output)
        "0980SSL3131-121-007D-202": {"type": "SIO"},
    }

    def __init__(self, excel_file_path: str = "Data.xlsx"):
        self.excel_file_path = excel_file_path
        self.raw_data = None
        self.modules: Dict[str, ModuleData] = {}
        self.unknown_part_numbers: List[str] = []
        self.fio_fioh_relationships: Dict[str, str] = {}  # FIOH -> FIO parent mapping

    def load_data(self) -> bool:
        """Load data from Excel file."""
        try:
            # Read Excel file from DESC_IP sheet (contains processed data with all required columns)
            self.raw_data = pd.read_excel(self.excel_file_path, sheet_name='DESC_IP')

            # Validate required columns
            required_columns = ['TAGNAME', 'IP', 'PARTNUMBER', 'IO_PATH', 'DESC', 'TERM', 'SIGNAL']
            optional_columns = ['DESB']  # DESB column for beacon-specific descriptions
            missing_columns = [col for col in required_columns if col not in self.raw_data.columns]

            if missing_columns:
                print(f"ERROR: Missing required columns: {missing_columns}")
                return False

            # Remove rows with empty TAGNAME (but keep empty PARTNUMBER for inference)
            initial_count = len(self.raw_data)

            # Find rows with empty TAGNAME or PARTNUMBER for reporting
            empty_tagnames = self.raw_data[self.raw_data['TAGNAME'].isna()]
            empty_partnumbers = self.raw_data[self.raw_data['PARTNUMBER'].isna()]

            # Report empty TAGNAME rows
            if len(empty_tagnames) > 0:
                print(f"WARNING: Removing {len(empty_tagnames)} rows with empty TAGNAME:")
                for idx, row in empty_tagnames.iterrows():
                    tagname = str(row['TAGNAME']) if pd.notna(row['TAGNAME']) else "[EMPTY]"
                    partnumber = str(row['PARTNUMBER']) if pd.notna(row['PARTNUMBER']) else "[EMPTY]"
                    desc = str(row['DESC']) if pd.notna(row['DESC']) else ""
                    term = str(row['TERM']) if pd.notna(row['TERM']) else ""
                    print(f"    Row {idx+2}: TAGNAME='{tagname}', PARTNUMBER='{partnumber}', DESC='{desc}', TERM='{term}'")

            # Report empty PARTNUMBER rows (but don't remove them)
            if len(empty_partnumbers) > 0:
                print(f"INFO: Found {len(empty_partnumbers)} rows with empty PARTNUMBER (will attempt inference):")
                for idx, row in empty_partnumbers.iterrows():
                    tagname = str(row['TAGNAME']) if pd.notna(row['TAGNAME']) else "[EMPTY]"
                    partnumber = str(row['PARTNUMBER']) if pd.notna(row['PARTNUMBER']) else "[EMPTY]"
                    desc = str(row['DESC']) if pd.notna(row['DESC']) else ""
                    term = str(row['TERM']) if pd.notna(row['TERM']) else ""
                    print(f"    Row {idx+2}: TAGNAME='{tagname}', PARTNUMBER='{partnumber}', DESC='{desc}', TERM='{term}'")

            # Only remove rows with empty TAGNAME
            self.raw_data = self.raw_data.dropna(subset=['TAGNAME'])
            final_count = len(self.raw_data)

            # Check for TAGNAMEs that appear more than 16 times (potential data issues)
            tagname_counts = self.raw_data['TAGNAME'].value_counts()
            excessive_tagnames = tagname_counts[tagname_counts > 16]

            if len(excessive_tagnames) > 0:
                print(f"WARNING: Found {len(excessive_tagnames)} TAGNAMEs with more than 16 entries:")
                for tagname, count in excessive_tagnames.items():
                    partnumber = self.raw_data[self.raw_data['TAGNAME'] == tagname]['PARTNUMBER'].iloc[0]
                    print(f"    {tagname}: {count} entries (PARTNUMBER: {partnumber})")
                print("    Note: Most modules should have ≤16 IO channels. Review these for potential data issues.")

            if initial_count != final_count:
                print(f"WARNING: Removed {initial_count - final_count} rows with empty TAGNAME")

            return True

        except Exception as e:
            print(f"ERROR: Loading Excel file: {e}")
            return False

    def process_data(self) -> bool:
        """Process the loaded data and organize by modules."""
        if self.raw_data is None:
            print("ERROR: No data loaded. Call load_data() first.")
            return False

        # Group data by TAGNAME to create modules
        grouped = self.raw_data.groupby('TAGNAME')

        for tagname, group in grouped:
            # Get the first row for module-level info
            first_row = group.iloc[0]
            part_number = str(first_row['PARTNUMBER']) if pd.notna(first_row['PARTNUMBER']) else ""

            # Infer part number and type from TAGNAME if empty
            inferred_part_number = part_number
            if not part_number:
                if tagname.endswith("_IB16"):
                    inferred_part_number = "1734-IB16"
                elif tagname.endswith("_IB16S"):
                    inferred_part_number = "1734-IB16S"
                elif tagname.endswith("_OB16E"):
                    inferred_part_number = "1734-OB16E"

            # Use inferred part number if available
            part_number = inferred_part_number or part_number

            # Handle IP address from data sheet
            ip_address = str(first_row['IP']).strip() if pd.notna(first_row['IP']) else ""
            if ip_address and not self._is_valid_ip(ip_address):
                print(f"WARNING: Invalid IP address format '{ip_address}' for module {tagname}")
                ip_address = ""

            # Check if part number is known
            unknown_part = part_number not in self.PART_NUMBER_MAP
            if unknown_part and part_number not in self.unknown_part_numbers:
                self.unknown_part_numbers.append(part_number)

            # Create module data
            module = ModuleData(
                tagname=tagname,
                ip_address=ip_address,
                part_number=part_number,
                unknown_part_number=unknown_part
            )

            # Process each IO mapping for this module
            for _, row in group.iterrows():
                io_mapping = IOPathMapping(
                    tagname=tagname,
                    terminal=str(row['TERM']) if pd.notna(row['TERM']) else "",
                    io_path=str(row['IO_PATH']) if pd.notna(row['IO_PATH']) else "",
                    description=str(row['DESC']) if pd.notna(row['DESC']) else "",
                    ip_address=ip_address,
                    part_number=part_number,
                    signal=str(row['SIGNAL']) if pd.notna(row['SIGNAL']) else "",
                    desb=str(row['DESB']) if 'DESB' in row and pd.notna(row['DESB']) else ""
                )
                module.io_mappings.append(io_mapping)

                # New: Collect comments - map TERM to point index and store DESC
                term = str(row['TERM']) if pd.notna(row['TERM']) else ""
                desc = str(row['DESC']) if pd.notna(row['DESC']) else ""
                if term and desc:
                    # Parse point index from TERM (e.g., 'I0' -> 0, 'O15' -> 15, 'I10' -> 10)
                    match = re.match(r'^[IO](\d{1,2})$', term.upper())
                    if match:
                        point_index = int(match.group(1))
                        if 0 <= point_index <= 15:
                            module.comments[point_index] = desc

            self.modules[tagname] = module

        # Find FIOH modules based on TERM IO4/IO12 and DESC containing FIOH
        self._find_fioh_modules()

        # Find FIO-FIOH relationships
        self._find_fio_fioh_relationships()

        return True

    def _find_fioh_modules(self):
        """Find FIOH modules based on TERM IO4/IO12 and DESC containing FIOH."""
        fioh_modules_to_create = {}  # TAGNAME -> (parent_module, terminal)

        # Scan all data for TERM IO4 or IO12 with FIOH in DESC
        for _, row in self.raw_data.iterrows():
            term = str(row['TERM']) if pd.notna(row['TERM']) else ""
            desc = str(row['DESC']) if pd.notna(row['DESC']) else ""
            tagname = str(row['TAGNAME']) if pd.notna(row['TAGNAME']) else ""

            # Check if TERM is IO4/IO12 AND DESC contains FIOH or hub patterns (channels restricted to 4 and 12)
            # Temporarily also support IO6/IO14 for backward compatibility during transition
            # Handle both IO4 and IO04 formats
            if term.upper() in ["IO4", "IO04", "IO12", "IO6", "IO06", "IO14"] and ("FIOH" in desc.upper() or _is_hub_name_in_desc(desc)):
                # Extract FIOH name from description
                # Look for patterns like "FL1014_FIOH1" or "S02_1_FIO1H1" in the description
                fioh_match = re.search(r'([A-Z0-9_]+FIOH\d*)', desc.upper())
                hub_match = re.search(r'([A-Z0-9_]+FIO\d+H\d+)', desc.upper())

                if fioh_match:
                    fioh_name = fioh_match.group(1)
                elif hub_match:
                    fioh_name = hub_match.group(1)
                else:
                    continue  # No valid hub pattern found

                # The parent module is the TAGNAME where we found this FIOH reference
                # Store both parent and terminal info
                if fioh_name not in fioh_modules_to_create:
                    fioh_modules_to_create[fioh_name] = (tagname, term.upper())

        # Create FIOH modules based on findings
        for fioh_name, (parent_module, terminal) in fioh_modules_to_create.items():
            # Create a new FIOH module
            fioh_module = ModuleData(
                tagname=fioh_name,
                ip_address="",  # FIOHs don't have IP addresses
                part_number="TBIL-M1-16DXP",  # Known FIOH part number
                parent_module=parent_module
            )

            # Store terminal info for port assignment (IO6 -> address 6, IO14 -> address 14)
            fioh_module.terminal = terminal

            # Find all IO mappings for this FIOH by scanning the data
            for _, row in self.raw_data.iterrows():
                row_tagname = str(row['TAGNAME']) if pd.notna(row['TAGNAME']) else ""
                if row_tagname.upper() == fioh_name.upper():
                    io_mapping = IOPathMapping(
                        tagname=fioh_name,
                        terminal=str(row['TERM']) if pd.notna(row['TERM']) else "",
                        io_path=str(row['IO_PATH']) if pd.notna(row['IO_PATH']) else "",
                        description=str(row['DESC']) if pd.notna(row['DESC']) else "",
                        ip_address="",
                        part_number="TBIL-M1-16DXP",
                        signal=str(row['SIGNAL']) if pd.notna(row['SIGNAL']) else ""
                    )
                    fioh_module.io_mappings.append(io_mapping)

            # Add to modules collection
            self.modules[fioh_name] = fioh_module

        if fioh_modules_to_create:
            print(f"Created {len(fioh_modules_to_create)} FIOH modules based on TERM analysis")

    def _find_fio_fioh_relationships(self):
        """Find parent-child relationships between FIO and FIOH modules."""
        # Update the relationships dict based on modules that already have parent_module set
        for module_name, module in self.modules.items():
            if (module.part_number == "TBIL-M1-16DXP" and  # FIOH modules
                module.parent_module):  # Has a parent
                self.fio_fioh_relationships[module_name] = module.parent_module

    def parse_io_path(self, io_path: str) -> Tuple[str, str, str]:
        """Parse IO_PATH string to extract tagname, channel, and terminal.

        Examples:
        - "FL3024_2_VFD1:I.In_0" -> ("FL3024_2_VFD1", "I", "In_0")
        - "FL3024_2_VFD1:SI.In01Data" -> ("FL3024_2_VFD1", "SI", "In01Data")
        """
        try:
            # Split on colon to separate tagname from path
            parts = io_path.split(":", 1)
            if len(parts) != 2:
                return "", "", ""

            tagname, path_part = parts

            # Split path part on dot to get channel and terminal
            path_parts = path_part.split(".", 1)
            if len(path_parts) != 2:
                return tagname, "", ""

            channel, terminal = path_parts
            return tagname, channel, terminal

        except Exception:
            return "", "", ""

    def get_modules_by_type(self, module_type: str) -> List[ModuleData]:
        """Get all modules of a specific type."""
        result = []
        for module in self.modules.values():
            if (module.part_number in self.PART_NUMBER_MAP and
                self.PART_NUMBER_MAP[module.part_number]["type"] == module_type):
                result.append(module)
        return result

    def get_comments_for_module(self, tagname: str) -> Dict[str, str]:
        """Get comment mappings for a specific module.

        Returns dict mapping terminal -> description
        """
        if tagname not in self.modules:
            return {}

        comments = {}
        for io_mapping in self.modules[tagname].io_mappings:
            if io_mapping.terminal and io_mapping.description:
                # Handle SPARE entries
                if io_mapping.description.upper() == "SPARE":
                    comments[io_mapping.terminal] = "SPARE"
                else:
                    comments[io_mapping.terminal] = io_mapping.description

        return comments

    def print_summary(self):
        """Print processing summary."""
        print("\n" + "=" * 60)
        print("Excel Data Processing Summary")
        print("=" * 60)

        print(f"Total modules processed: {len(self.modules)}")

        # Count by type
        type_counts = defaultdict(int)
        for module in self.modules.values():
            if module.part_number in self.PART_NUMBER_MAP:
                module_type = self.PART_NUMBER_MAP[module.part_number]["type"]
                type_counts[module_type] += 1
            else:
                type_counts["UNKNOWN"] += 1

        print("\nModules by type:")
        for module_type, count in type_counts.items():
            print(f"  {module_type}: {count}")

        print(f"\nFIO-FIOH relationships found: {len(self.fio_fioh_relationships)}")
        for fioh, fio in self.fio_fioh_relationships.items():
            print(f"  {fioh} -> {fio}")

        if self.unknown_part_numbers:
            print(f"\nUnknown part numbers ({len(self.unknown_part_numbers)}):")
            for part_num in self.unknown_part_numbers:
                print(f"  {part_num}")

        print("\nSample modules:")
        for i, (tagname, module) in enumerate(self.modules.items()):
            if i >= 5:  # Show first 5 modules
                break
            print(f"  {tagname}: {module.part_number} ({len(module.io_mappings)} IO mappings)")

    def _is_valid_ip(self, ip: str) -> bool:
        """Validate IP address format."""
        try:
            parts = ip.split('.')
            if len(parts) != 4:
                return False
            return all(0 <= int(part) <= 255 for part in parts)
        except (AttributeError, TypeError, ValueError):
            return False


def main():
    """Example usage of the Excel data processor."""
    print("Excel Data Processor Test")
    print("=" * 40)

    processor = ExcelDataProcessor("MCM04_Data.xlsx")

    # Load and process data
    if processor.load_data():
        if processor.process_data():
            processor.print_summary()

            # Example: Get comments for a specific module
            modules = list(processor.modules.keys())
            if modules:
                sample_module = modules[0]
                comments = processor.get_comments_for_module(sample_module)
                print(f"\nSample comments for {sample_module}:")
                for terminal, desc in comments.items():
                    print(f"  {terminal}: {desc}")
        else:
            print("Failed to process data")
    else:
        print("Failed to load data")


if __name__ == "__main__":
    main()