import pandas as pd
import sys
import os
import re

def extract_fio_fioh_modules(input_file='MCM04_DESC_IP_MERGED.xlsx', output_file='FIO_FIOH_OUTPUT.csv'):
    """
    Extract FIO and FIOH modules from DESC/IP sheet and create output with format:
    TAGNAME, ADDR, TERM, TERMDESC, DESCA, DESCB
    """
    
    try:
        # Read the DESC/IP merged data
        print(f"Reading input file: {input_file}")
        xl = pd.ExcelFile(input_file)
        # Try to auto-detect the DESC/IP sheet (kept for backward compatibility)
        df = pd.read_excel(xl, sheet_name='DESC_IP')
        print(f"Total rows in DESC_IP sheet: {len(df)}")
        
        # --- Load NETWORK sheet for DPM mapping ----------------------------
        network_sheet = None
        for sheet in xl.sheet_names:
            if 'NETWORK' in sheet.upper():
                network_sheet = sheet
                break
        if network_sheet:
            network_df = pd.read_excel(xl, sheet_name=network_sheet)
            print(f"Loaded NETWORK sheet: {network_sheet} ({len(network_df)} rows)")
            # Build mapping from Name -> DPM (blank-safe)
            network_df['Name'] = network_df['Name'].astype(str).str.strip()
            network_df['DPM'] = network_df['DPM'].fillna('').astype(str).str.strip()
            name_to_dpm = dict(zip(network_df['Name'], network_df['DPM']))
        else:
            print("WARNING: NETWORK sheet not found in workbook – DPM column will be blank for masters")
            name_to_dpm = {}
        
    except FileNotFoundError:
        print(f"ERROR: File {input_file} not found!")
        return
    except Exception as e:
        print(f"ERROR: Failed to read {input_file}: {str(e)}")
        return

    # ---------------------------------------------------------------------
    # Build a mapping of FIOH tag -> its MASTER FIO tag by scanning DESC_IP
    # Rows where DESCA contains 'FIOH' typically reference the hub on a master
    # channel. We use these to derive the master relationship.
    fioh_master_map = {}
    fioh_ref_rows = df[df['DESCA'].astype(str).str.contains('FIOH', case=False, na=False)]
    for _, r in fioh_ref_rows.iterrows():
        fioh_tag = str(r['DESCA']).strip()
        master_tag = str(r['TAGNAME']).strip()
        # Keep the first master encountered to avoid overriding inconsistencies
        fioh_master_map.setdefault(fioh_tag, master_tag)

    # Filter for FIO and FIOH modules (TAGNAME containing "FIO")
    fio_fioh_filter = df['TAGNAME'].str.contains('FIO', case=False, na=False)
    fio_fioh_data = df[fio_fioh_filter].copy()
    
    print(f"Found {len(fio_fioh_data)} FIO/FIOH entries")
    
    if len(fio_fioh_data) == 0:
        print("No FIO/FIOH modules found in the data!")
        return

    # Get unique module names
    unique_modules = fio_fioh_data['TAGNAME'].unique()
    print(f"Found {len(unique_modules)} unique FIO/FIOH modules")
    
    # Define channel mappings based on device type
    def get_channels_for_device(device_type):
        """Return list of channels for a given device type"""
        if device_type == 'M12DR':  # FIO devices
            return [f'IO{i}' for i in range(16)]  # IO0 to IO15
        elif device_type == 'Hub':  # FIOH devices  
            return [f'IO{i}' for i in range(16)]  # IO0 to IO15
        else:
            return []
    
    # Prepare output data
    output_rows = []
    
    for module_name in unique_modules:
        # Get module data
        module_data = fio_fioh_data[fio_fioh_data['TAGNAME'] == module_name]
        
        if len(module_data) == 0:
            continue
            
        # Get device type from first row
        device_type = module_data.iloc[0]['DEVICE_TYPE']
        channels = get_channels_for_device(device_type)
        
        print(f"Processing {module_name} ({device_type}) - {len(channels)} channels")
        
        # Create a mapping of existing data by TERM
        existing_data = {}
        for _, row in module_data.iterrows():
            term = str(row['TERM']).strip()
            existing_data[term] = {
                'DESCA': row['DESCA'] if pd.notna(row['DESCA']) else '',
                'DESCB': row['DESCB'] if pd.notna(row['DESCB']) else ''
            }
        
        # Generate output rows for all channels
        for channel in channels:
            # Create ADDR by combining module name with channel
            addr = f"{module_name}_{channel}"
            
            # Get DESCA and DESCB from existing data if available
            if channel in existing_data:
                desca = existing_data[channel]['DESCA']
                descb = existing_data[channel]['DESCB']
            else:
                # Default to SPARE if no existing data
                desca = 'SPARE'
                descb = ''
            
            # Determine DPM value based on device type
            if device_type == 'M12DR':  # Master FIO
                dpm_value = name_to_dpm.get(module_name, '')
            elif device_type == 'Hub':  # FIOH – use its master
                dpm_value = fioh_master_map.get(module_name, '')
            else:
                dpm_value = ''

            output_rows.append({
                'TAGNAME': module_name,
                'ADDR': addr,
                'TERM': channel,
                'TERMDESC': '',  # Empty as shown in example
                'DESCA': desca,
                'DESCB': descb,
                'DPM': dpm_value
            })
    
    # Create output DataFrame
    output_df = pd.DataFrame(output_rows)
    
    # Extract numeric part from TERM for natural sorting
    def extract_io_number(term):
        """Extract the numeric part from IO term for proper sorting"""
        match = re.match(r'IO(\d+)', term)
        if match:
            return int(match.group(1))
        return 0
    
    # Add a temporary column for sorting
    output_df['TERM_NUM'] = output_df['TERM'].apply(extract_io_number)
    
    # Sort by TAGNAME and then by the numeric value of TERM
    output_df = output_df.sort_values(['TAGNAME', 'TERM_NUM'])
    
    # Drop the temporary column
    output_df = output_df.drop(columns=['TERM_NUM'])
    
    print(f"\nGenerated {len(output_df)} output rows")
    print(f"Saving to: {output_file}")
    
    # Replace any NaN values with empty strings for clean output
    output_df = output_df.fillna('')
    
    # Ensure DPM column is last (you can change order if desired)
    cols = ['TAGNAME', 'ADDR', 'TERM', 'TERMDESC', 'DESCA', 'DESCB', 'DPM']
    output_df = output_df[cols]
    
    # Save to CSV
    output_df.to_csv(output_file, index=False)
    
    print(f"\nSample output:")
    print(output_df.head(15))
    
    print(f"\nOutput saved successfully to {output_file}")
    return output_df

if __name__ == "__main__":
    # Check if custom input file is provided
    if len(sys.argv) > 1:
        input_file = sys.argv[1]
    else:
        input_file = 'MCM04_DESC_IP_MERGED.xlsx'
    
    # Check if custom output file is provided  
    if len(sys.argv) > 2:
        output_file = sys.argv[2]
    else:
        output_file = 'FIO_FIOH_OUTPUT.csv'
    
    extract_fio_fioh_modules(input_file, output_file)