import pandas as pd
import os
import sys
import re

from io_paths import load_io_path_mappings
from process import process_data
from post_process import post_process_io_data

def create_desc_ip_sheet():
    # Get Excel file path from command line arguments
    if len(sys.argv) < 2:
        print("Usage: python main.py <excel_file_path>")
        sys.exit(1)
    
    excel_file = sys.argv[1]
    
    if not os.path.exists(excel_file):
        print(f"CRITICAL: Excel file not found: {excel_file}")
        sys.exit(1)
    
    # Load IO path mappings
    print("Loading IO path mappings...")
    apf_df, m12dr_df, hub_df, sorter_hub_df, sio_df, ib16_df, ob16e_df, ib16s_df = load_io_path_mappings()
    
    try:
        # Read Excel file to check available sheets
        xl = pd.ExcelFile(excel_file)
        print(f"Available sheets: {xl.sheet_names}")
        
        # Try to find sheets with specific names
        desc_sheet = None
        network_sheet = None
        
        for sheet in xl.sheet_names:
            if 'DESC_PLC' in sheet.upper():
                desc_sheet = sheet
            if 'NETWORK_PLC' in sheet.upper():
                network_sheet = sheet
                
        print(f"Found DESC sheet: {desc_sheet}")
        print(f"Found NETWORK sheet: {network_sheet}")
        
        if not desc_sheet or not network_sheet:
            print("CRITICAL: Required sheets 'DESC_PLC' and 'NETWORK_PLC' not found in the Excel file.")
            sys.exit(1)
            
        # Read the sheets
        desc_df = pd.read_excel(xl, sheet_name=desc_sheet)
        network_df = pd.read_excel(xl, sheet_name=network_sheet)
        
        print(f"\nDESC columns: {list(desc_df.columns)}")
        print(f"NETWORK columns: {list(network_df.columns)}")
        
        # Sort network data by PartNumber, DPM, and then Name
        network_df['PartNumber'] = network_df['PartNumber'].fillna('')  # Handle NaN in PartNumber
        network_df['DPM'] = network_df['DPM'].fillna('')  # Handle NaN in DPM
        network_df = network_df.sort_values(by=['PartNumber', 'DPM', 'Name'])
        
        # Process the data based on user requirements
        process_data(
            desc_df,
            network_df,
            excel_file,
            apf_df,
            m12dr_df,
            hub_df,
            sorter_hub_df,
            sio_df,
            ib16_df,
            ob16e_df,
            ib16s_df
        )

        # Extract full project name (e.g. MTN6_MCM04) from the Excel file path
        # This includes the project prefix for better organization
        print(f"DEBUG: Processing Excel file: {excel_file}")
        project_match = re.search(r"([A-Z0-9]+_MCM\d+)", excel_file, re.IGNORECASE)
        if project_match:
            subsystem = project_match.group(1).upper()
            print(f"DEBUG: Full project pattern matched: {subsystem}")
        else:
            # Fallback to MCM-only pattern for backward compatibility
            mcm_match = re.search(r"(MCM\d+)", excel_file, re.IGNORECASE)
            subsystem = mcm_match.group(1).upper() if mcm_match else "MCM"
            print(f"DEBUG: Fallback pattern used: {subsystem}")
        
        print(f"DEBUG: Final subsystem name: {subsystem}")

        # Now run post-processing on the freshly generated workbook
        new_file = f"{subsystem}_DESC_IP_MERGED.xlsx"
        output_file = f"{subsystem}_OUTPUT.csv"
        post_process_io_data(new_file, output_file)
        
        # Copy the output file to the standard name expected by streamlined generator
        import shutil
        if os.path.exists(new_file):
            shutil.copy2(new_file, "DESC_IP_MERGED.xlsx")
            print(f"Created standard output file: DESC_IP_MERGED.xlsx")
            
            # Do not append legacy safety/ZONES sheets. Step 2 extracts from DESC_IP and uses zones.json.
        
    except Exception as e:
        print(f"Error occurred during processing: {e}")
        sys.exit(1)

if __name__ == "__main__":
    create_desc_ip_sheet()