#!/usr/bin/env python
"""
Convert Excel format from network structure format to standardized format.

The source file has network structure data that needs to be extracted and reformatted
to match the target format with columns: DPM, DPM_IP, Name, PartNumber, IP

Usage:
    python format.py <source_file.xlsx> [output_file.xlsx]
    
Examples:
    python format.py "Amazon CDW5_IP Addresses_Local.xlsx"
    python format.py "input.xlsx" "output.xlsx"
"""

import argparse
import pandas as pd
from pathlib import Path
from openpyxl import load_workbook
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.utils import get_column_letter


def convert_format(source_file, output_file):
    """Convert source Excel format to target format."""
    print(f"Reading source file: {source_file}")
    # Read with header=2 to skip first 2 rows and use row 2 as header
    source_df = pd.read_excel(source_file, header=2)
    
    print(f"Source file shape: {source_df.shape}")
    print(f"Source columns: {list(source_df.columns)}")
    
    # Find the column names - they should be in row 2
    print("\nFirst few rows of source:")
    print(source_df.head(10).to_string())
    
    # Map source columns to target columns
    # Based on the structure, we need to find columns by their actual header values
    cols = list(source_df.columns)
    
    # Find columns by exact name match first
    dpm_col = None
    dpm_ip_col = None
    name_col = None
    part_number_col = None
    ip_col = None
    
    for col in source_df.columns:
        col_str = str(col).strip()
        if col_str == 'DPM' and 'PORT' not in col_str:
            dpm_col = col
        elif col_str == 'IP' and dpm_ip_col is None and 'PORT' not in col_str:
            # First IP column is DPM_IP
            dpm_ip_col = col
        elif col_str == 'Assigned Device':
            name_col = col
        elif col_str == 'Part Number':
            part_number_col = col
        elif col_str == 'IP.1' or (col_str == 'IP' and dpm_ip_col is not None):
            # Second IP column is device IP (pandas renames duplicate columns)
            ip_col = col
    
    # Fallback: use column indices if names don't match
    # Based on the structure: columns are at indices 3, 4, 5, 6, 7
    if dpm_col is None and len(cols) >= 8:
        dpm_col = cols[3]  # 'DPM'
        dpm_ip_col = cols[4]  # 'IP'
        name_col = cols[5]  # 'Assigned Device'
        part_number_col = cols[6]  # 'Part Number'
        ip_col = cols[7]  # 'IP.1' (second IP column)
    
    print(f"\nColumn mapping:")
    print(f"  DPM: {dpm_col}")
    print(f"  DPM_IP: {dpm_ip_col}")
    print(f"  Name: {name_col}")
    print(f"  PartNumber: {part_number_col}")
    print(f"  IP: {ip_col}")
    
    # Validate that we found the essential columns
    if not dpm_col or not name_col:
        raise ValueError(f"Could not find required columns. Found: DPM={dpm_col}, Name={name_col}")
    
    # Forward-fill DPM and DPM_IP values (they're merged across multiple rows)
    source_df[dpm_col] = source_df[dpm_col].ffill()
    if dpm_ip_col:
        source_df[dpm_ip_col] = source_df[dpm_ip_col].ffill()
    
    # Extract data
    output_data = []
    for idx, row in source_df.iterrows():
        dpm = row[dpm_col] if dpm_col and pd.notna(row[dpm_col]) else None
        dpm_ip = row[dpm_ip_col] if dpm_ip_col and pd.notna(row[dpm_ip_col]) else None
        name = row[name_col] if name_col and pd.notna(row[name_col]) else None
        part_number = row[part_number_col] if part_number_col and pd.notna(row[part_number_col]) else None
        ip = row[ip_col] if ip_col and pd.notna(row[ip_col]) else None
        
        # Only add rows that have at least DPM and Name (the essential data)
        if pd.notna(dpm) and pd.notna(name):
            output_data.append({
                'DPM': str(dpm).strip(),
                'DPM_IP': str(dpm_ip).strip() if pd.notna(dpm_ip) else '',
                'Name': str(name).strip(),
                'PartNumber': str(part_number).strip() if pd.notna(part_number) else '',
                'IP': str(ip).strip() if pd.notna(ip) else ''
            })
    
    # Create output dataframe
    output_df = pd.DataFrame(output_data)
    
    print(f"\nExtracted {len(output_df)} rows of network data")
    print("\nFirst few rows of output:")
    print(output_df.head(10).to_string())
    
    # Write output with formatting
    print(f"\nWriting output to: {output_file}")
    
    with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
        output_df.to_excel(writer, sheet_name='NETWORK_PLC', index=False)
        
        # Get the worksheet
        output_ws = writer.sheets['NETWORK_PLC']
        
        # Set column widths (reasonable defaults)
        column_widths = {
            'A': 20,  # DPM
            'B': 16,  # DPM_IP
            'C': 30,  # Name
            'D': 18,  # PartNumber
            'E': 16   # IP
        }
        for col_letter, width in column_widths.items():
            output_ws.column_dimensions[col_letter].width = width
        
        # Format header row
        header_fill = PatternFill(start_color="D3D3D3", end_color="D3D3D3", fill_type="solid")
        header_font = Font(bold=True, size=11)
        header_alignment = Alignment(horizontal="center", vertical="center")
        thin_border = Border(
            left=Side(style="thin"),
            right=Side(style="thin"),
            top=Side(style="thin"),
            bottom=Side(style="thin")
        )
        
        for col_idx in range(1, len(output_df.columns) + 1):
            header_cell = output_ws.cell(row=1, column=col_idx)
            header_cell.font = header_font
            header_cell.fill = header_fill
            header_cell.alignment = header_alignment
            header_cell.border = thin_border
        
        # Format data rows
        data_alignment = Alignment(horizontal="left", vertical="center")
        for row_idx in range(2, len(output_df) + 2):
            for col_idx in range(1, len(output_df.columns) + 1):
                data_cell = output_ws.cell(row=row_idx, column=col_idx)
                data_cell.alignment = data_alignment
                data_cell.border = thin_border
        
        # Center align IP columns (DPM_IP and IP)
        for row_idx in range(2, len(output_df) + 2):
            output_ws.cell(row=row_idx, column=2).alignment = Alignment(horizontal="center", vertical="center")  # DPM_IP
            output_ws.cell(row=row_idx, column=5).alignment = Alignment(horizontal="center", vertical="center")  # IP
    
    print(f"\nConversion complete!")
    print(f"  Output saved to: {output_file}")
    print(f"  Total rows: {len(output_df)}")


def main():
    parser = argparse.ArgumentParser(
        description="Convert Excel network structure format to standardized format",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python format.py "Amazon CDW5_IP Addresses_Local.xlsx"
  python format.py "input.xlsx" "output.xlsx"
  python format.py "C:\\path\\to\\file.xlsx"
        """
    )
    parser.add_argument(
        "source_file",
        help="Path to the source Excel file to convert"
    )
    parser.add_argument(
        "output_file",
        nargs="?",
        default=None,
        help="Path to the output Excel file (default: source_file with '_formatted' suffix)"
    )
    
    args = parser.parse_args()
    
    # Resolve file paths
    source_file = Path(args.source_file)
    if not source_file.is_absolute():
        # If relative, assume it's in the same directory as the script
        source_file = Path(__file__).parent / source_file
    
    if not source_file.exists():
        print(f"ERROR: Source file not found: {source_file}")
        exit(1)
    
    # Determine output file
    if args.output_file:
        output_file = Path(args.output_file)
        if not output_file.is_absolute():
            output_file = Path(__file__).parent / output_file
    else:
        # Default: add "_formatted" before the extension
        stem = source_file.stem
        suffix = source_file.suffix
        output_file = source_file.parent / f"{stem}_formatted{suffix}"
    
    print("=" * 60)
    print("CONVERTING EXCEL FORMAT")
    print("=" * 60)
    print(f"Source: {source_file}")
    print(f"Output: {output_file}")
    print("=" * 60)
    
    try:
        convert_format(source_file, output_file)
    except Exception as e:
        print(f"\nERROR: {e}")
        import traceback
        traceback.print_exc()
        exit(1)


if __name__ == "__main__":
    main()