#!/usr/bin/env python """ Convert Excel format from network structure format to standardized format. The source file has network structure data that needs to be extracted and reformatted to match the target format with columns: DPM, DPM_IP, Name, PartNumber, IP Usage: python format.py [output_file.xlsx] Examples: python format.py "Amazon CDW5_IP Addresses_Local.xlsx" python format.py "input.xlsx" "output.xlsx" """ import argparse import pandas as pd from pathlib import Path from openpyxl import load_workbook from openpyxl.styles import Font, Alignment, PatternFill, Border, Side from openpyxl.utils import get_column_letter def convert_format(source_file, output_file): """Convert source Excel format to target format.""" print(f"Reading source file: {source_file}") # Read with header=2 to skip first 2 rows and use row 2 as header source_df = pd.read_excel(source_file, header=2) print(f"Source file shape: {source_df.shape}") print(f"Source columns: {list(source_df.columns)}") # Find the column names - they should be in row 2 print("\nFirst few rows of source:") print(source_df.head(10).to_string()) # Map source columns to target columns # Based on the structure, we need to find columns by their actual header values cols = list(source_df.columns) # Find columns by exact name match first dpm_col = None dpm_ip_col = None name_col = None part_number_col = None ip_col = None for col in source_df.columns: col_str = str(col).strip() if col_str == 'DPM' and 'PORT' not in col_str: dpm_col = col elif col_str == 'IP' and dpm_ip_col is None and 'PORT' not in col_str: # First IP column is DPM_IP dpm_ip_col = col elif col_str == 'Assigned Device': name_col = col elif col_str == 'Part Number': part_number_col = col elif col_str == 'IP.1' or (col_str == 'IP' and dpm_ip_col is not None): # Second IP column is device IP (pandas renames duplicate columns) ip_col = col # Fallback: use column indices if names don't match # Based on the structure: columns are at indices 3, 4, 5, 6, 7 if dpm_col is None and len(cols) >= 8: dpm_col = cols[3] # 'DPM' dpm_ip_col = cols[4] # 'IP' name_col = cols[5] # 'Assigned Device' part_number_col = cols[6] # 'Part Number' ip_col = cols[7] # 'IP.1' (second IP column) print(f"\nColumn mapping:") print(f" DPM: {dpm_col}") print(f" DPM_IP: {dpm_ip_col}") print(f" Name: {name_col}") print(f" PartNumber: {part_number_col}") print(f" IP: {ip_col}") # Validate that we found the essential columns if not dpm_col or not name_col: raise ValueError(f"Could not find required columns. Found: DPM={dpm_col}, Name={name_col}") # Forward-fill DPM and DPM_IP values (they're merged across multiple rows) source_df[dpm_col] = source_df[dpm_col].ffill() if dpm_ip_col: source_df[dpm_ip_col] = source_df[dpm_ip_col].ffill() # Extract data output_data = [] for idx, row in source_df.iterrows(): dpm = row[dpm_col] if dpm_col and pd.notna(row[dpm_col]) else None dpm_ip = row[dpm_ip_col] if dpm_ip_col and pd.notna(row[dpm_ip_col]) else None name = row[name_col] if name_col and pd.notna(row[name_col]) else None part_number = row[part_number_col] if part_number_col and pd.notna(row[part_number_col]) else None ip = row[ip_col] if ip_col and pd.notna(row[ip_col]) else None # Only add rows that have at least DPM and Name (the essential data) if pd.notna(dpm) and pd.notna(name): output_data.append({ 'DPM': str(dpm).strip(), 'DPM_IP': str(dpm_ip).strip() if pd.notna(dpm_ip) else '', 'Name': str(name).strip(), 'PartNumber': str(part_number).strip() if pd.notna(part_number) else '', 'IP': str(ip).strip() if pd.notna(ip) else '' }) # Create output dataframe output_df = pd.DataFrame(output_data) print(f"\nExtracted {len(output_df)} rows of network data") print("\nFirst few rows of output:") print(output_df.head(10).to_string()) # Write output with formatting print(f"\nWriting output to: {output_file}") with pd.ExcelWriter(output_file, engine='openpyxl') as writer: output_df.to_excel(writer, sheet_name='NETWORK_PLC', index=False) # Get the worksheet output_ws = writer.sheets['NETWORK_PLC'] # Set column widths (reasonable defaults) column_widths = { 'A': 20, # DPM 'B': 16, # DPM_IP 'C': 30, # Name 'D': 18, # PartNumber 'E': 16 # IP } for col_letter, width in column_widths.items(): output_ws.column_dimensions[col_letter].width = width # Format header row header_fill = PatternFill(start_color="D3D3D3", end_color="D3D3D3", fill_type="solid") header_font = Font(bold=True, size=11) header_alignment = Alignment(horizontal="center", vertical="center") thin_border = Border( left=Side(style="thin"), right=Side(style="thin"), top=Side(style="thin"), bottom=Side(style="thin") ) for col_idx in range(1, len(output_df.columns) + 1): header_cell = output_ws.cell(row=1, column=col_idx) header_cell.font = header_font header_cell.fill = header_fill header_cell.alignment = header_alignment header_cell.border = thin_border # Format data rows data_alignment = Alignment(horizontal="left", vertical="center") for row_idx in range(2, len(output_df) + 2): for col_idx in range(1, len(output_df.columns) + 1): data_cell = output_ws.cell(row=row_idx, column=col_idx) data_cell.alignment = data_alignment data_cell.border = thin_border # Center align IP columns (DPM_IP and IP) for row_idx in range(2, len(output_df) + 2): output_ws.cell(row=row_idx, column=2).alignment = Alignment(horizontal="center", vertical="center") # DPM_IP output_ws.cell(row=row_idx, column=5).alignment = Alignment(horizontal="center", vertical="center") # IP print(f"\nConversion complete!") print(f" Output saved to: {output_file}") print(f" Total rows: {len(output_df)}") def main(): parser = argparse.ArgumentParser( description="Convert Excel network structure format to standardized format", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python format.py "Amazon CDW5_IP Addresses_Local.xlsx" python format.py "input.xlsx" "output.xlsx" python format.py "C:\\path\\to\\file.xlsx" """ ) parser.add_argument( "source_file", help="Path to the source Excel file to convert" ) parser.add_argument( "output_file", nargs="?", default=None, help="Path to the output Excel file (default: source_file with '_formatted' suffix)" ) args = parser.parse_args() # Resolve file paths source_file = Path(args.source_file) if not source_file.is_absolute(): # If relative, assume it's in the same directory as the script source_file = Path(__file__).parent / source_file if not source_file.exists(): print(f"ERROR: Source file not found: {source_file}") exit(1) # Determine output file if args.output_file: output_file = Path(args.output_file) if not output_file.is_absolute(): output_file = Path(__file__).parent / output_file else: # Default: add "_formatted" before the extension stem = source_file.stem suffix = source_file.suffix output_file = source_file.parent / f"{stem}_formatted{suffix}" print("=" * 60) print("CONVERTING EXCEL FORMAT") print("=" * 60) print(f"Source: {source_file}") print(f"Output: {output_file}") print("=" * 60) try: convert_format(source_file, output_file) except Exception as e: print(f"\nERROR: {e}") import traceback traceback.print_exc() exit(1) if __name__ == "__main__": main()