diff --git a/Additional/Amazon CDW5_IP Addresses_Local.xlsx b/Additional/Amazon CDW5_IP Addresses_Local.xlsx new file mode 100644 index 0000000..bec4a2c Binary files /dev/null and b/Additional/Amazon CDW5_IP Addresses_Local.xlsx differ diff --git a/Additional/Amazon CDW5_IP Addresses_Local_formatted.xlsx b/Additional/Amazon CDW5_IP Addresses_Local_formatted.xlsx new file mode 100644 index 0000000..4639937 Binary files /dev/null and b/Additional/Amazon CDW5_IP Addresses_Local_formatted.xlsx differ diff --git a/Additional/format.py b/Additional/format.py new file mode 100644 index 0000000..2712ac3 --- /dev/null +++ b/Additional/format.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python +""" +Convert Excel format from network structure format to standardized format. + +The source file has network structure data that needs to be extracted and reformatted +to match the target format with columns: DPM, DPM_IP, Name, PartNumber, IP + +Usage: + python format.py [output_file.xlsx] + +Examples: + python format.py "Amazon CDW5_IP Addresses_Local.xlsx" + python format.py "input.xlsx" "output.xlsx" +""" + +import argparse +import pandas as pd +from pathlib import Path +from openpyxl import load_workbook +from openpyxl.styles import Font, Alignment, PatternFill, Border, Side +from openpyxl.utils import get_column_letter + + +def convert_format(source_file, output_file): + """Convert source Excel format to target format.""" + print(f"Reading source file: {source_file}") + # Read with header=2 to skip first 2 rows and use row 2 as header + source_df = pd.read_excel(source_file, header=2) + + print(f"Source file shape: {source_df.shape}") + print(f"Source columns: {list(source_df.columns)}") + + # Find the column names - they should be in row 2 + print("\nFirst few rows of source:") + print(source_df.head(10).to_string()) + + # Map source columns to target columns + # Based on the structure, we need to find columns by their actual header values + cols = list(source_df.columns) + + # Find columns by exact name match first + dpm_col = None + dpm_ip_col = None + name_col = None + part_number_col = None + ip_col = None + + for col in source_df.columns: + col_str = str(col).strip() + if col_str == 'DPM' and 'PORT' not in col_str: + dpm_col = col + elif col_str == 'IP' and dpm_ip_col is None and 'PORT' not in col_str: + # First IP column is DPM_IP + dpm_ip_col = col + elif col_str == 'Assigned Device': + name_col = col + elif col_str == 'Part Number': + part_number_col = col + elif col_str == 'IP.1' or (col_str == 'IP' and dpm_ip_col is not None): + # Second IP column is device IP (pandas renames duplicate columns) + ip_col = col + + # Fallback: use column indices if names don't match + # Based on the structure: columns are at indices 3, 4, 5, 6, 7 + if dpm_col is None and len(cols) >= 8: + dpm_col = cols[3] # 'DPM' + dpm_ip_col = cols[4] # 'IP' + name_col = cols[5] # 'Assigned Device' + part_number_col = cols[6] # 'Part Number' + ip_col = cols[7] # 'IP.1' (second IP column) + + print(f"\nColumn mapping:") + print(f" DPM: {dpm_col}") + print(f" DPM_IP: {dpm_ip_col}") + print(f" Name: {name_col}") + print(f" PartNumber: {part_number_col}") + print(f" IP: {ip_col}") + + # Validate that we found the essential columns + if not dpm_col or not name_col: + raise ValueError(f"Could not find required columns. Found: DPM={dpm_col}, Name={name_col}") + + # Forward-fill DPM and DPM_IP values (they're merged across multiple rows) + source_df[dpm_col] = source_df[dpm_col].ffill() + if dpm_ip_col: + source_df[dpm_ip_col] = source_df[dpm_ip_col].ffill() + + # Extract data + output_data = [] + for idx, row in source_df.iterrows(): + dpm = row[dpm_col] if dpm_col and pd.notna(row[dpm_col]) else None + dpm_ip = row[dpm_ip_col] if dpm_ip_col and pd.notna(row[dpm_ip_col]) else None + name = row[name_col] if name_col and pd.notna(row[name_col]) else None + part_number = row[part_number_col] if part_number_col and pd.notna(row[part_number_col]) else None + ip = row[ip_col] if ip_col and pd.notna(row[ip_col]) else None + + # Only add rows that have at least DPM and Name (the essential data) + if pd.notna(dpm) and pd.notna(name): + output_data.append({ + 'DPM': str(dpm).strip(), + 'DPM_IP': str(dpm_ip).strip() if pd.notna(dpm_ip) else '', + 'Name': str(name).strip(), + 'PartNumber': str(part_number).strip() if pd.notna(part_number) else '', + 'IP': str(ip).strip() if pd.notna(ip) else '' + }) + + # Create output dataframe + output_df = pd.DataFrame(output_data) + + print(f"\nExtracted {len(output_df)} rows of network data") + print("\nFirst few rows of output:") + print(output_df.head(10).to_string()) + + # Write output with formatting + print(f"\nWriting output to: {output_file}") + + with pd.ExcelWriter(output_file, engine='openpyxl') as writer: + output_df.to_excel(writer, sheet_name='NETWORK_PLC', index=False) + + # Get the worksheet + output_ws = writer.sheets['NETWORK_PLC'] + + # Set column widths (reasonable defaults) + column_widths = { + 'A': 20, # DPM + 'B': 16, # DPM_IP + 'C': 30, # Name + 'D': 18, # PartNumber + 'E': 16 # IP + } + for col_letter, width in column_widths.items(): + output_ws.column_dimensions[col_letter].width = width + + # Format header row + header_fill = PatternFill(start_color="D3D3D3", end_color="D3D3D3", fill_type="solid") + header_font = Font(bold=True, size=11) + header_alignment = Alignment(horizontal="center", vertical="center") + thin_border = Border( + left=Side(style="thin"), + right=Side(style="thin"), + top=Side(style="thin"), + bottom=Side(style="thin") + ) + + for col_idx in range(1, len(output_df.columns) + 1): + header_cell = output_ws.cell(row=1, column=col_idx) + header_cell.font = header_font + header_cell.fill = header_fill + header_cell.alignment = header_alignment + header_cell.border = thin_border + + # Format data rows + data_alignment = Alignment(horizontal="left", vertical="center") + for row_idx in range(2, len(output_df) + 2): + for col_idx in range(1, len(output_df.columns) + 1): + data_cell = output_ws.cell(row=row_idx, column=col_idx) + data_cell.alignment = data_alignment + data_cell.border = thin_border + + # Center align IP columns (DPM_IP and IP) + for row_idx in range(2, len(output_df) + 2): + output_ws.cell(row=row_idx, column=2).alignment = Alignment(horizontal="center", vertical="center") # DPM_IP + output_ws.cell(row=row_idx, column=5).alignment = Alignment(horizontal="center", vertical="center") # IP + + print(f"\nConversion complete!") + print(f" Output saved to: {output_file}") + print(f" Total rows: {len(output_df)}") + + +def main(): + parser = argparse.ArgumentParser( + description="Convert Excel network structure format to standardized format", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python format.py "Amazon CDW5_IP Addresses_Local.xlsx" + python format.py "input.xlsx" "output.xlsx" + python format.py "C:\\path\\to\\file.xlsx" + """ + ) + parser.add_argument( + "source_file", + help="Path to the source Excel file to convert" + ) + parser.add_argument( + "output_file", + nargs="?", + default=None, + help="Path to the output Excel file (default: source_file with '_formatted' suffix)" + ) + + args = parser.parse_args() + + # Resolve file paths + source_file = Path(args.source_file) + if not source_file.is_absolute(): + # If relative, assume it's in the same directory as the script + source_file = Path(__file__).parent / source_file + + if not source_file.exists(): + print(f"ERROR: Source file not found: {source_file}") + exit(1) + + # Determine output file + if args.output_file: + output_file = Path(args.output_file) + if not output_file.is_absolute(): + output_file = Path(__file__).parent / output_file + else: + # Default: add "_formatted" before the extension + stem = source_file.stem + suffix = source_file.suffix + output_file = source_file.parent / f"{stem}_formatted{suffix}" + + print("=" * 60) + print("CONVERTING EXCEL FORMAT") + print("=" * 60) + print(f"Source: {source_file}") + print(f"Output: {output_file}") + print("=" * 60) + + try: + convert_format(source_file, output_file) + except Exception as e: + print(f"\nERROR: {e}") + import traceback + traceback.print_exc() + exit(1) + + +if __name__ == "__main__": + main() diff --git a/Additional/test_output.xlsx b/Additional/test_output.xlsx new file mode 100644 index 0000000..5277d0c Binary files /dev/null and b/Additional/test_output.xlsx differ diff --git a/Additional/~$Amazon CDW5_IP Addresses_Local_formatted.xlsx b/Additional/~$Amazon CDW5_IP Addresses_Local_formatted.xlsx new file mode 100644 index 0000000..b4dfae8 Binary files /dev/null and b/Additional/~$Amazon CDW5_IP Addresses_Local_formatted.xlsx differ