Merge pull request 'Updated Scripts' (#1 ) from master into main

Reviewed-on: #1
Updated Scripts
2026-01-04 09:41:52 +00:00 · 2026-01-04 13:38:21 +04:00
5 changed files with 232 additions and 0 deletions
--- a/Addresses_Local.xlsx
+++ b/Addresses_Local.xlsx
--- a/Addresses_Local_formatted.xlsx
+++ b/Addresses_Local_formatted.xlsx
--- a/Additional/format.py
+++ b/Additional/format.py
@ -0,0 +1,232 @@
+#!/usr/bin/env python
+"""
+Convert Excel format from network structure format to standardized format.
+
+The source file has network structure data that needs to be extracted and reformatted
+to match the target format with columns: DPM, DPM_IP, Name, PartNumber, IP
+
+Usage:
+    python format.py <source_file.xlsx> [output_file.xlsx]
+    
+Examples:
+    python format.py "Amazon CDW5_IP Addresses_Local.xlsx"
+    python format.py "input.xlsx" "output.xlsx"
+"""
+
+import argparse
+import pandas as pd
+from pathlib import Path
+from openpyxl import load_workbook
+from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
+from openpyxl.utils import get_column_letter
+
+
+def convert_format(source_file, output_file):
+    """Convert source Excel format to target format."""
+    print(f"Reading source file: {source_file}")
+    # Read with header=2 to skip first 2 rows and use row 2 as header
+    source_df = pd.read_excel(source_file, header=2)
+    
+    print(f"Source file shape: {source_df.shape}")
+    print(f"Source columns: {list(source_df.columns)}")
+    
+    # Find the column names - they should be in row 2
+    print("\nFirst few rows of source:")
+    print(source_df.head(10).to_string())
+    
+    # Map source columns to target columns
+    # Based on the structure, we need to find columns by their actual header values
+    cols = list(source_df.columns)
+    
+    # Find columns by exact name match first
+    dpm_col = None
+    dpm_ip_col = None
+    name_col = None
+    part_number_col = None
+    ip_col = None
+    
+    for col in source_df.columns:
+        col_str = str(col).strip()
+        if col_str == 'DPM' and 'PORT' not in col_str:
+            dpm_col = col
+        elif col_str == 'IP' and dpm_ip_col is None and 'PORT' not in col_str:
+            # First IP column is DPM_IP
+            dpm_ip_col = col
+        elif col_str == 'Assigned Device':
+            name_col = col
+        elif col_str == 'Part Number':
+            part_number_col = col
+        elif col_str == 'IP.1' or (col_str == 'IP' and dpm_ip_col is not None):
+            # Second IP column is device IP (pandas renames duplicate columns)
+            ip_col = col
+    
+    # Fallback: use column indices if names don't match
+    # Based on the structure: columns are at indices 3, 4, 5, 6, 7
+    if dpm_col is None and len(cols) >= 8:
+        dpm_col = cols[3]  # 'DPM'
+        dpm_ip_col = cols[4]  # 'IP'
+        name_col = cols[5]  # 'Assigned Device'
+        part_number_col = cols[6]  # 'Part Number'
+        ip_col = cols[7]  # 'IP.1' (second IP column)
+    
+    print(f"\nColumn mapping:")
+    print(f"  DPM: {dpm_col}")
+    print(f"  DPM_IP: {dpm_ip_col}")
+    print(f"  Name: {name_col}")
+    print(f"  PartNumber: {part_number_col}")
+    print(f"  IP: {ip_col}")
+    
+    # Validate that we found the essential columns
+    if not dpm_col or not name_col:
+        raise ValueError(f"Could not find required columns. Found: DPM={dpm_col}, Name={name_col}")
+    
+    # Forward-fill DPM and DPM_IP values (they're merged across multiple rows)
+    source_df[dpm_col] = source_df[dpm_col].ffill()
+    if dpm_ip_col:
+        source_df[dpm_ip_col] = source_df[dpm_ip_col].ffill()
+    
+    # Extract data
+    output_data = []
+    for idx, row in source_df.iterrows():
+        dpm = row[dpm_col] if dpm_col and pd.notna(row[dpm_col]) else None
+        dpm_ip = row[dpm_ip_col] if dpm_ip_col and pd.notna(row[dpm_ip_col]) else None
+        name = row[name_col] if name_col and pd.notna(row[name_col]) else None
+        part_number = row[part_number_col] if part_number_col and pd.notna(row[part_number_col]) else None
+        ip = row[ip_col] if ip_col and pd.notna(row[ip_col]) else None
+        
+        # Only add rows that have at least DPM and Name (the essential data)
+        if pd.notna(dpm) and pd.notna(name):
+            output_data.append({
+                'DPM': str(dpm).strip(),
+                'DPM_IP': str(dpm_ip).strip() if pd.notna(dpm_ip) else '',
+                'Name': str(name).strip(),
+                'PartNumber': str(part_number).strip() if pd.notna(part_number) else '',
+                'IP': str(ip).strip() if pd.notna(ip) else ''
+            })
+    
+    # Create output dataframe
+    output_df = pd.DataFrame(output_data)
+    
+    print(f"\nExtracted {len(output_df)} rows of network data")
+    print("\nFirst few rows of output:")
+    print(output_df.head(10).to_string())
+    
+    # Write output with formatting
+    print(f"\nWriting output to: {output_file}")
+    
+    with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
+        output_df.to_excel(writer, sheet_name='NETWORK_PLC', index=False)
+        
+        # Get the worksheet
+        output_ws = writer.sheets['NETWORK_PLC']
+        
+        # Set column widths (reasonable defaults)
+        column_widths = {
+            'A': 20,  # DPM
+            'B': 16,  # DPM_IP
+            'C': 30,  # Name
+            'D': 18,  # PartNumber
+            'E': 16   # IP
+        }
+        for col_letter, width in column_widths.items():
+            output_ws.column_dimensions[col_letter].width = width
+        
+        # Format header row
+        header_fill = PatternFill(start_color="D3D3D3", end_color="D3D3D3", fill_type="solid")
+        header_font = Font(bold=True, size=11)
+        header_alignment = Alignment(horizontal="center", vertical="center")
+        thin_border = Border(
+            left=Side(style="thin"),
+            right=Side(style="thin"),
+            top=Side(style="thin"),
+            bottom=Side(style="thin")
+        )
+        
+        for col_idx in range(1, len(output_df.columns) + 1):
+            header_cell = output_ws.cell(row=1, column=col_idx)
+            header_cell.font = header_font
+            header_cell.fill = header_fill
+            header_cell.alignment = header_alignment
+            header_cell.border = thin_border
+        
+        # Format data rows
+        data_alignment = Alignment(horizontal="left", vertical="center")
+        for row_idx in range(2, len(output_df) + 2):
+            for col_idx in range(1, len(output_df.columns) + 1):
+                data_cell = output_ws.cell(row=row_idx, column=col_idx)
+                data_cell.alignment = data_alignment
+                data_cell.border = thin_border
+        
+        # Center align IP columns (DPM_IP and IP)
+        for row_idx in range(2, len(output_df) + 2):
+            output_ws.cell(row=row_idx, column=2).alignment = Alignment(horizontal="center", vertical="center")  # DPM_IP
+            output_ws.cell(row=row_idx, column=5).alignment = Alignment(horizontal="center", vertical="center")  # IP
+    
+    print(f"\nConversion complete!")
+    print(f"  Output saved to: {output_file}")
+    print(f"  Total rows: {len(output_df)}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert Excel network structure format to standardized format",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python format.py "Amazon CDW5_IP Addresses_Local.xlsx"
+  python format.py "input.xlsx" "output.xlsx"
+  python format.py "C:\\path\\to\\file.xlsx"
+        """
+    )
+    parser.add_argument(
+        "source_file",
+        help="Path to the source Excel file to convert"
+    )
+    parser.add_argument(
+        "output_file",
+        nargs="?",
+        default=None,
+        help="Path to the output Excel file (default: source_file with '_formatted' suffix)"
+    )
+    
+    args = parser.parse_args()
+    
+    # Resolve file paths
+    source_file = Path(args.source_file)
+    if not source_file.is_absolute():
+        # If relative, assume it's in the same directory as the script
+        source_file = Path(__file__).parent / source_file
+    
+    if not source_file.exists():
+        print(f"ERROR: Source file not found: {source_file}")
+        exit(1)
+    
+    # Determine output file
+    if args.output_file:
+        output_file = Path(args.output_file)
+        if not output_file.is_absolute():
+            output_file = Path(__file__).parent / output_file
+    else:
+        # Default: add "_formatted" before the extension
+        stem = source_file.stem
+        suffix = source_file.suffix
+        output_file = source_file.parent / f"{stem}_formatted{suffix}"
+    
+    print("=" * 60)
+    print("CONVERTING EXCEL FORMAT")
+    print("=" * 60)
+    print(f"Source: {source_file}")
+    print(f"Output: {output_file}")
+    print("=" * 60)
+    
+    try:
+        convert_format(source_file, output_file)
+    except Exception as e:
+        print(f"\nERROR: {e}")
+        import traceback
+        traceback.print_exc()
+        exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/Additional/test_output.xlsx
+++ b/Additional/test_output.xlsx
--- a/Addresses_Local_formatted.xlsx
+++ b/Addresses_Local_formatted.xlsx
Author	SHA1	Message	Date
gigi.mamaladze	752115a1fc	Merge pull request 'Updated Scripts' (#1 ) from master into main Reviewed-on: #1	2026-01-04 09:41:52 +00:00
gigi mamaladze	4035dc0588	Updated Scripts	2026-01-04 13:38:21 +04:00