Merge pull request 'Updated Scripts' (#1) from master into main

Reviewed-on: #1
This commit is contained in:
gigi.mamaladze 2026-01-04 09:41:52 +00:00
commit 752115a1fc
5 changed files with 232 additions and 0 deletions

Binary file not shown.

232
Additional/format.py Normal file
View File

@ -0,0 +1,232 @@
#!/usr/bin/env python
"""
Convert Excel format from network structure format to standardized format.
The source file has network structure data that needs to be extracted and reformatted
to match the target format with columns: DPM, DPM_IP, Name, PartNumber, IP
Usage:
python format.py <source_file.xlsx> [output_file.xlsx]
Examples:
python format.py "Amazon CDW5_IP Addresses_Local.xlsx"
python format.py "input.xlsx" "output.xlsx"
"""
import argparse
import pandas as pd
from pathlib import Path
from openpyxl import load_workbook
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.utils import get_column_letter
def convert_format(source_file, output_file):
"""Convert source Excel format to target format."""
print(f"Reading source file: {source_file}")
# Read with header=2 to skip first 2 rows and use row 2 as header
source_df = pd.read_excel(source_file, header=2)
print(f"Source file shape: {source_df.shape}")
print(f"Source columns: {list(source_df.columns)}")
# Find the column names - they should be in row 2
print("\nFirst few rows of source:")
print(source_df.head(10).to_string())
# Map source columns to target columns
# Based on the structure, we need to find columns by their actual header values
cols = list(source_df.columns)
# Find columns by exact name match first
dpm_col = None
dpm_ip_col = None
name_col = None
part_number_col = None
ip_col = None
for col in source_df.columns:
col_str = str(col).strip()
if col_str == 'DPM' and 'PORT' not in col_str:
dpm_col = col
elif col_str == 'IP' and dpm_ip_col is None and 'PORT' not in col_str:
# First IP column is DPM_IP
dpm_ip_col = col
elif col_str == 'Assigned Device':
name_col = col
elif col_str == 'Part Number':
part_number_col = col
elif col_str == 'IP.1' or (col_str == 'IP' and dpm_ip_col is not None):
# Second IP column is device IP (pandas renames duplicate columns)
ip_col = col
# Fallback: use column indices if names don't match
# Based on the structure: columns are at indices 3, 4, 5, 6, 7
if dpm_col is None and len(cols) >= 8:
dpm_col = cols[3] # 'DPM'
dpm_ip_col = cols[4] # 'IP'
name_col = cols[5] # 'Assigned Device'
part_number_col = cols[6] # 'Part Number'
ip_col = cols[7] # 'IP.1' (second IP column)
print(f"\nColumn mapping:")
print(f" DPM: {dpm_col}")
print(f" DPM_IP: {dpm_ip_col}")
print(f" Name: {name_col}")
print(f" PartNumber: {part_number_col}")
print(f" IP: {ip_col}")
# Validate that we found the essential columns
if not dpm_col or not name_col:
raise ValueError(f"Could not find required columns. Found: DPM={dpm_col}, Name={name_col}")
# Forward-fill DPM and DPM_IP values (they're merged across multiple rows)
source_df[dpm_col] = source_df[dpm_col].ffill()
if dpm_ip_col:
source_df[dpm_ip_col] = source_df[dpm_ip_col].ffill()
# Extract data
output_data = []
for idx, row in source_df.iterrows():
dpm = row[dpm_col] if dpm_col and pd.notna(row[dpm_col]) else None
dpm_ip = row[dpm_ip_col] if dpm_ip_col and pd.notna(row[dpm_ip_col]) else None
name = row[name_col] if name_col and pd.notna(row[name_col]) else None
part_number = row[part_number_col] if part_number_col and pd.notna(row[part_number_col]) else None
ip = row[ip_col] if ip_col and pd.notna(row[ip_col]) else None
# Only add rows that have at least DPM and Name (the essential data)
if pd.notna(dpm) and pd.notna(name):
output_data.append({
'DPM': str(dpm).strip(),
'DPM_IP': str(dpm_ip).strip() if pd.notna(dpm_ip) else '',
'Name': str(name).strip(),
'PartNumber': str(part_number).strip() if pd.notna(part_number) else '',
'IP': str(ip).strip() if pd.notna(ip) else ''
})
# Create output dataframe
output_df = pd.DataFrame(output_data)
print(f"\nExtracted {len(output_df)} rows of network data")
print("\nFirst few rows of output:")
print(output_df.head(10).to_string())
# Write output with formatting
print(f"\nWriting output to: {output_file}")
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
output_df.to_excel(writer, sheet_name='NETWORK_PLC', index=False)
# Get the worksheet
output_ws = writer.sheets['NETWORK_PLC']
# Set column widths (reasonable defaults)
column_widths = {
'A': 20, # DPM
'B': 16, # DPM_IP
'C': 30, # Name
'D': 18, # PartNumber
'E': 16 # IP
}
for col_letter, width in column_widths.items():
output_ws.column_dimensions[col_letter].width = width
# Format header row
header_fill = PatternFill(start_color="D3D3D3", end_color="D3D3D3", fill_type="solid")
header_font = Font(bold=True, size=11)
header_alignment = Alignment(horizontal="center", vertical="center")
thin_border = Border(
left=Side(style="thin"),
right=Side(style="thin"),
top=Side(style="thin"),
bottom=Side(style="thin")
)
for col_idx in range(1, len(output_df.columns) + 1):
header_cell = output_ws.cell(row=1, column=col_idx)
header_cell.font = header_font
header_cell.fill = header_fill
header_cell.alignment = header_alignment
header_cell.border = thin_border
# Format data rows
data_alignment = Alignment(horizontal="left", vertical="center")
for row_idx in range(2, len(output_df) + 2):
for col_idx in range(1, len(output_df.columns) + 1):
data_cell = output_ws.cell(row=row_idx, column=col_idx)
data_cell.alignment = data_alignment
data_cell.border = thin_border
# Center align IP columns (DPM_IP and IP)
for row_idx in range(2, len(output_df) + 2):
output_ws.cell(row=row_idx, column=2).alignment = Alignment(horizontal="center", vertical="center") # DPM_IP
output_ws.cell(row=row_idx, column=5).alignment = Alignment(horizontal="center", vertical="center") # IP
print(f"\nConversion complete!")
print(f" Output saved to: {output_file}")
print(f" Total rows: {len(output_df)}")
def main():
parser = argparse.ArgumentParser(
description="Convert Excel network structure format to standardized format",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python format.py "Amazon CDW5_IP Addresses_Local.xlsx"
python format.py "input.xlsx" "output.xlsx"
python format.py "C:\\path\\to\\file.xlsx"
"""
)
parser.add_argument(
"source_file",
help="Path to the source Excel file to convert"
)
parser.add_argument(
"output_file",
nargs="?",
default=None,
help="Path to the output Excel file (default: source_file with '_formatted' suffix)"
)
args = parser.parse_args()
# Resolve file paths
source_file = Path(args.source_file)
if not source_file.is_absolute():
# If relative, assume it's in the same directory as the script
source_file = Path(__file__).parent / source_file
if not source_file.exists():
print(f"ERROR: Source file not found: {source_file}")
exit(1)
# Determine output file
if args.output_file:
output_file = Path(args.output_file)
if not output_file.is_absolute():
output_file = Path(__file__).parent / output_file
else:
# Default: add "_formatted" before the extension
stem = source_file.stem
suffix = source_file.suffix
output_file = source_file.parent / f"{stem}_formatted{suffix}"
print("=" * 60)
print("CONVERTING EXCEL FORMAT")
print("=" * 60)
print(f"Source: {source_file}")
print(f"Output: {output_file}")
print("=" * 60)
try:
convert_format(source_file, output_file)
except Exception as e:
print(f"\nERROR: {e}")
import traceback
traceback.print_exc()
exit(1)
if __name__ == "__main__":
main()

BIN
Additional/test_output.xlsx Normal file

Binary file not shown.