Compare commits
2 Commits
3582a67422
...
752115a1fc
| Author | SHA1 | Date | |
|---|---|---|---|
| 752115a1fc | |||
| 4035dc0588 |
BIN
Additional/Amazon CDW5_IP Addresses_Local.xlsx
Normal file
BIN
Additional/Amazon CDW5_IP Addresses_Local.xlsx
Normal file
Binary file not shown.
BIN
Additional/Amazon CDW5_IP Addresses_Local_formatted.xlsx
Normal file
BIN
Additional/Amazon CDW5_IP Addresses_Local_formatted.xlsx
Normal file
Binary file not shown.
232
Additional/format.py
Normal file
232
Additional/format.py
Normal file
@ -0,0 +1,232 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Convert Excel format from network structure format to standardized format.
|
||||
|
||||
The source file has network structure data that needs to be extracted and reformatted
|
||||
to match the target format with columns: DPM, DPM_IP, Name, PartNumber, IP
|
||||
|
||||
Usage:
|
||||
python format.py <source_file.xlsx> [output_file.xlsx]
|
||||
|
||||
Examples:
|
||||
python format.py "Amazon CDW5_IP Addresses_Local.xlsx"
|
||||
python format.py "input.xlsx" "output.xlsx"
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
|
||||
def convert_format(source_file, output_file):
|
||||
"""Convert source Excel format to target format."""
|
||||
print(f"Reading source file: {source_file}")
|
||||
# Read with header=2 to skip first 2 rows and use row 2 as header
|
||||
source_df = pd.read_excel(source_file, header=2)
|
||||
|
||||
print(f"Source file shape: {source_df.shape}")
|
||||
print(f"Source columns: {list(source_df.columns)}")
|
||||
|
||||
# Find the column names - they should be in row 2
|
||||
print("\nFirst few rows of source:")
|
||||
print(source_df.head(10).to_string())
|
||||
|
||||
# Map source columns to target columns
|
||||
# Based on the structure, we need to find columns by their actual header values
|
||||
cols = list(source_df.columns)
|
||||
|
||||
# Find columns by exact name match first
|
||||
dpm_col = None
|
||||
dpm_ip_col = None
|
||||
name_col = None
|
||||
part_number_col = None
|
||||
ip_col = None
|
||||
|
||||
for col in source_df.columns:
|
||||
col_str = str(col).strip()
|
||||
if col_str == 'DPM' and 'PORT' not in col_str:
|
||||
dpm_col = col
|
||||
elif col_str == 'IP' and dpm_ip_col is None and 'PORT' not in col_str:
|
||||
# First IP column is DPM_IP
|
||||
dpm_ip_col = col
|
||||
elif col_str == 'Assigned Device':
|
||||
name_col = col
|
||||
elif col_str == 'Part Number':
|
||||
part_number_col = col
|
||||
elif col_str == 'IP.1' or (col_str == 'IP' and dpm_ip_col is not None):
|
||||
# Second IP column is device IP (pandas renames duplicate columns)
|
||||
ip_col = col
|
||||
|
||||
# Fallback: use column indices if names don't match
|
||||
# Based on the structure: columns are at indices 3, 4, 5, 6, 7
|
||||
if dpm_col is None and len(cols) >= 8:
|
||||
dpm_col = cols[3] # 'DPM'
|
||||
dpm_ip_col = cols[4] # 'IP'
|
||||
name_col = cols[5] # 'Assigned Device'
|
||||
part_number_col = cols[6] # 'Part Number'
|
||||
ip_col = cols[7] # 'IP.1' (second IP column)
|
||||
|
||||
print(f"\nColumn mapping:")
|
||||
print(f" DPM: {dpm_col}")
|
||||
print(f" DPM_IP: {dpm_ip_col}")
|
||||
print(f" Name: {name_col}")
|
||||
print(f" PartNumber: {part_number_col}")
|
||||
print(f" IP: {ip_col}")
|
||||
|
||||
# Validate that we found the essential columns
|
||||
if not dpm_col or not name_col:
|
||||
raise ValueError(f"Could not find required columns. Found: DPM={dpm_col}, Name={name_col}")
|
||||
|
||||
# Forward-fill DPM and DPM_IP values (they're merged across multiple rows)
|
||||
source_df[dpm_col] = source_df[dpm_col].ffill()
|
||||
if dpm_ip_col:
|
||||
source_df[dpm_ip_col] = source_df[dpm_ip_col].ffill()
|
||||
|
||||
# Extract data
|
||||
output_data = []
|
||||
for idx, row in source_df.iterrows():
|
||||
dpm = row[dpm_col] if dpm_col and pd.notna(row[dpm_col]) else None
|
||||
dpm_ip = row[dpm_ip_col] if dpm_ip_col and pd.notna(row[dpm_ip_col]) else None
|
||||
name = row[name_col] if name_col and pd.notna(row[name_col]) else None
|
||||
part_number = row[part_number_col] if part_number_col and pd.notna(row[part_number_col]) else None
|
||||
ip = row[ip_col] if ip_col and pd.notna(row[ip_col]) else None
|
||||
|
||||
# Only add rows that have at least DPM and Name (the essential data)
|
||||
if pd.notna(dpm) and pd.notna(name):
|
||||
output_data.append({
|
||||
'DPM': str(dpm).strip(),
|
||||
'DPM_IP': str(dpm_ip).strip() if pd.notna(dpm_ip) else '',
|
||||
'Name': str(name).strip(),
|
||||
'PartNumber': str(part_number).strip() if pd.notna(part_number) else '',
|
||||
'IP': str(ip).strip() if pd.notna(ip) else ''
|
||||
})
|
||||
|
||||
# Create output dataframe
|
||||
output_df = pd.DataFrame(output_data)
|
||||
|
||||
print(f"\nExtracted {len(output_df)} rows of network data")
|
||||
print("\nFirst few rows of output:")
|
||||
print(output_df.head(10).to_string())
|
||||
|
||||
# Write output with formatting
|
||||
print(f"\nWriting output to: {output_file}")
|
||||
|
||||
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
||||
output_df.to_excel(writer, sheet_name='NETWORK_PLC', index=False)
|
||||
|
||||
# Get the worksheet
|
||||
output_ws = writer.sheets['NETWORK_PLC']
|
||||
|
||||
# Set column widths (reasonable defaults)
|
||||
column_widths = {
|
||||
'A': 20, # DPM
|
||||
'B': 16, # DPM_IP
|
||||
'C': 30, # Name
|
||||
'D': 18, # PartNumber
|
||||
'E': 16 # IP
|
||||
}
|
||||
for col_letter, width in column_widths.items():
|
||||
output_ws.column_dimensions[col_letter].width = width
|
||||
|
||||
# Format header row
|
||||
header_fill = PatternFill(start_color="D3D3D3", end_color="D3D3D3", fill_type="solid")
|
||||
header_font = Font(bold=True, size=11)
|
||||
header_alignment = Alignment(horizontal="center", vertical="center")
|
||||
thin_border = Border(
|
||||
left=Side(style="thin"),
|
||||
right=Side(style="thin"),
|
||||
top=Side(style="thin"),
|
||||
bottom=Side(style="thin")
|
||||
)
|
||||
|
||||
for col_idx in range(1, len(output_df.columns) + 1):
|
||||
header_cell = output_ws.cell(row=1, column=col_idx)
|
||||
header_cell.font = header_font
|
||||
header_cell.fill = header_fill
|
||||
header_cell.alignment = header_alignment
|
||||
header_cell.border = thin_border
|
||||
|
||||
# Format data rows
|
||||
data_alignment = Alignment(horizontal="left", vertical="center")
|
||||
for row_idx in range(2, len(output_df) + 2):
|
||||
for col_idx in range(1, len(output_df.columns) + 1):
|
||||
data_cell = output_ws.cell(row=row_idx, column=col_idx)
|
||||
data_cell.alignment = data_alignment
|
||||
data_cell.border = thin_border
|
||||
|
||||
# Center align IP columns (DPM_IP and IP)
|
||||
for row_idx in range(2, len(output_df) + 2):
|
||||
output_ws.cell(row=row_idx, column=2).alignment = Alignment(horizontal="center", vertical="center") # DPM_IP
|
||||
output_ws.cell(row=row_idx, column=5).alignment = Alignment(horizontal="center", vertical="center") # IP
|
||||
|
||||
print(f"\nConversion complete!")
|
||||
print(f" Output saved to: {output_file}")
|
||||
print(f" Total rows: {len(output_df)}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Convert Excel network structure format to standardized format",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python format.py "Amazon CDW5_IP Addresses_Local.xlsx"
|
||||
python format.py "input.xlsx" "output.xlsx"
|
||||
python format.py "C:\\path\\to\\file.xlsx"
|
||||
"""
|
||||
)
|
||||
parser.add_argument(
|
||||
"source_file",
|
||||
help="Path to the source Excel file to convert"
|
||||
)
|
||||
parser.add_argument(
|
||||
"output_file",
|
||||
nargs="?",
|
||||
default=None,
|
||||
help="Path to the output Excel file (default: source_file with '_formatted' suffix)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Resolve file paths
|
||||
source_file = Path(args.source_file)
|
||||
if not source_file.is_absolute():
|
||||
# If relative, assume it's in the same directory as the script
|
||||
source_file = Path(__file__).parent / source_file
|
||||
|
||||
if not source_file.exists():
|
||||
print(f"ERROR: Source file not found: {source_file}")
|
||||
exit(1)
|
||||
|
||||
# Determine output file
|
||||
if args.output_file:
|
||||
output_file = Path(args.output_file)
|
||||
if not output_file.is_absolute():
|
||||
output_file = Path(__file__).parent / output_file
|
||||
else:
|
||||
# Default: add "_formatted" before the extension
|
||||
stem = source_file.stem
|
||||
suffix = source_file.suffix
|
||||
output_file = source_file.parent / f"{stem}_formatted{suffix}"
|
||||
|
||||
print("=" * 60)
|
||||
print("CONVERTING EXCEL FORMAT")
|
||||
print("=" * 60)
|
||||
print(f"Source: {source_file}")
|
||||
print(f"Output: {output_file}")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
convert_format(source_file, output_file)
|
||||
except Exception as e:
|
||||
print(f"\nERROR: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
BIN
Additional/test_output.xlsx
Normal file
BIN
Additional/test_output.xlsx
Normal file
Binary file not shown.
BIN
Additional/~$Amazon CDW5_IP Addresses_Local_formatted.xlsx
Normal file
BIN
Additional/~$Amazon CDW5_IP Addresses_Local_formatted.xlsx
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user