Compare commits
No commits in common. "752115a1fcfcedc94202dc02ac41d511517b887a" and "3582a67422ae7f63c49757669048126ecee359e0" have entirely different histories.
752115a1fc
...
3582a67422
Binary file not shown.
Binary file not shown.
@ -1,232 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
"""
|
|
||||||
Convert Excel format from network structure format to standardized format.
|
|
||||||
|
|
||||||
The source file has network structure data that needs to be extracted and reformatted
|
|
||||||
to match the target format with columns: DPM, DPM_IP, Name, PartNumber, IP
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
python format.py <source_file.xlsx> [output_file.xlsx]
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
python format.py "Amazon CDW5_IP Addresses_Local.xlsx"
|
|
||||||
python format.py "input.xlsx" "output.xlsx"
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import pandas as pd
|
|
||||||
from pathlib import Path
|
|
||||||
from openpyxl import load_workbook
|
|
||||||
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
|
|
||||||
from openpyxl.utils import get_column_letter
|
|
||||||
|
|
||||||
|
|
||||||
def convert_format(source_file, output_file):
|
|
||||||
"""Convert source Excel format to target format."""
|
|
||||||
print(f"Reading source file: {source_file}")
|
|
||||||
# Read with header=2 to skip first 2 rows and use row 2 as header
|
|
||||||
source_df = pd.read_excel(source_file, header=2)
|
|
||||||
|
|
||||||
print(f"Source file shape: {source_df.shape}")
|
|
||||||
print(f"Source columns: {list(source_df.columns)}")
|
|
||||||
|
|
||||||
# Find the column names - they should be in row 2
|
|
||||||
print("\nFirst few rows of source:")
|
|
||||||
print(source_df.head(10).to_string())
|
|
||||||
|
|
||||||
# Map source columns to target columns
|
|
||||||
# Based on the structure, we need to find columns by their actual header values
|
|
||||||
cols = list(source_df.columns)
|
|
||||||
|
|
||||||
# Find columns by exact name match first
|
|
||||||
dpm_col = None
|
|
||||||
dpm_ip_col = None
|
|
||||||
name_col = None
|
|
||||||
part_number_col = None
|
|
||||||
ip_col = None
|
|
||||||
|
|
||||||
for col in source_df.columns:
|
|
||||||
col_str = str(col).strip()
|
|
||||||
if col_str == 'DPM' and 'PORT' not in col_str:
|
|
||||||
dpm_col = col
|
|
||||||
elif col_str == 'IP' and dpm_ip_col is None and 'PORT' not in col_str:
|
|
||||||
# First IP column is DPM_IP
|
|
||||||
dpm_ip_col = col
|
|
||||||
elif col_str == 'Assigned Device':
|
|
||||||
name_col = col
|
|
||||||
elif col_str == 'Part Number':
|
|
||||||
part_number_col = col
|
|
||||||
elif col_str == 'IP.1' or (col_str == 'IP' and dpm_ip_col is not None):
|
|
||||||
# Second IP column is device IP (pandas renames duplicate columns)
|
|
||||||
ip_col = col
|
|
||||||
|
|
||||||
# Fallback: use column indices if names don't match
|
|
||||||
# Based on the structure: columns are at indices 3, 4, 5, 6, 7
|
|
||||||
if dpm_col is None and len(cols) >= 8:
|
|
||||||
dpm_col = cols[3] # 'DPM'
|
|
||||||
dpm_ip_col = cols[4] # 'IP'
|
|
||||||
name_col = cols[5] # 'Assigned Device'
|
|
||||||
part_number_col = cols[6] # 'Part Number'
|
|
||||||
ip_col = cols[7] # 'IP.1' (second IP column)
|
|
||||||
|
|
||||||
print(f"\nColumn mapping:")
|
|
||||||
print(f" DPM: {dpm_col}")
|
|
||||||
print(f" DPM_IP: {dpm_ip_col}")
|
|
||||||
print(f" Name: {name_col}")
|
|
||||||
print(f" PartNumber: {part_number_col}")
|
|
||||||
print(f" IP: {ip_col}")
|
|
||||||
|
|
||||||
# Validate that we found the essential columns
|
|
||||||
if not dpm_col or not name_col:
|
|
||||||
raise ValueError(f"Could not find required columns. Found: DPM={dpm_col}, Name={name_col}")
|
|
||||||
|
|
||||||
# Forward-fill DPM and DPM_IP values (they're merged across multiple rows)
|
|
||||||
source_df[dpm_col] = source_df[dpm_col].ffill()
|
|
||||||
if dpm_ip_col:
|
|
||||||
source_df[dpm_ip_col] = source_df[dpm_ip_col].ffill()
|
|
||||||
|
|
||||||
# Extract data
|
|
||||||
output_data = []
|
|
||||||
for idx, row in source_df.iterrows():
|
|
||||||
dpm = row[dpm_col] if dpm_col and pd.notna(row[dpm_col]) else None
|
|
||||||
dpm_ip = row[dpm_ip_col] if dpm_ip_col and pd.notna(row[dpm_ip_col]) else None
|
|
||||||
name = row[name_col] if name_col and pd.notna(row[name_col]) else None
|
|
||||||
part_number = row[part_number_col] if part_number_col and pd.notna(row[part_number_col]) else None
|
|
||||||
ip = row[ip_col] if ip_col and pd.notna(row[ip_col]) else None
|
|
||||||
|
|
||||||
# Only add rows that have at least DPM and Name (the essential data)
|
|
||||||
if pd.notna(dpm) and pd.notna(name):
|
|
||||||
output_data.append({
|
|
||||||
'DPM': str(dpm).strip(),
|
|
||||||
'DPM_IP': str(dpm_ip).strip() if pd.notna(dpm_ip) else '',
|
|
||||||
'Name': str(name).strip(),
|
|
||||||
'PartNumber': str(part_number).strip() if pd.notna(part_number) else '',
|
|
||||||
'IP': str(ip).strip() if pd.notna(ip) else ''
|
|
||||||
})
|
|
||||||
|
|
||||||
# Create output dataframe
|
|
||||||
output_df = pd.DataFrame(output_data)
|
|
||||||
|
|
||||||
print(f"\nExtracted {len(output_df)} rows of network data")
|
|
||||||
print("\nFirst few rows of output:")
|
|
||||||
print(output_df.head(10).to_string())
|
|
||||||
|
|
||||||
# Write output with formatting
|
|
||||||
print(f"\nWriting output to: {output_file}")
|
|
||||||
|
|
||||||
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
|
||||||
output_df.to_excel(writer, sheet_name='NETWORK_PLC', index=False)
|
|
||||||
|
|
||||||
# Get the worksheet
|
|
||||||
output_ws = writer.sheets['NETWORK_PLC']
|
|
||||||
|
|
||||||
# Set column widths (reasonable defaults)
|
|
||||||
column_widths = {
|
|
||||||
'A': 20, # DPM
|
|
||||||
'B': 16, # DPM_IP
|
|
||||||
'C': 30, # Name
|
|
||||||
'D': 18, # PartNumber
|
|
||||||
'E': 16 # IP
|
|
||||||
}
|
|
||||||
for col_letter, width in column_widths.items():
|
|
||||||
output_ws.column_dimensions[col_letter].width = width
|
|
||||||
|
|
||||||
# Format header row
|
|
||||||
header_fill = PatternFill(start_color="D3D3D3", end_color="D3D3D3", fill_type="solid")
|
|
||||||
header_font = Font(bold=True, size=11)
|
|
||||||
header_alignment = Alignment(horizontal="center", vertical="center")
|
|
||||||
thin_border = Border(
|
|
||||||
left=Side(style="thin"),
|
|
||||||
right=Side(style="thin"),
|
|
||||||
top=Side(style="thin"),
|
|
||||||
bottom=Side(style="thin")
|
|
||||||
)
|
|
||||||
|
|
||||||
for col_idx in range(1, len(output_df.columns) + 1):
|
|
||||||
header_cell = output_ws.cell(row=1, column=col_idx)
|
|
||||||
header_cell.font = header_font
|
|
||||||
header_cell.fill = header_fill
|
|
||||||
header_cell.alignment = header_alignment
|
|
||||||
header_cell.border = thin_border
|
|
||||||
|
|
||||||
# Format data rows
|
|
||||||
data_alignment = Alignment(horizontal="left", vertical="center")
|
|
||||||
for row_idx in range(2, len(output_df) + 2):
|
|
||||||
for col_idx in range(1, len(output_df.columns) + 1):
|
|
||||||
data_cell = output_ws.cell(row=row_idx, column=col_idx)
|
|
||||||
data_cell.alignment = data_alignment
|
|
||||||
data_cell.border = thin_border
|
|
||||||
|
|
||||||
# Center align IP columns (DPM_IP and IP)
|
|
||||||
for row_idx in range(2, len(output_df) + 2):
|
|
||||||
output_ws.cell(row=row_idx, column=2).alignment = Alignment(horizontal="center", vertical="center") # DPM_IP
|
|
||||||
output_ws.cell(row=row_idx, column=5).alignment = Alignment(horizontal="center", vertical="center") # IP
|
|
||||||
|
|
||||||
print(f"\nConversion complete!")
|
|
||||||
print(f" Output saved to: {output_file}")
|
|
||||||
print(f" Total rows: {len(output_df)}")
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Convert Excel network structure format to standardized format",
|
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
||||||
epilog="""
|
|
||||||
Examples:
|
|
||||||
python format.py "Amazon CDW5_IP Addresses_Local.xlsx"
|
|
||||||
python format.py "input.xlsx" "output.xlsx"
|
|
||||||
python format.py "C:\\path\\to\\file.xlsx"
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"source_file",
|
|
||||||
help="Path to the source Excel file to convert"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"output_file",
|
|
||||||
nargs="?",
|
|
||||||
default=None,
|
|
||||||
help="Path to the output Excel file (default: source_file with '_formatted' suffix)"
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Resolve file paths
|
|
||||||
source_file = Path(args.source_file)
|
|
||||||
if not source_file.is_absolute():
|
|
||||||
# If relative, assume it's in the same directory as the script
|
|
||||||
source_file = Path(__file__).parent / source_file
|
|
||||||
|
|
||||||
if not source_file.exists():
|
|
||||||
print(f"ERROR: Source file not found: {source_file}")
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
# Determine output file
|
|
||||||
if args.output_file:
|
|
||||||
output_file = Path(args.output_file)
|
|
||||||
if not output_file.is_absolute():
|
|
||||||
output_file = Path(__file__).parent / output_file
|
|
||||||
else:
|
|
||||||
# Default: add "_formatted" before the extension
|
|
||||||
stem = source_file.stem
|
|
||||||
suffix = source_file.suffix
|
|
||||||
output_file = source_file.parent / f"{stem}_formatted{suffix}"
|
|
||||||
|
|
||||||
print("=" * 60)
|
|
||||||
print("CONVERTING EXCEL FORMAT")
|
|
||||||
print("=" * 60)
|
|
||||||
print(f"Source: {source_file}")
|
|
||||||
print(f"Output: {output_file}")
|
|
||||||
print("=" * 60)
|
|
||||||
|
|
||||||
try:
|
|
||||||
convert_format(source_file, output_file)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"\nERROR: {e}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user