2025-12-06 19:16:58 +04:00

397 lines
15 KiB
Python

import pandas as pd
import re
from pathlib import Path
def sort_key_fiom(term):
"""
Sort key for FIOM controllers with X format (X0_0, X0_1, etc.)
Sort order: X3_0, X3_1, X2_0, X2_1, X1_0, X1_1, X0_0, X0_1,
X7_0, X7_1, X6_0, X6_1, X5_0, X5_1, X4_0, X4_1
"""
if pd.isna(term) or not isinstance(term, str):
return (999, 999)
match = re.match(r'X(\d+)_([01])', term)
if not match:
return (999, term)
x_num = int(match.group(1))
suffix = int(match.group(2)) # 0 or 1
# Sort priority: X3->1, X2->2, X1->3, X0->4, X7->5, X6->6, X5->7, X4->8
sort_priority = {
3: 1, 2: 2, 1: 3, 0: 4, # First group
7: 5, 6: 6, 5: 7, 4: 8 # Second group
}
priority = sort_priority.get(x_num, 999)
# suffix 0 should come before suffix 1
return (priority, suffix)
def sort_key_fioh(term):
"""
Sort key for FIOH controllers with C format (C1_A, C1_B, etc.)
Sort order: C7_A, C7_B, C5_A, C5_B, C3_A, C3_B, C1_A, C1_B,
C8_A, C8_B, C6_A, C6_B, C4_A, C4_B, C2_A, C2_B
"""
if pd.isna(term) or not isinstance(term, str):
return (999, 999)
match = re.match(r'C(\d+)_([AB])', term)
if not match:
return (999, term)
num = int(match.group(1))
letter = match.group(2)
# Sort priority: C7->1, C5->2, C3->3, C1->4, C8->5, C6->6, C4->7, C2->8
sort_priority = {
7: 1, 5: 2, 3: 3, 1: 4, # Odd descending first
8: 5, 6: 6, 4: 7, 2: 8 # Even descending second
}
priority = sort_priority.get(num, 999)
letter_priority = 0 if letter == 'A' else 1 # A before B
return (priority, letter_priority)
def is_fiom_controller(tagname):
"""Check if controller name contains FIOM"""
if pd.isna(tagname) or not isinstance(tagname, str):
return False
return 'FIOM' in str(tagname).upper()
def is_fioh_controller(tagname):
"""Check if controller name contains FIOH"""
if pd.isna(tagname) or not isinstance(tagname, str):
return False
return 'FIOH' in str(tagname).upper()
def is_vfd_controller(tagname):
"""Check if controller name contains VFD"""
if pd.isna(tagname) or not isinstance(tagname, str):
return False
return 'VFD' in str(tagname).upper()
def get_base_prefix(tagname):
"""
Extract base prefix from controller name (before the controller type).
Examples:
PDP17_FIOM1 -> PDP17
PDP17_FIOH -> PDP17
BYAD_3_VFD -> BYAD_3
BYAD_3_FIOM -> BYAD_3
"""
if pd.isna(tagname) or not isinstance(tagname, str):
return ''
tagname_upper = str(tagname).upper()
# Try to find controller type markers
for controller_type in ['_FIOM', '_FIOH', '_VFD']:
if controller_type in tagname_upper:
idx = tagname_upper.index(controller_type)
return str(tagname)[:idx]
# If no controller type found, return the whole name
return str(tagname)
def get_column_mapping(xlsx_df):
"""
Map original XLSX columns to new CSV column names.
Attempts to find columns by common name variations.
"""
column_mapping = {}
# Find source columns (case-insensitive search)
cols_lower = {col.lower().strip(): col for col in xlsx_df.columns}
# Controller name -> TAGNAME
tagname_sources = ['controller name', 'controller', 'tagname', 'tag name']
column_mapping['TAGNAME'] = None
# First check if TAGNAME already exists (exact match, case-insensitive)
for col in xlsx_df.columns:
if col.upper().strip() == 'TAGNAME':
column_mapping['TAGNAME'] = col
break
# If not found, search by variations
if column_mapping['TAGNAME'] is None:
for src in tagname_sources:
if src in cols_lower:
column_mapping['TAGNAME'] = cols_lower[src]
break
# Address name -> ADDR
addr_sources = ['address name', 'address', 'addr']
column_mapping['ADDR'] = None
# Check if ADDR already exists
for col in xlsx_df.columns:
if col.upper().strip() == 'ADDR':
column_mapping['ADDR'] = col
break
if column_mapping['ADDR'] is None:
for src in addr_sources:
if src in cols_lower:
column_mapping['ADDR'] = cols_lower[src]
break
# Signal Type -> TERM
term_sources = ['signal type', 'signal', 'term']
column_mapping['TERM'] = None
# Check if TERM already exists
for col in xlsx_df.columns:
if col.upper().strip() == 'TERM':
column_mapping['TERM'] = col
break
if column_mapping['TERM'] is None:
for src in term_sources:
if src in cols_lower:
column_mapping['TERM'] = cols_lower[src]
break
# Assigned device -> DESCA
desca_sources = ['assigned device', 'assigned', 'device']
column_mapping['DESCA'] = None
# Check if DESCA already exists
for col in xlsx_df.columns:
if col.upper().strip() == 'DESCA':
column_mapping['DESCA'] = col
break
if column_mapping['DESCA'] is None:
for src in desca_sources:
if src in cols_lower:
column_mapping['DESCA'] = cols_lower[src]
break
# Description -> DESCB
descb_sources = ['description', 'desc']
column_mapping['DESCB'] = None
# Check if DESCB already exists
for col in xlsx_df.columns:
if col.upper().strip() == 'DESCB':
column_mapping['DESCB'] = col
break
if column_mapping['DESCB'] is None:
for src in descb_sources:
if src in cols_lower:
column_mapping['DESCB'] = cols_lower[src]
break
return column_mapping
def convert_xlsx_to_csv(xlsx_path, csv_path=None, sheet_name=None):
"""
Convert XLSX file to CSV with specified column mappings and transformations.
Args:
xlsx_path: Path to input XLSX file
csv_path: Path to output CSV file (default: same name as XLSX but .csv extension)
sheet_name: Name of the sheet to read (if None, tries 'MCM03' first, then first sheet)
"""
# Read XLSX file
print(f"Reading XLSX file: {xlsx_path}")
# Try to find the right sheet - look for any MCM sheet (MCM02, MCM03, MCM14, etc.)
if sheet_name is None:
xls = pd.ExcelFile(xlsx_path)
# Find any sheet that starts with 'MCM'
mcm_sheets = [s for s in xls.sheet_names if s.upper().startswith('MCM')]
if mcm_sheets:
# Prioritize MCM14 if it exists, otherwise use the highest numbered MCM sheet
if 'MCM14' in mcm_sheets:
sheet_name = 'MCM14'
print(f"Found MCM14 sheet, reading from it...")
else:
# Sort MCM sheets and use the one with highest number
def extract_mcm_number(sheet):
match = re.match(r'MCM(\d+)', sheet.upper())
return int(match.group(1)) if match else 0
mcm_sheets_sorted = sorted(mcm_sheets, key=extract_mcm_number, reverse=True)
sheet_name = mcm_sheets_sorted[0]
print(f"Found MCM sheet: '{sheet_name}', reading from it...")
else:
# If no MCM sheet, try to find sheet with expected columns
for s in xls.sheet_names:
test_df = pd.read_excel(xlsx_path, sheet_name=s, nrows=1)
cols_lower = [c.lower().strip() for c in test_df.columns]
# Check if it has any of our expected column names
if any(key in ' '.join(cols_lower) for key in ['controller', 'signal', 'address', 'assigned', 'description']):
sheet_name = s
print(f"Found sheet with expected columns: '{sheet_name}', reading from it...")
break
else:
# Fallback to first sheet
sheet_name = xls.sheet_names[0]
print(f"No MCM sheet found, reading from first sheet: '{sheet_name}'...")
df = pd.read_excel(xlsx_path, sheet_name=sheet_name)
print(f"Original columns: {list(df.columns)}")
print(f"Total rows: {len(df)}")
# Get column mapping
column_mapping = get_column_mapping(df)
print(f"Column mapping: {column_mapping}")
# Create new DataFrame - preserve exact row order from Excel
new_df = pd.DataFrame()
new_df.index = df.index # Preserve original index order
# Map and copy data - preserve all rows in original order
if column_mapping['TAGNAME']:
new_df['TAGNAME'] = df[column_mapping['TAGNAME']].values
else:
print("Warning: TAGNAME column not found, using first column")
new_df['TAGNAME'] = df.iloc[:, 0].values if len(df.columns) > 0 else ''
if column_mapping['ADDR']:
new_df['ADDR'] = df[column_mapping['ADDR']].values
else:
new_df['ADDR'] = ''
if column_mapping['TERM']:
new_df['TERM'] = df[column_mapping['TERM']].values # Use .values to preserve exact order
else:
new_df['TERM'] = ''
# Empty column TERMDESC
new_df['TERMDESC'] = ''
if column_mapping['DESCA']:
new_df['DESCA'] = df[column_mapping['DESCA']].values
else:
new_df['DESCA'] = ''
if column_mapping['DESCB']:
new_df['DESCB'] = df[column_mapping['DESCB']].values
else:
new_df['DESCB'] = ''
# Empty columns: DESCC, DESCD, DESCE, INST, LOC
new_df['DESCC'] = ''
new_df['DESCD'] = ''
new_df['DESCE'] = ''
new_df['INST'] = ''
new_df['LOC'] = ''
# Reorder columns to match required output format (doesn't change row order)
new_df = new_df[['TAGNAME', 'ADDR', 'TERM', 'TERMDESC', 'DESCA', 'DESCB',
'DESCC', 'DESCD', 'DESCE', 'INST', 'LOC']]
# Reset index to ensure clean sequential order, but preserve row sequence
new_df = new_df.reset_index(drop=True)
print("Sorting data by controller groups and TERM order...")
# Add base prefix column for grouping
new_df['_base_prefix'] = new_df['TAGNAME'].apply(get_base_prefix)
# Add base prefix priority to sort PDPs first
def get_base_prefix_priority(base_prefix):
"""Return priority: PDP=1, others=2 (sorted alphabetically)"""
if base_prefix.upper().startswith('PDP'):
return (1, base_prefix) # PDPs first, then sorted by name
else:
return (2, base_prefix) # Others after PDPs, sorted by name
new_df['_base_priority'] = new_df['_base_prefix'].apply(get_base_prefix_priority)
# Add controller type priority for ordering within each base prefix group
def get_controller_type_priority(tagname):
"""Return priority: VFD=1, FIOM=2, FIOH=3, Other=4"""
if is_vfd_controller(tagname):
return 1
elif is_fiom_controller(tagname):
return 2
elif is_fioh_controller(tagname):
return 3
else:
return 4
new_df['_ctrl_type_priority'] = new_df['TAGNAME'].apply(get_controller_type_priority)
# Separate rows by controller type for TERM sorting
fiom_mask = new_df['TAGNAME'].apply(is_fiom_controller)
fioh_mask = new_df['TAGNAME'].apply(is_fioh_controller)
vfd_mask = new_df['TAGNAME'].apply(is_vfd_controller)
other_mask = ~(fiom_mask | fioh_mask | vfd_mask)
# Sort FIOM by X format: X3_0, X3_1, X2_0, X2_1, X1_0, X1_1, X0_0, X0_1,
# X7_0, X7_1, X6_0, X6_1, X5_0, X5_1, X4_0, X4_1
if fiom_mask.any():
new_df.loc[fiom_mask, '_sort_priority'] = new_df.loc[fiom_mask, 'TERM'].apply(lambda x: sort_key_fiom(x)[0])
new_df.loc[fiom_mask, '_sort_suffix'] = new_df.loc[fiom_mask, 'TERM'].apply(lambda x: sort_key_fiom(x)[1])
print(f"Sorted {fiom_mask.sum()} FIOM rows by TERM")
else:
new_df['_sort_priority'] = 0
new_df['_sort_suffix'] = 0
# Sort FIOH by C format: C7_A, C7_B, C5_A, C5_B, C3_A, C3_B, C1_A, C1_B,
# C8_A, C8_B, C6_A, C6_B, C4_A, C4_B, C2_A, C2_B
if fioh_mask.any():
new_df.loc[fioh_mask, '_sort_priority'] = new_df.loc[fioh_mask, 'TERM'].apply(lambda x: sort_key_fioh(x)[0])
new_df.loc[fioh_mask, '_sort_suffix'] = new_df.loc[fioh_mask, 'TERM'].apply(lambda x: sort_key_fioh(x)[1])
print(f"Sorted {fioh_mask.sum()} FIOH rows by TERM")
# Fill NaN values in sort columns for non-FIOM/FIOH rows
new_df['_sort_priority'] = new_df['_sort_priority'].fillna(999)
new_df['_sort_suffix'] = new_df['_sort_suffix'].fillna(999)
# Final sort: PDPs first, then by base prefix, then by controller type (VFD, FIOM, FIOH),
# then by TAGNAME, then by TERM sort order
final_df = new_df.sort_values([
'_base_priority', # PDPs first, then others alphabetically
'_ctrl_type_priority', # VFD first, then FIOM, then FIOH
'TAGNAME', # Keep same controller together
'_sort_priority', # Sort TERM by custom order
'_sort_suffix'
])
# Drop temporary sorting columns
final_df = final_df.drop(columns=['_base_prefix', '_base_priority', '_ctrl_type_priority', '_sort_priority', '_sort_suffix'])
final_df = final_df.reset_index(drop=True)
print(f"Grouped controllers by base prefix and sorted within groups")
# Determine output path
if csv_path is None:
xlsx_path_obj = Path(xlsx_path)
csv_path = xlsx_path_obj.with_suffix('.csv')
# Write to CSV
print(f"Writing CSV file: {csv_path}")
try:
final_df.to_csv(csv_path, index=False)
print(f"Conversion complete! Total rows: {len(final_df)}")
except PermissionError:
print(f"\nERROR: Cannot write to '{csv_path}' - file is likely open in another program.")
print("Please close the CSV file in Excel or any other program and try again.")
raise
except Exception as e:
print(f"\nERROR writing CSV file: {e}")
raise
return final_df
if __name__ == "__main__":
import sys
# Get input file and optional sheet name from command line arguments
input_file = None
sheet_name_arg = None
if len(sys.argv) > 1:
input_file = sys.argv[1]
if len(sys.argv) > 2:
sheet_name_arg = sys.argv[2]
if input_file is None:
input_file = "Amazon CDW5_Devices IO (1).xlsx"
# Check if file exists
if Path(input_file).exists():
result_df = convert_xlsx_to_csv(input_file, sheet_name=sheet_name_arg)
print(f"\nSummary:")
print(f"- Total rows in output: {len(result_df)}")
else:
print(f"Error: File '{input_file}' not found!")
print("Please ensure the XLSX file is in the same directory as this script.")
print("\nUsage: python xlsx_to_csv.py [filename.xlsx] [sheet_name]")
print("Example: python xlsx_to_csv.py 'file.xlsx' 'MCM14'")