397 lines
15 KiB
Python
397 lines
15 KiB
Python
import pandas as pd
|
|
import re
|
|
from pathlib import Path
|
|
|
|
def sort_key_fiom(term):
|
|
"""
|
|
Sort key for FIOM controllers with X format (X0_0, X0_1, etc.)
|
|
Sort order: X3_0, X3_1, X2_0, X2_1, X1_0, X1_1, X0_0, X0_1,
|
|
X7_0, X7_1, X6_0, X6_1, X5_0, X5_1, X4_0, X4_1
|
|
"""
|
|
if pd.isna(term) or not isinstance(term, str):
|
|
return (999, 999)
|
|
|
|
match = re.match(r'X(\d+)_([01])', term)
|
|
if not match:
|
|
return (999, term)
|
|
|
|
x_num = int(match.group(1))
|
|
suffix = int(match.group(2)) # 0 or 1
|
|
|
|
# Sort priority: X3->1, X2->2, X1->3, X0->4, X7->5, X6->6, X5->7, X4->8
|
|
sort_priority = {
|
|
3: 1, 2: 2, 1: 3, 0: 4, # First group
|
|
7: 5, 6: 6, 5: 7, 4: 8 # Second group
|
|
}
|
|
|
|
priority = sort_priority.get(x_num, 999)
|
|
# suffix 0 should come before suffix 1
|
|
return (priority, suffix)
|
|
|
|
def sort_key_fioh(term):
|
|
"""
|
|
Sort key for FIOH controllers with C format (C1_A, C1_B, etc.)
|
|
Sort order: C7_A, C7_B, C5_A, C5_B, C3_A, C3_B, C1_A, C1_B,
|
|
C8_A, C8_B, C6_A, C6_B, C4_A, C4_B, C2_A, C2_B
|
|
"""
|
|
if pd.isna(term) or not isinstance(term, str):
|
|
return (999, 999)
|
|
|
|
match = re.match(r'C(\d+)_([AB])', term)
|
|
if not match:
|
|
return (999, term)
|
|
|
|
num = int(match.group(1))
|
|
letter = match.group(2)
|
|
# Sort priority: C7->1, C5->2, C3->3, C1->4, C8->5, C6->6, C4->7, C2->8
|
|
sort_priority = {
|
|
7: 1, 5: 2, 3: 3, 1: 4, # Odd descending first
|
|
8: 5, 6: 6, 4: 7, 2: 8 # Even descending second
|
|
}
|
|
priority = sort_priority.get(num, 999)
|
|
letter_priority = 0 if letter == 'A' else 1 # A before B
|
|
return (priority, letter_priority)
|
|
|
|
def is_fiom_controller(tagname):
|
|
"""Check if controller name contains FIOM"""
|
|
if pd.isna(tagname) or not isinstance(tagname, str):
|
|
return False
|
|
return 'FIOM' in str(tagname).upper()
|
|
|
|
def is_fioh_controller(tagname):
|
|
"""Check if controller name contains FIOH"""
|
|
if pd.isna(tagname) or not isinstance(tagname, str):
|
|
return False
|
|
return 'FIOH' in str(tagname).upper()
|
|
|
|
def is_vfd_controller(tagname):
|
|
"""Check if controller name contains VFD"""
|
|
if pd.isna(tagname) or not isinstance(tagname, str):
|
|
return False
|
|
return 'VFD' in str(tagname).upper()
|
|
|
|
def get_base_prefix(tagname):
|
|
"""
|
|
Extract base prefix from controller name (before the controller type).
|
|
Examples:
|
|
PDP17_FIOM1 -> PDP17
|
|
PDP17_FIOH -> PDP17
|
|
BYAD_3_VFD -> BYAD_3
|
|
BYAD_3_FIOM -> BYAD_3
|
|
"""
|
|
if pd.isna(tagname) or not isinstance(tagname, str):
|
|
return ''
|
|
|
|
tagname_upper = str(tagname).upper()
|
|
|
|
# Try to find controller type markers
|
|
for controller_type in ['_FIOM', '_FIOH', '_VFD']:
|
|
if controller_type in tagname_upper:
|
|
idx = tagname_upper.index(controller_type)
|
|
return str(tagname)[:idx]
|
|
|
|
# If no controller type found, return the whole name
|
|
return str(tagname)
|
|
|
|
def get_column_mapping(xlsx_df):
|
|
"""
|
|
Map original XLSX columns to new CSV column names.
|
|
Attempts to find columns by common name variations.
|
|
"""
|
|
column_mapping = {}
|
|
|
|
# Find source columns (case-insensitive search)
|
|
cols_lower = {col.lower().strip(): col for col in xlsx_df.columns}
|
|
|
|
# Controller name -> TAGNAME
|
|
tagname_sources = ['controller name', 'controller', 'tagname', 'tag name']
|
|
column_mapping['TAGNAME'] = None
|
|
# First check if TAGNAME already exists (exact match, case-insensitive)
|
|
for col in xlsx_df.columns:
|
|
if col.upper().strip() == 'TAGNAME':
|
|
column_mapping['TAGNAME'] = col
|
|
break
|
|
# If not found, search by variations
|
|
if column_mapping['TAGNAME'] is None:
|
|
for src in tagname_sources:
|
|
if src in cols_lower:
|
|
column_mapping['TAGNAME'] = cols_lower[src]
|
|
break
|
|
|
|
# Address name -> ADDR
|
|
addr_sources = ['address name', 'address', 'addr']
|
|
column_mapping['ADDR'] = None
|
|
# Check if ADDR already exists
|
|
for col in xlsx_df.columns:
|
|
if col.upper().strip() == 'ADDR':
|
|
column_mapping['ADDR'] = col
|
|
break
|
|
if column_mapping['ADDR'] is None:
|
|
for src in addr_sources:
|
|
if src in cols_lower:
|
|
column_mapping['ADDR'] = cols_lower[src]
|
|
break
|
|
|
|
# Signal Type -> TERM
|
|
term_sources = ['signal type', 'signal', 'term']
|
|
column_mapping['TERM'] = None
|
|
# Check if TERM already exists
|
|
for col in xlsx_df.columns:
|
|
if col.upper().strip() == 'TERM':
|
|
column_mapping['TERM'] = col
|
|
break
|
|
if column_mapping['TERM'] is None:
|
|
for src in term_sources:
|
|
if src in cols_lower:
|
|
column_mapping['TERM'] = cols_lower[src]
|
|
break
|
|
|
|
# Assigned device -> DESCA
|
|
desca_sources = ['assigned device', 'assigned', 'device']
|
|
column_mapping['DESCA'] = None
|
|
# Check if DESCA already exists
|
|
for col in xlsx_df.columns:
|
|
if col.upper().strip() == 'DESCA':
|
|
column_mapping['DESCA'] = col
|
|
break
|
|
if column_mapping['DESCA'] is None:
|
|
for src in desca_sources:
|
|
if src in cols_lower:
|
|
column_mapping['DESCA'] = cols_lower[src]
|
|
break
|
|
|
|
# Description -> DESCB
|
|
descb_sources = ['description', 'desc']
|
|
column_mapping['DESCB'] = None
|
|
# Check if DESCB already exists
|
|
for col in xlsx_df.columns:
|
|
if col.upper().strip() == 'DESCB':
|
|
column_mapping['DESCB'] = col
|
|
break
|
|
if column_mapping['DESCB'] is None:
|
|
for src in descb_sources:
|
|
if src in cols_lower:
|
|
column_mapping['DESCB'] = cols_lower[src]
|
|
break
|
|
|
|
return column_mapping
|
|
|
|
def convert_xlsx_to_csv(xlsx_path, csv_path=None, sheet_name=None):
|
|
"""
|
|
Convert XLSX file to CSV with specified column mappings and transformations.
|
|
|
|
Args:
|
|
xlsx_path: Path to input XLSX file
|
|
csv_path: Path to output CSV file (default: same name as XLSX but .csv extension)
|
|
sheet_name: Name of the sheet to read (if None, tries 'MCM03' first, then first sheet)
|
|
"""
|
|
# Read XLSX file
|
|
print(f"Reading XLSX file: {xlsx_path}")
|
|
|
|
# Try to find the right sheet - look for any MCM sheet (MCM02, MCM03, MCM14, etc.)
|
|
if sheet_name is None:
|
|
xls = pd.ExcelFile(xlsx_path)
|
|
# Find any sheet that starts with 'MCM'
|
|
mcm_sheets = [s for s in xls.sheet_names if s.upper().startswith('MCM')]
|
|
if mcm_sheets:
|
|
# Prioritize MCM14 if it exists, otherwise use the highest numbered MCM sheet
|
|
if 'MCM14' in mcm_sheets:
|
|
sheet_name = 'MCM14'
|
|
print(f"Found MCM14 sheet, reading from it...")
|
|
else:
|
|
# Sort MCM sheets and use the one with highest number
|
|
def extract_mcm_number(sheet):
|
|
match = re.match(r'MCM(\d+)', sheet.upper())
|
|
return int(match.group(1)) if match else 0
|
|
mcm_sheets_sorted = sorted(mcm_sheets, key=extract_mcm_number, reverse=True)
|
|
sheet_name = mcm_sheets_sorted[0]
|
|
print(f"Found MCM sheet: '{sheet_name}', reading from it...")
|
|
else:
|
|
# If no MCM sheet, try to find sheet with expected columns
|
|
for s in xls.sheet_names:
|
|
test_df = pd.read_excel(xlsx_path, sheet_name=s, nrows=1)
|
|
cols_lower = [c.lower().strip() for c in test_df.columns]
|
|
# Check if it has any of our expected column names
|
|
if any(key in ' '.join(cols_lower) for key in ['controller', 'signal', 'address', 'assigned', 'description']):
|
|
sheet_name = s
|
|
print(f"Found sheet with expected columns: '{sheet_name}', reading from it...")
|
|
break
|
|
else:
|
|
# Fallback to first sheet
|
|
sheet_name = xls.sheet_names[0]
|
|
print(f"No MCM sheet found, reading from first sheet: '{sheet_name}'...")
|
|
|
|
df = pd.read_excel(xlsx_path, sheet_name=sheet_name)
|
|
|
|
print(f"Original columns: {list(df.columns)}")
|
|
print(f"Total rows: {len(df)}")
|
|
|
|
# Get column mapping
|
|
column_mapping = get_column_mapping(df)
|
|
print(f"Column mapping: {column_mapping}")
|
|
|
|
# Create new DataFrame - preserve exact row order from Excel
|
|
new_df = pd.DataFrame()
|
|
new_df.index = df.index # Preserve original index order
|
|
|
|
# Map and copy data - preserve all rows in original order
|
|
if column_mapping['TAGNAME']:
|
|
new_df['TAGNAME'] = df[column_mapping['TAGNAME']].values
|
|
else:
|
|
print("Warning: TAGNAME column not found, using first column")
|
|
new_df['TAGNAME'] = df.iloc[:, 0].values if len(df.columns) > 0 else ''
|
|
|
|
if column_mapping['ADDR']:
|
|
new_df['ADDR'] = df[column_mapping['ADDR']].values
|
|
else:
|
|
new_df['ADDR'] = ''
|
|
|
|
if column_mapping['TERM']:
|
|
new_df['TERM'] = df[column_mapping['TERM']].values # Use .values to preserve exact order
|
|
else:
|
|
new_df['TERM'] = ''
|
|
|
|
# Empty column TERMDESC
|
|
new_df['TERMDESC'] = ''
|
|
|
|
if column_mapping['DESCA']:
|
|
new_df['DESCA'] = df[column_mapping['DESCA']].values
|
|
else:
|
|
new_df['DESCA'] = ''
|
|
|
|
if column_mapping['DESCB']:
|
|
new_df['DESCB'] = df[column_mapping['DESCB']].values
|
|
else:
|
|
new_df['DESCB'] = ''
|
|
|
|
# Empty columns: DESCC, DESCD, DESCE, INST, LOC
|
|
new_df['DESCC'] = ''
|
|
new_df['DESCD'] = ''
|
|
new_df['DESCE'] = ''
|
|
new_df['INST'] = ''
|
|
new_df['LOC'] = ''
|
|
|
|
# Reorder columns to match required output format (doesn't change row order)
|
|
new_df = new_df[['TAGNAME', 'ADDR', 'TERM', 'TERMDESC', 'DESCA', 'DESCB',
|
|
'DESCC', 'DESCD', 'DESCE', 'INST', 'LOC']]
|
|
|
|
# Reset index to ensure clean sequential order, but preserve row sequence
|
|
new_df = new_df.reset_index(drop=True)
|
|
|
|
print("Sorting data by controller groups and TERM order...")
|
|
|
|
# Add base prefix column for grouping
|
|
new_df['_base_prefix'] = new_df['TAGNAME'].apply(get_base_prefix)
|
|
|
|
# Add base prefix priority to sort PDPs first
|
|
def get_base_prefix_priority(base_prefix):
|
|
"""Return priority: PDP=1, others=2 (sorted alphabetically)"""
|
|
if base_prefix.upper().startswith('PDP'):
|
|
return (1, base_prefix) # PDPs first, then sorted by name
|
|
else:
|
|
return (2, base_prefix) # Others after PDPs, sorted by name
|
|
|
|
new_df['_base_priority'] = new_df['_base_prefix'].apply(get_base_prefix_priority)
|
|
|
|
# Add controller type priority for ordering within each base prefix group
|
|
def get_controller_type_priority(tagname):
|
|
"""Return priority: VFD=1, FIOM=2, FIOH=3, Other=4"""
|
|
if is_vfd_controller(tagname):
|
|
return 1
|
|
elif is_fiom_controller(tagname):
|
|
return 2
|
|
elif is_fioh_controller(tagname):
|
|
return 3
|
|
else:
|
|
return 4
|
|
|
|
new_df['_ctrl_type_priority'] = new_df['TAGNAME'].apply(get_controller_type_priority)
|
|
|
|
# Separate rows by controller type for TERM sorting
|
|
fiom_mask = new_df['TAGNAME'].apply(is_fiom_controller)
|
|
fioh_mask = new_df['TAGNAME'].apply(is_fioh_controller)
|
|
vfd_mask = new_df['TAGNAME'].apply(is_vfd_controller)
|
|
other_mask = ~(fiom_mask | fioh_mask | vfd_mask)
|
|
|
|
# Sort FIOM by X format: X3_0, X3_1, X2_0, X2_1, X1_0, X1_1, X0_0, X0_1,
|
|
# X7_0, X7_1, X6_0, X6_1, X5_0, X5_1, X4_0, X4_1
|
|
if fiom_mask.any():
|
|
new_df.loc[fiom_mask, '_sort_priority'] = new_df.loc[fiom_mask, 'TERM'].apply(lambda x: sort_key_fiom(x)[0])
|
|
new_df.loc[fiom_mask, '_sort_suffix'] = new_df.loc[fiom_mask, 'TERM'].apply(lambda x: sort_key_fiom(x)[1])
|
|
print(f"Sorted {fiom_mask.sum()} FIOM rows by TERM")
|
|
else:
|
|
new_df['_sort_priority'] = 0
|
|
new_df['_sort_suffix'] = 0
|
|
|
|
# Sort FIOH by C format: C7_A, C7_B, C5_A, C5_B, C3_A, C3_B, C1_A, C1_B,
|
|
# C8_A, C8_B, C6_A, C6_B, C4_A, C4_B, C2_A, C2_B
|
|
if fioh_mask.any():
|
|
new_df.loc[fioh_mask, '_sort_priority'] = new_df.loc[fioh_mask, 'TERM'].apply(lambda x: sort_key_fioh(x)[0])
|
|
new_df.loc[fioh_mask, '_sort_suffix'] = new_df.loc[fioh_mask, 'TERM'].apply(lambda x: sort_key_fioh(x)[1])
|
|
print(f"Sorted {fioh_mask.sum()} FIOH rows by TERM")
|
|
|
|
# Fill NaN values in sort columns for non-FIOM/FIOH rows
|
|
new_df['_sort_priority'] = new_df['_sort_priority'].fillna(999)
|
|
new_df['_sort_suffix'] = new_df['_sort_suffix'].fillna(999)
|
|
|
|
# Final sort: PDPs first, then by base prefix, then by controller type (VFD, FIOM, FIOH),
|
|
# then by TAGNAME, then by TERM sort order
|
|
final_df = new_df.sort_values([
|
|
'_base_priority', # PDPs first, then others alphabetically
|
|
'_ctrl_type_priority', # VFD first, then FIOM, then FIOH
|
|
'TAGNAME', # Keep same controller together
|
|
'_sort_priority', # Sort TERM by custom order
|
|
'_sort_suffix'
|
|
])
|
|
|
|
# Drop temporary sorting columns
|
|
final_df = final_df.drop(columns=['_base_prefix', '_base_priority', '_ctrl_type_priority', '_sort_priority', '_sort_suffix'])
|
|
final_df = final_df.reset_index(drop=True)
|
|
|
|
print(f"Grouped controllers by base prefix and sorted within groups")
|
|
|
|
# Determine output path
|
|
if csv_path is None:
|
|
xlsx_path_obj = Path(xlsx_path)
|
|
csv_path = xlsx_path_obj.with_suffix('.csv')
|
|
|
|
# Write to CSV
|
|
print(f"Writing CSV file: {csv_path}")
|
|
try:
|
|
final_df.to_csv(csv_path, index=False)
|
|
print(f"Conversion complete! Total rows: {len(final_df)}")
|
|
except PermissionError:
|
|
print(f"\nERROR: Cannot write to '{csv_path}' - file is likely open in another program.")
|
|
print("Please close the CSV file in Excel or any other program and try again.")
|
|
raise
|
|
except Exception as e:
|
|
print(f"\nERROR writing CSV file: {e}")
|
|
raise
|
|
|
|
return final_df
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
# Get input file and optional sheet name from command line arguments
|
|
input_file = None
|
|
sheet_name_arg = None
|
|
|
|
if len(sys.argv) > 1:
|
|
input_file = sys.argv[1]
|
|
if len(sys.argv) > 2:
|
|
sheet_name_arg = sys.argv[2]
|
|
if input_file is None:
|
|
input_file = "Amazon CDW5_Devices IO (1).xlsx"
|
|
|
|
# Check if file exists
|
|
if Path(input_file).exists():
|
|
result_df = convert_xlsx_to_csv(input_file, sheet_name=sheet_name_arg)
|
|
print(f"\nSummary:")
|
|
print(f"- Total rows in output: {len(result_df)}")
|
|
else:
|
|
print(f"Error: File '{input_file}' not found!")
|
|
print("Please ensure the XLSX file is in the same directory as this script.")
|
|
print("\nUsage: python xlsx_to_csv.py [filename.xlsx] [sheet_name]")
|
|
print("Example: python xlsx_to_csv.py 'file.xlsx' 'MCM14'")
|