import pandas as pd import re from pathlib import Path def sort_key_fiom(term): """ Sort key for FIOM controllers with X format (X0_0, X0_1, etc.) Sort order: X3_0, X3_1, X2_0, X2_1, X1_0, X1_1, X0_0, X0_1, X7_0, X7_1, X6_0, X6_1, X5_0, X5_1, X4_0, X4_1 """ if pd.isna(term) or not isinstance(term, str): return (999, 999) match = re.match(r'X(\d+)_([01])', term) if not match: return (999, term) x_num = int(match.group(1)) suffix = int(match.group(2)) # 0 or 1 # Sort priority: X3->1, X2->2, X1->3, X0->4, X7->5, X6->6, X5->7, X4->8 sort_priority = { 3: 1, 2: 2, 1: 3, 0: 4, # First group 7: 5, 6: 6, 5: 7, 4: 8 # Second group } priority = sort_priority.get(x_num, 999) # suffix 0 should come before suffix 1 return (priority, suffix) def sort_key_fioh(term): """ Sort key for FIOH controllers with C format (C1_A, C1_B, etc.) Sort order: C7_A, C7_B, C5_A, C5_B, C3_A, C3_B, C1_A, C1_B, C8_A, C8_B, C6_A, C6_B, C4_A, C4_B, C2_A, C2_B """ if pd.isna(term) or not isinstance(term, str): return (999, 999) match = re.match(r'C(\d+)_([AB])', term) if not match: return (999, term) num = int(match.group(1)) letter = match.group(2) # Sort priority: C7->1, C5->2, C3->3, C1->4, C8->5, C6->6, C4->7, C2->8 sort_priority = { 7: 1, 5: 2, 3: 3, 1: 4, # Odd descending first 8: 5, 6: 6, 4: 7, 2: 8 # Even descending second } priority = sort_priority.get(num, 999) letter_priority = 0 if letter == 'A' else 1 # A before B return (priority, letter_priority) def is_fiom_controller(tagname): """Check if controller name contains FIOM""" if pd.isna(tagname) or not isinstance(tagname, str): return False return 'FIOM' in str(tagname).upper() def is_fioh_controller(tagname): """Check if controller name contains FIOH""" if pd.isna(tagname) or not isinstance(tagname, str): return False return 'FIOH' in str(tagname).upper() def is_vfd_controller(tagname): """Check if controller name contains VFD""" if pd.isna(tagname) or not isinstance(tagname, str): return False return 'VFD' in str(tagname).upper() def get_base_prefix(tagname): """ Extract base prefix from controller name (before the controller type). Examples: PDP17_FIOM1 -> PDP17 PDP17_FIOH -> PDP17 BYAD_3_VFD -> BYAD_3 BYAD_3_FIOM -> BYAD_3 """ if pd.isna(tagname) or not isinstance(tagname, str): return '' tagname_upper = str(tagname).upper() # Try to find controller type markers for controller_type in ['_FIOM', '_FIOH', '_VFD']: if controller_type in tagname_upper: idx = tagname_upper.index(controller_type) return str(tagname)[:idx] # If no controller type found, return the whole name return str(tagname) def get_column_mapping(xlsx_df): """ Map original XLSX columns to new CSV column names. Attempts to find columns by common name variations. """ column_mapping = {} # Find source columns (case-insensitive search) cols_lower = {col.lower().strip(): col for col in xlsx_df.columns} # Controller name -> TAGNAME tagname_sources = ['controller name', 'controller', 'tagname', 'tag name'] column_mapping['TAGNAME'] = None # First check if TAGNAME already exists (exact match, case-insensitive) for col in xlsx_df.columns: if col.upper().strip() == 'TAGNAME': column_mapping['TAGNAME'] = col break # If not found, search by variations if column_mapping['TAGNAME'] is None: for src in tagname_sources: if src in cols_lower: column_mapping['TAGNAME'] = cols_lower[src] break # Address name -> ADDR addr_sources = ['address name', 'address', 'addr'] column_mapping['ADDR'] = None # Check if ADDR already exists for col in xlsx_df.columns: if col.upper().strip() == 'ADDR': column_mapping['ADDR'] = col break if column_mapping['ADDR'] is None: for src in addr_sources: if src in cols_lower: column_mapping['ADDR'] = cols_lower[src] break # Signal Type -> TERM term_sources = ['signal type', 'signal', 'term'] column_mapping['TERM'] = None # Check if TERM already exists for col in xlsx_df.columns: if col.upper().strip() == 'TERM': column_mapping['TERM'] = col break if column_mapping['TERM'] is None: for src in term_sources: if src in cols_lower: column_mapping['TERM'] = cols_lower[src] break # Assigned device -> DESCA desca_sources = ['assigned device', 'assigned', 'device'] column_mapping['DESCA'] = None # Check if DESCA already exists for col in xlsx_df.columns: if col.upper().strip() == 'DESCA': column_mapping['DESCA'] = col break if column_mapping['DESCA'] is None: for src in desca_sources: if src in cols_lower: column_mapping['DESCA'] = cols_lower[src] break # Description -> DESCB descb_sources = ['description', 'desc'] column_mapping['DESCB'] = None # Check if DESCB already exists for col in xlsx_df.columns: if col.upper().strip() == 'DESCB': column_mapping['DESCB'] = col break if column_mapping['DESCB'] is None: for src in descb_sources: if src in cols_lower: column_mapping['DESCB'] = cols_lower[src] break return column_mapping def convert_xlsx_to_csv(xlsx_path, csv_path=None, sheet_name=None): """ Convert XLSX file to CSV with specified column mappings and transformations. Args: xlsx_path: Path to input XLSX file csv_path: Path to output CSV file (default: same name as XLSX but .csv extension) sheet_name: Name of the sheet to read (if None, tries 'MCM03' first, then first sheet) """ # Read XLSX file print(f"Reading XLSX file: {xlsx_path}") # Try to find the right sheet - look for any MCM sheet (MCM02, MCM03, MCM14, etc.) if sheet_name is None: xls = pd.ExcelFile(xlsx_path) # Find any sheet that starts with 'MCM' mcm_sheets = [s for s in xls.sheet_names if s.upper().startswith('MCM')] if mcm_sheets: # Prioritize MCM14 if it exists, otherwise use the highest numbered MCM sheet if 'MCM14' in mcm_sheets: sheet_name = 'MCM14' print(f"Found MCM14 sheet, reading from it...") else: # Sort MCM sheets and use the one with highest number def extract_mcm_number(sheet): match = re.match(r'MCM(\d+)', sheet.upper()) return int(match.group(1)) if match else 0 mcm_sheets_sorted = sorted(mcm_sheets, key=extract_mcm_number, reverse=True) sheet_name = mcm_sheets_sorted[0] print(f"Found MCM sheet: '{sheet_name}', reading from it...") else: # If no MCM sheet, try to find sheet with expected columns for s in xls.sheet_names: test_df = pd.read_excel(xlsx_path, sheet_name=s, nrows=1) cols_lower = [c.lower().strip() for c in test_df.columns] # Check if it has any of our expected column names if any(key in ' '.join(cols_lower) for key in ['controller', 'signal', 'address', 'assigned', 'description']): sheet_name = s print(f"Found sheet with expected columns: '{sheet_name}', reading from it...") break else: # Fallback to first sheet sheet_name = xls.sheet_names[0] print(f"No MCM sheet found, reading from first sheet: '{sheet_name}'...") df = pd.read_excel(xlsx_path, sheet_name=sheet_name) print(f"Original columns: {list(df.columns)}") print(f"Total rows: {len(df)}") # Get column mapping column_mapping = get_column_mapping(df) print(f"Column mapping: {column_mapping}") # Create new DataFrame - preserve exact row order from Excel new_df = pd.DataFrame() new_df.index = df.index # Preserve original index order # Map and copy data - preserve all rows in original order if column_mapping['TAGNAME']: new_df['TAGNAME'] = df[column_mapping['TAGNAME']].values else: print("Warning: TAGNAME column not found, using first column") new_df['TAGNAME'] = df.iloc[:, 0].values if len(df.columns) > 0 else '' if column_mapping['ADDR']: new_df['ADDR'] = df[column_mapping['ADDR']].values else: new_df['ADDR'] = '' if column_mapping['TERM']: new_df['TERM'] = df[column_mapping['TERM']].values # Use .values to preserve exact order else: new_df['TERM'] = '' # Empty column TERMDESC new_df['TERMDESC'] = '' if column_mapping['DESCA']: new_df['DESCA'] = df[column_mapping['DESCA']].values else: new_df['DESCA'] = '' if column_mapping['DESCB']: new_df['DESCB'] = df[column_mapping['DESCB']].values else: new_df['DESCB'] = '' # Empty columns: DESCC, DESCD, DESCE, INST, LOC new_df['DESCC'] = '' new_df['DESCD'] = '' new_df['DESCE'] = '' new_df['INST'] = '' new_df['LOC'] = '' # Reorder columns to match required output format (doesn't change row order) new_df = new_df[['TAGNAME', 'ADDR', 'TERM', 'TERMDESC', 'DESCA', 'DESCB', 'DESCC', 'DESCD', 'DESCE', 'INST', 'LOC']] # Reset index to ensure clean sequential order, but preserve row sequence new_df = new_df.reset_index(drop=True) print("Sorting data by controller groups and TERM order...") # Add base prefix column for grouping new_df['_base_prefix'] = new_df['TAGNAME'].apply(get_base_prefix) # Add base prefix priority to sort PDPs first def get_base_prefix_priority(base_prefix): """Return priority: PDP=1, others=2 (sorted alphabetically)""" if base_prefix.upper().startswith('PDP'): return (1, base_prefix) # PDPs first, then sorted by name else: return (2, base_prefix) # Others after PDPs, sorted by name new_df['_base_priority'] = new_df['_base_prefix'].apply(get_base_prefix_priority) # Add controller type priority for ordering within each base prefix group def get_controller_type_priority(tagname): """Return priority: VFD=1, FIOM=2, FIOH=3, Other=4""" if is_vfd_controller(tagname): return 1 elif is_fiom_controller(tagname): return 2 elif is_fioh_controller(tagname): return 3 else: return 4 new_df['_ctrl_type_priority'] = new_df['TAGNAME'].apply(get_controller_type_priority) # Separate rows by controller type for TERM sorting fiom_mask = new_df['TAGNAME'].apply(is_fiom_controller) fioh_mask = new_df['TAGNAME'].apply(is_fioh_controller) vfd_mask = new_df['TAGNAME'].apply(is_vfd_controller) other_mask = ~(fiom_mask | fioh_mask | vfd_mask) # Sort FIOM by X format: X3_0, X3_1, X2_0, X2_1, X1_0, X1_1, X0_0, X0_1, # X7_0, X7_1, X6_0, X6_1, X5_0, X5_1, X4_0, X4_1 if fiom_mask.any(): new_df.loc[fiom_mask, '_sort_priority'] = new_df.loc[fiom_mask, 'TERM'].apply(lambda x: sort_key_fiom(x)[0]) new_df.loc[fiom_mask, '_sort_suffix'] = new_df.loc[fiom_mask, 'TERM'].apply(lambda x: sort_key_fiom(x)[1]) print(f"Sorted {fiom_mask.sum()} FIOM rows by TERM") else: new_df['_sort_priority'] = 0 new_df['_sort_suffix'] = 0 # Sort FIOH by C format: C7_A, C7_B, C5_A, C5_B, C3_A, C3_B, C1_A, C1_B, # C8_A, C8_B, C6_A, C6_B, C4_A, C4_B, C2_A, C2_B if fioh_mask.any(): new_df.loc[fioh_mask, '_sort_priority'] = new_df.loc[fioh_mask, 'TERM'].apply(lambda x: sort_key_fioh(x)[0]) new_df.loc[fioh_mask, '_sort_suffix'] = new_df.loc[fioh_mask, 'TERM'].apply(lambda x: sort_key_fioh(x)[1]) print(f"Sorted {fioh_mask.sum()} FIOH rows by TERM") # Fill NaN values in sort columns for non-FIOM/FIOH rows new_df['_sort_priority'] = new_df['_sort_priority'].fillna(999) new_df['_sort_suffix'] = new_df['_sort_suffix'].fillna(999) # Final sort: PDPs first, then by base prefix, then by controller type (VFD, FIOM, FIOH), # then by TAGNAME, then by TERM sort order final_df = new_df.sort_values([ '_base_priority', # PDPs first, then others alphabetically '_ctrl_type_priority', # VFD first, then FIOM, then FIOH 'TAGNAME', # Keep same controller together '_sort_priority', # Sort TERM by custom order '_sort_suffix' ]) # Drop temporary sorting columns final_df = final_df.drop(columns=['_base_prefix', '_base_priority', '_ctrl_type_priority', '_sort_priority', '_sort_suffix']) final_df = final_df.reset_index(drop=True) print(f"Grouped controllers by base prefix and sorted within groups") # Determine output path if csv_path is None: xlsx_path_obj = Path(xlsx_path) csv_path = xlsx_path_obj.with_suffix('.csv') # Write to CSV print(f"Writing CSV file: {csv_path}") try: final_df.to_csv(csv_path, index=False) print(f"Conversion complete! Total rows: {len(final_df)}") except PermissionError: print(f"\nERROR: Cannot write to '{csv_path}' - file is likely open in another program.") print("Please close the CSV file in Excel or any other program and try again.") raise except Exception as e: print(f"\nERROR writing CSV file: {e}") raise return final_df if __name__ == "__main__": import sys # Get input file and optional sheet name from command line arguments input_file = None sheet_name_arg = None if len(sys.argv) > 1: input_file = sys.argv[1] if len(sys.argv) > 2: sheet_name_arg = sys.argv[2] if input_file is None: input_file = "Amazon CDW5_Devices IO (1).xlsx" # Check if file exists if Path(input_file).exists(): result_df = convert_xlsx_to_csv(input_file, sheet_name=sheet_name_arg) print(f"\nSummary:") print(f"- Total rows in output: {len(result_df)}") else: print(f"Error: File '{input_file}' not found!") print("Please ensure the XLSX file is in the same directory as this script.") print("\nUsage: python xlsx_to_csv.py [filename.xlsx] [sheet_name]") print("Example: python xlsx_to_csv.py 'file.xlsx' 'MCM14'")