import os import threading import time import json import git import csv import re from flask import Flask, render_template, jsonify, Response app = Flask(__name__) # --- Configuration --- REPO_URL = "http://192.168.5.191:3000/LCI/MTN6" REPO_DIR = "./cloned_repo" # Directory to clone the repo into BRANCH = "main" CSV_FILENAME = "MTN6 Equipment Manifest REV6(Conveyor List).csv" VIEWS_DIR_RELATIVE = "MTN6_SCADA/com.inductiveautomation.perspective/views/Detailed-Views" TEXT_OUTPUT_FOLDER = "./extracted_texts" # Added: Directory with .txt files CHECK_INTERVAL_SECONDS = 60 # --- Column Names from CSV (Adjust if necessary) --- CSV_ALIAS_COL = 'Alias' CSV_PANEL_COL = 'Control Panel' CSV_EQ_TYPE_COL = 'Equipment Type' # Optional, for details modal CSV_CONV_TYPE_COL = 'Type of Conveyor' # Optional, for details modal # --- Global state --- last_commit_hash = None # New detailed progress data structure progress_data = { "overall": { "total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, "percentage_found_both": 0, "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": [] }, "panels": {} # Populated dynamically } status_message = "Initializing..." repo_lock = threading.Lock() # Lock for accessing repo and shared data data_updated_event = threading.Event() # Event to signal data updates # --- Helper Functions --- def get_repo_path(): return os.path.abspath(REPO_DIR) def get_csv_path(): script_dir = os.path.dirname(os.path.abspath(__file__)) return os.path.join(script_dir, CSV_FILENAME) def get_views_dir_path(): return os.path.join(get_repo_path(), VIEWS_DIR_RELATIVE) def get_text_output_dir_path(): # Construct absolute path based on the script's directory script_dir = os.path.dirname(os.path.abspath(__file__)) # Use os.path.join to handle path separators correctly and avoid './' return os.path.abspath(os.path.join(script_dir, TEXT_OUTPUT_FOLDER)) def normalize(text): """Normalize string for comparison: lowercase, treat '-' and '_' the same, remove all whitespace.""" if not isinstance(text, str): return "" text = text.lower() # Convert to lowercase text = text.replace('-', '_') # Replace hyphens with underscores text = re.sub(r'\s+', '', text) # Remove ALL whitespace characters (including newlines) return text def read_manifest(csv_filepath): """Reads the manifest CSV into a list of dictionaries.""" manifest_items = [] # Only require Alias and Panel now for basic grouping required_cols = {CSV_ALIAS_COL, CSV_PANEL_COL} optional_cols = {CSV_EQ_TYPE_COL, CSV_CONV_TYPE_COL} try: # Revert back to 'utf-8-sig' to handle potential BOM from Excel with open(csv_filepath, mode='r', newline='', encoding='utf-8-sig') as infile: reader = csv.DictReader(infile) headers = set(h.strip() for h in reader.fieldnames) # Check for required columns missing_required = required_cols - headers if missing_required: print(f"Error: Missing required columns in CSV '{csv_filepath}': {', '.join(missing_required)}") print(f"Available columns: {', '.join(headers)}") return None for row in reader: alias = row.get(CSV_ALIAS_COL, "").strip() panel = row.get(CSV_PANEL_COL, "").strip() # unit_number = row.get('Unit Number', "").strip() # No longer needed for filename # Add if Alias and Control Panel are present (Panel needed for grouping results later) if alias and panel: item = { "alias": alias, "normalized_alias": normalize(alias), "control_panel": panel, # "unit_number": unit_number, # Removed # "expected_drawing_filename": f"MTN6_SYSDL-{unit_number}.txt", # Removed # Add optional data if columns exist "equipment_type": row.get(CSV_EQ_TYPE_COL, "").strip() if CSV_EQ_TYPE_COL in headers else "N/A", "conveyor_type": row.get(CSV_CONV_TYPE_COL, "").strip() if CSV_CONV_TYPE_COL in headers else "N/A", # Status fields to be filled later "found_scada": False, "found_drawing": False } manifest_items.append(item) # elif alias and panel: # If Unit Number is missing but others are present # Condition removed # print(f"Warning: Alias '{alias}' in Panel '{panel}' is missing 'Unit Number' in CSV. Skipping drawing check for this item.") elif alias and not panel: print(f"Warning: Alias '{alias}' found in CSV but is missing its '{CSV_PANEL_COL}'. Skipping.") # Add other specific warnings if needed except FileNotFoundError: print(f"Error: Manifest file not found at {csv_filepath}") return None except Exception as e: print(f"Error reading CSV file {csv_filepath}: {e}") return None print(f"Read {len(manifest_items)} valid items from manifest.") return manifest_items def check_scada(manifest_data, views_dir): """Checks for aliases in SCADA JSON view files.""" if not manifest_data: return print(f"Starting SCADA check in directory: {views_dir}...") found_count = 0 processed_files = 0 # Create a quick lookup map of normalized_alias -> list of manifest items (handles duplicate aliases) alias_map = {} for item in manifest_data: na = item['normalized_alias'] if na not in alias_map: alias_map[na] = [] alias_map[na].append(item) try: for root, _, files in os.walk(views_dir): for filename in files: if filename == 'view.json': filepath = os.path.join(root, filename) processed_files += 1 try: with open(filepath, 'r', encoding='utf-8') as f: # Read the whole file, normalize it for substring search content = f.read() normalized_content = normalize(content) # Check manifest aliases against this file's normalized content for norm_alias, items in alias_map.items(): if norm_alias in normalized_content: for item in items: if not item['found_scada']: # Update only if not already found elsewhere item['found_scada'] = True found_count += 1 # Count unique aliases found except Exception as e: print(f" Warning: Could not read or process JSON file {filepath}: {e}") except Exception as e: print(f"Error walking SCADA views directory {views_dir}: {e}") print(f"SCADA check finished. Processed {processed_files} view.json files. Found {found_count} manifest aliases.") def check_drawings(manifest_data, text_output_dir): """Checks if aliases from manifest exist in *any* extracted drawing text file.""" if not manifest_data: return print(f"Starting Drawings check: Scanning all .txt files in directory: {text_output_dir}...") all_normalized_content = "" # Combine all text content here processed_files = 0 found_files = [] try: # Step 1: Read and combine content of all .txt files in the directory for filename in os.listdir(text_output_dir): if filename.lower().endswith('.txt'): filepath = os.path.join(text_output_dir, filename) processed_files += 1 try: with open(filepath, 'r', encoding='utf-8') as f: content = f.read() # Add a separator to prevent false matches across file boundaries all_normalized_content += normalize(content) + "\n--file-separator--\n" found_files.append(filename) except Exception as e: print(f" Warning: Could not read or process text file {filepath}: {e}") if processed_files == 0: print(" Warning: No .txt files found in the directory. Cannot perform drawing check.") return else: print(f" Successfully read and normalized content from {len(found_files)} out of {processed_files} .txt files found.") # Step 2: Check each manifest alias against the combined content found_count = 0 for item in manifest_data: normalized_alias = item['normalized_alias'] if normalized_alias and normalized_alias in all_normalized_content: item['found_drawing'] = True found_count += 1 # else: item['found_drawing'] is already False by default print(f"Drawings check finished. Found {found_count} manifest aliases within the combined text content.") except FileNotFoundError: print(f" Error: Drawings text directory not found: {text_output_dir}") except Exception as e: print(f" Error during drawings check: {e}") def calculate_combined_progress(manifest_data): """Calculates the combined progress based on scada/drawing status.""" print("Calculating combined progress statistics...") results = { "overall": { "total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, "percentage_found_both": 0, "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": [] }, "panels": {} } if not manifest_data: print("Warning: No manifest data to calculate progress from.") return results results["overall"]["total_csv"] = len(manifest_data) for item in manifest_data: panel = item['control_panel'] # Initialize panel data if not present if panel not in results["panels"]: results["panels"][panel] = { "total": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, "percentage_found_both": 0, "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": [] } results["panels"][panel]["total"] += 1 # Categorize and add to lists item_detail = {k: v for k, v in item.items() if k not in ['normalized_alias']} # Don't need normalized in output if item['found_scada'] and item['found_drawing']: results["overall"]["found_both"] += 1 results["panels"][panel]["found_both"] += 1 results["overall"]["found_both_list"].append(item_detail) results["panels"][panel]["found_both_list"].append(item_detail) elif item['found_scada'] and not item['found_drawing']: results["overall"]["found_scada_only"] += 1 results["panels"][panel]["found_scada_only"] += 1 results["overall"]["found_scada_only_list"].append(item_detail) results["panels"][panel]["found_scada_only_list"].append(item_detail) elif not item['found_scada'] and item['found_drawing']: results["overall"]["found_drawing_only"] += 1 results["panels"][panel]["found_drawing_only"] += 1 results["overall"]["found_drawing_only_list"].append(item_detail) results["panels"][panel]["found_drawing_only_list"].append(item_detail) else: # Missing both results["overall"]["missing_both"] += 1 results["panels"][panel]["missing_both"] += 1 results["overall"]["missing_list"].append(item_detail) results["panels"][panel]["missing_list"].append(item_detail) # Calculate percentages if results["overall"]["total_csv"] > 0: results["overall"]["percentage_found_both"] = round( (results["overall"]["found_both"] / results["overall"]["total_csv"]) * 100, 1 ) for panel_data in results["panels"].values(): if panel_data["total"] > 0: panel_data["percentage_found_both"] = round( (panel_data["found_both"] / panel_data["total"]) * 100, 1 ) print("Combined progress calculation finished.") # print(json.dumps(results, indent=2)) # DEBUG: Print structure return results # --- Core Logic --- def update_progress_data(): """Reads manifest, runs both checks, combines results, and updates global state.""" global progress_data, status_message csv_path = get_csv_path() views_dir = get_views_dir_path() text_dir = get_text_output_dir_path() current_status = "" new_data_calculated = None # 1. Read Manifest status_message = "Reading manifest file..." print(f"Reading manifest: {csv_path}") manifest_data = read_manifest(csv_path) if manifest_data is None: current_status = f"Error: Failed to read or process manifest file {csv_path}" print(current_status) status_message = current_status data_updated_event.set(); data_updated_event.clear() return # Cannot proceed without manifest # 2. Check SCADA (JSON files) status_message = "Checking SCADA views..." if not os.path.exists(views_dir): current_status = f"Warning: SCADA Views directory not found at {views_dir}. Skipping SCADA check." print(current_status) # Mark all as not found in SCADA? Or just skip update? Skipping update is safer. else: check_scada(manifest_data, views_dir) # 3. Check Drawings (TXT files) status_message = "Checking drawing text files..." if not os.path.exists(text_dir): current_status = f"Warning: Extracted Text directory not found at {text_dir}. Skipping Drawings check." print(current_status) # Mark all as not found in Drawings? Or skip? Skipping update. else: check_drawings(manifest_data, text_dir) # 4. Calculate Combined Progress status_message = "Calculating combined progress..." try: new_data_calculated = calculate_combined_progress(manifest_data) if new_data_calculated: current_status = f"Analysis complete at {time.strftime('%Y-%m-%d %H:%M:%S')}" else: # This case shouldn't happen if manifest_data was valid current_status = "Error: Failed to calculate combined progress." except Exception as e: current_status = f"Error during progress calculation: {e}" print(f"Detailed Calculation Error: {e}", exc_info=True) # Log stack trace new_data_calculated = None # Ensure no partial data update # Update global state print(current_status) status_message = current_status # Update status regardless of calculation success/failure if new_data_calculated is not None: progress_data = new_data_calculated # Signal that an update attempt finished WITH new data data_updated_event.set() data_updated_event.clear() # --- Git Repo Handling (Modified slightly to use updated status messages) --- def check_and_update_repo(): global last_commit_hash, status_message repo_path = get_repo_path() did_update = False # Flag to track if files were actually updated initial_hash = last_commit_hash # Store hash before check with repo_lock: try: repo_existed = os.path.exists(os.path.join(repo_path, ".git")) if not repo_existed: print(f"Cloning repository {REPO_URL} into {repo_path}...") status_message = f"Cloning repository {REPO_URL}..." git.Repo.clone_from(REPO_URL, repo_path, branch=BRANCH) repo = git.Repo(repo_path) last_commit_hash = repo.head.commit.hexsha print(f"Initial clone complete. Commit: {last_commit_hash}") did_update = True # Cloned, so considered an update else: repo = git.Repo(repo_path) print("Fetching updates from remote...") current_local_commit = repo.head.commit.hexsha # Update hash *before* fetch in case fetch fails but commit was readable if last_commit_hash is None: last_commit_hash = current_local_commit origin = repo.remotes.origin fetch_info = origin.fetch() # Check if fetch actually brought new data for the target branch # fetched_new_commits = any(info.flags & info.NEW_HEAD for info in fetch_info if info.name == f'origin/{BRANCH}') # More precise check if needed current_remote_commit = repo.commit(f'origin/{BRANCH}').hexsha print(f"Local commit: {current_local_commit}, Remote commit: {current_remote_commit}") if current_local_commit != current_remote_commit: print("New commit detected! Pulling changes...") status_message = "Pulling updates..." try: pull_info = origin.pull() new_commit_hash = repo.head.commit.hexsha print(f"Pull successful. New commit: {new_commit_hash}") last_commit_hash = new_commit_hash did_update = True # Pulled, so considered an update except git.GitCommandError as e: status_message = f"Error pulling repository: {e}" print(status_message) # Revert hash if pull failed last_commit_hash = current_local_commit else: print("No new commits detected.") # Update status if it wasn't an error before if not status_message.startswith("Error"): status_message = f"Checked repo at {time.strftime('%Y-%m-%d %H:%M:%S')}. No changes." # Run analysis IF the repo was updated (cloned or pulled) if did_update: # Status will be updated within update_progress_data update_progress_data() # If no git update, signal any status change (e.g., "No changes" or error) # else: # REMOVED block that signaled event for no changes # REMOVED: data_updated_event.set() # Signal status change event # REMOVED: data_updated_event.clear() # Status message is still updated globally, just won't trigger event except git.GitCommandError as e: status_message = f"Git command error: {e}" print(status_message) # Try to get commit hash even if failed try: if os.path.exists(os.path.join(repo_path, ".git")): repo = git.Repo(repo_path) # Use previous hash if available, else try to read current if last_commit_hash is None: last_commit_hash = repo.head.commit.hexsha except Exception: if last_commit_hash is None: last_commit_hash = "Error reading commit" # REMOVED: data_updated_event.set() # Signal error status change # REMOVED: data_updated_event.clear() except Exception as e: status_message = f"Error checking repository: {e}" print(status_message) if last_commit_hash is None: last_commit_hash = "Error checking repo" # REMOVED: data_updated_event.set() # Signal error status change # REMOVED: data_updated_event.clear() # Return true if analysis was run (because repo changed), false otherwise return did_update def periodic_repo_check(): """Runs the check_and_update_repo function periodically.""" while True: print(f"\nStarting periodic repository check (Interval: {CHECK_INTERVAL_SECONDS}s)...") repo_changed = check_and_update_repo() # If repo didn't change, analysis wasn't triggered, but we might want to run it anyway? # For now, analysis only runs if repo changes or on initial startup. # If you want analysis *every* interval regardless of git changes, add a call here: # if not repo_changed: # print("Repo unchanged, triggering analysis anyway...") # update_progress_data() print(f"Check finished. Sleeping...") time.sleep(CHECK_INTERVAL_SECONDS) # --- Flask Routes (Largely unchanged, rely on updated global state) --- @app.route('/') def index(): return render_template('index.html') @app.route('/drawings') def drawings_page(): # Render the main index template which now contains all content return render_template('index.html') @app.route('/conflicts') def conflicts_page(): # Render the main index template which now contains all content return render_template('index.html') @app.route('/stream') def stream(): def event_stream(): last_sent_hash_to_client = None # Track hash sent to *this specific client* # Send initial state immediately on connection with repo_lock: current_global_hash = last_commit_hash current_global_status = status_message current_global_progress = progress_data initial_payload = json.dumps({ "status": current_global_status, "progress": current_global_progress, "last_commit": current_global_hash }) yield f"data: {initial_payload}\n\n" last_sent_hash_to_client = current_global_hash # Record that we sent the initial state for this client print(f"Sent initial state to new client (Hash: {last_sent_hash_to_client})") # Now wait for subsequent updates signaled by the event while True: data_updated_event.wait() # Wait for background thread to signal completion with repo_lock: # Re-acquire lock to get the latest state current_global_hash = last_commit_hash current_global_status = status_message current_global_progress = progress_data # Send update to the client IF the data is different from what they last received # Check hash first as primary indicator of change in underlying data if current_global_hash != last_sent_hash_to_client: print(f"Data updated (Hash changed: {last_sent_hash_to_client} -> {current_global_hash}). Sending update to client.") data_payload = json.dumps({ "status": current_global_status, "progress": current_global_progress, "last_commit": current_global_hash }) yield f"data: {data_payload}\n\n" last_sent_hash_to_client = current_global_hash # Update the hash sent to this client # else: # No need for the else block logging here anymore, as the event shouldn't trigger if hash is same # If hash is the same, maybe only the status message changed (e.g., error occurred) # Option: Send update only if status is different from last sent status? # For simplicity now, we only send if hash differs. Client UI shows last known status. # print(f"Data updated event triggered, but hash {current_global_hash} unchanged for this client. Status: '{current_global_status}'") # Removed log return Response(event_stream(), mimetype="text/event-stream") # --- Main Execution --- if __name__ == '__main__': # Ensure repo and text directories exist (optional for text dir if PDFs are pre-processed) if not os.path.exists(REPO_DIR): os.makedirs(REPO_DIR) if not os.path.exists(TEXT_OUTPUT_FOLDER): print(f"Warning: Text output folder '{TEXT_OUTPUT_FOLDER}' not found. Drawing check might fail unless PDF extraction runs first or files are manually placed.") # os.makedirs(TEXT_OUTPUT_FOLDER) # Optionally create it # Perform initial check/clone and data load print("Performing initial repository check and data load...") # Run check_and_update_repo which calls update_progress_data if repo updated initial_update_done = check_and_update_repo() # If repo existed and was up-to-date on first check, analysis wasn't run yet. Run it now. if not initial_update_done: print("Repository present and up-to-date. Running initial analysis...") # No need for lock here as background thread isn't running yet update_progress_data() # Run the full analysis else: print("Initial analysis was triggered by repo clone/pull.") # Start the background thread for periodic checks print("Starting background repository check thread...") repo_check_thread = threading.Thread(target=periodic_repo_check, daemon=True) repo_check_thread.start() # Run the Flask app print("Starting Flask server on port 5050...") # Use threaded=True for SSE background sending, debug=False for production/stability app.run(host='0.0.0.0', port=5050, debug=False, threaded=True)