import os import threading import time import json import git import csv import re from flask import Flask, render_template, jsonify, Response # --- Configuration Loading --- # Determine the directory where app.py resides _script_dir = os.path.dirname(os.path.abspath(__file__)) CONFIG_FILE_ABS = os.path.join(_script_dir, 'config.json') # Absolute path def load_config(): """Loads configuration from JSON file using an absolute path.""" try: print(f"Attempting to load config from absolute path: {CONFIG_FILE_ABS}") # Add log with open(CONFIG_FILE_ABS, 'r') as f: # Use the absolute path config = json.load(f) # Basic validation if 'projects' not in config or not isinstance(config['projects'], dict): raise ValueError("Config missing 'projects' dictionary.") if 'check_interval_seconds' not in config or not isinstance(config['check_interval_seconds'], int): raise ValueError("Config missing 'check_interval_seconds' integer.") print(f"Configuration loaded successfully from {CONFIG_FILE_ABS}") return config except FileNotFoundError: # Also print the current working directory for debugging cwd = os.getcwd() print(f"ERROR: Configuration file not found at absolute path: {CONFIG_FILE_ABS}") print(f"Current working directory when error occurred: {cwd}") exit(1) except json.JSONDecodeError: print(f"ERROR: Could not decode JSON from '{CONFIG_FILE_ABS}'. Check its format.") exit(1) except ValueError as e: print(f"ERROR: Invalid configuration in '{CONFIG_FILE_ABS}': {e}") exit(1) except Exception as e: cwd = os.getcwd() print(f"ERROR: An unexpected error occurred loading config from {CONFIG_FILE_ABS}: {e}") print(f"Current working directory during error: {cwd}") exit(1) config = load_config() CHECK_INTERVAL_SECONDS = config['check_interval_seconds'] # --- Project Class --- class Project: """Encapsulates configuration, state, and operations for a single project.""" def __init__(self, project_id, project_config): self.id = project_id self.config = project_config self.name = project_config.get('name', project_id) # Project-specific state self.last_commit_hash = None self.progress_data = { # Default empty structure "overall": {"total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, "percentage_found_both": 0, "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []}, "panels": {} } self.status_message = "Initializing..." self.lock = threading.Lock() # Lock for accessing project-specific repo and data self.data_updated_event = threading.Event() # Event to signal data updates for this project # --- Pre-calculate absolute paths --- self._script_dir = os.path.dirname(os.path.abspath(__file__)) self._repo_dir_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('repo_dir', f'./cloned_repos/{project_id}'))) # Added default self._csv_path_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('manifest_csv', f'./manifests/{project_id}_manifest.csv'))) # Added default self._views_dir_abs = os.path.join(self._repo_dir_abs, self.config.get('scada_views_dir', '')) self._texts_dir_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('drawing_texts_dir', f'./extracted_texts/{project_id}'))) # Added default pdf_source_dir_rel = self.config.get('pdf_source_dir') self._pdf_dir_abs = os.path.abspath(os.path.join(self._script_dir, pdf_source_dir_rel)) if pdf_source_dir_rel else None print(f"Initialized Project '{self.id}' ({self.name}):") print(f" Repo Dir: {self._repo_dir_abs}") print(f" CSV Path: {self._csv_path_abs}") if self._views_dir_abs: print(f" Views Dir: {self._views_dir_abs}") if self._texts_dir_abs: print(f" Texts Dir: {self._texts_dir_abs}") if self._pdf_dir_abs: print(f" PDF Dir: {self._pdf_dir_abs}") # --- Path Helper Methods --- def get_repo_path(self): return self._repo_dir_abs def get_csv_path(self): return self._csv_path_abs def get_views_dir_path(self): return self._views_dir_abs def get_text_output_dir_path(self): return self._texts_dir_abs def get_pdf_source_dir_path(self): return self._pdf_dir_abs # --- Core Logic Methods --- def _update_progress_data(self): """(Internal) Reads manifest, runs checks, combines results, and updates project state.""" current_status = "" new_data_calculated = None manifest_data = None # Initialize with self.lock: try: # 1. Read Manifest self.status_message = f"[{self.id}] Reading manifest..." print(f"[{self.id}] Reading manifest: {self.get_csv_path()}") manifest_data = read_manifest(self) # Pass self (project instance) if manifest_data is None: current_status = f"[{self.id}] Error: Failed manifest read {self.get_csv_path()}" print(current_status) self.status_message = current_status # Don't return, proceed to calculate progress with empty data if needed # or handle error state appropriately manifest_data = [] # Ensure it's an empty list for calculation # 2. Check SCADA (If views dir is configured) if self.get_views_dir_path(): self.status_message = f"[{self.id}] Checking SCADA..." check_scada(self, manifest_data) # Pass self (project instance) else: print(f"[{self.id}] Skipping SCADA check (no views_dir configured).") # 3. Check Drawings (If texts dir is configured) if self.get_text_output_dir_path(): self.status_message = f"[{self.id}] Checking drawings..." check_drawings(self, manifest_data) # Pass self (project instance) else: print(f"[{self.id}] Skipping Drawing check (no drawing_texts_dir configured).") # 4. Calculate Combined Progress self.status_message = f"[{self.id}] Calculating progress..." new_data_calculated = calculate_combined_progress(self, manifest_data) # Pass self if new_data_calculated: if not manifest_data and new_data_calculated['overall']['total_csv'] == 0: # Check if manifest read failed/was empty current_status = f"[{self.id}] Analysis complete (Manifest empty/read failed) @ {time.strftime('%H:%M:%S')}" else: current_status = f"[{self.id}] Analysis complete @ {time.strftime('%H:%M:%S')}" self.progress_data = new_data_calculated # Update project's data else: # This case should ideally not happen if calculate_combined_progress always returns a dict current_status = f"[{self.id}] Error: Failed progress calculation." # Keep previous progress_data? except Exception as e: current_status = f"[{self.id}] CRITICAL Error during analysis: {e}" print(f"[{self.id}] Detailed Analysis Error: {e}", exc_info=True) # Optionally reset progress data or keep old? # self.progress_data = { ... } # Reset to default empty # Update status and signal completion (inside lock) print(current_status) self.status_message = current_status self.data_updated_event.set() self.data_updated_event.clear() def _check_and_update_repo(self): """(Internal) Checks and updates the Git repository for this project.""" did_update = False repo_path = self.get_repo_path() repo_url = self.config.get('repo_url') branch = self.config.get('branch', 'main') # Default to main if not specified if not repo_url: print(f"[{self.id}] Skipping repo check: repo_url not configured.") self.status_message = f"[{self.id}] Repo check skipped (no URL)" self.data_updated_event.set(); self.data_updated_event.clear() # Signal status change return False # No update occurred # --- Lock is acquired by the calling method (update_repo_and_analyze) --- try: # Ensure parent directory exists before cloning repo_parent_dir = os.path.dirname(repo_path) if not os.path.exists(repo_parent_dir): print(f"[{self.id}] Creating parent directory for repo: {repo_parent_dir}") os.makedirs(repo_parent_dir) repo_existed = os.path.exists(os.path.join(repo_path, ".git")) if not repo_existed: print(f"[{self.id}] Cloning repository {repo_url} (branch: {branch}) into {repo_path}...") self.status_message = f"[{self.id}] Cloning repository..." git.Repo.clone_from(repo_url, repo_path, branch=branch) repo = git.Repo(repo_path) self.last_commit_hash = repo.head.commit.hexsha print(f"[{self.id}] Initial clone complete. Commit: {self.last_commit_hash}") did_update = True else: repo = git.Repo(repo_path) print(f"[{self.id}] Fetching updates from remote...") current_local_commit = repo.head.commit.hexsha # Use current local hash if global is still None (e.g. first run after restart) if self.last_commit_hash is None: self.last_commit_hash = current_local_commit origin = repo.remotes.origin current_remote_commit = None # Initialize try: print(f"[{self.id}] Running fetch...") origin.fetch() print(f"[{self.id}] Fetch complete. Getting remote commit...") current_remote_commit = repo.commit(f'origin/{branch}').hexsha print(f"[{self.id}] Remote commit for origin/{branch}: {current_remote_commit}") except git.GitCommandError as fetch_err: print(f"[{self.id}] Warning: Could not fetch from remote/find branch '{branch}': {fetch_err}") # Keep status as is, will signal update later except Exception as fetch_err_other: print(f"[{self.id}] Warning: Unexpected error during fetch: {fetch_err_other}") # Keep status as is print(f"[{self.id}] Local commit: {current_local_commit}, Remote commit: {current_remote_commit or 'Fetch Failed/Not Found'}") if current_remote_commit and current_local_commit != current_remote_commit: print(f"[{self.id}] New commit detected! Pulling changes (Branch: {branch})...") self.status_message = f"[{self.id}] Pulling updates..." try: # Ensure working directory is clean before pull? # Add checkout if needed: repo.git.checkout(branch) pull_info = origin.pull() # Verify pull info if needed (e.g., pull_info[0].flags) new_commit_hash = repo.head.commit.hexsha print(f"[{self.id}] Pull successful. New commit: {new_commit_hash}") self.last_commit_hash = new_commit_hash did_update = True except git.GitCommandError as e_pull: self.status_message = f"[{self.id}] Error pulling repository: {e_pull}" print(self.status_message) self.last_commit_hash = current_local_commit # Keep old hash on failed pull except Exception as e_pull_other: self.status_message = f"[{self.id}] Unexpected error pulling repository: {e_pull_other}" print(self.status_message) self.last_commit_hash = current_local_commit # Keep old hash elif current_remote_commit: print(f"[{self.id}] No new commits detected.") # Only update status if no pull error occurred previously if not self.status_message.startswith(f"[{self.id}] Error pulling"): self.status_message = f"[{self.id}] Repo up-to-date @ {time.strftime('%H:%M:%S')}" else: # Fetch failed # Keep previous status message (e.g., Analysis complete, Cloning, Error pulling, etc.) print(f"[{self.id}] Keeping previous status due to fetch failure.") # Ensure last_commit_hash is set if it was None if self.last_commit_hash is None: self.last_commit_hash = current_local_commit or "Unknown (Fetch Failed)" except git.GitCommandError as e_git: error_msg = f"[{self.id}] Git command error: {e_git}" print(error_msg) self.status_message = error_msg # Try reading existing hash even on error try: if os.path.exists(os.path.join(repo_path, ".git")): repo = git.Repo(repo_path) # Don't overwrite existing hash if we have one if self.last_commit_hash is None: self.last_commit_hash = repo.head.commit.hexsha except Exception as e_read_hash: print(f"[{self.id}] Additionally failed to read hash after Git error: {e_read_hash}") if self.last_commit_hash is None: self.last_commit_hash = "Error reading commit" except Exception as e_other: error_msg = f"[{self.id}] Error checking repository: {e_other}" print(error_msg, exc_info=True) self.status_message = error_msg if self.last_commit_hash is None: self.last_commit_hash = "Error checking repo" # --- Lock is released by the calling method --- return did_update # Return whether the repo content was changed def update_repo_and_analyze(self, force_analysis=False): """Checks the repo for updates, pulls if necessary, and runs analysis if changed or forced.""" print(f"[{self.id}] Starting update_repo_and_analyze (force_analysis={force_analysis})...") repo_changed = False initial_hash = self.last_commit_hash # Store hash before check with self.lock: repo_changed = self._check_and_update_repo() if repo_changed: print(f"[{self.id}] Repo changed, triggering analysis...") # Analysis reads files, recalculates progress, updates status, and signals event self._update_progress_data() elif force_analysis: print(f"[{self.id}] Forcing analysis even though repo didn't change...") self._update_progress_data() else: # If repo didn't change and analysis not forced, still signal potentially changed status message from repo check print(f"[{self.id}] Repo unchanged, analysis not forced. Signaling potential status update.") self.data_updated_event.set() self.data_updated_event.clear() final_hash = self.last_commit_hash print(f"[{self.id}] Finished update_repo_and_analyze. Repo Changed: {repo_changed}. Hash: {initial_hash} -> {final_hash}. Status: '{self.status_message}'") return repo_changed # --- Global Application State --- app = Flask(__name__) projects = {} # Dictionary to hold Project instances {project_id: Project_instance} global_lock = threading.Lock() # Lock for modifying the projects dictionary itself (if needed later) # Instantiate projects from config for project_id, project_conf in config.get('projects', {}).items(): projects[project_id] = Project(project_id, project_conf) if not projects: print("ERROR: No projects defined in configuration. Exiting.") exit(1) # --- Original Global Variables (To be removed or refactored) --- # REPO_URL = "http://192.168.5.191:3000/LCI/MTN6" # REMOVE # REPO_DIR = "./cloned_repo" # REMOVE # BRANCH = "main" # REMOVE # CSV_FILENAME = "MTN6 Equipment Manifest REV6(Conveyor List).csv" # REMOVE # VIEWS_DIR_RELATIVE = "MTN6_SCADA/com.inductiveautomation.perspective/views/Detailed-Views" # REMOVE # TEXT_OUTPUT_FOLDER = "./extracted_texts" # REMOVE # CHECK_INTERVAL_SECONDS = 60 # MOVED to config load # CSV_ALIAS_COL = 'Alias' # MOVE to project config? Or keep global if consistent? -> Moved to config # CSV_PANEL_COL = 'Control Panel' # MOVE to project config? -> Moved to config # CSV_EQ_TYPE_COL = 'Equipment Type' # MOVE to project config? -> Moved to config # CSV_CONV_TYPE_COL = 'Type of Conveyor' # MOVE to project config? -> Moved to config # last_commit_hash = None # MOVED to Project class # progress_data = { ... } # MOVED to Project class # status_message = "Initializing..." # MOVED to Project class # repo_lock = threading.Lock() # MOVED to Project class (per-project lock) # data_updated_event = threading.Event() # MOVED to Project class (per-project event) # --- Helper Functions (To be refactored or become Project methods) --- # def get_repo_path(): # BECOMES Project method # return os.path.abspath(REPO_DIR) # def get_csv_path(): # BECOMES Project method # script_dir = os.path.dirname(os.path.abspath(__file__)) # return os.path.join(script_dir, CSV_FILENAME) # def get_views_dir_path(): # BECOMES Project method # return os.path.join(get_repo_path(), VIEWS_DIR_RELATIVE) # def get_text_output_dir_path(): # BECOMES Project method # script_dir = os.path.dirname(os.path.abspath(__file__)) # return os.path.abspath(os.path.join(script_dir, TEXT_OUTPUT_FOLDER)) def normalize(text): """Normalize string for comparison: lowercase, treat '-' and '_' the same, remove all whitespace.""" # This seems general enough to remain a standalone helper if not isinstance(text, str): return "" text = text.lower() # Convert to lowercase text = text.replace('-', '_') # Replace hyphens with underscores text = re.sub(r'\s+', '', text) # Remove ALL whitespace characters (including newlines) return text def read_manifest(project: Project): # Takes Project instance """Reads the manifest CSV for a specific project into a list of dictionaries.""" manifest_items = [] csv_filepath = project.get_csv_path() # Get column names from project config cols = project.config.get('csv_cols', {}) col_alias = cols.get('alias', 'Alias') # Default fallback col_panel = cols.get('panel', 'Control Panel') col_eq_type = cols.get('eq_type', 'Equipment Type') col_conv_type = cols.get('conv_type', 'Type of Conveyor') required_cols = {col_alias, col_panel} optional_cols = {col_eq_type, col_conv_type} try: with open(csv_filepath, mode='r', newline='', encoding='utf-8-sig') as infile: reader = csv.DictReader(infile) headers = set(h.strip() for h in reader.fieldnames) missing_required = required_cols - headers if missing_required: print(f"[{project.id}] Error: Missing required columns in CSV '{csv_filepath}': {', '.join(missing_required)}") print(f"[{project.id}] Available columns: {', '.join(headers)}") return None for row in reader: alias = row.get(col_alias, "").strip() panel = row.get(col_panel, "").strip() if alias and panel: item = { "alias": alias, "normalized_alias": normalize(alias), "control_panel": panel, "equipment_type": row.get(col_eq_type, "").strip() if col_eq_type in headers else "N/A", "conveyor_type": row.get(col_conv_type, "").strip() if col_conv_type in headers else "N/A", "found_scada": False, "found_drawing": False } manifest_items.append(item) elif alias and not panel: print(f"[{project.id}] Warning: Alias '{alias}' found in CSV but is missing its '{col_panel}'. Skipping.") except FileNotFoundError: print(f"[{project.id}] Error: Manifest file not found at {csv_filepath}") return None except Exception as e: print(f"[{project.id}] Error reading CSV file {csv_filepath}: {e}") return None print(f"[{project.id}] Read {len(manifest_items)} valid items from manifest.") return manifest_items def check_scada(project: Project, manifest_data): # Takes Project instance """Checks for aliases in SCADA JSON view files for a specific project.""" if not manifest_data: return views_dir = project.get_views_dir_path() print(f"[{project.id}] Starting SCADA check in directory: {views_dir}...") found_count = 0 processed_files = 0 alias_map = {} for item in manifest_data: na = item['normalized_alias'] if na not in alias_map: alias_map[na] = [] alias_map[na].append(item) try: # Check if views dir exists BEFORE walking if not os.path.exists(views_dir): print(f"[{project.id}] Warning: SCADA Views directory not found at {views_dir}. Skipping SCADA check.") return # Exit function early for root, _, files in os.walk(views_dir): for filename in files: if filename == 'view.json': filepath = os.path.join(root, filename) processed_files += 1 try: with open(filepath, 'r', encoding='utf-8') as f: content = f.read() normalized_content = normalize(content) for norm_alias, items in alias_map.items(): if norm_alias in normalized_content: for item in items: if not item['found_scada']: item['found_scada'] = True found_count += 1 except Exception as e: print(f"[{project.id}] Warning: Could not read or process JSON file {filepath}: {e}") except Exception as e: print(f"[{project.id}] Error walking SCADA views directory {views_dir}: {e}") print(f"[{project.id}] SCADA check finished. Processed {processed_files} view.json files. Found {found_count} manifest aliases.") def check_drawings(project: Project, manifest_data): # Takes Project instance """Checks if aliases from manifest exist in *any* extracted drawing text file for a specific project.""" if not manifest_data: return text_output_dir = project.get_text_output_dir_path() print(f"[{project.id}] Starting Drawings check: Scanning all .txt files in directory: {text_output_dir}...") all_normalized_content = "" processed_files = 0 found_files = [] try: # Check if text dir exists BEFORE listing if not os.path.exists(text_output_dir): print(f"[{project.id}] Warning: Drawings text directory not found: {text_output_dir}. Skipping Drawings check.") return # Exit function early if not os.path.isdir(text_output_dir): print(f"[{project.id}] Error: Path specified for Drawings text is not a directory: {text_output_dir}. Skipping Drawings check.") return # Exit function early for filename in os.listdir(text_output_dir): if filename.lower().endswith('.txt'): filepath = os.path.join(text_output_dir, filename) processed_files += 1 try: with open(filepath, 'r', encoding='utf-8') as f: content = f.read() all_normalized_content += normalize(content) + "\\n--file-separator--\\n" found_files.append(filename) except Exception as e: print(f"[{project.id}] Warning: Could not read or process text file {filepath}: {e}") if processed_files == 0: print(f"[{project.id}] Warning: No .txt files found in the directory {text_output_dir}. Cannot perform drawing check.") return else: print(f"[{project.id}] Successfully read and normalized content from {len(found_files)} out of {processed_files} .txt files found.") found_count = 0 for item in manifest_data: normalized_alias = item['normalized_alias'] if normalized_alias and normalized_alias in all_normalized_content: item['found_drawing'] = True found_count += 1 print(f"[{project.id}] Drawings check finished. Found {found_count} manifest aliases within the combined text content.") except Exception as e: print(f"[{project.id}] Error during drawings check in {text_output_dir}: {e}") def calculate_combined_progress(project: Project, manifest_data): # Takes Project instance """Calculates the combined progress based on scada/drawing status for a specific project.""" print(f"[{project.id}] Calculating combined progress statistics...") results = { "overall": { "total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, "percentage_found_both": 0, "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": [] }, "panels": {} } if not manifest_data: print(f"[{project.id}] Warning: No manifest data to calculate progress from.") return results # Return default empty structure results["overall"]["total_csv"] = len(manifest_data) for item in manifest_data: panel = item['control_panel'] if panel not in results["panels"]: results["panels"][panel] = { "total": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, "percentage_found_both": 0, "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": [] } results["panels"][panel]["total"] += 1 item_detail = {k: v for k, v in item.items() if k not in ['normalized_alias']} if item['found_scada'] and item['found_drawing']: results["overall"]["found_both"] += 1 results["panels"][panel]["found_both"] += 1 results["overall"]["found_both_list"].append(item_detail) results["panels"][panel]["found_both_list"].append(item_detail) elif item['found_scada'] and not item['found_drawing']: results["overall"]["found_scada_only"] += 1 results["panels"][panel]["found_scada_only"] += 1 results["overall"]["found_scada_only_list"].append(item_detail) results["panels"][panel]["found_scada_only_list"].append(item_detail) elif not item['found_scada'] and item['found_drawing']: results["overall"]["found_drawing_only"] += 1 results["panels"][panel]["found_drawing_only"] += 1 results["overall"]["found_drawing_only_list"].append(item_detail) results["panels"][panel]["found_drawing_only_list"].append(item_detail) else: # Missing both results["overall"]["missing_both"] += 1 results["panels"][panel]["missing_both"] += 1 results["overall"]["missing_list"].append(item_detail) results["panels"][panel]["missing_list"].append(item_detail) if results["overall"]["total_csv"] > 0: results["overall"]["percentage_found_both"] = round( (results["overall"]["found_both"] / results["overall"]["total_csv"]) * 100, 1 ) for panel_data in results["panels"].values(): if panel_data["total"] > 0: panel_data["percentage_found_both"] = round( (panel_data["found_both"] / panel_data["total"]) * 100, 1 ) print(f"[{project.id}] Combined progress calculation finished.") return results # --- Background Task --- def periodic_repo_check(): """Runs the check_and_update_repo method periodically for all projects sequentially.""" # Small delay at startup to allow Flask server to bind port before first check cycle prints time.sleep(2) while True: print(f"\n--- Starting Periodic Project Checks (Interval: {CHECK_INTERVAL_SECONDS}s) ---") project_ids = list(projects.keys()) for project_id in project_ids: project = projects.get(project_id) if not project: print(f"Warning: Project ID '{project_id}' found in keys but not in projects dict during periodic check.") continue print(f"--- Checking Project: {project.id} ({project.name}) ---") try: # Use the public method which handles locking and analysis triggering project.update_repo_and_analyze() except Exception as e: print(f"!! CRITICAL ERROR during periodic check for project '{project_id}': {e}", exc_info=True) # Update status via lock if possible with project.lock: project.status_message = f"[{project.id}] CRITICAL check cycle error: {e}" if project.last_commit_hash is None: project.last_commit_hash = "Unknown (Check Error)" project.data_updated_event.set() # Signal error project.data_updated_event.clear() print(f"--- Finished check for project: {project.id} ---") print(f"--- All project checks finished. Sleeping for {CHECK_INTERVAL_SECONDS}s... ---") time.sleep(CHECK_INTERVAL_SECONDS) # --- Flask Routes --- @app.route('/') def index(): # Pass project list sorted by name to template for selector project_list = sorted( [{"id": pid, "name": p.name} for pid, p in projects.items()], key=lambda x: x['name'] ) return render_template('index.html', projects=project_list) @app.route('/stream/') def stream(project_id): project = projects.get(project_id) if not project: return jsonify({"error": "Project not found"}), 404 def event_stream(target_project: Project): last_sent_hash_to_client = None print(f"SSE Client connected for project: {target_project.id}") # ... (rest of stream function remains the same) ... # Send initial state immediately with target_project.lock: current_hash = target_project.last_commit_hash current_status = target_project.status_message current_progress = target_project.progress_data initial_payload = json.dumps({ "status": current_status, "progress": current_progress, "last_commit": current_hash }) yield f"data: {initial_payload}\\n\\n" last_sent_hash_to_client = current_hash print(f"[{target_project.id}] Sent initial SSE state (Hash: {last_sent_hash_to_client})") # Wait for project-specific updates while True: # Add a timeout to prevent hangs if event logic fails? event_fired = target_project.data_updated_event.wait(timeout=CHECK_INTERVAL_SECONDS * 2) if not event_fired: # If timeout occurs, maybe send a keep-alive or re-send current state? print(f"[{target_project.id}] SSE wait timeout. Re-checking state.") # Optionally send a keep-alive comment: yield ": keepalive\n\n" # Or just continue to re-evaluate state below pass # Fall through to check state with target_project.lock: current_hash = target_project.last_commit_hash current_status = target_project.status_message current_progress = target_project.progress_data # Send update ONLY if hash changed or if the wait timed out (to ensure client syncs) if current_hash != last_sent_hash_to_client or not event_fired: print(f"[{target_project.id}] SSE Data updated (Hash: {last_sent_hash_to_client} -> {current_hash}, Event Fired: {event_fired}). Sending.") data_payload = json.dumps({ "status": current_status, "progress": current_progress, "last_commit": current_hash }) yield f"data: {data_payload}\\n\\n" last_sent_hash_to_client = current_hash # else: # print(f"[{target_project.id}] SSE Event triggered, hash {current_hash} unchanged.") return Response(event_stream(project), mimetype="text/event-stream") # --- Main Execution --- if __name__ == '__main__': # Ensure base directories exist (safer to do this before initializing projects) # Create parent dirs for repos, manifests, texts, pdfs based on config dirs_to_ensure = set() for pid, pconf in config.get('projects', {}).items(): script_dir = os.path.dirname(os.path.abspath(__file__)) dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('repo_dir', f'./cloned_repos/{pid}'))))) dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('manifest_csv', f'./manifests/{pid}_manifest.csv'))))) dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('drawing_texts_dir', f'./extracted_texts/{pid}'))))) pdf_dir = pconf.get('pdf_source_dir') if pdf_dir: dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pdf_dir)))) for d in dirs_to_ensure: if d and not os.path.exists(d): print(f"Creating necessary directory: {d}") try: os.makedirs(d, exist_ok=True) except OSError as e: print(f"ERROR: Could not create directory {d}: {e}") # Decide if this is fatal? Probably depends on which dir failed. # exit(1) # Exit if repo/manifest dirs can't be made? # Initialize Project objects AFTER ensuring directories for project_id, project_conf in config.get('projects', {}).items(): if project_id not in projects: # Avoid re-init if already done (though shouldn't happen here) projects[project_id] = Project(project_id, project_conf) if not projects: print("ERROR: No projects defined or initialized. Exiting.") exit(1) # Perform initial check/clone and data load for ALL projects print("--- Performing Initial Project Analysis ---") # ====================================================================== # === TEMPORARILY DISABLED INITIAL ANALYSIS THREADING FOR DEBUGGING === # initial_threads = [] # for project_id, project in projects.items(): # print(f"--- Starting initial analysis for: {project.id} ({project.name}) ---") # # Run initial analysis in parallel threads for faster startup? # # Set force_analysis=True to ensure data is loaded even if repo exists and hasn't changed since last run # thread = threading.Thread(target=project.update_repo_and_analyze, kwargs={'force_analysis': True}) # initial_threads.append(thread) # thread.start() # # # Wait for all initial analyses to complete # print("Waiting for initial analyses to complete...") # for thread in initial_threads: # thread.join() # print("--- Initial Analyses Complete ---") print("SKIPPING initial analysis for debugging port binding issue.") print("NOTE: Project data will be loaded on the first periodic check.") # ====================================================================== # Start the background thread for periodic checks AFTER initial load print("Starting background periodic check thread...") repo_check_thread = threading.Thread(target=periodic_repo_check, daemon=True) repo_check_thread.start() # Run the Flask app print(f"Starting Flask server: http://0.0.0.0:5050") try: app.run(host='0.0.0.0', port=5050, debug=False, threaded=True) except Exception as e: print(f"FATAL ERROR during app.run: {e}", exc_info=True) exit(1)