import os import threading import time import json import git import re # Import re for project name validation from flask import Flask, render_template, jsonify, Response, request # Add request from werkzeug.utils import secure_filename # For securing filenames from concurrent.futures import ThreadPoolExecutor # Import ThreadPoolExecutor # Import configurations and new modules import config import utils from manifest_reader import read_manifest from scada_checker import check_scada from drawing_checker import check_drawings from progress_calculator import calculate_combined_progress app = Flask(__name__) # --- Global state (Per-Project) --- # Dictionaries keyed by project name project_last_commit = {} project_progress_data = {} project_status = {} all_projects = utils.discover_projects() # Discover projects at startup # Initialize state for discovered projects def get_default_progress(): # Helper to return a fresh copy of the default progress structure return { "overall": {"total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, "percentage_found_both": 0, "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []}, "panels": {} } for proj_name in all_projects: project_last_commit[proj_name] = None project_progress_data[proj_name] = get_default_progress() project_status[proj_name] = "Initializing..." repo_lock = threading.Lock() # Lock remains global for now, managing access to shared dicts data_updated_event = threading.Event() # Event signals ANY project update # Define max workers for thread pools MAX_INITIAL_CHECK_WORKERS = 5 # Adjust as needed MAX_PERIODIC_CHECK_WORKERS = 5 # Adjust as needed # --- Core Logic Orchestration (Per-Project) --- def update_progress_data(project_name): """Reads manifest, runs checks, combines results for a specific project.""" global project_progress_data, project_status # Reference the global dicts current_status = "" new_data_calculated = None print(f"[{project_name}] Starting analysis workflow...") # 1. Read Manifest set_status(project_name, "Reading manifest file...") manifest_data = read_manifest(project_name) if manifest_data is None: current_status = f"[{project_name}] Error: Failed to read or process manifest file." print(current_status) set_status(project_name, current_status) # Reset progress data for this project on manifest error with repo_lock: project_progress_data[project_name] = get_default_progress() data_updated_event.set(); data_updated_event.clear() # Signal update (error status + reset data) return # Cannot proceed without manifest # 2. Check SCADA (JSON files) set_status(project_name, "Checking SCADA views...") check_scada(project_name, manifest_data) # 3. Check Drawings (TXT files) set_status(project_name, "Checking drawing text files...") check_drawings(project_name, manifest_data) # 4. Calculate Combined Progress set_status(project_name, "Calculating combined progress...") try: new_data_calculated = calculate_combined_progress(project_name, manifest_data) if new_data_calculated: current_status = f"[{project_name}] Analysis complete at {time.strftime('%Y-%m-%d %H:%M:%S')}" else: current_status = f"[{project_name}] Warning: Progress calculation yielded no results (manifest might be empty)." new_data_calculated = get_default_progress() # Reset to default empty structure except Exception as e: current_status = f"[{project_name}] Error during progress calculation: {e}" print(f"Detailed Calculation Error: {e}") # Log stack trace (removed exc_info) new_data_calculated = None # Ensure no partial data update # Update global state atomically for this project with repo_lock: print(current_status) # Update status first (always) project_status[project_name] = current_status # Update progress data only if calculation was successful or yielded default empty if new_data_calculated is not None: project_progress_data[project_name] = new_data_calculated # Signal update regardless of calculation success if status changed or data changed data_updated_event.set() data_updated_event.clear() def set_status(project_name, message): """Helper to update status message for a project and signal change.""" global project_status with repo_lock: if project_status.get(project_name) != message: print(f"[{project_name}] Status: {message}") project_status[project_name] = message data_updated_event.set() data_updated_event.clear() # --- Git Repo Handling (Per-Project) --- def check_and_update_repo(project_name): """Checks and updates the Git repository for a specific project, minimizing lock contention.""" global project_last_commit, project_status # Reference global dicts repo_path = utils.get_repo_path(project_name) repo_url = config.REPO_URL # Assuming global for now branch = config.BRANCH # Assuming global for now did_update = False # Flag to track if files were actually updated initial_hash = None with repo_lock: # Briefly lock to get initial hash initial_hash = project_last_commit.get(project_name) try: project_base_path = utils.get_project_base_path(project_name) if not os.path.exists(project_base_path): # Use set_status which handles locking set_status(project_name, f"Error: Project directory not found: {project_base_path}") return False # Cannot proceed # Ensure parent directory exists (outside lock) os.makedirs(os.path.dirname(repo_path), exist_ok=True) repo_existed = os.path.exists(os.path.join(repo_path, ".git")) if not repo_existed: print(f"[{project_name}] Cloning repository {repo_url} into {repo_path}...") set_status(project_name, "Cloning repository...") # --- Clone happens OUTSIDE lock --- try: git.Repo.clone_from(repo_url, repo_path, branch=branch) repo = git.Repo(repo_path) new_commit_hash = repo.head.commit.hexsha with repo_lock: # Lock ONLY to update shared state project_last_commit[project_name] = new_commit_hash print(f"[{project_name}] Initial clone complete. Commit: {new_commit_hash}") did_update = True except git.GitCommandError as clone_err: set_status(project_name, f"Error cloning repository: {clone_err}") print(f"[{project_name}] Git clone error: {clone_err}") # Ensure commit state reflects error if needed with repo_lock: if project_last_commit.get(project_name) is None: project_last_commit[project_name] = "Clone Error" return False # Indicate no update occurred # --- End Clone --- else: # --- Fetch/Pull Logic --- repo = git.Repo(repo_path) current_local_commit = repo.head.commit.hexsha # Ensure initial hash is set if missing (brief lock) with repo_lock: if project_last_commit.get(project_name) is None: project_last_commit[project_name] = current_local_commit initial_hash = current_local_commit # Update local var too print(f"[{project_name}] Fetching updates from remote...") set_status(project_name, "Checking for updates...") origin = repo.remotes.origin # --- Fetch happens OUTSIDE lock --- try: fetch_info = origin.fetch() except git.GitCommandError as fetch_err: set_status(project_name, f"Error fetching remote: {fetch_err}") print(f"[{project_name}] Git fetch error: {fetch_err}") return False # No update occurred # --- End Fetch --- # --- Check commits (brief lock) --- current_remote_commit = None pull_needed = False try: # Must read remote commit *after* fetch current_remote_commit = repo.commit(f'origin/{branch}').hexsha # Check if pull is needed inside the try block after getting remote commit if current_local_commit != current_remote_commit: pull_needed = True except git.GitCommandError as commit_err: set_status(project_name, f"Error accessing remote branch origin/{branch}: {commit_err}") print(f"[{project_name}] Error accessing remote branch: {commit_err}") return False # Cannot compare/pull # --- End Check commits --- print(f"[{project_name}] Local commit: {current_local_commit}, Remote commit (origin/{branch}): {current_remote_commit}") if pull_needed: print(f"[{project_name}] New commit detected! Pulling changes...") set_status(project_name, "Pulling updates...") # --- Pull happens OUTSIDE lock --- try: pull_info = origin.pull() new_commit_hash = repo.head.commit.hexsha # Get hash after pull with repo_lock: # Lock ONLY to update shared state project_last_commit[project_name] = new_commit_hash print(f"[{project_name}] Pull successful. New commit: {new_commit_hash}") did_update = True except git.GitCommandError as pull_err: set_status(project_name, f"Error pulling repository: {pull_err}") print(f"[{project_name}] Git pull error: {pull_err}") # Revert shared state hash if pull failed? Safest is to keep the pre-pull local commit. with repo_lock: project_last_commit[project_name] = current_local_commit # Revert to known local state before pull attempt # Keep did_update = False # --- End Pull --- else: print(f"[{project_name}] No new commits detected.") # Update status only if it wasn't an error before (set_status handles lock) current_status = project_status.get(project_name, "") if not current_status.startswith("Error"): set_status(project_name, f"Checked repo at {time.strftime('%Y-%m-%d %H:%M:%S')}. No changes.") # --- End Fetch/Pull Logic --- # --- Run analysis IF repo was updated (outside lock) --- if did_update: print(f"[{project_name}] Repository updated. Triggering analysis...") update_progress_data(project_name) # Calls the orchestrator function except git.InvalidGitRepositoryError: msg = f"Error: Directory '{repo_path}' exists but is not a valid Git repository. Consider deleting it and restarting." set_status(project_name, msg) # Handles lock print(f"[{project_name}] {msg}") with repo_lock: # Lock to update commit state project_last_commit[project_name] = "Invalid Repository" except git.GitCommandError as e: # General Git command error (if not caught above) msg = f"Git command error: {e}" set_status(project_name, msg) # Handles lock print(f"[{project_name}] {msg}") # Try to set commit hash state even on error (brief lock) with repo_lock: if project_last_commit.get(project_name) is None: # Only set if not already set (e.g., by failed pull) try: if os.path.exists(os.path.join(repo_path, ".git")): repo = git.Repo(repo_path) project_last_commit[project_name] = repo.head.commit.hexsha else: project_last_commit[project_name] = "Error (No repo)" except Exception: project_last_commit[project_name] = "Error reading commit" except Exception as e: # Catch-all for other unexpected errors msg = f"Unexpected error checking repository: {e}" set_status(project_name, msg) # Handles lock print(f"[{project_name}] {msg}") # Log stack trace for unexpected errors with repo_lock: # Lock to update commit state if project_last_commit.get(project_name) is None: project_last_commit[project_name] = "Error checking repo" # Return true if analysis was run (because repo changed), false otherwise return did_update def periodic_repo_check(): """Runs the check_and_update_repo function periodically for all projects using a thread pool.""" global all_projects # Use a ThreadPoolExecutor to manage periodic checks concurrently with ThreadPoolExecutor(max_workers=MAX_PERIODIC_CHECK_WORKERS) as executor: while True: print(f"\nStarting periodic check cycle for all projects (Interval: {config.CHECK_INTERVAL_SECONDS}s)...") current_projects = list(all_projects) # Copy list in case it changes futures = [] for project_name in current_projects: print(f"--- Submitting periodic check for project: {project_name} ---") # Submit check_and_update_repo to the thread pool futures.append(executor.submit(run_check_and_log_errors, project_name, "periodic")) # Wait briefly for tasks to start, but don't block the loop long # time.sleep(1) # Optional: short sleep if needed print(f"Periodic check cycle submitted. Sleeping for {config.CHECK_INTERVAL_SECONDS}s...") time.sleep(config.CHECK_INTERVAL_SECONDS) # Note: We don't explicitly wait for futures to complete here. # The pool manages threads, and the loop continues periodically. def run_check_and_log_errors(project_name, check_type="initial"): """Wrapper to run check_and_update_repo and log any exceptions.""" try: print(f"--- [{check_type.capitalize()}] Running check for project: {project_name} ---") check_and_update_repo(project_name) print(f"--- [{check_type.capitalize()}] Finished check for project: {project_name} ---") except Exception as e: err_msg = f"Critical error during {check_type} check for {project_name}: {e}" print(err_msg) # Use set_status which handles locking and event signaling set_status(project_name, f"Error during {check_type} check: {e}") def initial_project_setup_and_analysis(project_name): """Performs initial repo check/update AND ensures initial analysis runs.""" try: print(f"--- [Initial Setup] Starting for project: {project_name} ---") # Run check_and_update_repo first. It returns True if it triggered an update/analysis. update_occurred = check_and_update_repo(project_name) # If no update occurred (repo was cloned before or was already up-to-date), # we still need to run the analysis once on startup. if not update_occurred: print(f"--- [Initial Analysis] Repo up-to-date or non-git. Running analysis for project: {project_name} ---") update_progress_data(project_name) # Run the analysis explicitly print(f"--- [Initial Setup] Finished for project: {project_name} ---") except Exception as e: err_msg = f"Critical error during initial setup/analysis for {project_name}: {e}" print(err_msg) set_status(project_name, f"Error during initial setup: {e}") # --- Flask Routes --- @app.route('/') def index(): # Pass the list of projects and initial statuses to the template with repo_lock: initial_statuses = dict(project_status) # Get a consistent snapshot project_list = list(all_projects) return render_template('index.html', projects=project_list, initial_statuses=initial_statuses) # Removed redundant routes for /drawings and /conflicts as index.html handles tabs @app.route('/stream') def stream(): def event_stream(): # Track state sent to *this specific client* (using a copy of global state) last_sent_state = {} # Send initial state immediately on connection with repo_lock: # Send data for all known projects current_global_state = { "projects": list(all_projects), "status": dict(project_status), "progress": dict(project_progress_data), "last_commit": dict(project_last_commit) } initial_payload = json.dumps(current_global_state) yield f"data: {initial_payload}\n\n" last_sent_state = current_global_state # Store the state sent to this client print(f"Sent initial state to new client for projects: {last_sent_state.get('projects')}") # Now wait for subsequent updates signaled by the event while True: data_updated_event.wait() # Wait for ANY background thread signal with repo_lock: # Re-acquire lock to get the latest state current_global_state = { "projects": list(all_projects), "status": dict(project_status), "progress": dict(project_progress_data), "last_commit": dict(project_last_commit) } # Basic check: Compare entire state dictionaries (can be refined if needed) # Using json.dumps for a quick deep comparison, might be slow for huge data current_state_json = json.dumps(current_global_state, sort_keys=True) last_sent_state_json = json.dumps(last_sent_state, sort_keys=True) if current_state_json != last_sent_state_json: print(f"Global state changed. Sending update to client.") # print(f"Debug: Old state: {last_sent_state_json}") # Optional debug # print(f"Debug: New state: {current_state_json}") # Optional debug yield f"data: {current_state_json}\n\n" last_sent_state = current_global_state # Update the state sent to this client # else: # Log if event triggered but nothing changed # print(f"Data update event triggered, but state unchanged for this client.") return Response(event_stream(), mimetype="text/event-stream") # --- NEW: Add Project Endpoint --- ALLOWED_PROJECT_NAME_REGEX = re.compile(r'^[a-zA-Z0-9_-]+$') @app.route('/add_project', methods=['POST']) def add_project(): if 'projectName' not in request.form: return jsonify(success=False, message="Missing project name."), 400 if 'repoUrl' not in request.form: # We receive it but don't use it for cloning yet return jsonify(success=False, message="Missing repository URL."), 400 if 'manifestFile' not in request.files: return jsonify(success=False, message="Missing manifest CSV file."), 400 project_name_raw = request.form['projectName'].strip() repo_url = request.form['repoUrl'].strip() manifest_file = request.files['manifestFile'] pdf_files = request.files.getlist('pdfFiles') # Use getlist for multiple files # --- Validation --- if not project_name_raw: return jsonify(success=False, message="Project name cannot be empty."), 400 if not ALLOWED_PROJECT_NAME_REGEX.match(project_name_raw): return jsonify(success=False, message="Invalid Project Name. Use only letters, numbers, underscores, or hyphens."), 400 if not manifest_file.filename or not manifest_file.filename.lower().endswith('.csv'): return jsonify(success=False, message="Manifest file must be a .csv file."), 400 if not pdf_files or all(not f.filename for f in pdf_files): # Check if list is empty or contains only empty filenames return jsonify(success=False, message="At least one PDF file must be provided."), 400 for pdf_file in pdf_files: if not pdf_file.filename or not pdf_file.filename.lower().endswith('.pdf'): return jsonify(success=False, message=f"Invalid file type uploaded: {pdf_file.filename}. Only PDF files allowed."), 400 # Use secure_filename for the project name used in paths # Although we validated with regex, this adds another layer against path traversal etc. safe_project_name = secure_filename(project_name_raw) if safe_project_name != project_name_raw: # Extra check if secure_filename modified it unexpectedly (e.g., spaces removed) print(f"Warning: Project name sanitized from '{project_name_raw}' to '{safe_project_name}'") # Optionally reject here, or proceed with the sanitized name project_base_path = os.path.join(config.PROJECTS_ROOT_DIR, safe_project_name) pdf_dir_path = os.path.join(project_base_path, 'pdfs') repo_dir_path = os.path.join(project_base_path, 'repo') # Create repo dir, but don't clone yet # --- Check if project already exists --- if os.path.exists(project_base_path): return jsonify(success=False, message=f"Project '{safe_project_name}' already exists."), 400 # --- Create Directories --- try: print(f"Creating directory structure for project: {safe_project_name}") os.makedirs(project_base_path, exist_ok=False) # Base dir first, fail if exists os.makedirs(pdf_dir_path, exist_ok=True) os.makedirs(repo_dir_path, exist_ok=True) except OSError as e: print(f"Error creating directories for {safe_project_name}: {e}") return jsonify(success=False, message=f"Server error creating project directories: {e}"), 500 # --- Save Manifest File --- try: manifest_filename = secure_filename(manifest_file.filename) manifest_save_path = os.path.join(project_base_path, manifest_filename) print(f"Saving manifest file to: {manifest_save_path}") manifest_file.save(manifest_save_path) except Exception as e: print(f"Error saving manifest file for {safe_project_name}: {e}") # Clean up created directories on error? # shutil.rmtree(project_base_path, ignore_errors=True) return jsonify(success=False, message=f"Error saving manifest file: {e}"), 500 # --- Save PDF Files --- saved_pdfs = [] try: for pdf_file in pdf_files: if pdf_file and pdf_file.filename: # Check again if file is valid pdf_filename = secure_filename(pdf_file.filename) pdf_save_path = os.path.join(pdf_dir_path, pdf_filename) print(f"Saving PDF file to: {pdf_save_path}") pdf_file.save(pdf_save_path) saved_pdfs.append(pdf_filename) except Exception as e: print(f"Error saving PDF files for {safe_project_name}: {e}") # Clean up potentially partially saved files and directories? # shutil.rmtree(project_base_path, ignore_errors=True) return jsonify(success=False, message=f"Error saving PDF files: {e}"), 500 # --- Store Repo URL (optional, e.g., in a simple info file) --- try: info_file_path = os.path.join(project_base_path, 'project_info.txt') with open(info_file_path, 'w') as f: f.write(f"ProjectName: {safe_project_name}\n") f.write(f"RepoURL: {repo_url}\n") print(f"Saved project info (including repo URL) to: {info_file_path}") except Exception as e: print(f"Warning: Could not save project_info.txt for {safe_project_name}: {e}") # Don't treat this as a fatal error for the add operation itself print(f"Successfully added project '{safe_project_name}' with {len(saved_pdfs)} PDF(s).") # NOTE: Server needs restart for this new project to be discovered and processed. return jsonify(success=True, message=f"Project '{safe_project_name}' created successfully.") # --- Main Execution --- if __name__ == '__main__': # Ensure project-specific directories (like text output) exist if needed # This is now handled within drawing_checker # Perform initial check/clone and data load FOR EACH PROJECT in parallel print("--- Performing initial checks and analysis for all discovered projects in background threads ---") if not all_projects: print("Warning: No projects discovered in projects directory.") else: # Use a ThreadPoolExecutor for initial setup with ThreadPoolExecutor(max_workers=MAX_INITIAL_CHECK_WORKERS, thread_name_prefix='InitialCheck') as executor: for proj_name in all_projects: print(f"--- Submitting initial setup for project: {proj_name} ---") # Submit the combined setup and analysis function to the pool executor.submit(initial_project_setup_and_analysis, proj_name) # We exit the 'with' block here, but the threads continue running in the background. # The executor automatically manages the threads. We don't call shutdown(wait=True). # Start the background thread for PERIODIC checks (now uses its own thread pool internally) print("--- Starting background periodic check manager thread ---") # This thread now manages submitting tasks to its own pool repo_check_thread = threading.Thread(target=periodic_repo_check, daemon=True, name="PeriodicCheckManager") repo_check_thread.start() # Run the Flask app - This will start *before* initial checks might be complete print(f"--- Starting Flask server on http://0.0.0.0:5050 ... ---") # Ensure Flask runs threaded to handle multiple requests (like SSE connections) app.run(host='0.0.0.0', port=5050, debug=False, threaded=True)