MonitorProgress/app.py

import os
import threading
import time
import json
import git
import csv
import re
from flask import Flask, render_template, jsonify, Response

# --- Configuration Loading ---
# Determine the directory where app.py resides
_script_dir = os.path.dirname(os.path.abspath(__file__))
CONFIG_FILE_ABS = os.path.join(_script_dir, 'config.json') # Absolute path

def load_config():
    """Loads configuration from JSON file using an absolute path."""
    try:
        print(f"Attempting to load config from absolute path: {CONFIG_FILE_ABS}") # Add log
        with open(CONFIG_FILE_ABS, 'r') as f: # Use the absolute path
            config = json.load(f)
            # Basic validation
            if 'projects' not in config or not isinstance(config['projects'], dict):
                raise ValueError("Config missing 'projects' dictionary.")
            if 'check_interval_seconds' not in config or not isinstance(config['check_interval_seconds'], int):
                 raise ValueError("Config missing 'check_interval_seconds' integer.")
            print(f"Configuration loaded successfully from {CONFIG_FILE_ABS}")
            return config
    except FileNotFoundError:
        # Also print the current working directory for debugging
        cwd = os.getcwd()
        print(f"ERROR: Configuration file not found at absolute path: {CONFIG_FILE_ABS}")
        print(f"Current working directory when error occurred: {cwd}")
        exit(1)
    except json.JSONDecodeError:
        print(f"ERROR: Could not decode JSON from '{CONFIG_FILE_ABS}'. Check its format.")
        exit(1)
    except ValueError as e:
        print(f"ERROR: Invalid configuration in '{CONFIG_FILE_ABS}': {e}")
        exit(1)
    except Exception as e:
        cwd = os.getcwd()
        print(f"ERROR: An unexpected error occurred loading config from {CONFIG_FILE_ABS}: {e}")
        print(f"Current working directory during error: {cwd}")
        exit(1)

config = load_config()
CHECK_INTERVAL_SECONDS = config['check_interval_seconds']

# --- Project Class ---
class Project:
    """Encapsulates configuration, state, and operations for a single project."""
    def __init__(self, project_id, project_config):
        self.id = project_id
        self.config = project_config
        self.name = project_config.get('name', project_id)

        # Project-specific state
        self.last_commit_hash = None
        self.progress_data = { # Default empty structure
            "overall": {"total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, "percentage_found_both": 0, "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []},
            "panels": {}
        }
        self.status_message = "Initializing..."
        self.lock = threading.Lock() # Lock for accessing project-specific repo and data
        self.data_updated_event = threading.Event() # Event to signal data updates for this project

        # --- Pre-calculate absolute paths ---
        self._script_dir = os.path.dirname(os.path.abspath(__file__))
        self._repo_dir_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('repo_dir', f'./cloned_repos/{project_id}'))) # Added default
        self._csv_path_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('manifest_csv', f'./manifests/{project_id}_manifest.csv'))) # Added default
        self._views_dir_abs = os.path.join(self._repo_dir_abs, self.config.get('scada_views_dir', ''))
        self._texts_dir_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('drawing_texts_dir', f'./extracted_texts/{project_id}'))) # Added default
        pdf_source_dir_rel = self.config.get('pdf_source_dir')
        self._pdf_dir_abs = os.path.abspath(os.path.join(self._script_dir, pdf_source_dir_rel)) if pdf_source_dir_rel else None

        print(f"Initialized Project '{self.id}' ({self.name}):")
        print(f"  Repo Dir: {self._repo_dir_abs}")
        print(f"  CSV Path: {self._csv_path_abs}")
        if self._views_dir_abs: print(f"  Views Dir: {self._views_dir_abs}")
        if self._texts_dir_abs: print(f"  Texts Dir: {self._texts_dir_abs}")
        if self._pdf_dir_abs: print(f"  PDF Dir: {self._pdf_dir_abs}")

    # --- Path Helper Methods ---
    def get_repo_path(self):
        return self._repo_dir_abs

    def get_csv_path(self):
        return self._csv_path_abs

    def get_views_dir_path(self):
        return self._views_dir_abs

    def get_text_output_dir_path(self):
        return self._texts_dir_abs

    def get_pdf_source_dir_path(self):
        return self._pdf_dir_abs

    # --- Core Logic Methods ---
    def _update_progress_data(self):
        """(Internal) Reads manifest, runs checks, combines results, and updates project state."""
        current_status = ""
        new_data_calculated = None
        manifest_data = None # Initialize

        with self.lock:
            try:
                # 1. Read Manifest
                self.status_message = f"[{self.id}] Reading manifest..."
                print(f"[{self.id}] Reading manifest: {self.get_csv_path()}")
                manifest_data = read_manifest(self) # Pass self (project instance)
                if manifest_data is None:
                    current_status = f"[{self.id}] Error: Failed manifest read {self.get_csv_path()}"
                    print(current_status)
                    self.status_message = current_status
                    # Don't return, proceed to calculate progress with empty data if needed
                    # or handle error state appropriately
                    manifest_data = [] # Ensure it's an empty list for calculation

                # 2. Check SCADA (If views dir is configured)
                if self.get_views_dir_path():
                    self.status_message = f"[{self.id}] Checking SCADA..."
                    check_scada(self, manifest_data) # Pass self (project instance)
                else:
                    print(f"[{self.id}] Skipping SCADA check (no views_dir configured).")

                # 3. Check Drawings (If texts dir is configured)
                if self.get_text_output_dir_path():
                    self.status_message = f"[{self.id}] Checking drawings..."
                    check_drawings(self, manifest_data) # Pass self (project instance)
                else:
                    print(f"[{self.id}] Skipping Drawing check (no drawing_texts_dir configured).")

                # 4. Calculate Combined Progress
                self.status_message = f"[{self.id}] Calculating progress..."
                new_data_calculated = calculate_combined_progress(self, manifest_data) # Pass self

                if new_data_calculated:
                    if not manifest_data and new_data_calculated['overall']['total_csv'] == 0: # Check if manifest read failed/was empty
                         current_status = f"[{self.id}] Analysis complete (Manifest empty/read failed) @ {time.strftime('%H:%M:%S')}"
                    else:
                        current_status = f"[{self.id}] Analysis complete @ {time.strftime('%H:%M:%S')}"
                    self.progress_data = new_data_calculated # Update project's data
                else:
                    # This case should ideally not happen if calculate_combined_progress always returns a dict
                    current_status = f"[{self.id}] Error: Failed progress calculation."
                    # Keep previous progress_data?

            except Exception as e:
                current_status = f"[{self.id}] CRITICAL Error during analysis: {e}"
                print(f"[{self.id}] Detailed Analysis Error: {e}", exc_info=True)
                # Optionally reset progress data or keep old?
                # self.progress_data = { ... } # Reset to default empty

            # Update status and signal completion (inside lock)
            print(current_status)
            self.status_message = current_status
            self.data_updated_event.set()
            self.data_updated_event.clear()

    def _check_and_update_repo(self):
        """(Internal) Checks and updates the Git repository for this project."""
        did_update = False
        repo_path = self.get_repo_path()
        repo_url = self.config.get('repo_url')
        branch = self.config.get('branch', 'main') # Default to main if not specified

        if not repo_url:
            print(f"[{self.id}] Skipping repo check: repo_url not configured.")
            self.status_message = f"[{self.id}] Repo check skipped (no URL)"
            self.data_updated_event.set(); self.data_updated_event.clear() # Signal status change
            return False # No update occurred

        # --- Lock is acquired by the calling method (update_repo_and_analyze) ---
        try:
            # Ensure parent directory exists before cloning
            repo_parent_dir = os.path.dirname(repo_path)
            if not os.path.exists(repo_parent_dir):
                 print(f"[{self.id}] Creating parent directory for repo: {repo_parent_dir}")
                 os.makedirs(repo_parent_dir)

            repo_existed = os.path.exists(os.path.join(repo_path, ".git"))
            if not repo_existed:
                print(f"[{self.id}] Cloning repository {repo_url} (branch: {branch}) into {repo_path}...")
                self.status_message = f"[{self.id}] Cloning repository..."
                git.Repo.clone_from(repo_url, repo_path, branch=branch)
                repo = git.Repo(repo_path)
                self.last_commit_hash = repo.head.commit.hexsha
                print(f"[{self.id}] Initial clone complete. Commit: {self.last_commit_hash}")
                did_update = True
            else:
                repo = git.Repo(repo_path)
                print(f"[{self.id}] Fetching updates from remote...")
                current_local_commit = repo.head.commit.hexsha
                # Use current local hash if global is still None (e.g. first run after restart)
                if self.last_commit_hash is None: self.last_commit_hash = current_local_commit

                origin = repo.remotes.origin
                current_remote_commit = None # Initialize
                try:
                    print(f"[{self.id}] Running fetch...")
                    origin.fetch()
                    print(f"[{self.id}] Fetch complete. Getting remote commit...")
                    current_remote_commit = repo.commit(f'origin/{branch}').hexsha
                    print(f"[{self.id}] Remote commit for origin/{branch}: {current_remote_commit}")
                except git.GitCommandError as fetch_err:
                    print(f"[{self.id}] Warning: Could not fetch from remote/find branch '{branch}': {fetch_err}")
                    # Keep status as is, will signal update later
                except Exception as fetch_err_other:
                     print(f"[{self.id}] Warning: Unexpected error during fetch: {fetch_err_other}")
                     # Keep status as is

                print(f"[{self.id}] Local commit: {current_local_commit}, Remote commit: {current_remote_commit or 'Fetch Failed/Not Found'}")

                if current_remote_commit and current_local_commit != current_remote_commit:
                    print(f"[{self.id}] New commit detected! Pulling changes (Branch: {branch})...")
                    self.status_message = f"[{self.id}] Pulling updates..."
                    try:
                        # Ensure working directory is clean before pull?
                        # Add checkout if needed: repo.git.checkout(branch)
                        pull_info = origin.pull()
                        # Verify pull info if needed (e.g., pull_info[0].flags)
                        new_commit_hash = repo.head.commit.hexsha
                        print(f"[{self.id}] Pull successful. New commit: {new_commit_hash}")
                        self.last_commit_hash = new_commit_hash
                        did_update = True
                    except git.GitCommandError as e_pull:
                        self.status_message = f"[{self.id}] Error pulling repository: {e_pull}"
                        print(self.status_message)
                        self.last_commit_hash = current_local_commit # Keep old hash on failed pull
                    except Exception as e_pull_other:
                         self.status_message = f"[{self.id}] Unexpected error pulling repository: {e_pull_other}"
                         print(self.status_message)
                         self.last_commit_hash = current_local_commit # Keep old hash
                elif current_remote_commit:
                    print(f"[{self.id}] No new commits detected.")
                    # Only update status if no pull error occurred previously
                    if not self.status_message.startswith(f"[{self.id}] Error pulling"):
                        self.status_message = f"[{self.id}] Repo up-to-date @ {time.strftime('%H:%M:%S')}"
                else: # Fetch failed
                     # Keep previous status message (e.g., Analysis complete, Cloning, Error pulling, etc.)
                     print(f"[{self.id}] Keeping previous status due to fetch failure.")
                     # Ensure last_commit_hash is set if it was None
                     if self.last_commit_hash is None: self.last_commit_hash = current_local_commit or "Unknown (Fetch Failed)"

        except git.GitCommandError as e_git:
            error_msg = f"[{self.id}] Git command error: {e_git}"
            print(error_msg)
            self.status_message = error_msg
            # Try reading existing hash even on error
            try:
                 if os.path.exists(os.path.join(repo_path, ".git")):
                      repo = git.Repo(repo_path)
                      # Don't overwrite existing hash if we have one
                      if self.last_commit_hash is None: self.last_commit_hash = repo.head.commit.hexsha
            except Exception as e_read_hash:
                 print(f"[{self.id}] Additionally failed to read hash after Git error: {e_read_hash}")
                 if self.last_commit_hash is None: self.last_commit_hash = "Error reading commit"
        except Exception as e_other:
            error_msg = f"[{self.id}] Error checking repository: {e_other}"
            print(error_msg, exc_info=True)
            self.status_message = error_msg
            if self.last_commit_hash is None: self.last_commit_hash = "Error checking repo"

        # --- Lock is released by the calling method ---
        return did_update # Return whether the repo content was changed

    def update_repo_and_analyze(self, force_analysis=False):
        """Checks the repo for updates, pulls if necessary, and runs analysis if changed or forced."""
        print(f"[{self.id}] Starting update_repo_and_analyze (force_analysis={force_analysis})...")
        repo_changed = False
        initial_hash = self.last_commit_hash # Store hash before check

        with self.lock:
            repo_changed = self._check_and_update_repo()

            if repo_changed:
                print(f"[{self.id}] Repo changed, triggering analysis...")
                # Analysis reads files, recalculates progress, updates status, and signals event
                self._update_progress_data()
            elif force_analysis:
                print(f"[{self.id}] Forcing analysis even though repo didn't change...")
                self._update_progress_data()
            else:
                # If repo didn't change and analysis not forced, still signal potentially changed status message from repo check
                print(f"[{self.id}] Repo unchanged, analysis not forced. Signaling potential status update.")
                self.data_updated_event.set()
                self.data_updated_event.clear()

        final_hash = self.last_commit_hash
        print(f"[{self.id}] Finished update_repo_and_analyze. Repo Changed: {repo_changed}. Hash: {initial_hash} -> {final_hash}. Status: '{self.status_message}'")
        return repo_changed


# --- Global Application State ---
app = Flask(__name__)
projects = {} # Dictionary to hold Project instances {project_id: Project_instance}
global_lock = threading.Lock() # Lock for modifying the projects dictionary itself (if needed later)

# Instantiate projects from config
for project_id, project_conf in config.get('projects', {}).items():
    projects[project_id] = Project(project_id, project_conf)

if not projects:
    print("ERROR: No projects defined in configuration. Exiting.")
    exit(1)

# --- Original Global Variables (To be removed or refactored) ---
# REPO_URL = "http://192.168.5.191:3000/LCI/MTN6" # REMOVE
# REPO_DIR = "./cloned_repo" # REMOVE
# BRANCH = "main" # REMOVE
# CSV_FILENAME = "MTN6 Equipment Manifest REV6(Conveyor List).csv" # REMOVE
# VIEWS_DIR_RELATIVE = "MTN6_SCADA/com.inductiveautomation.perspective/views/Detailed-Views" # REMOVE
# TEXT_OUTPUT_FOLDER = "./extracted_texts" # REMOVE
# CHECK_INTERVAL_SECONDS = 60 # MOVED to config load

# CSV_ALIAS_COL = 'Alias' # MOVE to project config? Or keep global if consistent? -> Moved to config
# CSV_PANEL_COL = 'Control Panel' # MOVE to project config? -> Moved to config
# CSV_EQ_TYPE_COL = 'Equipment Type' # MOVE to project config? -> Moved to config
# CSV_CONV_TYPE_COL = 'Type of Conveyor' # MOVE to project config? -> Moved to config

# last_commit_hash = None # MOVED to Project class
# progress_data = { ... } # MOVED to Project class
# status_message = "Initializing..." # MOVED to Project class
# repo_lock = threading.Lock() # MOVED to Project class (per-project lock)
# data_updated_event = threading.Event() # MOVED to Project class (per-project event)

# --- Helper Functions (To be refactored or become Project methods) ---

# def get_repo_path(): # BECOMES Project method
#     return os.path.abspath(REPO_DIR)

# def get_csv_path(): # BECOMES Project method
#     script_dir = os.path.dirname(os.path.abspath(__file__))
#     return os.path.join(script_dir, CSV_FILENAME)

# def get_views_dir_path(): # BECOMES Project method
#     return os.path.join(get_repo_path(), VIEWS_DIR_RELATIVE)

# def get_text_output_dir_path(): # BECOMES Project method
#     script_dir = os.path.dirname(os.path.abspath(__file__))
#     return os.path.abspath(os.path.join(script_dir, TEXT_OUTPUT_FOLDER))

def normalize(text):
    """Normalize string for comparison: lowercase, treat '-' and '_' the same, remove all whitespace."""
    # This seems general enough to remain a standalone helper
    if not isinstance(text, str):
        return ""
    text = text.lower()          # Convert to lowercase
    text = text.replace('-', '_') # Replace hyphens with underscores
    text = re.sub(r'\s+', '', text) # Remove ALL whitespace characters (including newlines)
    return text

def read_manifest(project: Project): # Takes Project instance
    """Reads the manifest CSV for a specific project into a list of dictionaries."""
    manifest_items = []
    csv_filepath = project.get_csv_path()
    # Get column names from project config
    cols = project.config.get('csv_cols', {})
    col_alias = cols.get('alias', 'Alias') # Default fallback
    col_panel = cols.get('panel', 'Control Panel')
    col_eq_type = cols.get('eq_type', 'Equipment Type')
    col_conv_type = cols.get('conv_type', 'Type of Conveyor')

    required_cols = {col_alias, col_panel}
    optional_cols = {col_eq_type, col_conv_type}

    try:
        with open(csv_filepath, mode='r', newline='', encoding='utf-8-sig') as infile:
            reader = csv.DictReader(infile)
            headers = set(h.strip() for h in reader.fieldnames)

            missing_required = required_cols - headers
            if missing_required:
                 print(f"[{project.id}] Error: Missing required columns in CSV '{csv_filepath}': {', '.join(missing_required)}")
                 print(f"[{project.id}] Available columns: {', '.join(headers)}")
                 return None

            for row in reader:
                alias = row.get(col_alias, "").strip()
                panel = row.get(col_panel, "").strip()

                if alias and panel:
                    item = {
                        "alias": alias,
                        "normalized_alias": normalize(alias),
                        "control_panel": panel,
                        "equipment_type": row.get(col_eq_type, "").strip() if col_eq_type in headers else "N/A",
                        "conveyor_type": row.get(col_conv_type, "").strip() if col_conv_type in headers else "N/A",
                        "found_scada": False,
                        "found_drawing": False
                    }
                    manifest_items.append(item)
                elif alias and not panel:
                    print(f"[{project.id}] Warning: Alias '{alias}' found in CSV but is missing its '{col_panel}'. Skipping.")

    except FileNotFoundError:
        print(f"[{project.id}] Error: Manifest file not found at {csv_filepath}")
        return None
    except Exception as e:
        print(f"[{project.id}] Error reading CSV file {csv_filepath}: {e}")
        return None
    print(f"[{project.id}] Read {len(manifest_items)} valid items from manifest.")
    return manifest_items

def check_scada(project: Project, manifest_data): # Takes Project instance
    """Checks for aliases in SCADA JSON view files for a specific project."""
    if not manifest_data: return
    views_dir = project.get_views_dir_path()
    print(f"[{project.id}] Starting SCADA check in directory: {views_dir}...")
    found_count = 0
    processed_files = 0

    alias_map = {}
    for item in manifest_data:
        na = item['normalized_alias']
        if na not in alias_map:
            alias_map[na] = []
        alias_map[na].append(item)

    try:
        # Check if views dir exists BEFORE walking
        if not os.path.exists(views_dir):
            print(f"[{project.id}] Warning: SCADA Views directory not found at {views_dir}. Skipping SCADA check.")
            return # Exit function early

        for root, _, files in os.walk(views_dir):
            for filename in files:
                if filename == 'view.json':
                    filepath = os.path.join(root, filename)
                    processed_files += 1
                    try:
                        with open(filepath, 'r', encoding='utf-8') as f:
                            content = f.read()
                            normalized_content = normalize(content)
                            for norm_alias, items in alias_map.items():
                                if norm_alias in normalized_content:
                                    for item in items:
                                        if not item['found_scada']:
                                            item['found_scada'] = True
                                            found_count += 1
                    except Exception as e:
                        print(f"[{project.id}]   Warning: Could not read or process JSON file {filepath}: {e}")
    except Exception as e:
        print(f"[{project.id}] Error walking SCADA views directory {views_dir}: {e}")

    print(f"[{project.id}] SCADA check finished. Processed {processed_files} view.json files. Found {found_count} manifest aliases.")

def check_drawings(project: Project, manifest_data): # Takes Project instance
    """Checks if aliases from manifest exist in *any* extracted drawing text file for a specific project."""
    if not manifest_data: return
    text_output_dir = project.get_text_output_dir_path()
    print(f"[{project.id}] Starting Drawings check: Scanning all .txt files in directory: {text_output_dir}...")

    all_normalized_content = ""
    processed_files = 0
    found_files = []

    try:
        # Check if text dir exists BEFORE listing
        if not os.path.exists(text_output_dir):
             print(f"[{project.id}] Warning: Drawings text directory not found: {text_output_dir}. Skipping Drawings check.")
             return # Exit function early
        if not os.path.isdir(text_output_dir):
             print(f"[{project.id}] Error: Path specified for Drawings text is not a directory: {text_output_dir}. Skipping Drawings check.")
             return # Exit function early

        for filename in os.listdir(text_output_dir):
            if filename.lower().endswith('.txt'):
                filepath = os.path.join(text_output_dir, filename)
                processed_files += 1
                try:
                    with open(filepath, 'r', encoding='utf-8') as f:
                        content = f.read()
                        all_normalized_content += normalize(content) + "\\n--file-separator--\\n"
                        found_files.append(filename)
                except Exception as e:
                    print(f"[{project.id}]   Warning: Could not read or process text file {filepath}: {e}")

        if processed_files == 0:
            print(f"[{project.id}]   Warning: No .txt files found in the directory {text_output_dir}. Cannot perform drawing check.")
            return
        else:
            print(f"[{project.id}]   Successfully read and normalized content from {len(found_files)} out of {processed_files} .txt files found.")

        found_count = 0
        for item in manifest_data:
            normalized_alias = item['normalized_alias']
            if normalized_alias and normalized_alias in all_normalized_content:
                item['found_drawing'] = True
                found_count += 1

        print(f"[{project.id}] Drawings check finished. Found {found_count} manifest aliases within the combined text content.")

    except Exception as e:
        print(f"[{project.id}]   Error during drawings check in {text_output_dir}: {e}")

def calculate_combined_progress(project: Project, manifest_data): # Takes Project instance
    """Calculates the combined progress based on scada/drawing status for a specific project."""
    print(f"[{project.id}] Calculating combined progress statistics...")
    results = {
        "overall": {
            "total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0,
            "percentage_found_both": 0,
            "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []
        },
        "panels": {}
    }
    if not manifest_data:
        print(f"[{project.id}] Warning: No manifest data to calculate progress from.")
        return results # Return default empty structure

    results["overall"]["total_csv"] = len(manifest_data)

    for item in manifest_data:
        panel = item['control_panel']

        if panel not in results["panels"]:
             results["panels"][panel] = {
                 "total": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0,
                 "percentage_found_both": 0,
                 "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []
             }

        results["panels"][panel]["total"] += 1
        item_detail = {k: v for k, v in item.items() if k not in ['normalized_alias']}

        if item['found_scada'] and item['found_drawing']:
            results["overall"]["found_both"] += 1
            results["panels"][panel]["found_both"] += 1
            results["overall"]["found_both_list"].append(item_detail)
            results["panels"][panel]["found_both_list"].append(item_detail)
        elif item['found_scada'] and not item['found_drawing']:
            results["overall"]["found_scada_only"] += 1
            results["panels"][panel]["found_scada_only"] += 1
            results["overall"]["found_scada_only_list"].append(item_detail)
            results["panels"][panel]["found_scada_only_list"].append(item_detail)
        elif not item['found_scada'] and item['found_drawing']:
            results["overall"]["found_drawing_only"] += 1
            results["panels"][panel]["found_drawing_only"] += 1
            results["overall"]["found_drawing_only_list"].append(item_detail)
            results["panels"][panel]["found_drawing_only_list"].append(item_detail)
        else: # Missing both
            results["overall"]["missing_both"] += 1
            results["panels"][panel]["missing_both"] += 1
            results["overall"]["missing_list"].append(item_detail)
            results["panels"][panel]["missing_list"].append(item_detail)

    if results["overall"]["total_csv"] > 0:
        results["overall"]["percentage_found_both"] = round(
            (results["overall"]["found_both"] / results["overall"]["total_csv"]) * 100, 1
        )
    for panel_data in results["panels"].values():
        if panel_data["total"] > 0:
            panel_data["percentage_found_both"] = round(
                (panel_data["found_both"] / panel_data["total"]) * 100, 1
            )

    print(f"[{project.id}] Combined progress calculation finished.")
    return results


# --- Background Task ---

def periodic_repo_check():
    """Runs the check_and_update_repo method periodically for all projects sequentially."""
    # Small delay at startup to allow Flask server to bind port before first check cycle prints
    time.sleep(2)
    while True:
        print(f"\n--- Starting Periodic Project Checks (Interval: {CHECK_INTERVAL_SECONDS}s) ---")
        project_ids = list(projects.keys())
        for project_id in project_ids:
            project = projects.get(project_id)
            if not project:
                print(f"Warning: Project ID '{project_id}' found in keys but not in projects dict during periodic check.")
                continue

            print(f"--- Checking Project: {project.id} ({project.name}) ---")
            try:
                # Use the public method which handles locking and analysis triggering
                project.update_repo_and_analyze()
            except Exception as e:
                 print(f"!! CRITICAL ERROR during periodic check for project '{project_id}': {e}", exc_info=True)
                 # Update status via lock if possible
                 with project.lock:
                     project.status_message = f"[{project.id}] CRITICAL check cycle error: {e}"
                     if project.last_commit_hash is None: project.last_commit_hash = "Unknown (Check Error)"
                     project.data_updated_event.set() # Signal error
                     project.data_updated_event.clear()
            print(f"--- Finished check for project: {project.id} ---")

        print(f"--- All project checks finished. Sleeping for {CHECK_INTERVAL_SECONDS}s... ---")
        time.sleep(CHECK_INTERVAL_SECONDS)

# --- Flask Routes ---

@app.route('/')
def index():
    # Pass project list sorted by name to template for selector
    project_list = sorted(
        [{"id": pid, "name": p.name} for pid, p in projects.items()],
        key=lambda x: x['name']
    )
    return render_template('index.html', projects=project_list)

@app.route('/stream/<project_id>')
def stream(project_id):
    project = projects.get(project_id)
    if not project:
        return jsonify({"error": "Project not found"}), 404

    def event_stream(target_project: Project):
        last_sent_hash_to_client = None
        print(f"SSE Client connected for project: {target_project.id}")
        # ... (rest of stream function remains the same) ...
        # Send initial state immediately
        with target_project.lock:
            current_hash = target_project.last_commit_hash
            current_status = target_project.status_message
            current_progress = target_project.progress_data

        initial_payload = json.dumps({
            "status": current_status,
            "progress": current_progress,
            "last_commit": current_hash
        })
        yield f"data: {initial_payload}\\n\\n"
        last_sent_hash_to_client = current_hash
        print(f"[{target_project.id}] Sent initial SSE state (Hash: {last_sent_hash_to_client})")

        # Wait for project-specific updates
        while True:
            # Add a timeout to prevent hangs if event logic fails?
            event_fired = target_project.data_updated_event.wait(timeout=CHECK_INTERVAL_SECONDS * 2)
            if not event_fired:
                # If timeout occurs, maybe send a keep-alive or re-send current state?
                print(f"[{target_project.id}] SSE wait timeout. Re-checking state.")
                # Optionally send a keep-alive comment: yield ": keepalive\n\n"
                # Or just continue to re-evaluate state below
                pass # Fall through to check state

            with target_project.lock:
                 current_hash = target_project.last_commit_hash
                 current_status = target_project.status_message
                 current_progress = target_project.progress_data

            # Send update ONLY if hash changed or if the wait timed out (to ensure client syncs)
            if current_hash != last_sent_hash_to_client or not event_fired:
                print(f"[{target_project.id}] SSE Data updated (Hash: {last_sent_hash_to_client} -> {current_hash}, Event Fired: {event_fired}). Sending.")
                data_payload = json.dumps({
                    "status": current_status,
                    "progress": current_progress,
                    "last_commit": current_hash
                })
                yield f"data: {data_payload}\\n\\n"
                last_sent_hash_to_client = current_hash
            # else:
                 # print(f"[{target_project.id}] SSE Event triggered, hash {current_hash} unchanged.")

    return Response(event_stream(project), mimetype="text/event-stream")

# --- Main Execution ---

if __name__ == '__main__':
    # Ensure base directories exist (safer to do this before initializing projects)
    # Create parent dirs for repos, manifests, texts, pdfs based on config
    dirs_to_ensure = set()
    for pid, pconf in config.get('projects', {}).items():
        script_dir = os.path.dirname(os.path.abspath(__file__))
        dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('repo_dir', f'./cloned_repos/{pid}')))))
        dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('manifest_csv', f'./manifests/{pid}_manifest.csv')))))
        dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('drawing_texts_dir', f'./extracted_texts/{pid}')))))
        pdf_dir = pconf.get('pdf_source_dir')
        if pdf_dir:
            dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pdf_dir))))

    for d in dirs_to_ensure:
        if d and not os.path.exists(d):
            print(f"Creating necessary directory: {d}")
            try:
                os.makedirs(d, exist_ok=True)
            except OSError as e:
                print(f"ERROR: Could not create directory {d}: {e}")
                # Decide if this is fatal? Probably depends on which dir failed.
                # exit(1) # Exit if repo/manifest dirs can't be made?

    # Initialize Project objects AFTER ensuring directories
    for project_id, project_conf in config.get('projects', {}).items():
        if project_id not in projects: # Avoid re-init if already done (though shouldn't happen here)
             projects[project_id] = Project(project_id, project_conf)

    if not projects:
        print("ERROR: No projects defined or initialized. Exiting.")
        exit(1)

    # Perform initial check/clone and data load for ALL projects
    print("--- Performing Initial Project Analysis ---")
    # ======================================================================
    # === TEMPORARILY DISABLED INITIAL ANALYSIS THREADING FOR DEBUGGING ===
    # initial_threads = []
    # for project_id, project in projects.items():
    #     print(f"--- Starting initial analysis for: {project.id} ({project.name}) ---")
    #     # Run initial analysis in parallel threads for faster startup?
    #     # Set force_analysis=True to ensure data is loaded even if repo exists and hasn't changed since last run
    #     thread = threading.Thread(target=project.update_repo_and_analyze, kwargs={'force_analysis': True})
    #     initial_threads.append(thread)
    #     thread.start()
    #
    # # Wait for all initial analyses to complete
    # print("Waiting for initial analyses to complete...")
    # for thread in initial_threads:
    #     thread.join()
    # print("--- Initial Analyses Complete ---")
    print("SKIPPING initial analysis for debugging port binding issue.")
    print("NOTE: Project data will be loaded on the first periodic check.")
    # ======================================================================

    # Start the background thread for periodic checks AFTER initial load
    print("Starting background periodic check thread...")
    repo_check_thread = threading.Thread(target=periodic_repo_check, daemon=True)
    repo_check_thread.start()

    # Run the Flask app
    print(f"Starting Flask server: http://0.0.0.0:5050")
    try:
        app.run(host='0.0.0.0', port=5050, debug=False, threaded=True)
    except Exception as e:
        print(f"FATAL ERROR during app.run: {e}", exc_info=True)
        exit(1)