diff --git a/app.py b/app.py index b981e3c..55da423 100644 --- a/app.py +++ b/app.py @@ -7,58 +7,343 @@ import csv import re from flask import Flask, render_template, jsonify, Response +# --- Configuration Loading --- +# Determine the directory where app.py resides +_script_dir = os.path.dirname(os.path.abspath(__file__)) +CONFIG_FILE_ABS = os.path.join(_script_dir, 'config.json') # Absolute path + +def load_config(): + """Loads configuration from JSON file using an absolute path.""" + try: + print(f"Attempting to load config from absolute path: {CONFIG_FILE_ABS}") # Add log + with open(CONFIG_FILE_ABS, 'r') as f: # Use the absolute path + config = json.load(f) + # Basic validation + if 'projects' not in config or not isinstance(config['projects'], dict): + raise ValueError("Config missing 'projects' dictionary.") + if 'check_interval_seconds' not in config or not isinstance(config['check_interval_seconds'], int): + raise ValueError("Config missing 'check_interval_seconds' integer.") + print(f"Configuration loaded successfully from {CONFIG_FILE_ABS}") + return config + except FileNotFoundError: + # Also print the current working directory for debugging + cwd = os.getcwd() + print(f"ERROR: Configuration file not found at absolute path: {CONFIG_FILE_ABS}") + print(f"Current working directory when error occurred: {cwd}") + exit(1) + except json.JSONDecodeError: + print(f"ERROR: Could not decode JSON from '{CONFIG_FILE_ABS}'. Check its format.") + exit(1) + except ValueError as e: + print(f"ERROR: Invalid configuration in '{CONFIG_FILE_ABS}': {e}") + exit(1) + except Exception as e: + cwd = os.getcwd() + print(f"ERROR: An unexpected error occurred loading config from {CONFIG_FILE_ABS}: {e}") + print(f"Current working directory during error: {cwd}") + exit(1) + +config = load_config() +CHECK_INTERVAL_SECONDS = config['check_interval_seconds'] + +# --- Project Class --- +class Project: + """Encapsulates configuration, state, and operations for a single project.""" + def __init__(self, project_id, project_config): + self.id = project_id + self.config = project_config + self.name = project_config.get('name', project_id) + + # Project-specific state + self.last_commit_hash = None + self.progress_data = { # Default empty structure + "overall": {"total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, "percentage_found_both": 0, "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []}, + "panels": {} + } + self.status_message = "Initializing..." + self.lock = threading.Lock() # Lock for accessing project-specific repo and data + self.data_updated_event = threading.Event() # Event to signal data updates for this project + + # --- Pre-calculate absolute paths --- + self._script_dir = os.path.dirname(os.path.abspath(__file__)) + self._repo_dir_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('repo_dir', f'./cloned_repos/{project_id}'))) # Added default + self._csv_path_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('manifest_csv', f'./manifests/{project_id}_manifest.csv'))) # Added default + self._views_dir_abs = os.path.join(self._repo_dir_abs, self.config.get('scada_views_dir', '')) + self._texts_dir_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('drawing_texts_dir', f'./extracted_texts/{project_id}'))) # Added default + pdf_source_dir_rel = self.config.get('pdf_source_dir') + self._pdf_dir_abs = os.path.abspath(os.path.join(self._script_dir, pdf_source_dir_rel)) if pdf_source_dir_rel else None + + print(f"Initialized Project '{self.id}' ({self.name}):") + print(f" Repo Dir: {self._repo_dir_abs}") + print(f" CSV Path: {self._csv_path_abs}") + if self._views_dir_abs: print(f" Views Dir: {self._views_dir_abs}") + if self._texts_dir_abs: print(f" Texts Dir: {self._texts_dir_abs}") + if self._pdf_dir_abs: print(f" PDF Dir: {self._pdf_dir_abs}") + + # --- Path Helper Methods --- + def get_repo_path(self): + return self._repo_dir_abs + + def get_csv_path(self): + return self._csv_path_abs + + def get_views_dir_path(self): + return self._views_dir_abs + + def get_text_output_dir_path(self): + return self._texts_dir_abs + + def get_pdf_source_dir_path(self): + return self._pdf_dir_abs + + # --- Core Logic Methods --- + def _update_progress_data(self): + """(Internal) Reads manifest, runs checks, combines results, and updates project state.""" + current_status = "" + new_data_calculated = None + manifest_data = None # Initialize + + with self.lock: + try: + # 1. Read Manifest + self.status_message = f"[{self.id}] Reading manifest..." + print(f"[{self.id}] Reading manifest: {self.get_csv_path()}") + manifest_data = read_manifest(self) # Pass self (project instance) + if manifest_data is None: + current_status = f"[{self.id}] Error: Failed manifest read {self.get_csv_path()}" + print(current_status) + self.status_message = current_status + # Don't return, proceed to calculate progress with empty data if needed + # or handle error state appropriately + manifest_data = [] # Ensure it's an empty list for calculation + + # 2. Check SCADA (If views dir is configured) + if self.get_views_dir_path(): + self.status_message = f"[{self.id}] Checking SCADA..." + check_scada(self, manifest_data) # Pass self (project instance) + else: + print(f"[{self.id}] Skipping SCADA check (no views_dir configured).") + + # 3. Check Drawings (If texts dir is configured) + if self.get_text_output_dir_path(): + self.status_message = f"[{self.id}] Checking drawings..." + check_drawings(self, manifest_data) # Pass self (project instance) + else: + print(f"[{self.id}] Skipping Drawing check (no drawing_texts_dir configured).") + + # 4. Calculate Combined Progress + self.status_message = f"[{self.id}] Calculating progress..." + new_data_calculated = calculate_combined_progress(self, manifest_data) # Pass self + + if new_data_calculated: + if not manifest_data and new_data_calculated['overall']['total_csv'] == 0: # Check if manifest read failed/was empty + current_status = f"[{self.id}] Analysis complete (Manifest empty/read failed) @ {time.strftime('%H:%M:%S')}" + else: + current_status = f"[{self.id}] Analysis complete @ {time.strftime('%H:%M:%S')}" + self.progress_data = new_data_calculated # Update project's data + else: + # This case should ideally not happen if calculate_combined_progress always returns a dict + current_status = f"[{self.id}] Error: Failed progress calculation." + # Keep previous progress_data? + + except Exception as e: + current_status = f"[{self.id}] CRITICAL Error during analysis: {e}" + print(f"[{self.id}] Detailed Analysis Error: {e}", exc_info=True) + # Optionally reset progress data or keep old? + # self.progress_data = { ... } # Reset to default empty + + # Update status and signal completion (inside lock) + print(current_status) + self.status_message = current_status + self.data_updated_event.set() + self.data_updated_event.clear() + + def _check_and_update_repo(self): + """(Internal) Checks and updates the Git repository for this project.""" + did_update = False + repo_path = self.get_repo_path() + repo_url = self.config.get('repo_url') + branch = self.config.get('branch', 'main') # Default to main if not specified + + if not repo_url: + print(f"[{self.id}] Skipping repo check: repo_url not configured.") + self.status_message = f"[{self.id}] Repo check skipped (no URL)" + self.data_updated_event.set(); self.data_updated_event.clear() # Signal status change + return False # No update occurred + + # --- Lock is acquired by the calling method (update_repo_and_analyze) --- + try: + # Ensure parent directory exists before cloning + repo_parent_dir = os.path.dirname(repo_path) + if not os.path.exists(repo_parent_dir): + print(f"[{self.id}] Creating parent directory for repo: {repo_parent_dir}") + os.makedirs(repo_parent_dir) + + repo_existed = os.path.exists(os.path.join(repo_path, ".git")) + if not repo_existed: + print(f"[{self.id}] Cloning repository {repo_url} (branch: {branch}) into {repo_path}...") + self.status_message = f"[{self.id}] Cloning repository..." + git.Repo.clone_from(repo_url, repo_path, branch=branch) + repo = git.Repo(repo_path) + self.last_commit_hash = repo.head.commit.hexsha + print(f"[{self.id}] Initial clone complete. Commit: {self.last_commit_hash}") + did_update = True + else: + repo = git.Repo(repo_path) + print(f"[{self.id}] Fetching updates from remote...") + current_local_commit = repo.head.commit.hexsha + # Use current local hash if global is still None (e.g. first run after restart) + if self.last_commit_hash is None: self.last_commit_hash = current_local_commit + + origin = repo.remotes.origin + current_remote_commit = None # Initialize + try: + print(f"[{self.id}] Running fetch...") + origin.fetch() + print(f"[{self.id}] Fetch complete. Getting remote commit...") + current_remote_commit = repo.commit(f'origin/{branch}').hexsha + print(f"[{self.id}] Remote commit for origin/{branch}: {current_remote_commit}") + except git.GitCommandError as fetch_err: + print(f"[{self.id}] Warning: Could not fetch from remote/find branch '{branch}': {fetch_err}") + # Keep status as is, will signal update later + except Exception as fetch_err_other: + print(f"[{self.id}] Warning: Unexpected error during fetch: {fetch_err_other}") + # Keep status as is + + print(f"[{self.id}] Local commit: {current_local_commit}, Remote commit: {current_remote_commit or 'Fetch Failed/Not Found'}") + + if current_remote_commit and current_local_commit != current_remote_commit: + print(f"[{self.id}] New commit detected! Pulling changes (Branch: {branch})...") + self.status_message = f"[{self.id}] Pulling updates..." + try: + # Ensure working directory is clean before pull? + # Add checkout if needed: repo.git.checkout(branch) + pull_info = origin.pull() + # Verify pull info if needed (e.g., pull_info[0].flags) + new_commit_hash = repo.head.commit.hexsha + print(f"[{self.id}] Pull successful. New commit: {new_commit_hash}") + self.last_commit_hash = new_commit_hash + did_update = True + except git.GitCommandError as e_pull: + self.status_message = f"[{self.id}] Error pulling repository: {e_pull}" + print(self.status_message) + self.last_commit_hash = current_local_commit # Keep old hash on failed pull + except Exception as e_pull_other: + self.status_message = f"[{self.id}] Unexpected error pulling repository: {e_pull_other}" + print(self.status_message) + self.last_commit_hash = current_local_commit # Keep old hash + elif current_remote_commit: + print(f"[{self.id}] No new commits detected.") + # Only update status if no pull error occurred previously + if not self.status_message.startswith(f"[{self.id}] Error pulling"): + self.status_message = f"[{self.id}] Repo up-to-date @ {time.strftime('%H:%M:%S')}" + else: # Fetch failed + # Keep previous status message (e.g., Analysis complete, Cloning, Error pulling, etc.) + print(f"[{self.id}] Keeping previous status due to fetch failure.") + # Ensure last_commit_hash is set if it was None + if self.last_commit_hash is None: self.last_commit_hash = current_local_commit or "Unknown (Fetch Failed)" + + except git.GitCommandError as e_git: + error_msg = f"[{self.id}] Git command error: {e_git}" + print(error_msg) + self.status_message = error_msg + # Try reading existing hash even on error + try: + if os.path.exists(os.path.join(repo_path, ".git")): + repo = git.Repo(repo_path) + # Don't overwrite existing hash if we have one + if self.last_commit_hash is None: self.last_commit_hash = repo.head.commit.hexsha + except Exception as e_read_hash: + print(f"[{self.id}] Additionally failed to read hash after Git error: {e_read_hash}") + if self.last_commit_hash is None: self.last_commit_hash = "Error reading commit" + except Exception as e_other: + error_msg = f"[{self.id}] Error checking repository: {e_other}" + print(error_msg, exc_info=True) + self.status_message = error_msg + if self.last_commit_hash is None: self.last_commit_hash = "Error checking repo" + + # --- Lock is released by the calling method --- + return did_update # Return whether the repo content was changed + + def update_repo_and_analyze(self, force_analysis=False): + """Checks the repo for updates, pulls if necessary, and runs analysis if changed or forced.""" + print(f"[{self.id}] Starting update_repo_and_analyze (force_analysis={force_analysis})...") + repo_changed = False + initial_hash = self.last_commit_hash # Store hash before check + + with self.lock: + repo_changed = self._check_and_update_repo() + + if repo_changed: + print(f"[{self.id}] Repo changed, triggering analysis...") + # Analysis reads files, recalculates progress, updates status, and signals event + self._update_progress_data() + elif force_analysis: + print(f"[{self.id}] Forcing analysis even though repo didn't change...") + self._update_progress_data() + else: + # If repo didn't change and analysis not forced, still signal potentially changed status message from repo check + print(f"[{self.id}] Repo unchanged, analysis not forced. Signaling potential status update.") + self.data_updated_event.set() + self.data_updated_event.clear() + + final_hash = self.last_commit_hash + print(f"[{self.id}] Finished update_repo_and_analyze. Repo Changed: {repo_changed}. Hash: {initial_hash} -> {final_hash}. Status: '{self.status_message}'") + return repo_changed + + +# --- Global Application State --- app = Flask(__name__) +projects = {} # Dictionary to hold Project instances {project_id: Project_instance} +global_lock = threading.Lock() # Lock for modifying the projects dictionary itself (if needed later) -# --- Configuration --- -REPO_URL = "http://192.168.5.191:3000/LCI/MTN6" -REPO_DIR = "./cloned_repo" # Directory to clone the repo into -BRANCH = "main" -CSV_FILENAME = "MTN6 Equipment Manifest REV6(Conveyor List).csv" -VIEWS_DIR_RELATIVE = "MTN6_SCADA/com.inductiveautomation.perspective/views/Detailed-Views" -TEXT_OUTPUT_FOLDER = "./extracted_texts" # Added: Directory with .txt files -CHECK_INTERVAL_SECONDS = 60 +# Instantiate projects from config +for project_id, project_conf in config.get('projects', {}).items(): + projects[project_id] = Project(project_id, project_conf) -# --- Column Names from CSV (Adjust if necessary) --- -CSV_ALIAS_COL = 'Alias' -CSV_PANEL_COL = 'Control Panel' -CSV_EQ_TYPE_COL = 'Equipment Type' # Optional, for details modal -CSV_CONV_TYPE_COL = 'Type of Conveyor' # Optional, for details modal +if not projects: + print("ERROR: No projects defined in configuration. Exiting.") + exit(1) -# --- Global state --- -last_commit_hash = None -# New detailed progress data structure -progress_data = { - "overall": { - "total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, - "percentage_found_both": 0, - "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": [] - }, - "panels": {} # Populated dynamically -} -status_message = "Initializing..." -repo_lock = threading.Lock() # Lock for accessing repo and shared data -data_updated_event = threading.Event() # Event to signal data updates +# --- Original Global Variables (To be removed or refactored) --- +# REPO_URL = "http://192.168.5.191:3000/LCI/MTN6" # REMOVE +# REPO_DIR = "./cloned_repo" # REMOVE +# BRANCH = "main" # REMOVE +# CSV_FILENAME = "MTN6 Equipment Manifest REV6(Conveyor List).csv" # REMOVE +# VIEWS_DIR_RELATIVE = "MTN6_SCADA/com.inductiveautomation.perspective/views/Detailed-Views" # REMOVE +# TEXT_OUTPUT_FOLDER = "./extracted_texts" # REMOVE +# CHECK_INTERVAL_SECONDS = 60 # MOVED to config load -# --- Helper Functions --- +# CSV_ALIAS_COL = 'Alias' # MOVE to project config? Or keep global if consistent? -> Moved to config +# CSV_PANEL_COL = 'Control Panel' # MOVE to project config? -> Moved to config +# CSV_EQ_TYPE_COL = 'Equipment Type' # MOVE to project config? -> Moved to config +# CSV_CONV_TYPE_COL = 'Type of Conveyor' # MOVE to project config? -> Moved to config -def get_repo_path(): - return os.path.abspath(REPO_DIR) +# last_commit_hash = None # MOVED to Project class +# progress_data = { ... } # MOVED to Project class +# status_message = "Initializing..." # MOVED to Project class +# repo_lock = threading.Lock() # MOVED to Project class (per-project lock) +# data_updated_event = threading.Event() # MOVED to Project class (per-project event) -def get_csv_path(): - script_dir = os.path.dirname(os.path.abspath(__file__)) - return os.path.join(script_dir, CSV_FILENAME) +# --- Helper Functions (To be refactored or become Project methods) --- -def get_views_dir_path(): - return os.path.join(get_repo_path(), VIEWS_DIR_RELATIVE) +# def get_repo_path(): # BECOMES Project method +# return os.path.abspath(REPO_DIR) -def get_text_output_dir_path(): - # Construct absolute path based on the script's directory - script_dir = os.path.dirname(os.path.abspath(__file__)) - # Use os.path.join to handle path separators correctly and avoid './' - return os.path.abspath(os.path.join(script_dir, TEXT_OUTPUT_FOLDER)) +# def get_csv_path(): # BECOMES Project method +# script_dir = os.path.dirname(os.path.abspath(__file__)) +# return os.path.join(script_dir, CSV_FILENAME) + +# def get_views_dir_path(): # BECOMES Project method +# return os.path.join(get_repo_path(), VIEWS_DIR_RELATIVE) + +# def get_text_output_dir_path(): # BECOMES Project method +# script_dir = os.path.dirname(os.path.abspath(__file__)) +# return os.path.abspath(os.path.join(script_dir, TEXT_OUTPUT_FOLDER)) def normalize(text): """Normalize string for comparison: lowercase, treat '-' and '_' the same, remove all whitespace.""" + # This seems general enough to remain a standalone helper if not isinstance(text, str): return "" text = text.lower() # Convert to lowercase @@ -66,69 +351,66 @@ def normalize(text): text = re.sub(r'\s+', '', text) # Remove ALL whitespace characters (including newlines) return text -def read_manifest(csv_filepath): - """Reads the manifest CSV into a list of dictionaries.""" +def read_manifest(project: Project): # Takes Project instance + """Reads the manifest CSV for a specific project into a list of dictionaries.""" manifest_items = [] - # Only require Alias and Panel now for basic grouping - required_cols = {CSV_ALIAS_COL, CSV_PANEL_COL} - optional_cols = {CSV_EQ_TYPE_COL, CSV_CONV_TYPE_COL} + csv_filepath = project.get_csv_path() + # Get column names from project config + cols = project.config.get('csv_cols', {}) + col_alias = cols.get('alias', 'Alias') # Default fallback + col_panel = cols.get('panel', 'Control Panel') + col_eq_type = cols.get('eq_type', 'Equipment Type') + col_conv_type = cols.get('conv_type', 'Type of Conveyor') + + required_cols = {col_alias, col_panel} + optional_cols = {col_eq_type, col_conv_type} + try: - # Revert back to 'utf-8-sig' to handle potential BOM from Excel with open(csv_filepath, mode='r', newline='', encoding='utf-8-sig') as infile: reader = csv.DictReader(infile) headers = set(h.strip() for h in reader.fieldnames) - # Check for required columns missing_required = required_cols - headers if missing_required: - print(f"Error: Missing required columns in CSV '{csv_filepath}': {', '.join(missing_required)}") - print(f"Available columns: {', '.join(headers)}") + print(f"[{project.id}] Error: Missing required columns in CSV '{csv_filepath}': {', '.join(missing_required)}") + print(f"[{project.id}] Available columns: {', '.join(headers)}") return None for row in reader: - alias = row.get(CSV_ALIAS_COL, "").strip() - panel = row.get(CSV_PANEL_COL, "").strip() - # unit_number = row.get('Unit Number', "").strip() # No longer needed for filename + alias = row.get(col_alias, "").strip() + panel = row.get(col_panel, "").strip() - # Add if Alias and Control Panel are present (Panel needed for grouping results later) if alias and panel: item = { "alias": alias, "normalized_alias": normalize(alias), "control_panel": panel, - # "unit_number": unit_number, # Removed - # "expected_drawing_filename": f"MTN6_SYSDL-{unit_number}.txt", # Removed - # Add optional data if columns exist - "equipment_type": row.get(CSV_EQ_TYPE_COL, "").strip() if CSV_EQ_TYPE_COL in headers else "N/A", - "conveyor_type": row.get(CSV_CONV_TYPE_COL, "").strip() if CSV_CONV_TYPE_COL in headers else "N/A", - # Status fields to be filled later + "equipment_type": row.get(col_eq_type, "").strip() if col_eq_type in headers else "N/A", + "conveyor_type": row.get(col_conv_type, "").strip() if col_conv_type in headers else "N/A", "found_scada": False, "found_drawing": False } manifest_items.append(item) - # elif alias and panel: # If Unit Number is missing but others are present # Condition removed - # print(f"Warning: Alias '{alias}' in Panel '{panel}' is missing 'Unit Number' in CSV. Skipping drawing check for this item.") elif alias and not panel: - print(f"Warning: Alias '{alias}' found in CSV but is missing its '{CSV_PANEL_COL}'. Skipping.") - # Add other specific warnings if needed + print(f"[{project.id}] Warning: Alias '{alias}' found in CSV but is missing its '{col_panel}'. Skipping.") except FileNotFoundError: - print(f"Error: Manifest file not found at {csv_filepath}") + print(f"[{project.id}] Error: Manifest file not found at {csv_filepath}") return None except Exception as e: - print(f"Error reading CSV file {csv_filepath}: {e}") + print(f"[{project.id}] Error reading CSV file {csv_filepath}: {e}") return None - print(f"Read {len(manifest_items)} valid items from manifest.") + print(f"[{project.id}] Read {len(manifest_items)} valid items from manifest.") return manifest_items -def check_scada(manifest_data, views_dir): - """Checks for aliases in SCADA JSON view files.""" +def check_scada(project: Project, manifest_data): # Takes Project instance + """Checks for aliases in SCADA JSON view files for a specific project.""" if not manifest_data: return - print(f"Starting SCADA check in directory: {views_dir}...") + views_dir = project.get_views_dir_path() + print(f"[{project.id}] Starting SCADA check in directory: {views_dir}...") found_count = 0 processed_files = 0 - # Create a quick lookup map of normalized_alias -> list of manifest items (handles duplicate aliases) alias_map = {} for item in manifest_data: na = item['normalized_alias'] @@ -137,6 +419,11 @@ def check_scada(manifest_data, views_dir): alias_map[na].append(item) try: + # Check if views dir exists BEFORE walking + if not os.path.exists(views_dir): + print(f"[{project.id}] Warning: SCADA Views directory not found at {views_dir}. Skipping SCADA check.") + return # Exit function early + for root, _, files in os.walk(views_dir): for filename in files: if filename == 'view.json': @@ -144,36 +431,40 @@ def check_scada(manifest_data, views_dir): processed_files += 1 try: with open(filepath, 'r', encoding='utf-8') as f: - # Read the whole file, normalize it for substring search content = f.read() normalized_content = normalize(content) - - # Check manifest aliases against this file's normalized content for norm_alias, items in alias_map.items(): if norm_alias in normalized_content: for item in items: - if not item['found_scada']: # Update only if not already found elsewhere + if not item['found_scada']: item['found_scada'] = True - found_count += 1 # Count unique aliases found + found_count += 1 except Exception as e: - print(f" Warning: Could not read or process JSON file {filepath}: {e}") + print(f"[{project.id}] Warning: Could not read or process JSON file {filepath}: {e}") except Exception as e: - print(f"Error walking SCADA views directory {views_dir}: {e}") + print(f"[{project.id}] Error walking SCADA views directory {views_dir}: {e}") - print(f"SCADA check finished. Processed {processed_files} view.json files. Found {found_count} manifest aliases.") + print(f"[{project.id}] SCADA check finished. Processed {processed_files} view.json files. Found {found_count} manifest aliases.") - -def check_drawings(manifest_data, text_output_dir): - """Checks if aliases from manifest exist in *any* extracted drawing text file.""" +def check_drawings(project: Project, manifest_data): # Takes Project instance + """Checks if aliases from manifest exist in *any* extracted drawing text file for a specific project.""" if not manifest_data: return - print(f"Starting Drawings check: Scanning all .txt files in directory: {text_output_dir}...") + text_output_dir = project.get_text_output_dir_path() + print(f"[{project.id}] Starting Drawings check: Scanning all .txt files in directory: {text_output_dir}...") - all_normalized_content = "" # Combine all text content here + all_normalized_content = "" processed_files = 0 found_files = [] try: - # Step 1: Read and combine content of all .txt files in the directory + # Check if text dir exists BEFORE listing + if not os.path.exists(text_output_dir): + print(f"[{project.id}] Warning: Drawings text directory not found: {text_output_dir}. Skipping Drawings check.") + return # Exit function early + if not os.path.isdir(text_output_dir): + print(f"[{project.id}] Error: Path specified for Drawings text is not a directory: {text_output_dir}. Skipping Drawings check.") + return # Exit function early + for filename in os.listdir(text_output_dir): if filename.lower().endswith('.txt'): filepath = os.path.join(text_output_dir, filename) @@ -181,38 +472,32 @@ def check_drawings(manifest_data, text_output_dir): try: with open(filepath, 'r', encoding='utf-8') as f: content = f.read() - # Add a separator to prevent false matches across file boundaries - all_normalized_content += normalize(content) + "\n--file-separator--\n" + all_normalized_content += normalize(content) + "\\n--file-separator--\\n" found_files.append(filename) except Exception as e: - print(f" Warning: Could not read or process text file {filepath}: {e}") + print(f"[{project.id}] Warning: Could not read or process text file {filepath}: {e}") if processed_files == 0: - print(" Warning: No .txt files found in the directory. Cannot perform drawing check.") + print(f"[{project.id}] Warning: No .txt files found in the directory {text_output_dir}. Cannot perform drawing check.") return else: - print(f" Successfully read and normalized content from {len(found_files)} out of {processed_files} .txt files found.") + print(f"[{project.id}] Successfully read and normalized content from {len(found_files)} out of {processed_files} .txt files found.") - # Step 2: Check each manifest alias against the combined content found_count = 0 for item in manifest_data: normalized_alias = item['normalized_alias'] if normalized_alias and normalized_alias in all_normalized_content: item['found_drawing'] = True found_count += 1 - # else: item['found_drawing'] is already False by default - print(f"Drawings check finished. Found {found_count} manifest aliases within the combined text content.") + print(f"[{project.id}] Drawings check finished. Found {found_count} manifest aliases within the combined text content.") - except FileNotFoundError: - print(f" Error: Drawings text directory not found: {text_output_dir}") except Exception as e: - print(f" Error during drawings check: {e}") + print(f"[{project.id}] Error during drawings check in {text_output_dir}: {e}") - -def calculate_combined_progress(manifest_data): - """Calculates the combined progress based on scada/drawing status.""" - print("Calculating combined progress statistics...") +def calculate_combined_progress(project: Project, manifest_data): # Takes Project instance + """Calculates the combined progress based on scada/drawing status for a specific project.""" + print(f"[{project.id}] Calculating combined progress statistics...") results = { "overall": { "total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, @@ -222,15 +507,14 @@ def calculate_combined_progress(manifest_data): "panels": {} } if not manifest_data: - print("Warning: No manifest data to calculate progress from.") - return results + print(f"[{project.id}] Warning: No manifest data to calculate progress from.") + return results # Return default empty structure results["overall"]["total_csv"] = len(manifest_data) for item in manifest_data: panel = item['control_panel'] - # Initialize panel data if not present if panel not in results["panels"]: results["panels"][panel] = { "total": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, @@ -239,9 +523,7 @@ def calculate_combined_progress(manifest_data): } results["panels"][panel]["total"] += 1 - - # Categorize and add to lists - item_detail = {k: v for k, v in item.items() if k not in ['normalized_alias']} # Don't need normalized in output + item_detail = {k: v for k, v in item.items() if k not in ['normalized_alias']} if item['found_scada'] and item['found_drawing']: results["overall"]["found_both"] += 1 @@ -264,7 +546,6 @@ def calculate_combined_progress(manifest_data): results["overall"]["missing_list"].append(item_detail) results["panels"][panel]["missing_list"].append(item_detail) - # Calculate percentages if results["overall"]["total_csv"] > 0: results["overall"]["percentage_found_both"] = round( (results["overall"]["found_both"] / results["overall"]["total_csv"]) * 100, 1 @@ -275,269 +556,174 @@ def calculate_combined_progress(manifest_data): (panel_data["found_both"] / panel_data["total"]) * 100, 1 ) - print("Combined progress calculation finished.") - # print(json.dumps(results, indent=2)) # DEBUG: Print structure + print(f"[{project.id}] Combined progress calculation finished.") return results -# --- Core Logic --- - -def update_progress_data(): - """Reads manifest, runs both checks, combines results, and updates global state.""" - global progress_data, status_message - csv_path = get_csv_path() - views_dir = get_views_dir_path() - text_dir = get_text_output_dir_path() - current_status = "" - new_data_calculated = None - - # 1. Read Manifest - status_message = "Reading manifest file..." - print(f"Reading manifest: {csv_path}") - manifest_data = read_manifest(csv_path) - if manifest_data is None: - current_status = f"Error: Failed to read or process manifest file {csv_path}" - print(current_status) - status_message = current_status - data_updated_event.set(); data_updated_event.clear() - return # Cannot proceed without manifest - - # 2. Check SCADA (JSON files) - status_message = "Checking SCADA views..." - if not os.path.exists(views_dir): - current_status = f"Warning: SCADA Views directory not found at {views_dir}. Skipping SCADA check." - print(current_status) - # Mark all as not found in SCADA? Or just skip update? Skipping update is safer. - else: - check_scada(manifest_data, views_dir) - - # 3. Check Drawings (TXT files) - status_message = "Checking drawing text files..." - if not os.path.exists(text_dir): - current_status = f"Warning: Extracted Text directory not found at {text_dir}. Skipping Drawings check." - print(current_status) - # Mark all as not found in Drawings? Or skip? Skipping update. - else: - check_drawings(manifest_data, text_dir) - - # 4. Calculate Combined Progress - status_message = "Calculating combined progress..." - try: - new_data_calculated = calculate_combined_progress(manifest_data) - if new_data_calculated: - current_status = f"Analysis complete at {time.strftime('%Y-%m-%d %H:%M:%S')}" - else: - # This case shouldn't happen if manifest_data was valid - current_status = "Error: Failed to calculate combined progress." - except Exception as e: - current_status = f"Error during progress calculation: {e}" - print(f"Detailed Calculation Error: {e}", exc_info=True) # Log stack trace - new_data_calculated = None # Ensure no partial data update - - # Update global state - print(current_status) - status_message = current_status # Update status regardless of calculation success/failure - if new_data_calculated is not None: - progress_data = new_data_calculated - # Signal that an update attempt finished WITH new data - data_updated_event.set() - data_updated_event.clear() - -# --- Git Repo Handling (Modified slightly to use updated status messages) --- - -def check_and_update_repo(): - global last_commit_hash, status_message - repo_path = get_repo_path() - did_update = False # Flag to track if files were actually updated - initial_hash = last_commit_hash # Store hash before check - - with repo_lock: - try: - repo_existed = os.path.exists(os.path.join(repo_path, ".git")) - if not repo_existed: - print(f"Cloning repository {REPO_URL} into {repo_path}...") - status_message = f"Cloning repository {REPO_URL}..." - git.Repo.clone_from(REPO_URL, repo_path, branch=BRANCH) - repo = git.Repo(repo_path) - last_commit_hash = repo.head.commit.hexsha - print(f"Initial clone complete. Commit: {last_commit_hash}") - did_update = True # Cloned, so considered an update - else: - repo = git.Repo(repo_path) - print("Fetching updates from remote...") - current_local_commit = repo.head.commit.hexsha - # Update hash *before* fetch in case fetch fails but commit was readable - if last_commit_hash is None: last_commit_hash = current_local_commit - origin = repo.remotes.origin - fetch_info = origin.fetch() - - # Check if fetch actually brought new data for the target branch - # fetched_new_commits = any(info.flags & info.NEW_HEAD for info in fetch_info if info.name == f'origin/{BRANCH}') # More precise check if needed - - current_remote_commit = repo.commit(f'origin/{BRANCH}').hexsha - - print(f"Local commit: {current_local_commit}, Remote commit: {current_remote_commit}") - - if current_local_commit != current_remote_commit: - print("New commit detected! Pulling changes...") - status_message = "Pulling updates..." - try: - pull_info = origin.pull() - new_commit_hash = repo.head.commit.hexsha - print(f"Pull successful. New commit: {new_commit_hash}") - last_commit_hash = new_commit_hash - did_update = True # Pulled, so considered an update - except git.GitCommandError as e: - status_message = f"Error pulling repository: {e}" - print(status_message) - # Revert hash if pull failed - last_commit_hash = current_local_commit - else: - print("No new commits detected.") - # Update status if it wasn't an error before - if not status_message.startswith("Error"): - status_message = f"Checked repo at {time.strftime('%Y-%m-%d %H:%M:%S')}. No changes." - - # Run analysis IF the repo was updated (cloned or pulled) - if did_update: - # Status will be updated within update_progress_data - update_progress_data() - # If no git update, signal any status change (e.g., "No changes" or error) - # else: # REMOVED block that signaled event for no changes - # REMOVED: data_updated_event.set() # Signal status change event - # REMOVED: data_updated_event.clear() - # Status message is still updated globally, just won't trigger event - - except git.GitCommandError as e: - status_message = f"Git command error: {e}" - print(status_message) - # Try to get commit hash even if failed - try: - if os.path.exists(os.path.join(repo_path, ".git")): - repo = git.Repo(repo_path) - # Use previous hash if available, else try to read current - if last_commit_hash is None: last_commit_hash = repo.head.commit.hexsha - except Exception: - if last_commit_hash is None: last_commit_hash = "Error reading commit" - # REMOVED: data_updated_event.set() # Signal error status change - # REMOVED: data_updated_event.clear() - except Exception as e: - status_message = f"Error checking repository: {e}" - print(status_message) - if last_commit_hash is None: last_commit_hash = "Error checking repo" - # REMOVED: data_updated_event.set() # Signal error status change - # REMOVED: data_updated_event.clear() - - # Return true if analysis was run (because repo changed), false otherwise - return did_update +# --- Background Task --- def periodic_repo_check(): - """Runs the check_and_update_repo function periodically.""" + """Runs the check_and_update_repo method periodically for all projects sequentially.""" + # Small delay at startup to allow Flask server to bind port before first check cycle prints + time.sleep(2) while True: - print(f"\nStarting periodic repository check (Interval: {CHECK_INTERVAL_SECONDS}s)...") - repo_changed = check_and_update_repo() - # If repo didn't change, analysis wasn't triggered, but we might want to run it anyway? - # For now, analysis only runs if repo changes or on initial startup. - # If you want analysis *every* interval regardless of git changes, add a call here: - # if not repo_changed: - # print("Repo unchanged, triggering analysis anyway...") - # update_progress_data() - print(f"Check finished. Sleeping...") + print(f"\n--- Starting Periodic Project Checks (Interval: {CHECK_INTERVAL_SECONDS}s) ---") + project_ids = list(projects.keys()) + for project_id in project_ids: + project = projects.get(project_id) + if not project: + print(f"Warning: Project ID '{project_id}' found in keys but not in projects dict during periodic check.") + continue + + print(f"--- Checking Project: {project.id} ({project.name}) ---") + try: + # Use the public method which handles locking and analysis triggering + project.update_repo_and_analyze() + except Exception as e: + print(f"!! CRITICAL ERROR during periodic check for project '{project_id}': {e}", exc_info=True) + # Update status via lock if possible + with project.lock: + project.status_message = f"[{project.id}] CRITICAL check cycle error: {e}" + if project.last_commit_hash is None: project.last_commit_hash = "Unknown (Check Error)" + project.data_updated_event.set() # Signal error + project.data_updated_event.clear() + print(f"--- Finished check for project: {project.id} ---") + + print(f"--- All project checks finished. Sleeping for {CHECK_INTERVAL_SECONDS}s... ---") time.sleep(CHECK_INTERVAL_SECONDS) -# --- Flask Routes (Largely unchanged, rely on updated global state) --- +# --- Flask Routes --- @app.route('/') def index(): - return render_template('index.html') + # Pass project list sorted by name to template for selector + project_list = sorted( + [{"id": pid, "name": p.name} for pid, p in projects.items()], + key=lambda x: x['name'] + ) + return render_template('index.html', projects=project_list) -@app.route('/drawings') -def drawings_page(): - # Render the main index template which now contains all content - return render_template('index.html') +@app.route('/stream/') +def stream(project_id): + project = projects.get(project_id) + if not project: + return jsonify({"error": "Project not found"}), 404 -@app.route('/conflicts') -def conflicts_page(): - # Render the main index template which now contains all content - return render_template('index.html') - -@app.route('/stream') -def stream(): - def event_stream(): - last_sent_hash_to_client = None # Track hash sent to *this specific client* - - # Send initial state immediately on connection - with repo_lock: - current_global_hash = last_commit_hash - current_global_status = status_message - current_global_progress = progress_data + def event_stream(target_project: Project): + last_sent_hash_to_client = None + print(f"SSE Client connected for project: {target_project.id}") + # ... (rest of stream function remains the same) ... + # Send initial state immediately + with target_project.lock: + current_hash = target_project.last_commit_hash + current_status = target_project.status_message + current_progress = target_project.progress_data initial_payload = json.dumps({ - "status": current_global_status, - "progress": current_global_progress, - "last_commit": current_global_hash + "status": current_status, + "progress": current_progress, + "last_commit": current_hash }) - yield f"data: {initial_payload}\n\n" - last_sent_hash_to_client = current_global_hash # Record that we sent the initial state for this client - print(f"Sent initial state to new client (Hash: {last_sent_hash_to_client})") + yield f"data: {initial_payload}\\n\\n" + last_sent_hash_to_client = current_hash + print(f"[{target_project.id}] Sent initial SSE state (Hash: {last_sent_hash_to_client})") - # Now wait for subsequent updates signaled by the event + # Wait for project-specific updates while True: - data_updated_event.wait() # Wait for background thread to signal completion + # Add a timeout to prevent hangs if event logic fails? + event_fired = target_project.data_updated_event.wait(timeout=CHECK_INTERVAL_SECONDS * 2) + if not event_fired: + # If timeout occurs, maybe send a keep-alive or re-send current state? + print(f"[{target_project.id}] SSE wait timeout. Re-checking state.") + # Optionally send a keep-alive comment: yield ": keepalive\n\n" + # Or just continue to re-evaluate state below + pass # Fall through to check state - with repo_lock: # Re-acquire lock to get the latest state - current_global_hash = last_commit_hash - current_global_status = status_message - current_global_progress = progress_data + with target_project.lock: + current_hash = target_project.last_commit_hash + current_status = target_project.status_message + current_progress = target_project.progress_data - # Send update to the client IF the data is different from what they last received - # Check hash first as primary indicator of change in underlying data - if current_global_hash != last_sent_hash_to_client: - print(f"Data updated (Hash changed: {last_sent_hash_to_client} -> {current_global_hash}). Sending update to client.") + # Send update ONLY if hash changed or if the wait timed out (to ensure client syncs) + if current_hash != last_sent_hash_to_client or not event_fired: + print(f"[{target_project.id}] SSE Data updated (Hash: {last_sent_hash_to_client} -> {current_hash}, Event Fired: {event_fired}). Sending.") data_payload = json.dumps({ - "status": current_global_status, - "progress": current_global_progress, - "last_commit": current_global_hash + "status": current_status, + "progress": current_progress, + "last_commit": current_hash }) - yield f"data: {data_payload}\n\n" - last_sent_hash_to_client = current_global_hash # Update the hash sent to this client - # else: # No need for the else block logging here anymore, as the event shouldn't trigger if hash is same - # If hash is the same, maybe only the status message changed (e.g., error occurred) - # Option: Send update only if status is different from last sent status? - # For simplicity now, we only send if hash differs. Client UI shows last known status. - # print(f"Data updated event triggered, but hash {current_global_hash} unchanged for this client. Status: '{current_global_status}'") # Removed log + yield f"data: {data_payload}\\n\\n" + last_sent_hash_to_client = current_hash + # else: + # print(f"[{target_project.id}] SSE Event triggered, hash {current_hash} unchanged.") + return Response(event_stream(project), mimetype="text/event-stream") - return Response(event_stream(), mimetype="text/event-stream") - -# --- Main Execution --- +# --- Main Execution --- if __name__ == '__main__': - # Ensure repo and text directories exist (optional for text dir if PDFs are pre-processed) - if not os.path.exists(REPO_DIR): - os.makedirs(REPO_DIR) - if not os.path.exists(TEXT_OUTPUT_FOLDER): - print(f"Warning: Text output folder '{TEXT_OUTPUT_FOLDER}' not found. Drawing check might fail unless PDF extraction runs first or files are manually placed.") - # os.makedirs(TEXT_OUTPUT_FOLDER) # Optionally create it + # Ensure base directories exist (safer to do this before initializing projects) + # Create parent dirs for repos, manifests, texts, pdfs based on config + dirs_to_ensure = set() + for pid, pconf in config.get('projects', {}).items(): + script_dir = os.path.dirname(os.path.abspath(__file__)) + dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('repo_dir', f'./cloned_repos/{pid}'))))) + dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('manifest_csv', f'./manifests/{pid}_manifest.csv'))))) + dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('drawing_texts_dir', f'./extracted_texts/{pid}'))))) + pdf_dir = pconf.get('pdf_source_dir') + if pdf_dir: + dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pdf_dir)))) - # Perform initial check/clone and data load - print("Performing initial repository check and data load...") - # Run check_and_update_repo which calls update_progress_data if repo updated - initial_update_done = check_and_update_repo() - # If repo existed and was up-to-date on first check, analysis wasn't run yet. Run it now. - if not initial_update_done: - print("Repository present and up-to-date. Running initial analysis...") - # No need for lock here as background thread isn't running yet - update_progress_data() # Run the full analysis - else: - print("Initial analysis was triggered by repo clone/pull.") + for d in dirs_to_ensure: + if d and not os.path.exists(d): + print(f"Creating necessary directory: {d}") + try: + os.makedirs(d, exist_ok=True) + except OSError as e: + print(f"ERROR: Could not create directory {d}: {e}") + # Decide if this is fatal? Probably depends on which dir failed. + # exit(1) # Exit if repo/manifest dirs can't be made? - # Start the background thread for periodic checks - print("Starting background repository check thread...") + # Initialize Project objects AFTER ensuring directories + for project_id, project_conf in config.get('projects', {}).items(): + if project_id not in projects: # Avoid re-init if already done (though shouldn't happen here) + projects[project_id] = Project(project_id, project_conf) + + if not projects: + print("ERROR: No projects defined or initialized. Exiting.") + exit(1) + + # Perform initial check/clone and data load for ALL projects + print("--- Performing Initial Project Analysis ---") + # ====================================================================== + # === TEMPORARILY DISABLED INITIAL ANALYSIS THREADING FOR DEBUGGING === + # initial_threads = [] + # for project_id, project in projects.items(): + # print(f"--- Starting initial analysis for: {project.id} ({project.name}) ---") + # # Run initial analysis in parallel threads for faster startup? + # # Set force_analysis=True to ensure data is loaded even if repo exists and hasn't changed since last run + # thread = threading.Thread(target=project.update_repo_and_analyze, kwargs={'force_analysis': True}) + # initial_threads.append(thread) + # thread.start() + # + # # Wait for all initial analyses to complete + # print("Waiting for initial analyses to complete...") + # for thread in initial_threads: + # thread.join() + # print("--- Initial Analyses Complete ---") + print("SKIPPING initial analysis for debugging port binding issue.") + print("NOTE: Project data will be loaded on the first periodic check.") + # ====================================================================== + + # Start the background thread for periodic checks AFTER initial load + print("Starting background periodic check thread...") repo_check_thread = threading.Thread(target=periodic_repo_check, daemon=True) repo_check_thread.start() # Run the Flask app - print("Starting Flask server on port 5050...") - # Use threaded=True for SSE background sending, debug=False for production/stability - app.run(host='0.0.0.0', port=5050, debug=False, threaded=True) + print(f"Starting Flask server: http://0.0.0.0:5050") + try: + app.run(host='0.0.0.0', port=5050, debug=False, threaded=True) + except Exception as e: + print(f"FATAL ERROR during app.run: {e}", exc_info=True) + exit(1) diff --git a/cloned_repos/mtn6 b/cloned_repos/mtn6 new file mode 160000 index 0000000..456de12 --- /dev/null +++ b/cloned_repos/mtn6 @@ -0,0 +1 @@ +Subproject commit 456de12cca56c09bc1881660b163ac3b5dff593a diff --git a/config.json b/config.json new file mode 100644 index 0000000..06dfaf0 --- /dev/null +++ b/config.json @@ -0,0 +1,37 @@ +{ + "check_interval_seconds": 60, + "projects": { + "mtn6": { + "name": "Mountain Top 6 (MTN6)", + "repo_url": "http://192.168.5.191:3000/LCI/MTN6", + "repo_dir": "./cloned_repos/mtn6", + "branch": "main", + "manifest_csv": "./manifests/MTN6 Equipment Manifest REV6(Conveyor List).csv", + "scada_views_dir": "MTN6_SCADA/com.inductiveautomation.perspective/views/Detailed-Views", + "drawing_texts_dir": "./extracted_texts/mtn6", + "pdf_source_dir": "./pdfs/mtn6", + "csv_cols": { + "alias": "Alias", + "panel": "Control Panel", + "eq_type": "Equipment Type", + "conv_type": "Type of Conveyor" + } + }, + "another_project": { + "name": "Another Example Project", + "repo_url": "http://example.com/git/another_project.git", + "repo_dir": "./cloned_repos/another", + "branch": "develop", + "manifest_csv": "./manifests/another_manifest.csv", + "scada_views_dir": "perspective/views/Area51", + "drawing_texts_dir": "./extracted_texts/another", + "pdf_source_dir": null, + "csv_cols": { + "alias": "DeviceTag", + "panel": "Cabinet ID", + "eq_type": "Description", + "conv_type": "Category" + } + } + } +} \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index 42d8986..20d3901 100644 --- a/templates/index.html +++ b/templates/index.html @@ -52,11 +52,29 @@
- -