2025-04-09 19:19:08 +00:00

730 lines
36 KiB
Python

import os
import threading
import time
import json
import git
import csv
import re
from flask import Flask, render_template, jsonify, Response
# --- Configuration Loading ---
# Determine the directory where app.py resides
_script_dir = os.path.dirname(os.path.abspath(__file__))
CONFIG_FILE_ABS = os.path.join(_script_dir, 'config.json') # Absolute path
def load_config():
"""Loads configuration from JSON file using an absolute path."""
try:
print(f"Attempting to load config from absolute path: {CONFIG_FILE_ABS}") # Add log
with open(CONFIG_FILE_ABS, 'r') as f: # Use the absolute path
config = json.load(f)
# Basic validation
if 'projects' not in config or not isinstance(config['projects'], dict):
raise ValueError("Config missing 'projects' dictionary.")
if 'check_interval_seconds' not in config or not isinstance(config['check_interval_seconds'], int):
raise ValueError("Config missing 'check_interval_seconds' integer.")
print(f"Configuration loaded successfully from {CONFIG_FILE_ABS}")
return config
except FileNotFoundError:
# Also print the current working directory for debugging
cwd = os.getcwd()
print(f"ERROR: Configuration file not found at absolute path: {CONFIG_FILE_ABS}")
print(f"Current working directory when error occurred: {cwd}")
exit(1)
except json.JSONDecodeError:
print(f"ERROR: Could not decode JSON from '{CONFIG_FILE_ABS}'. Check its format.")
exit(1)
except ValueError as e:
print(f"ERROR: Invalid configuration in '{CONFIG_FILE_ABS}': {e}")
exit(1)
except Exception as e:
cwd = os.getcwd()
print(f"ERROR: An unexpected error occurred loading config from {CONFIG_FILE_ABS}: {e}")
print(f"Current working directory during error: {cwd}")
exit(1)
config = load_config()
CHECK_INTERVAL_SECONDS = config['check_interval_seconds']
# --- Project Class ---
class Project:
"""Encapsulates configuration, state, and operations for a single project."""
def __init__(self, project_id, project_config):
self.id = project_id
self.config = project_config
self.name = project_config.get('name', project_id)
# Project-specific state
self.last_commit_hash = None
self.progress_data = { # Default empty structure
"overall": {"total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, "percentage_found_both": 0, "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []},
"panels": {}
}
self.status_message = "Initializing..."
self.lock = threading.Lock() # Lock for accessing project-specific repo and data
self.data_updated_event = threading.Event() # Event to signal data updates for this project
# --- Pre-calculate absolute paths ---
self._script_dir = os.path.dirname(os.path.abspath(__file__))
self._repo_dir_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('repo_dir', f'./cloned_repos/{project_id}'))) # Added default
self._csv_path_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('manifest_csv', f'./manifests/{project_id}_manifest.csv'))) # Added default
self._views_dir_abs = os.path.join(self._repo_dir_abs, self.config.get('scada_views_dir', ''))
self._texts_dir_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('drawing_texts_dir', f'./extracted_texts/{project_id}'))) # Added default
pdf_source_dir_rel = self.config.get('pdf_source_dir')
self._pdf_dir_abs = os.path.abspath(os.path.join(self._script_dir, pdf_source_dir_rel)) if pdf_source_dir_rel else None
print(f"Initialized Project '{self.id}' ({self.name}):")
print(f" Repo Dir: {self._repo_dir_abs}")
print(f" CSV Path: {self._csv_path_abs}")
if self._views_dir_abs: print(f" Views Dir: {self._views_dir_abs}")
if self._texts_dir_abs: print(f" Texts Dir: {self._texts_dir_abs}")
if self._pdf_dir_abs: print(f" PDF Dir: {self._pdf_dir_abs}")
# --- Path Helper Methods ---
def get_repo_path(self):
return self._repo_dir_abs
def get_csv_path(self):
return self._csv_path_abs
def get_views_dir_path(self):
return self._views_dir_abs
def get_text_output_dir_path(self):
return self._texts_dir_abs
def get_pdf_source_dir_path(self):
return self._pdf_dir_abs
# --- Core Logic Methods ---
def _update_progress_data(self):
"""(Internal) Reads manifest, runs checks, combines results, and updates project state."""
current_status = ""
new_data_calculated = None
manifest_data = None # Initialize
with self.lock:
try:
# 1. Read Manifest
self.status_message = f"[{self.id}] Reading manifest..."
print(f"[{self.id}] Reading manifest: {self.get_csv_path()}")
manifest_data = read_manifest(self) # Pass self (project instance)
if manifest_data is None:
current_status = f"[{self.id}] Error: Failed manifest read {self.get_csv_path()}"
print(current_status)
self.status_message = current_status
# Don't return, proceed to calculate progress with empty data if needed
# or handle error state appropriately
manifest_data = [] # Ensure it's an empty list for calculation
# 2. Check SCADA (If views dir is configured)
if self.get_views_dir_path():
self.status_message = f"[{self.id}] Checking SCADA..."
check_scada(self, manifest_data) # Pass self (project instance)
else:
print(f"[{self.id}] Skipping SCADA check (no views_dir configured).")
# 3. Check Drawings (If texts dir is configured)
if self.get_text_output_dir_path():
self.status_message = f"[{self.id}] Checking drawings..."
check_drawings(self, manifest_data) # Pass self (project instance)
else:
print(f"[{self.id}] Skipping Drawing check (no drawing_texts_dir configured).")
# 4. Calculate Combined Progress
self.status_message = f"[{self.id}] Calculating progress..."
new_data_calculated = calculate_combined_progress(self, manifest_data) # Pass self
if new_data_calculated:
if not manifest_data and new_data_calculated['overall']['total_csv'] == 0: # Check if manifest read failed/was empty
current_status = f"[{self.id}] Analysis complete (Manifest empty/read failed) @ {time.strftime('%H:%M:%S')}"
else:
current_status = f"[{self.id}] Analysis complete @ {time.strftime('%H:%M:%S')}"
self.progress_data = new_data_calculated # Update project's data
else:
# This case should ideally not happen if calculate_combined_progress always returns a dict
current_status = f"[{self.id}] Error: Failed progress calculation."
# Keep previous progress_data?
except Exception as e:
current_status = f"[{self.id}] CRITICAL Error during analysis: {e}"
print(f"[{self.id}] Detailed Analysis Error: {e}", exc_info=True)
# Optionally reset progress data or keep old?
# self.progress_data = { ... } # Reset to default empty
# Update status and signal completion (inside lock)
print(current_status)
self.status_message = current_status
self.data_updated_event.set()
self.data_updated_event.clear()
def _check_and_update_repo(self):
"""(Internal) Checks and updates the Git repository for this project."""
did_update = False
repo_path = self.get_repo_path()
repo_url = self.config.get('repo_url')
branch = self.config.get('branch', 'main') # Default to main if not specified
if not repo_url:
print(f"[{self.id}] Skipping repo check: repo_url not configured.")
self.status_message = f"[{self.id}] Repo check skipped (no URL)"
self.data_updated_event.set(); self.data_updated_event.clear() # Signal status change
return False # No update occurred
# --- Lock is acquired by the calling method (update_repo_and_analyze) ---
try:
# Ensure parent directory exists before cloning
repo_parent_dir = os.path.dirname(repo_path)
if not os.path.exists(repo_parent_dir):
print(f"[{self.id}] Creating parent directory for repo: {repo_parent_dir}")
os.makedirs(repo_parent_dir)
repo_existed = os.path.exists(os.path.join(repo_path, ".git"))
if not repo_existed:
print(f"[{self.id}] Cloning repository {repo_url} (branch: {branch}) into {repo_path}...")
self.status_message = f"[{self.id}] Cloning repository..."
git.Repo.clone_from(repo_url, repo_path, branch=branch)
repo = git.Repo(repo_path)
self.last_commit_hash = repo.head.commit.hexsha
print(f"[{self.id}] Initial clone complete. Commit: {self.last_commit_hash}")
did_update = True
else:
repo = git.Repo(repo_path)
print(f"[{self.id}] Fetching updates from remote...")
current_local_commit = repo.head.commit.hexsha
# Use current local hash if global is still None (e.g. first run after restart)
if self.last_commit_hash is None: self.last_commit_hash = current_local_commit
origin = repo.remotes.origin
current_remote_commit = None # Initialize
try:
print(f"[{self.id}] Running fetch...")
origin.fetch()
print(f"[{self.id}] Fetch complete. Getting remote commit...")
current_remote_commit = repo.commit(f'origin/{branch}').hexsha
print(f"[{self.id}] Remote commit for origin/{branch}: {current_remote_commit}")
except git.GitCommandError as fetch_err:
print(f"[{self.id}] Warning: Could not fetch from remote/find branch '{branch}': {fetch_err}")
# Keep status as is, will signal update later
except Exception as fetch_err_other:
print(f"[{self.id}] Warning: Unexpected error during fetch: {fetch_err_other}")
# Keep status as is
print(f"[{self.id}] Local commit: {current_local_commit}, Remote commit: {current_remote_commit or 'Fetch Failed/Not Found'}")
if current_remote_commit and current_local_commit != current_remote_commit:
print(f"[{self.id}] New commit detected! Pulling changes (Branch: {branch})...")
self.status_message = f"[{self.id}] Pulling updates..."
try:
# Ensure working directory is clean before pull?
# Add checkout if needed: repo.git.checkout(branch)
pull_info = origin.pull()
# Verify pull info if needed (e.g., pull_info[0].flags)
new_commit_hash = repo.head.commit.hexsha
print(f"[{self.id}] Pull successful. New commit: {new_commit_hash}")
self.last_commit_hash = new_commit_hash
did_update = True
except git.GitCommandError as e_pull:
self.status_message = f"[{self.id}] Error pulling repository: {e_pull}"
print(self.status_message)
self.last_commit_hash = current_local_commit # Keep old hash on failed pull
except Exception as e_pull_other:
self.status_message = f"[{self.id}] Unexpected error pulling repository: {e_pull_other}"
print(self.status_message)
self.last_commit_hash = current_local_commit # Keep old hash
elif current_remote_commit:
print(f"[{self.id}] No new commits detected.")
# Only update status if no pull error occurred previously
if not self.status_message.startswith(f"[{self.id}] Error pulling"):
self.status_message = f"[{self.id}] Repo up-to-date @ {time.strftime('%H:%M:%S')}"
else: # Fetch failed
# Keep previous status message (e.g., Analysis complete, Cloning, Error pulling, etc.)
print(f"[{self.id}] Keeping previous status due to fetch failure.")
# Ensure last_commit_hash is set if it was None
if self.last_commit_hash is None: self.last_commit_hash = current_local_commit or "Unknown (Fetch Failed)"
except git.GitCommandError as e_git:
error_msg = f"[{self.id}] Git command error: {e_git}"
print(error_msg)
self.status_message = error_msg
# Try reading existing hash even on error
try:
if os.path.exists(os.path.join(repo_path, ".git")):
repo = git.Repo(repo_path)
# Don't overwrite existing hash if we have one
if self.last_commit_hash is None: self.last_commit_hash = repo.head.commit.hexsha
except Exception as e_read_hash:
print(f"[{self.id}] Additionally failed to read hash after Git error: {e_read_hash}")
if self.last_commit_hash is None: self.last_commit_hash = "Error reading commit"
except Exception as e_other:
error_msg = f"[{self.id}] Error checking repository: {e_other}"
print(error_msg, exc_info=True)
self.status_message = error_msg
if self.last_commit_hash is None: self.last_commit_hash = "Error checking repo"
# --- Lock is released by the calling method ---
return did_update # Return whether the repo content was changed
def update_repo_and_analyze(self, force_analysis=False):
"""Checks the repo for updates, pulls if necessary, and runs analysis if changed or forced."""
print(f"[{self.id}] Starting update_repo_and_analyze (force_analysis={force_analysis})...")
repo_changed = False
initial_hash = self.last_commit_hash # Store hash before check
with self.lock:
repo_changed = self._check_and_update_repo()
if repo_changed:
print(f"[{self.id}] Repo changed, triggering analysis...")
# Analysis reads files, recalculates progress, updates status, and signals event
self._update_progress_data()
elif force_analysis:
print(f"[{self.id}] Forcing analysis even though repo didn't change...")
self._update_progress_data()
else:
# If repo didn't change and analysis not forced, still signal potentially changed status message from repo check
print(f"[{self.id}] Repo unchanged, analysis not forced. Signaling potential status update.")
self.data_updated_event.set()
self.data_updated_event.clear()
final_hash = self.last_commit_hash
print(f"[{self.id}] Finished update_repo_and_analyze. Repo Changed: {repo_changed}. Hash: {initial_hash} -> {final_hash}. Status: '{self.status_message}'")
return repo_changed
# --- Global Application State ---
app = Flask(__name__)
projects = {} # Dictionary to hold Project instances {project_id: Project_instance}
global_lock = threading.Lock() # Lock for modifying the projects dictionary itself (if needed later)
# Instantiate projects from config
for project_id, project_conf in config.get('projects', {}).items():
projects[project_id] = Project(project_id, project_conf)
if not projects:
print("ERROR: No projects defined in configuration. Exiting.")
exit(1)
# --- Original Global Variables (To be removed or refactored) ---
# REPO_URL = "http://192.168.5.191:3000/LCI/MTN6" # REMOVE
# REPO_DIR = "./cloned_repo" # REMOVE
# BRANCH = "main" # REMOVE
# CSV_FILENAME = "MTN6 Equipment Manifest REV6(Conveyor List).csv" # REMOVE
# VIEWS_DIR_RELATIVE = "MTN6_SCADA/com.inductiveautomation.perspective/views/Detailed-Views" # REMOVE
# TEXT_OUTPUT_FOLDER = "./extracted_texts" # REMOVE
# CHECK_INTERVAL_SECONDS = 60 # MOVED to config load
# CSV_ALIAS_COL = 'Alias' # MOVE to project config? Or keep global if consistent? -> Moved to config
# CSV_PANEL_COL = 'Control Panel' # MOVE to project config? -> Moved to config
# CSV_EQ_TYPE_COL = 'Equipment Type' # MOVE to project config? -> Moved to config
# CSV_CONV_TYPE_COL = 'Type of Conveyor' # MOVE to project config? -> Moved to config
# last_commit_hash = None # MOVED to Project class
# progress_data = { ... } # MOVED to Project class
# status_message = "Initializing..." # MOVED to Project class
# repo_lock = threading.Lock() # MOVED to Project class (per-project lock)
# data_updated_event = threading.Event() # MOVED to Project class (per-project event)
# --- Helper Functions (To be refactored or become Project methods) ---
# def get_repo_path(): # BECOMES Project method
# return os.path.abspath(REPO_DIR)
# def get_csv_path(): # BECOMES Project method
# script_dir = os.path.dirname(os.path.abspath(__file__))
# return os.path.join(script_dir, CSV_FILENAME)
# def get_views_dir_path(): # BECOMES Project method
# return os.path.join(get_repo_path(), VIEWS_DIR_RELATIVE)
# def get_text_output_dir_path(): # BECOMES Project method
# script_dir = os.path.dirname(os.path.abspath(__file__))
# return os.path.abspath(os.path.join(script_dir, TEXT_OUTPUT_FOLDER))
def normalize(text):
"""Normalize string for comparison: lowercase, treat '-' and '_' the same, remove all whitespace."""
# This seems general enough to remain a standalone helper
if not isinstance(text, str):
return ""
text = text.lower() # Convert to lowercase
text = text.replace('-', '_') # Replace hyphens with underscores
text = re.sub(r'\s+', '', text) # Remove ALL whitespace characters (including newlines)
return text
def read_manifest(project: Project): # Takes Project instance
"""Reads the manifest CSV for a specific project into a list of dictionaries."""
manifest_items = []
csv_filepath = project.get_csv_path()
# Get column names from project config
cols = project.config.get('csv_cols', {})
col_alias = cols.get('alias', 'Alias') # Default fallback
col_panel = cols.get('panel', 'Control Panel')
col_eq_type = cols.get('eq_type', 'Equipment Type')
col_conv_type = cols.get('conv_type', 'Type of Conveyor')
required_cols = {col_alias, col_panel}
optional_cols = {col_eq_type, col_conv_type}
try:
with open(csv_filepath, mode='r', newline='', encoding='utf-8-sig') as infile:
reader = csv.DictReader(infile)
headers = set(h.strip() for h in reader.fieldnames)
missing_required = required_cols - headers
if missing_required:
print(f"[{project.id}] Error: Missing required columns in CSV '{csv_filepath}': {', '.join(missing_required)}")
print(f"[{project.id}] Available columns: {', '.join(headers)}")
return None
for row in reader:
alias = row.get(col_alias, "").strip()
panel = row.get(col_panel, "").strip()
if alias and panel:
item = {
"alias": alias,
"normalized_alias": normalize(alias),
"control_panel": panel,
"equipment_type": row.get(col_eq_type, "").strip() if col_eq_type in headers else "N/A",
"conveyor_type": row.get(col_conv_type, "").strip() if col_conv_type in headers else "N/A",
"found_scada": False,
"found_drawing": False
}
manifest_items.append(item)
elif alias and not panel:
print(f"[{project.id}] Warning: Alias '{alias}' found in CSV but is missing its '{col_panel}'. Skipping.")
except FileNotFoundError:
print(f"[{project.id}] Error: Manifest file not found at {csv_filepath}")
return None
except Exception as e:
print(f"[{project.id}] Error reading CSV file {csv_filepath}: {e}")
return None
print(f"[{project.id}] Read {len(manifest_items)} valid items from manifest.")
return manifest_items
def check_scada(project: Project, manifest_data): # Takes Project instance
"""Checks for aliases in SCADA JSON view files for a specific project."""
if not manifest_data: return
views_dir = project.get_views_dir_path()
print(f"[{project.id}] Starting SCADA check in directory: {views_dir}...")
found_count = 0
processed_files = 0
alias_map = {}
for item in manifest_data:
na = item['normalized_alias']
if na not in alias_map:
alias_map[na] = []
alias_map[na].append(item)
try:
# Check if views dir exists BEFORE walking
if not os.path.exists(views_dir):
print(f"[{project.id}] Warning: SCADA Views directory not found at {views_dir}. Skipping SCADA check.")
return # Exit function early
for root, _, files in os.walk(views_dir):
for filename in files:
if filename == 'view.json':
filepath = os.path.join(root, filename)
processed_files += 1
try:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
normalized_content = normalize(content)
for norm_alias, items in alias_map.items():
if norm_alias in normalized_content:
for item in items:
if not item['found_scada']:
item['found_scada'] = True
found_count += 1
except Exception as e:
print(f"[{project.id}] Warning: Could not read or process JSON file {filepath}: {e}")
except Exception as e:
print(f"[{project.id}] Error walking SCADA views directory {views_dir}: {e}")
print(f"[{project.id}] SCADA check finished. Processed {processed_files} view.json files. Found {found_count} manifest aliases.")
def check_drawings(project: Project, manifest_data): # Takes Project instance
"""Checks if aliases from manifest exist in *any* extracted drawing text file for a specific project."""
if not manifest_data: return
text_output_dir = project.get_text_output_dir_path()
print(f"[{project.id}] Starting Drawings check: Scanning all .txt files in directory: {text_output_dir}...")
all_normalized_content = ""
processed_files = 0
found_files = []
try:
# Check if text dir exists BEFORE listing
if not os.path.exists(text_output_dir):
print(f"[{project.id}] Warning: Drawings text directory not found: {text_output_dir}. Skipping Drawings check.")
return # Exit function early
if not os.path.isdir(text_output_dir):
print(f"[{project.id}] Error: Path specified for Drawings text is not a directory: {text_output_dir}. Skipping Drawings check.")
return # Exit function early
for filename in os.listdir(text_output_dir):
if filename.lower().endswith('.txt'):
filepath = os.path.join(text_output_dir, filename)
processed_files += 1
try:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
all_normalized_content += normalize(content) + "\\n--file-separator--\\n"
found_files.append(filename)
except Exception as e:
print(f"[{project.id}] Warning: Could not read or process text file {filepath}: {e}")
if processed_files == 0:
print(f"[{project.id}] Warning: No .txt files found in the directory {text_output_dir}. Cannot perform drawing check.")
return
else:
print(f"[{project.id}] Successfully read and normalized content from {len(found_files)} out of {processed_files} .txt files found.")
found_count = 0
for item in manifest_data:
normalized_alias = item['normalized_alias']
if normalized_alias and normalized_alias in all_normalized_content:
item['found_drawing'] = True
found_count += 1
print(f"[{project.id}] Drawings check finished. Found {found_count} manifest aliases within the combined text content.")
except Exception as e:
print(f"[{project.id}] Error during drawings check in {text_output_dir}: {e}")
def calculate_combined_progress(project: Project, manifest_data): # Takes Project instance
"""Calculates the combined progress based on scada/drawing status for a specific project."""
print(f"[{project.id}] Calculating combined progress statistics...")
results = {
"overall": {
"total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0,
"percentage_found_both": 0,
"missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []
},
"panels": {}
}
if not manifest_data:
print(f"[{project.id}] Warning: No manifest data to calculate progress from.")
return results # Return default empty structure
results["overall"]["total_csv"] = len(manifest_data)
for item in manifest_data:
panel = item['control_panel']
if panel not in results["panels"]:
results["panels"][panel] = {
"total": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0,
"percentage_found_both": 0,
"missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []
}
results["panels"][panel]["total"] += 1
item_detail = {k: v for k, v in item.items() if k not in ['normalized_alias']}
if item['found_scada'] and item['found_drawing']:
results["overall"]["found_both"] += 1
results["panels"][panel]["found_both"] += 1
results["overall"]["found_both_list"].append(item_detail)
results["panels"][panel]["found_both_list"].append(item_detail)
elif item['found_scada'] and not item['found_drawing']:
results["overall"]["found_scada_only"] += 1
results["panels"][panel]["found_scada_only"] += 1
results["overall"]["found_scada_only_list"].append(item_detail)
results["panels"][panel]["found_scada_only_list"].append(item_detail)
elif not item['found_scada'] and item['found_drawing']:
results["overall"]["found_drawing_only"] += 1
results["panels"][panel]["found_drawing_only"] += 1
results["overall"]["found_drawing_only_list"].append(item_detail)
results["panels"][panel]["found_drawing_only_list"].append(item_detail)
else: # Missing both
results["overall"]["missing_both"] += 1
results["panels"][panel]["missing_both"] += 1
results["overall"]["missing_list"].append(item_detail)
results["panels"][panel]["missing_list"].append(item_detail)
if results["overall"]["total_csv"] > 0:
results["overall"]["percentage_found_both"] = round(
(results["overall"]["found_both"] / results["overall"]["total_csv"]) * 100, 1
)
for panel_data in results["panels"].values():
if panel_data["total"] > 0:
panel_data["percentage_found_both"] = round(
(panel_data["found_both"] / panel_data["total"]) * 100, 1
)
print(f"[{project.id}] Combined progress calculation finished.")
return results
# --- Background Task ---
def periodic_repo_check():
"""Runs the check_and_update_repo method periodically for all projects sequentially."""
# Small delay at startup to allow Flask server to bind port before first check cycle prints
time.sleep(2)
while True:
print(f"\n--- Starting Periodic Project Checks (Interval: {CHECK_INTERVAL_SECONDS}s) ---")
project_ids = list(projects.keys())
for project_id in project_ids:
project = projects.get(project_id)
if not project:
print(f"Warning: Project ID '{project_id}' found in keys but not in projects dict during periodic check.")
continue
print(f"--- Checking Project: {project.id} ({project.name}) ---")
try:
# Use the public method which handles locking and analysis triggering
project.update_repo_and_analyze()
except Exception as e:
print(f"!! CRITICAL ERROR during periodic check for project '{project_id}': {e}", exc_info=True)
# Update status via lock if possible
with project.lock:
project.status_message = f"[{project.id}] CRITICAL check cycle error: {e}"
if project.last_commit_hash is None: project.last_commit_hash = "Unknown (Check Error)"
project.data_updated_event.set() # Signal error
project.data_updated_event.clear()
print(f"--- Finished check for project: {project.id} ---")
print(f"--- All project checks finished. Sleeping for {CHECK_INTERVAL_SECONDS}s... ---")
time.sleep(CHECK_INTERVAL_SECONDS)
# --- Flask Routes ---
@app.route('/')
def index():
# Pass project list sorted by name to template for selector
project_list = sorted(
[{"id": pid, "name": p.name} for pid, p in projects.items()],
key=lambda x: x['name']
)
return render_template('index.html', projects=project_list)
@app.route('/stream/<project_id>')
def stream(project_id):
project = projects.get(project_id)
if not project:
return jsonify({"error": "Project not found"}), 404
def event_stream(target_project: Project):
last_sent_hash_to_client = None
print(f"SSE Client connected for project: {target_project.id}")
# ... (rest of stream function remains the same) ...
# Send initial state immediately
with target_project.lock:
current_hash = target_project.last_commit_hash
current_status = target_project.status_message
current_progress = target_project.progress_data
initial_payload = json.dumps({
"status": current_status,
"progress": current_progress,
"last_commit": current_hash
})
yield f"data: {initial_payload}\\n\\n"
last_sent_hash_to_client = current_hash
print(f"[{target_project.id}] Sent initial SSE state (Hash: {last_sent_hash_to_client})")
# Wait for project-specific updates
while True:
# Add a timeout to prevent hangs if event logic fails?
event_fired = target_project.data_updated_event.wait(timeout=CHECK_INTERVAL_SECONDS * 2)
if not event_fired:
# If timeout occurs, maybe send a keep-alive or re-send current state?
print(f"[{target_project.id}] SSE wait timeout. Re-checking state.")
# Optionally send a keep-alive comment: yield ": keepalive\n\n"
# Or just continue to re-evaluate state below
pass # Fall through to check state
with target_project.lock:
current_hash = target_project.last_commit_hash
current_status = target_project.status_message
current_progress = target_project.progress_data
# Send update ONLY if hash changed or if the wait timed out (to ensure client syncs)
if current_hash != last_sent_hash_to_client or not event_fired:
print(f"[{target_project.id}] SSE Data updated (Hash: {last_sent_hash_to_client} -> {current_hash}, Event Fired: {event_fired}). Sending.")
data_payload = json.dumps({
"status": current_status,
"progress": current_progress,
"last_commit": current_hash
})
yield f"data: {data_payload}\\n\\n"
last_sent_hash_to_client = current_hash
# else:
# print(f"[{target_project.id}] SSE Event triggered, hash {current_hash} unchanged.")
return Response(event_stream(project), mimetype="text/event-stream")
# --- Main Execution ---
if __name__ == '__main__':
# Ensure base directories exist (safer to do this before initializing projects)
# Create parent dirs for repos, manifests, texts, pdfs based on config
dirs_to_ensure = set()
for pid, pconf in config.get('projects', {}).items():
script_dir = os.path.dirname(os.path.abspath(__file__))
dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('repo_dir', f'./cloned_repos/{pid}')))))
dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('manifest_csv', f'./manifests/{pid}_manifest.csv')))))
dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('drawing_texts_dir', f'./extracted_texts/{pid}')))))
pdf_dir = pconf.get('pdf_source_dir')
if pdf_dir:
dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pdf_dir))))
for d in dirs_to_ensure:
if d and not os.path.exists(d):
print(f"Creating necessary directory: {d}")
try:
os.makedirs(d, exist_ok=True)
except OSError as e:
print(f"ERROR: Could not create directory {d}: {e}")
# Decide if this is fatal? Probably depends on which dir failed.
# exit(1) # Exit if repo/manifest dirs can't be made?
# Initialize Project objects AFTER ensuring directories
for project_id, project_conf in config.get('projects', {}).items():
if project_id not in projects: # Avoid re-init if already done (though shouldn't happen here)
projects[project_id] = Project(project_id, project_conf)
if not projects:
print("ERROR: No projects defined or initialized. Exiting.")
exit(1)
# Perform initial check/clone and data load for ALL projects
print("--- Performing Initial Project Analysis ---")
# ======================================================================
# === TEMPORARILY DISABLED INITIAL ANALYSIS THREADING FOR DEBUGGING ===
# initial_threads = []
# for project_id, project in projects.items():
# print(f"--- Starting initial analysis for: {project.id} ({project.name}) ---")
# # Run initial analysis in parallel threads for faster startup?
# # Set force_analysis=True to ensure data is loaded even if repo exists and hasn't changed since last run
# thread = threading.Thread(target=project.update_repo_and_analyze, kwargs={'force_analysis': True})
# initial_threads.append(thread)
# thread.start()
#
# # Wait for all initial analyses to complete
# print("Waiting for initial analyses to complete...")
# for thread in initial_threads:
# thread.join()
# print("--- Initial Analyses Complete ---")
print("SKIPPING initial analysis for debugging port binding issue.")
print("NOTE: Project data will be loaded on the first periodic check.")
# ======================================================================
# Start the background thread for periodic checks AFTER initial load
print("Starting background periodic check thread...")
repo_check_thread = threading.Thread(target=periodic_repo_check, daemon=True)
repo_check_thread.start()
# Run the Flask app
print(f"Starting Flask server: http://0.0.0.0:5050")
try:
app.run(host='0.0.0.0', port=5050, debug=False, threaded=True)
except Exception as e:
print(f"FATAL ERROR during app.run: {e}", exc_info=True)
exit(1)