2025-04-10 04:08:55 +04:00

515 lines
26 KiB
Python

import os
import threading
import time
import json
import git
import re # Import re for project name validation
from flask import Flask, render_template, jsonify, Response, request # Add request
from werkzeug.utils import secure_filename # For securing filenames
from concurrent.futures import ThreadPoolExecutor # Import ThreadPoolExecutor
# Import configurations and new modules
import config
import utils
from manifest_reader import read_manifest
from scada_checker import check_scada
from drawing_checker import check_drawings
from progress_calculator import calculate_combined_progress
app = Flask(__name__)
# --- Global state (Per-Project) ---
# Dictionaries keyed by project name
project_last_commit = {}
project_progress_data = {}
project_status = {}
all_projects = utils.discover_projects() # Discover projects at startup
# Initialize state for discovered projects
def get_default_progress():
# Helper to return a fresh copy of the default progress structure
return {
"overall": {"total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, "percentage_found_both": 0, "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []},
"panels": {}
}
for proj_name in all_projects:
project_last_commit[proj_name] = None
project_progress_data[proj_name] = get_default_progress()
project_status[proj_name] = "Initializing..."
repo_lock = threading.Lock() # Lock remains global for now, managing access to shared dicts
data_updated_event = threading.Event() # Event signals ANY project update
# Define max workers for thread pools
MAX_INITIAL_CHECK_WORKERS = 5 # Adjust as needed
MAX_PERIODIC_CHECK_WORKERS = 5 # Adjust as needed
# --- Core Logic Orchestration (Per-Project) ---
def update_progress_data(project_name):
"""Reads manifest, runs checks, combines results for a specific project."""
global project_progress_data, project_status # Reference the global dicts
current_status = ""
new_data_calculated = None
print(f"[{project_name}] Starting analysis workflow...")
# 1. Read Manifest
set_status(project_name, "Reading manifest file...")
manifest_data = read_manifest(project_name)
if manifest_data is None:
current_status = f"[{project_name}] Error: Failed to read or process manifest file."
print(current_status)
set_status(project_name, current_status)
# Reset progress data for this project on manifest error
with repo_lock:
project_progress_data[project_name] = get_default_progress()
data_updated_event.set(); data_updated_event.clear() # Signal update (error status + reset data)
return # Cannot proceed without manifest
# 2. Check SCADA (JSON files)
set_status(project_name, "Checking SCADA views...")
check_scada(project_name, manifest_data)
# 3. Check Drawings (TXT files)
set_status(project_name, "Checking drawing text files...")
check_drawings(project_name, manifest_data)
# 4. Calculate Combined Progress
set_status(project_name, "Calculating combined progress...")
try:
new_data_calculated = calculate_combined_progress(project_name, manifest_data)
if new_data_calculated:
current_status = f"[{project_name}] Analysis complete at {time.strftime('%Y-%m-%d %H:%M:%S')}"
else:
current_status = f"[{project_name}] Warning: Progress calculation yielded no results (manifest might be empty)."
new_data_calculated = get_default_progress() # Reset to default empty structure
except Exception as e:
current_status = f"[{project_name}] Error during progress calculation: {e}"
print(f"Detailed Calculation Error: {e}") # Log stack trace (removed exc_info)
new_data_calculated = None # Ensure no partial data update
# Update global state atomically for this project
with repo_lock:
print(current_status)
# Update status first (always)
project_status[project_name] = current_status
# Update progress data only if calculation was successful or yielded default empty
if new_data_calculated is not None:
project_progress_data[project_name] = new_data_calculated
# Signal update regardless of calculation success if status changed or data changed
data_updated_event.set()
data_updated_event.clear()
def set_status(project_name, message):
"""Helper to update status message for a project and signal change."""
global project_status
with repo_lock:
if project_status.get(project_name) != message:
print(f"[{project_name}] Status: {message}")
project_status[project_name] = message
data_updated_event.set()
data_updated_event.clear()
# --- Git Repo Handling (Per-Project) ---
def check_and_update_repo(project_name):
"""Checks and updates the Git repository for a specific project, minimizing lock contention."""
global project_last_commit, project_status # Reference global dicts
repo_path = utils.get_repo_path(project_name)
repo_url = config.REPO_URL # Assuming global for now
branch = config.BRANCH # Assuming global for now
did_update = False # Flag to track if files were actually updated
initial_hash = None
with repo_lock: # Briefly lock to get initial hash
initial_hash = project_last_commit.get(project_name)
try:
project_base_path = utils.get_project_base_path(project_name)
if not os.path.exists(project_base_path):
# Use set_status which handles locking
set_status(project_name, f"Error: Project directory not found: {project_base_path}")
return False # Cannot proceed
# Ensure parent directory exists (outside lock)
os.makedirs(os.path.dirname(repo_path), exist_ok=True)
repo_existed = os.path.exists(os.path.join(repo_path, ".git"))
if not repo_existed:
print(f"[{project_name}] Cloning repository {repo_url} into {repo_path}...")
set_status(project_name, "Cloning repository...")
# --- Clone happens OUTSIDE lock ---
try:
git.Repo.clone_from(repo_url, repo_path, branch=branch)
repo = git.Repo(repo_path)
new_commit_hash = repo.head.commit.hexsha
with repo_lock: # Lock ONLY to update shared state
project_last_commit[project_name] = new_commit_hash
print(f"[{project_name}] Initial clone complete. Commit: {new_commit_hash}")
did_update = True
except git.GitCommandError as clone_err:
set_status(project_name, f"Error cloning repository: {clone_err}")
print(f"[{project_name}] Git clone error: {clone_err}")
# Ensure commit state reflects error if needed
with repo_lock:
if project_last_commit.get(project_name) is None:
project_last_commit[project_name] = "Clone Error"
return False # Indicate no update occurred
# --- End Clone ---
else:
# --- Fetch/Pull Logic ---
repo = git.Repo(repo_path)
current_local_commit = repo.head.commit.hexsha
# Ensure initial hash is set if missing (brief lock)
with repo_lock:
if project_last_commit.get(project_name) is None:
project_last_commit[project_name] = current_local_commit
initial_hash = current_local_commit # Update local var too
print(f"[{project_name}] Fetching updates from remote...")
set_status(project_name, "Checking for updates...")
origin = repo.remotes.origin
# --- Fetch happens OUTSIDE lock ---
try:
fetch_info = origin.fetch()
except git.GitCommandError as fetch_err:
set_status(project_name, f"Error fetching remote: {fetch_err}")
print(f"[{project_name}] Git fetch error: {fetch_err}")
return False # No update occurred
# --- End Fetch ---
# --- Check commits (brief lock) ---
current_remote_commit = None
pull_needed = False
try:
# Must read remote commit *after* fetch
current_remote_commit = repo.commit(f'origin/{branch}').hexsha
# Check if pull is needed inside the try block after getting remote commit
if current_local_commit != current_remote_commit:
pull_needed = True
except git.GitCommandError as commit_err:
set_status(project_name, f"Error accessing remote branch origin/{branch}: {commit_err}")
print(f"[{project_name}] Error accessing remote branch: {commit_err}")
return False # Cannot compare/pull
# --- End Check commits ---
print(f"[{project_name}] Local commit: {current_local_commit}, Remote commit (origin/{branch}): {current_remote_commit}")
if pull_needed:
print(f"[{project_name}] New commit detected! Pulling changes...")
set_status(project_name, "Pulling updates...")
# --- Pull happens OUTSIDE lock ---
try:
pull_info = origin.pull()
new_commit_hash = repo.head.commit.hexsha # Get hash after pull
with repo_lock: # Lock ONLY to update shared state
project_last_commit[project_name] = new_commit_hash
print(f"[{project_name}] Pull successful. New commit: {new_commit_hash}")
did_update = True
except git.GitCommandError as pull_err:
set_status(project_name, f"Error pulling repository: {pull_err}")
print(f"[{project_name}] Git pull error: {pull_err}")
# Revert shared state hash if pull failed? Safest is to keep the pre-pull local commit.
with repo_lock:
project_last_commit[project_name] = current_local_commit # Revert to known local state before pull attempt
# Keep did_update = False
# --- End Pull ---
else:
print(f"[{project_name}] No new commits detected.")
# Update status only if it wasn't an error before (set_status handles lock)
current_status = project_status.get(project_name, "")
if not current_status.startswith("Error"):
set_status(project_name, f"Checked repo at {time.strftime('%Y-%m-%d %H:%M:%S')}. No changes.")
# --- End Fetch/Pull Logic ---
# --- Run analysis IF repo was updated (outside lock) ---
if did_update:
print(f"[{project_name}] Repository updated. Triggering analysis...")
update_progress_data(project_name) # Calls the orchestrator function
except git.InvalidGitRepositoryError:
msg = f"Error: Directory '{repo_path}' exists but is not a valid Git repository. Consider deleting it and restarting."
set_status(project_name, msg) # Handles lock
print(f"[{project_name}] {msg}")
with repo_lock: # Lock to update commit state
project_last_commit[project_name] = "Invalid Repository"
except git.GitCommandError as e:
# General Git command error (if not caught above)
msg = f"Git command error: {e}"
set_status(project_name, msg) # Handles lock
print(f"[{project_name}] {msg}")
# Try to set commit hash state even on error (brief lock)
with repo_lock:
if project_last_commit.get(project_name) is None: # Only set if not already set (e.g., by failed pull)
try:
if os.path.exists(os.path.join(repo_path, ".git")):
repo = git.Repo(repo_path)
project_last_commit[project_name] = repo.head.commit.hexsha
else:
project_last_commit[project_name] = "Error (No repo)"
except Exception:
project_last_commit[project_name] = "Error reading commit"
except Exception as e:
# Catch-all for other unexpected errors
msg = f"Unexpected error checking repository: {e}"
set_status(project_name, msg) # Handles lock
print(f"[{project_name}] {msg}") # Log stack trace for unexpected errors
with repo_lock: # Lock to update commit state
if project_last_commit.get(project_name) is None:
project_last_commit[project_name] = "Error checking repo"
# Return true if analysis was run (because repo changed), false otherwise
return did_update
def periodic_repo_check():
"""Runs the check_and_update_repo function periodically for all projects using a thread pool."""
global all_projects
# Use a ThreadPoolExecutor to manage periodic checks concurrently
with ThreadPoolExecutor(max_workers=MAX_PERIODIC_CHECK_WORKERS) as executor:
while True:
print(f"\nStarting periodic check cycle for all projects (Interval: {config.CHECK_INTERVAL_SECONDS}s)...")
current_projects = list(all_projects) # Copy list in case it changes
futures = []
for project_name in current_projects:
print(f"--- Submitting periodic check for project: {project_name} ---")
# Submit check_and_update_repo to the thread pool
futures.append(executor.submit(run_check_and_log_errors, project_name, "periodic"))
# Wait briefly for tasks to start, but don't block the loop long
# time.sleep(1) # Optional: short sleep if needed
print(f"Periodic check cycle submitted. Sleeping for {config.CHECK_INTERVAL_SECONDS}s...")
time.sleep(config.CHECK_INTERVAL_SECONDS)
# Note: We don't explicitly wait for futures to complete here.
# The pool manages threads, and the loop continues periodically.
def run_check_and_log_errors(project_name, check_type="initial"):
"""Wrapper to run check_and_update_repo and log any exceptions."""
try:
print(f"--- [{check_type.capitalize()}] Running check for project: {project_name} ---")
check_and_update_repo(project_name)
print(f"--- [{check_type.capitalize()}] Finished check for project: {project_name} ---")
except Exception as e:
err_msg = f"Critical error during {check_type} check for {project_name}: {e}"
print(err_msg)
# Use set_status which handles locking and event signaling
set_status(project_name, f"Error during {check_type} check: {e}")
def initial_project_setup_and_analysis(project_name):
"""Performs initial repo check/update AND ensures initial analysis runs."""
try:
print(f"--- [Initial Setup] Starting for project: {project_name} ---")
# Run check_and_update_repo first. It returns True if it triggered an update/analysis.
update_occurred = check_and_update_repo(project_name)
# If no update occurred (repo was cloned before or was already up-to-date),
# we still need to run the analysis once on startup.
if not update_occurred:
print(f"--- [Initial Analysis] Repo up-to-date or non-git. Running analysis for project: {project_name} ---")
update_progress_data(project_name) # Run the analysis explicitly
print(f"--- [Initial Setup] Finished for project: {project_name} ---")
except Exception as e:
err_msg = f"Critical error during initial setup/analysis for {project_name}: {e}"
print(err_msg)
set_status(project_name, f"Error during initial setup: {e}")
# --- Flask Routes ---
@app.route('/')
def index():
# Pass the list of projects and initial statuses to the template
with repo_lock:
initial_statuses = dict(project_status) # Get a consistent snapshot
project_list = list(all_projects)
return render_template('index.html', projects=project_list, initial_statuses=initial_statuses)
# Removed redundant routes for /drawings and /conflicts as index.html handles tabs
@app.route('/stream')
def stream():
def event_stream():
# Track state sent to *this specific client* (using a copy of global state)
last_sent_state = {}
# Send initial state immediately on connection
with repo_lock:
# Send data for all known projects
current_global_state = {
"projects": list(all_projects),
"status": dict(project_status),
"progress": dict(project_progress_data),
"last_commit": dict(project_last_commit)
}
initial_payload = json.dumps(current_global_state)
yield f"data: {initial_payload}\n\n"
last_sent_state = current_global_state # Store the state sent to this client
print(f"Sent initial state to new client for projects: {last_sent_state.get('projects')}")
# Now wait for subsequent updates signaled by the event
while True:
data_updated_event.wait() # Wait for ANY background thread signal
with repo_lock: # Re-acquire lock to get the latest state
current_global_state = {
"projects": list(all_projects),
"status": dict(project_status),
"progress": dict(project_progress_data),
"last_commit": dict(project_last_commit)
}
# Basic check: Compare entire state dictionaries (can be refined if needed)
# Using json.dumps for a quick deep comparison, might be slow for huge data
current_state_json = json.dumps(current_global_state, sort_keys=True)
last_sent_state_json = json.dumps(last_sent_state, sort_keys=True)
if current_state_json != last_sent_state_json:
print(f"Global state changed. Sending update to client.")
# print(f"Debug: Old state: {last_sent_state_json}") # Optional debug
# print(f"Debug: New state: {current_state_json}") # Optional debug
yield f"data: {current_state_json}\n\n"
last_sent_state = current_global_state # Update the state sent to this client
# else: # Log if event triggered but nothing changed
# print(f"Data update event triggered, but state unchanged for this client.")
return Response(event_stream(), mimetype="text/event-stream")
# --- NEW: Add Project Endpoint ---
ALLOWED_PROJECT_NAME_REGEX = re.compile(r'^[a-zA-Z0-9_-]+$')
@app.route('/add_project', methods=['POST'])
def add_project():
if 'projectName' not in request.form:
return jsonify(success=False, message="Missing project name."), 400
if 'repoUrl' not in request.form: # We receive it but don't use it for cloning yet
return jsonify(success=False, message="Missing repository URL."), 400
if 'manifestFile' not in request.files:
return jsonify(success=False, message="Missing manifest CSV file."), 400
project_name_raw = request.form['projectName'].strip()
repo_url = request.form['repoUrl'].strip()
manifest_file = request.files['manifestFile']
pdf_files = request.files.getlist('pdfFiles') # Use getlist for multiple files
# --- Validation ---
if not project_name_raw:
return jsonify(success=False, message="Project name cannot be empty."), 400
if not ALLOWED_PROJECT_NAME_REGEX.match(project_name_raw):
return jsonify(success=False, message="Invalid Project Name. Use only letters, numbers, underscores, or hyphens."), 400
if not manifest_file.filename or not manifest_file.filename.lower().endswith('.csv'):
return jsonify(success=False, message="Manifest file must be a .csv file."), 400
if not pdf_files or all(not f.filename for f in pdf_files): # Check if list is empty or contains only empty filenames
return jsonify(success=False, message="At least one PDF file must be provided."), 400
for pdf_file in pdf_files:
if not pdf_file.filename or not pdf_file.filename.lower().endswith('.pdf'):
return jsonify(success=False, message=f"Invalid file type uploaded: {pdf_file.filename}. Only PDF files allowed."), 400
# Use secure_filename for the project name used in paths
# Although we validated with regex, this adds another layer against path traversal etc.
safe_project_name = secure_filename(project_name_raw)
if safe_project_name != project_name_raw: # Extra check if secure_filename modified it unexpectedly (e.g., spaces removed)
print(f"Warning: Project name sanitized from '{project_name_raw}' to '{safe_project_name}'")
# Optionally reject here, or proceed with the sanitized name
project_base_path = os.path.join(config.PROJECTS_ROOT_DIR, safe_project_name)
pdf_dir_path = os.path.join(project_base_path, 'pdfs')
repo_dir_path = os.path.join(project_base_path, 'repo') # Create repo dir, but don't clone yet
# --- Check if project already exists ---
if os.path.exists(project_base_path):
return jsonify(success=False, message=f"Project '{safe_project_name}' already exists."), 400
# --- Create Directories ---
try:
print(f"Creating directory structure for project: {safe_project_name}")
os.makedirs(project_base_path, exist_ok=False) # Base dir first, fail if exists
os.makedirs(pdf_dir_path, exist_ok=True)
os.makedirs(repo_dir_path, exist_ok=True)
except OSError as e:
print(f"Error creating directories for {safe_project_name}: {e}")
return jsonify(success=False, message=f"Server error creating project directories: {e}"), 500
# --- Save Manifest File ---
try:
manifest_filename = secure_filename(manifest_file.filename)
manifest_save_path = os.path.join(project_base_path, manifest_filename)
print(f"Saving manifest file to: {manifest_save_path}")
manifest_file.save(manifest_save_path)
except Exception as e:
print(f"Error saving manifest file for {safe_project_name}: {e}")
# Clean up created directories on error?
# shutil.rmtree(project_base_path, ignore_errors=True)
return jsonify(success=False, message=f"Error saving manifest file: {e}"), 500
# --- Save PDF Files ---
saved_pdfs = []
try:
for pdf_file in pdf_files:
if pdf_file and pdf_file.filename: # Check again if file is valid
pdf_filename = secure_filename(pdf_file.filename)
pdf_save_path = os.path.join(pdf_dir_path, pdf_filename)
print(f"Saving PDF file to: {pdf_save_path}")
pdf_file.save(pdf_save_path)
saved_pdfs.append(pdf_filename)
except Exception as e:
print(f"Error saving PDF files for {safe_project_name}: {e}")
# Clean up potentially partially saved files and directories?
# shutil.rmtree(project_base_path, ignore_errors=True)
return jsonify(success=False, message=f"Error saving PDF files: {e}"), 500
# --- Store Repo URL (optional, e.g., in a simple info file) ---
try:
info_file_path = os.path.join(project_base_path, 'project_info.txt')
with open(info_file_path, 'w') as f:
f.write(f"ProjectName: {safe_project_name}\n")
f.write(f"RepoURL: {repo_url}\n")
print(f"Saved project info (including repo URL) to: {info_file_path}")
except Exception as e:
print(f"Warning: Could not save project_info.txt for {safe_project_name}: {e}")
# Don't treat this as a fatal error for the add operation itself
print(f"Successfully added project '{safe_project_name}' with {len(saved_pdfs)} PDF(s).")
# NOTE: Server needs restart for this new project to be discovered and processed.
return jsonify(success=True, message=f"Project '{safe_project_name}' created successfully.")
# --- Main Execution ---
if __name__ == '__main__':
# Ensure project-specific directories (like text output) exist if needed
# This is now handled within drawing_checker
# Perform initial check/clone and data load FOR EACH PROJECT in parallel
print("--- Performing initial checks and analysis for all discovered projects in background threads ---")
if not all_projects:
print("Warning: No projects discovered in projects directory.")
else:
# Use a ThreadPoolExecutor for initial setup
with ThreadPoolExecutor(max_workers=MAX_INITIAL_CHECK_WORKERS, thread_name_prefix='InitialCheck') as executor:
for proj_name in all_projects:
print(f"--- Submitting initial setup for project: {proj_name} ---")
# Submit the combined setup and analysis function to the pool
executor.submit(initial_project_setup_and_analysis, proj_name)
# We exit the 'with' block here, but the threads continue running in the background.
# The executor automatically manages the threads. We don't call shutdown(wait=True).
# Start the background thread for PERIODIC checks (now uses its own thread pool internally)
print("--- Starting background periodic check manager thread ---")
# This thread now manages submitting tasks to its own pool
repo_check_thread = threading.Thread(target=periodic_repo_check, daemon=True, name="PeriodicCheckManager")
repo_check_thread.start()
# Run the Flask app - This will start *before* initial checks might be complete
print(f"--- Starting Flask server on http://0.0.0.0:5050 ... ---")
# Ensure Flask runs threaded to handle multiple requests (like SSE connections)
app.run(host='0.0.0.0', port=5050, debug=False, threaded=True)