730 lines
36 KiB
Python
730 lines
36 KiB
Python
import os
|
|
import threading
|
|
import time
|
|
import json
|
|
import git
|
|
import csv
|
|
import re
|
|
from flask import Flask, render_template, jsonify, Response
|
|
|
|
# --- Configuration Loading ---
|
|
# Determine the directory where app.py resides
|
|
_script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
CONFIG_FILE_ABS = os.path.join(_script_dir, 'config.json') # Absolute path
|
|
|
|
def load_config():
|
|
"""Loads configuration from JSON file using an absolute path."""
|
|
try:
|
|
print(f"Attempting to load config from absolute path: {CONFIG_FILE_ABS}") # Add log
|
|
with open(CONFIG_FILE_ABS, 'r') as f: # Use the absolute path
|
|
config = json.load(f)
|
|
# Basic validation
|
|
if 'projects' not in config or not isinstance(config['projects'], dict):
|
|
raise ValueError("Config missing 'projects' dictionary.")
|
|
if 'check_interval_seconds' not in config or not isinstance(config['check_interval_seconds'], int):
|
|
raise ValueError("Config missing 'check_interval_seconds' integer.")
|
|
print(f"Configuration loaded successfully from {CONFIG_FILE_ABS}")
|
|
return config
|
|
except FileNotFoundError:
|
|
# Also print the current working directory for debugging
|
|
cwd = os.getcwd()
|
|
print(f"ERROR: Configuration file not found at absolute path: {CONFIG_FILE_ABS}")
|
|
print(f"Current working directory when error occurred: {cwd}")
|
|
exit(1)
|
|
except json.JSONDecodeError:
|
|
print(f"ERROR: Could not decode JSON from '{CONFIG_FILE_ABS}'. Check its format.")
|
|
exit(1)
|
|
except ValueError as e:
|
|
print(f"ERROR: Invalid configuration in '{CONFIG_FILE_ABS}': {e}")
|
|
exit(1)
|
|
except Exception as e:
|
|
cwd = os.getcwd()
|
|
print(f"ERROR: An unexpected error occurred loading config from {CONFIG_FILE_ABS}: {e}")
|
|
print(f"Current working directory during error: {cwd}")
|
|
exit(1)
|
|
|
|
config = load_config()
|
|
CHECK_INTERVAL_SECONDS = config['check_interval_seconds']
|
|
|
|
# --- Project Class ---
|
|
class Project:
|
|
"""Encapsulates configuration, state, and operations for a single project."""
|
|
def __init__(self, project_id, project_config):
|
|
self.id = project_id
|
|
self.config = project_config
|
|
self.name = project_config.get('name', project_id)
|
|
|
|
# Project-specific state
|
|
self.last_commit_hash = None
|
|
self.progress_data = { # Default empty structure
|
|
"overall": {"total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0, "percentage_found_both": 0, "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []},
|
|
"panels": {}
|
|
}
|
|
self.status_message = "Initializing..."
|
|
self.lock = threading.Lock() # Lock for accessing project-specific repo and data
|
|
self.data_updated_event = threading.Event() # Event to signal data updates for this project
|
|
|
|
# --- Pre-calculate absolute paths ---
|
|
self._script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
self._repo_dir_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('repo_dir', f'./cloned_repos/{project_id}'))) # Added default
|
|
self._csv_path_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('manifest_csv', f'./manifests/{project_id}_manifest.csv'))) # Added default
|
|
self._views_dir_abs = os.path.join(self._repo_dir_abs, self.config.get('scada_views_dir', ''))
|
|
self._texts_dir_abs = os.path.abspath(os.path.join(self._script_dir, self.config.get('drawing_texts_dir', f'./extracted_texts/{project_id}'))) # Added default
|
|
pdf_source_dir_rel = self.config.get('pdf_source_dir')
|
|
self._pdf_dir_abs = os.path.abspath(os.path.join(self._script_dir, pdf_source_dir_rel)) if pdf_source_dir_rel else None
|
|
|
|
print(f"Initialized Project '{self.id}' ({self.name}):")
|
|
print(f" Repo Dir: {self._repo_dir_abs}")
|
|
print(f" CSV Path: {self._csv_path_abs}")
|
|
if self._views_dir_abs: print(f" Views Dir: {self._views_dir_abs}")
|
|
if self._texts_dir_abs: print(f" Texts Dir: {self._texts_dir_abs}")
|
|
if self._pdf_dir_abs: print(f" PDF Dir: {self._pdf_dir_abs}")
|
|
|
|
# --- Path Helper Methods ---
|
|
def get_repo_path(self):
|
|
return self._repo_dir_abs
|
|
|
|
def get_csv_path(self):
|
|
return self._csv_path_abs
|
|
|
|
def get_views_dir_path(self):
|
|
return self._views_dir_abs
|
|
|
|
def get_text_output_dir_path(self):
|
|
return self._texts_dir_abs
|
|
|
|
def get_pdf_source_dir_path(self):
|
|
return self._pdf_dir_abs
|
|
|
|
# --- Core Logic Methods ---
|
|
def _update_progress_data(self):
|
|
"""(Internal) Reads manifest, runs checks, combines results, and updates project state."""
|
|
current_status = ""
|
|
new_data_calculated = None
|
|
manifest_data = None # Initialize
|
|
|
|
with self.lock:
|
|
try:
|
|
# 1. Read Manifest
|
|
self.status_message = f"[{self.id}] Reading manifest..."
|
|
print(f"[{self.id}] Reading manifest: {self.get_csv_path()}")
|
|
manifest_data = read_manifest(self) # Pass self (project instance)
|
|
if manifest_data is None:
|
|
current_status = f"[{self.id}] Error: Failed manifest read {self.get_csv_path()}"
|
|
print(current_status)
|
|
self.status_message = current_status
|
|
# Don't return, proceed to calculate progress with empty data if needed
|
|
# or handle error state appropriately
|
|
manifest_data = [] # Ensure it's an empty list for calculation
|
|
|
|
# 2. Check SCADA (If views dir is configured)
|
|
if self.get_views_dir_path():
|
|
self.status_message = f"[{self.id}] Checking SCADA..."
|
|
check_scada(self, manifest_data) # Pass self (project instance)
|
|
else:
|
|
print(f"[{self.id}] Skipping SCADA check (no views_dir configured).")
|
|
|
|
# 3. Check Drawings (If texts dir is configured)
|
|
if self.get_text_output_dir_path():
|
|
self.status_message = f"[{self.id}] Checking drawings..."
|
|
check_drawings(self, manifest_data) # Pass self (project instance)
|
|
else:
|
|
print(f"[{self.id}] Skipping Drawing check (no drawing_texts_dir configured).")
|
|
|
|
# 4. Calculate Combined Progress
|
|
self.status_message = f"[{self.id}] Calculating progress..."
|
|
new_data_calculated = calculate_combined_progress(self, manifest_data) # Pass self
|
|
|
|
if new_data_calculated:
|
|
if not manifest_data and new_data_calculated['overall']['total_csv'] == 0: # Check if manifest read failed/was empty
|
|
current_status = f"[{self.id}] Analysis complete (Manifest empty/read failed) @ {time.strftime('%H:%M:%S')}"
|
|
else:
|
|
current_status = f"[{self.id}] Analysis complete @ {time.strftime('%H:%M:%S')}"
|
|
self.progress_data = new_data_calculated # Update project's data
|
|
else:
|
|
# This case should ideally not happen if calculate_combined_progress always returns a dict
|
|
current_status = f"[{self.id}] Error: Failed progress calculation."
|
|
# Keep previous progress_data?
|
|
|
|
except Exception as e:
|
|
current_status = f"[{self.id}] CRITICAL Error during analysis: {e}"
|
|
print(f"[{self.id}] Detailed Analysis Error: {e}", exc_info=True)
|
|
# Optionally reset progress data or keep old?
|
|
# self.progress_data = { ... } # Reset to default empty
|
|
|
|
# Update status and signal completion (inside lock)
|
|
print(current_status)
|
|
self.status_message = current_status
|
|
self.data_updated_event.set()
|
|
self.data_updated_event.clear()
|
|
|
|
def _check_and_update_repo(self):
|
|
"""(Internal) Checks and updates the Git repository for this project."""
|
|
did_update = False
|
|
repo_path = self.get_repo_path()
|
|
repo_url = self.config.get('repo_url')
|
|
branch = self.config.get('branch', 'main') # Default to main if not specified
|
|
|
|
if not repo_url:
|
|
print(f"[{self.id}] Skipping repo check: repo_url not configured.")
|
|
self.status_message = f"[{self.id}] Repo check skipped (no URL)"
|
|
self.data_updated_event.set(); self.data_updated_event.clear() # Signal status change
|
|
return False # No update occurred
|
|
|
|
# --- Lock is acquired by the calling method (update_repo_and_analyze) ---
|
|
try:
|
|
# Ensure parent directory exists before cloning
|
|
repo_parent_dir = os.path.dirname(repo_path)
|
|
if not os.path.exists(repo_parent_dir):
|
|
print(f"[{self.id}] Creating parent directory for repo: {repo_parent_dir}")
|
|
os.makedirs(repo_parent_dir)
|
|
|
|
repo_existed = os.path.exists(os.path.join(repo_path, ".git"))
|
|
if not repo_existed:
|
|
print(f"[{self.id}] Cloning repository {repo_url} (branch: {branch}) into {repo_path}...")
|
|
self.status_message = f"[{self.id}] Cloning repository..."
|
|
git.Repo.clone_from(repo_url, repo_path, branch=branch)
|
|
repo = git.Repo(repo_path)
|
|
self.last_commit_hash = repo.head.commit.hexsha
|
|
print(f"[{self.id}] Initial clone complete. Commit: {self.last_commit_hash}")
|
|
did_update = True
|
|
else:
|
|
repo = git.Repo(repo_path)
|
|
print(f"[{self.id}] Fetching updates from remote...")
|
|
current_local_commit = repo.head.commit.hexsha
|
|
# Use current local hash if global is still None (e.g. first run after restart)
|
|
if self.last_commit_hash is None: self.last_commit_hash = current_local_commit
|
|
|
|
origin = repo.remotes.origin
|
|
current_remote_commit = None # Initialize
|
|
try:
|
|
print(f"[{self.id}] Running fetch...")
|
|
origin.fetch()
|
|
print(f"[{self.id}] Fetch complete. Getting remote commit...")
|
|
current_remote_commit = repo.commit(f'origin/{branch}').hexsha
|
|
print(f"[{self.id}] Remote commit for origin/{branch}: {current_remote_commit}")
|
|
except git.GitCommandError as fetch_err:
|
|
print(f"[{self.id}] Warning: Could not fetch from remote/find branch '{branch}': {fetch_err}")
|
|
# Keep status as is, will signal update later
|
|
except Exception as fetch_err_other:
|
|
print(f"[{self.id}] Warning: Unexpected error during fetch: {fetch_err_other}")
|
|
# Keep status as is
|
|
|
|
print(f"[{self.id}] Local commit: {current_local_commit}, Remote commit: {current_remote_commit or 'Fetch Failed/Not Found'}")
|
|
|
|
if current_remote_commit and current_local_commit != current_remote_commit:
|
|
print(f"[{self.id}] New commit detected! Pulling changes (Branch: {branch})...")
|
|
self.status_message = f"[{self.id}] Pulling updates..."
|
|
try:
|
|
# Ensure working directory is clean before pull?
|
|
# Add checkout if needed: repo.git.checkout(branch)
|
|
pull_info = origin.pull()
|
|
# Verify pull info if needed (e.g., pull_info[0].flags)
|
|
new_commit_hash = repo.head.commit.hexsha
|
|
print(f"[{self.id}] Pull successful. New commit: {new_commit_hash}")
|
|
self.last_commit_hash = new_commit_hash
|
|
did_update = True
|
|
except git.GitCommandError as e_pull:
|
|
self.status_message = f"[{self.id}] Error pulling repository: {e_pull}"
|
|
print(self.status_message)
|
|
self.last_commit_hash = current_local_commit # Keep old hash on failed pull
|
|
except Exception as e_pull_other:
|
|
self.status_message = f"[{self.id}] Unexpected error pulling repository: {e_pull_other}"
|
|
print(self.status_message)
|
|
self.last_commit_hash = current_local_commit # Keep old hash
|
|
elif current_remote_commit:
|
|
print(f"[{self.id}] No new commits detected.")
|
|
# Only update status if no pull error occurred previously
|
|
if not self.status_message.startswith(f"[{self.id}] Error pulling"):
|
|
self.status_message = f"[{self.id}] Repo up-to-date @ {time.strftime('%H:%M:%S')}"
|
|
else: # Fetch failed
|
|
# Keep previous status message (e.g., Analysis complete, Cloning, Error pulling, etc.)
|
|
print(f"[{self.id}] Keeping previous status due to fetch failure.")
|
|
# Ensure last_commit_hash is set if it was None
|
|
if self.last_commit_hash is None: self.last_commit_hash = current_local_commit or "Unknown (Fetch Failed)"
|
|
|
|
except git.GitCommandError as e_git:
|
|
error_msg = f"[{self.id}] Git command error: {e_git}"
|
|
print(error_msg)
|
|
self.status_message = error_msg
|
|
# Try reading existing hash even on error
|
|
try:
|
|
if os.path.exists(os.path.join(repo_path, ".git")):
|
|
repo = git.Repo(repo_path)
|
|
# Don't overwrite existing hash if we have one
|
|
if self.last_commit_hash is None: self.last_commit_hash = repo.head.commit.hexsha
|
|
except Exception as e_read_hash:
|
|
print(f"[{self.id}] Additionally failed to read hash after Git error: {e_read_hash}")
|
|
if self.last_commit_hash is None: self.last_commit_hash = "Error reading commit"
|
|
except Exception as e_other:
|
|
error_msg = f"[{self.id}] Error checking repository: {e_other}"
|
|
print(error_msg, exc_info=True)
|
|
self.status_message = error_msg
|
|
if self.last_commit_hash is None: self.last_commit_hash = "Error checking repo"
|
|
|
|
# --- Lock is released by the calling method ---
|
|
return did_update # Return whether the repo content was changed
|
|
|
|
def update_repo_and_analyze(self, force_analysis=False):
|
|
"""Checks the repo for updates, pulls if necessary, and runs analysis if changed or forced."""
|
|
print(f"[{self.id}] Starting update_repo_and_analyze (force_analysis={force_analysis})...")
|
|
repo_changed = False
|
|
initial_hash = self.last_commit_hash # Store hash before check
|
|
|
|
with self.lock:
|
|
repo_changed = self._check_and_update_repo()
|
|
|
|
if repo_changed:
|
|
print(f"[{self.id}] Repo changed, triggering analysis...")
|
|
# Analysis reads files, recalculates progress, updates status, and signals event
|
|
self._update_progress_data()
|
|
elif force_analysis:
|
|
print(f"[{self.id}] Forcing analysis even though repo didn't change...")
|
|
self._update_progress_data()
|
|
else:
|
|
# If repo didn't change and analysis not forced, still signal potentially changed status message from repo check
|
|
print(f"[{self.id}] Repo unchanged, analysis not forced. Signaling potential status update.")
|
|
self.data_updated_event.set()
|
|
self.data_updated_event.clear()
|
|
|
|
final_hash = self.last_commit_hash
|
|
print(f"[{self.id}] Finished update_repo_and_analyze. Repo Changed: {repo_changed}. Hash: {initial_hash} -> {final_hash}. Status: '{self.status_message}'")
|
|
return repo_changed
|
|
|
|
|
|
# --- Global Application State ---
|
|
app = Flask(__name__)
|
|
projects = {} # Dictionary to hold Project instances {project_id: Project_instance}
|
|
global_lock = threading.Lock() # Lock for modifying the projects dictionary itself (if needed later)
|
|
|
|
# Instantiate projects from config
|
|
for project_id, project_conf in config.get('projects', {}).items():
|
|
projects[project_id] = Project(project_id, project_conf)
|
|
|
|
if not projects:
|
|
print("ERROR: No projects defined in configuration. Exiting.")
|
|
exit(1)
|
|
|
|
# --- Original Global Variables (To be removed or refactored) ---
|
|
# REPO_URL = "http://192.168.5.191:3000/LCI/MTN6" # REMOVE
|
|
# REPO_DIR = "./cloned_repo" # REMOVE
|
|
# BRANCH = "main" # REMOVE
|
|
# CSV_FILENAME = "MTN6 Equipment Manifest REV6(Conveyor List).csv" # REMOVE
|
|
# VIEWS_DIR_RELATIVE = "MTN6_SCADA/com.inductiveautomation.perspective/views/Detailed-Views" # REMOVE
|
|
# TEXT_OUTPUT_FOLDER = "./extracted_texts" # REMOVE
|
|
# CHECK_INTERVAL_SECONDS = 60 # MOVED to config load
|
|
|
|
# CSV_ALIAS_COL = 'Alias' # MOVE to project config? Or keep global if consistent? -> Moved to config
|
|
# CSV_PANEL_COL = 'Control Panel' # MOVE to project config? -> Moved to config
|
|
# CSV_EQ_TYPE_COL = 'Equipment Type' # MOVE to project config? -> Moved to config
|
|
# CSV_CONV_TYPE_COL = 'Type of Conveyor' # MOVE to project config? -> Moved to config
|
|
|
|
# last_commit_hash = None # MOVED to Project class
|
|
# progress_data = { ... } # MOVED to Project class
|
|
# status_message = "Initializing..." # MOVED to Project class
|
|
# repo_lock = threading.Lock() # MOVED to Project class (per-project lock)
|
|
# data_updated_event = threading.Event() # MOVED to Project class (per-project event)
|
|
|
|
# --- Helper Functions (To be refactored or become Project methods) ---
|
|
|
|
# def get_repo_path(): # BECOMES Project method
|
|
# return os.path.abspath(REPO_DIR)
|
|
|
|
# def get_csv_path(): # BECOMES Project method
|
|
# script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
# return os.path.join(script_dir, CSV_FILENAME)
|
|
|
|
# def get_views_dir_path(): # BECOMES Project method
|
|
# return os.path.join(get_repo_path(), VIEWS_DIR_RELATIVE)
|
|
|
|
# def get_text_output_dir_path(): # BECOMES Project method
|
|
# script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
# return os.path.abspath(os.path.join(script_dir, TEXT_OUTPUT_FOLDER))
|
|
|
|
def normalize(text):
|
|
"""Normalize string for comparison: lowercase, treat '-' and '_' the same, remove all whitespace."""
|
|
# This seems general enough to remain a standalone helper
|
|
if not isinstance(text, str):
|
|
return ""
|
|
text = text.lower() # Convert to lowercase
|
|
text = text.replace('-', '_') # Replace hyphens with underscores
|
|
text = re.sub(r'\s+', '', text) # Remove ALL whitespace characters (including newlines)
|
|
return text
|
|
|
|
def read_manifest(project: Project): # Takes Project instance
|
|
"""Reads the manifest CSV for a specific project into a list of dictionaries."""
|
|
manifest_items = []
|
|
csv_filepath = project.get_csv_path()
|
|
# Get column names from project config
|
|
cols = project.config.get('csv_cols', {})
|
|
col_alias = cols.get('alias', 'Alias') # Default fallback
|
|
col_panel = cols.get('panel', 'Control Panel')
|
|
col_eq_type = cols.get('eq_type', 'Equipment Type')
|
|
col_conv_type = cols.get('conv_type', 'Type of Conveyor')
|
|
|
|
required_cols = {col_alias, col_panel}
|
|
optional_cols = {col_eq_type, col_conv_type}
|
|
|
|
try:
|
|
with open(csv_filepath, mode='r', newline='', encoding='utf-8-sig') as infile:
|
|
reader = csv.DictReader(infile)
|
|
headers = set(h.strip() for h in reader.fieldnames)
|
|
|
|
missing_required = required_cols - headers
|
|
if missing_required:
|
|
print(f"[{project.id}] Error: Missing required columns in CSV '{csv_filepath}': {', '.join(missing_required)}")
|
|
print(f"[{project.id}] Available columns: {', '.join(headers)}")
|
|
return None
|
|
|
|
for row in reader:
|
|
alias = row.get(col_alias, "").strip()
|
|
panel = row.get(col_panel, "").strip()
|
|
|
|
if alias and panel:
|
|
item = {
|
|
"alias": alias,
|
|
"normalized_alias": normalize(alias),
|
|
"control_panel": panel,
|
|
"equipment_type": row.get(col_eq_type, "").strip() if col_eq_type in headers else "N/A",
|
|
"conveyor_type": row.get(col_conv_type, "").strip() if col_conv_type in headers else "N/A",
|
|
"found_scada": False,
|
|
"found_drawing": False
|
|
}
|
|
manifest_items.append(item)
|
|
elif alias and not panel:
|
|
print(f"[{project.id}] Warning: Alias '{alias}' found in CSV but is missing its '{col_panel}'. Skipping.")
|
|
|
|
except FileNotFoundError:
|
|
print(f"[{project.id}] Error: Manifest file not found at {csv_filepath}")
|
|
return None
|
|
except Exception as e:
|
|
print(f"[{project.id}] Error reading CSV file {csv_filepath}: {e}")
|
|
return None
|
|
print(f"[{project.id}] Read {len(manifest_items)} valid items from manifest.")
|
|
return manifest_items
|
|
|
|
def check_scada(project: Project, manifest_data): # Takes Project instance
|
|
"""Checks for aliases in SCADA JSON view files for a specific project."""
|
|
if not manifest_data: return
|
|
views_dir = project.get_views_dir_path()
|
|
print(f"[{project.id}] Starting SCADA check in directory: {views_dir}...")
|
|
found_count = 0
|
|
processed_files = 0
|
|
|
|
alias_map = {}
|
|
for item in manifest_data:
|
|
na = item['normalized_alias']
|
|
if na not in alias_map:
|
|
alias_map[na] = []
|
|
alias_map[na].append(item)
|
|
|
|
try:
|
|
# Check if views dir exists BEFORE walking
|
|
if not os.path.exists(views_dir):
|
|
print(f"[{project.id}] Warning: SCADA Views directory not found at {views_dir}. Skipping SCADA check.")
|
|
return # Exit function early
|
|
|
|
for root, _, files in os.walk(views_dir):
|
|
for filename in files:
|
|
if filename == 'view.json':
|
|
filepath = os.path.join(root, filename)
|
|
processed_files += 1
|
|
try:
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
normalized_content = normalize(content)
|
|
for norm_alias, items in alias_map.items():
|
|
if norm_alias in normalized_content:
|
|
for item in items:
|
|
if not item['found_scada']:
|
|
item['found_scada'] = True
|
|
found_count += 1
|
|
except Exception as e:
|
|
print(f"[{project.id}] Warning: Could not read or process JSON file {filepath}: {e}")
|
|
except Exception as e:
|
|
print(f"[{project.id}] Error walking SCADA views directory {views_dir}: {e}")
|
|
|
|
print(f"[{project.id}] SCADA check finished. Processed {processed_files} view.json files. Found {found_count} manifest aliases.")
|
|
|
|
def check_drawings(project: Project, manifest_data): # Takes Project instance
|
|
"""Checks if aliases from manifest exist in *any* extracted drawing text file for a specific project."""
|
|
if not manifest_data: return
|
|
text_output_dir = project.get_text_output_dir_path()
|
|
print(f"[{project.id}] Starting Drawings check: Scanning all .txt files in directory: {text_output_dir}...")
|
|
|
|
all_normalized_content = ""
|
|
processed_files = 0
|
|
found_files = []
|
|
|
|
try:
|
|
# Check if text dir exists BEFORE listing
|
|
if not os.path.exists(text_output_dir):
|
|
print(f"[{project.id}] Warning: Drawings text directory not found: {text_output_dir}. Skipping Drawings check.")
|
|
return # Exit function early
|
|
if not os.path.isdir(text_output_dir):
|
|
print(f"[{project.id}] Error: Path specified for Drawings text is not a directory: {text_output_dir}. Skipping Drawings check.")
|
|
return # Exit function early
|
|
|
|
for filename in os.listdir(text_output_dir):
|
|
if filename.lower().endswith('.txt'):
|
|
filepath = os.path.join(text_output_dir, filename)
|
|
processed_files += 1
|
|
try:
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
all_normalized_content += normalize(content) + "\\n--file-separator--\\n"
|
|
found_files.append(filename)
|
|
except Exception as e:
|
|
print(f"[{project.id}] Warning: Could not read or process text file {filepath}: {e}")
|
|
|
|
if processed_files == 0:
|
|
print(f"[{project.id}] Warning: No .txt files found in the directory {text_output_dir}. Cannot perform drawing check.")
|
|
return
|
|
else:
|
|
print(f"[{project.id}] Successfully read and normalized content from {len(found_files)} out of {processed_files} .txt files found.")
|
|
|
|
found_count = 0
|
|
for item in manifest_data:
|
|
normalized_alias = item['normalized_alias']
|
|
if normalized_alias and normalized_alias in all_normalized_content:
|
|
item['found_drawing'] = True
|
|
found_count += 1
|
|
|
|
print(f"[{project.id}] Drawings check finished. Found {found_count} manifest aliases within the combined text content.")
|
|
|
|
except Exception as e:
|
|
print(f"[{project.id}] Error during drawings check in {text_output_dir}: {e}")
|
|
|
|
def calculate_combined_progress(project: Project, manifest_data): # Takes Project instance
|
|
"""Calculates the combined progress based on scada/drawing status for a specific project."""
|
|
print(f"[{project.id}] Calculating combined progress statistics...")
|
|
results = {
|
|
"overall": {
|
|
"total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0,
|
|
"percentage_found_both": 0,
|
|
"missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []
|
|
},
|
|
"panels": {}
|
|
}
|
|
if not manifest_data:
|
|
print(f"[{project.id}] Warning: No manifest data to calculate progress from.")
|
|
return results # Return default empty structure
|
|
|
|
results["overall"]["total_csv"] = len(manifest_data)
|
|
|
|
for item in manifest_data:
|
|
panel = item['control_panel']
|
|
|
|
if panel not in results["panels"]:
|
|
results["panels"][panel] = {
|
|
"total": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0,
|
|
"percentage_found_both": 0,
|
|
"missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []
|
|
}
|
|
|
|
results["panels"][panel]["total"] += 1
|
|
item_detail = {k: v for k, v in item.items() if k not in ['normalized_alias']}
|
|
|
|
if item['found_scada'] and item['found_drawing']:
|
|
results["overall"]["found_both"] += 1
|
|
results["panels"][panel]["found_both"] += 1
|
|
results["overall"]["found_both_list"].append(item_detail)
|
|
results["panels"][panel]["found_both_list"].append(item_detail)
|
|
elif item['found_scada'] and not item['found_drawing']:
|
|
results["overall"]["found_scada_only"] += 1
|
|
results["panels"][panel]["found_scada_only"] += 1
|
|
results["overall"]["found_scada_only_list"].append(item_detail)
|
|
results["panels"][panel]["found_scada_only_list"].append(item_detail)
|
|
elif not item['found_scada'] and item['found_drawing']:
|
|
results["overall"]["found_drawing_only"] += 1
|
|
results["panels"][panel]["found_drawing_only"] += 1
|
|
results["overall"]["found_drawing_only_list"].append(item_detail)
|
|
results["panels"][panel]["found_drawing_only_list"].append(item_detail)
|
|
else: # Missing both
|
|
results["overall"]["missing_both"] += 1
|
|
results["panels"][panel]["missing_both"] += 1
|
|
results["overall"]["missing_list"].append(item_detail)
|
|
results["panels"][panel]["missing_list"].append(item_detail)
|
|
|
|
if results["overall"]["total_csv"] > 0:
|
|
results["overall"]["percentage_found_both"] = round(
|
|
(results["overall"]["found_both"] / results["overall"]["total_csv"]) * 100, 1
|
|
)
|
|
for panel_data in results["panels"].values():
|
|
if panel_data["total"] > 0:
|
|
panel_data["percentage_found_both"] = round(
|
|
(panel_data["found_both"] / panel_data["total"]) * 100, 1
|
|
)
|
|
|
|
print(f"[{project.id}] Combined progress calculation finished.")
|
|
return results
|
|
|
|
|
|
# --- Background Task ---
|
|
|
|
def periodic_repo_check():
|
|
"""Runs the check_and_update_repo method periodically for all projects sequentially."""
|
|
# Small delay at startup to allow Flask server to bind port before first check cycle prints
|
|
time.sleep(2)
|
|
while True:
|
|
print(f"\n--- Starting Periodic Project Checks (Interval: {CHECK_INTERVAL_SECONDS}s) ---")
|
|
project_ids = list(projects.keys())
|
|
for project_id in project_ids:
|
|
project = projects.get(project_id)
|
|
if not project:
|
|
print(f"Warning: Project ID '{project_id}' found in keys but not in projects dict during periodic check.")
|
|
continue
|
|
|
|
print(f"--- Checking Project: {project.id} ({project.name}) ---")
|
|
try:
|
|
# Use the public method which handles locking and analysis triggering
|
|
project.update_repo_and_analyze()
|
|
except Exception as e:
|
|
print(f"!! CRITICAL ERROR during periodic check for project '{project_id}': {e}", exc_info=True)
|
|
# Update status via lock if possible
|
|
with project.lock:
|
|
project.status_message = f"[{project.id}] CRITICAL check cycle error: {e}"
|
|
if project.last_commit_hash is None: project.last_commit_hash = "Unknown (Check Error)"
|
|
project.data_updated_event.set() # Signal error
|
|
project.data_updated_event.clear()
|
|
print(f"--- Finished check for project: {project.id} ---")
|
|
|
|
print(f"--- All project checks finished. Sleeping for {CHECK_INTERVAL_SECONDS}s... ---")
|
|
time.sleep(CHECK_INTERVAL_SECONDS)
|
|
|
|
# --- Flask Routes ---
|
|
|
|
@app.route('/')
|
|
def index():
|
|
# Pass project list sorted by name to template for selector
|
|
project_list = sorted(
|
|
[{"id": pid, "name": p.name} for pid, p in projects.items()],
|
|
key=lambda x: x['name']
|
|
)
|
|
return render_template('index.html', projects=project_list)
|
|
|
|
@app.route('/stream/<project_id>')
|
|
def stream(project_id):
|
|
project = projects.get(project_id)
|
|
if not project:
|
|
return jsonify({"error": "Project not found"}), 404
|
|
|
|
def event_stream(target_project: Project):
|
|
last_sent_hash_to_client = None
|
|
print(f"SSE Client connected for project: {target_project.id}")
|
|
# ... (rest of stream function remains the same) ...
|
|
# Send initial state immediately
|
|
with target_project.lock:
|
|
current_hash = target_project.last_commit_hash
|
|
current_status = target_project.status_message
|
|
current_progress = target_project.progress_data
|
|
|
|
initial_payload = json.dumps({
|
|
"status": current_status,
|
|
"progress": current_progress,
|
|
"last_commit": current_hash
|
|
})
|
|
yield f"data: {initial_payload}\\n\\n"
|
|
last_sent_hash_to_client = current_hash
|
|
print(f"[{target_project.id}] Sent initial SSE state (Hash: {last_sent_hash_to_client})")
|
|
|
|
# Wait for project-specific updates
|
|
while True:
|
|
# Add a timeout to prevent hangs if event logic fails?
|
|
event_fired = target_project.data_updated_event.wait(timeout=CHECK_INTERVAL_SECONDS * 2)
|
|
if not event_fired:
|
|
# If timeout occurs, maybe send a keep-alive or re-send current state?
|
|
print(f"[{target_project.id}] SSE wait timeout. Re-checking state.")
|
|
# Optionally send a keep-alive comment: yield ": keepalive\n\n"
|
|
# Or just continue to re-evaluate state below
|
|
pass # Fall through to check state
|
|
|
|
with target_project.lock:
|
|
current_hash = target_project.last_commit_hash
|
|
current_status = target_project.status_message
|
|
current_progress = target_project.progress_data
|
|
|
|
# Send update ONLY if hash changed or if the wait timed out (to ensure client syncs)
|
|
if current_hash != last_sent_hash_to_client or not event_fired:
|
|
print(f"[{target_project.id}] SSE Data updated (Hash: {last_sent_hash_to_client} -> {current_hash}, Event Fired: {event_fired}). Sending.")
|
|
data_payload = json.dumps({
|
|
"status": current_status,
|
|
"progress": current_progress,
|
|
"last_commit": current_hash
|
|
})
|
|
yield f"data: {data_payload}\\n\\n"
|
|
last_sent_hash_to_client = current_hash
|
|
# else:
|
|
# print(f"[{target_project.id}] SSE Event triggered, hash {current_hash} unchanged.")
|
|
|
|
return Response(event_stream(project), mimetype="text/event-stream")
|
|
|
|
# --- Main Execution ---
|
|
|
|
if __name__ == '__main__':
|
|
# Ensure base directories exist (safer to do this before initializing projects)
|
|
# Create parent dirs for repos, manifests, texts, pdfs based on config
|
|
dirs_to_ensure = set()
|
|
for pid, pconf in config.get('projects', {}).items():
|
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('repo_dir', f'./cloned_repos/{pid}')))))
|
|
dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('manifest_csv', f'./manifests/{pid}_manifest.csv')))))
|
|
dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pconf.get('drawing_texts_dir', f'./extracted_texts/{pid}')))))
|
|
pdf_dir = pconf.get('pdf_source_dir')
|
|
if pdf_dir:
|
|
dirs_to_ensure.add(os.path.dirname(os.path.abspath(os.path.join(script_dir, pdf_dir))))
|
|
|
|
for d in dirs_to_ensure:
|
|
if d and not os.path.exists(d):
|
|
print(f"Creating necessary directory: {d}")
|
|
try:
|
|
os.makedirs(d, exist_ok=True)
|
|
except OSError as e:
|
|
print(f"ERROR: Could not create directory {d}: {e}")
|
|
# Decide if this is fatal? Probably depends on which dir failed.
|
|
# exit(1) # Exit if repo/manifest dirs can't be made?
|
|
|
|
# Initialize Project objects AFTER ensuring directories
|
|
for project_id, project_conf in config.get('projects', {}).items():
|
|
if project_id not in projects: # Avoid re-init if already done (though shouldn't happen here)
|
|
projects[project_id] = Project(project_id, project_conf)
|
|
|
|
if not projects:
|
|
print("ERROR: No projects defined or initialized. Exiting.")
|
|
exit(1)
|
|
|
|
# Perform initial check/clone and data load for ALL projects
|
|
print("--- Performing Initial Project Analysis ---")
|
|
# ======================================================================
|
|
# === TEMPORARILY DISABLED INITIAL ANALYSIS THREADING FOR DEBUGGING ===
|
|
# initial_threads = []
|
|
# for project_id, project in projects.items():
|
|
# print(f"--- Starting initial analysis for: {project.id} ({project.name}) ---")
|
|
# # Run initial analysis in parallel threads for faster startup?
|
|
# # Set force_analysis=True to ensure data is loaded even if repo exists and hasn't changed since last run
|
|
# thread = threading.Thread(target=project.update_repo_and_analyze, kwargs={'force_analysis': True})
|
|
# initial_threads.append(thread)
|
|
# thread.start()
|
|
#
|
|
# # Wait for all initial analyses to complete
|
|
# print("Waiting for initial analyses to complete...")
|
|
# for thread in initial_threads:
|
|
# thread.join()
|
|
# print("--- Initial Analyses Complete ---")
|
|
print("SKIPPING initial analysis for debugging port binding issue.")
|
|
print("NOTE: Project data will be loaded on the first periodic check.")
|
|
# ======================================================================
|
|
|
|
# Start the background thread for periodic checks AFTER initial load
|
|
print("Starting background periodic check thread...")
|
|
repo_check_thread = threading.Thread(target=periodic_repo_check, daemon=True)
|
|
repo_check_thread.start()
|
|
|
|
# Run the Flask app
|
|
print(f"Starting Flask server: http://0.0.0.0:5050")
|
|
try:
|
|
app.run(host='0.0.0.0', port=5050, debug=False, threaded=True)
|
|
except Exception as e:
|
|
print(f"FATAL ERROR during app.run: {e}", exc_info=True)
|
|
exit(1)
|