2025-04-09 16:49:59 +00:00

544 lines
25 KiB
Python

import os
import threading
import time
import json
import git
import csv
import re
from flask import Flask, render_template, jsonify, Response
app = Flask(__name__)
# --- Configuration ---
REPO_URL = "http://192.168.5.191:3000/LCI/MTN6"
REPO_DIR = "./cloned_repo" # Directory to clone the repo into
BRANCH = "main"
CSV_FILENAME = "MTN6 Equipment Manifest REV6(Conveyor List).csv"
VIEWS_DIR_RELATIVE = "MTN6_SCADA/com.inductiveautomation.perspective/views/Detailed-Views"
TEXT_OUTPUT_FOLDER = "./extracted_texts" # Added: Directory with .txt files
CHECK_INTERVAL_SECONDS = 60
# --- Column Names from CSV (Adjust if necessary) ---
CSV_ALIAS_COL = 'Alias'
CSV_PANEL_COL = 'Control Panel'
CSV_EQ_TYPE_COL = 'Equipment Type' # Optional, for details modal
CSV_CONV_TYPE_COL = 'Type of Conveyor' # Optional, for details modal
# --- Global state ---
last_commit_hash = None
# New detailed progress data structure
progress_data = {
"overall": {
"total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0,
"percentage_found_both": 0,
"missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []
},
"panels": {} # Populated dynamically
}
status_message = "Initializing..."
repo_lock = threading.Lock() # Lock for accessing repo and shared data
data_updated_event = threading.Event() # Event to signal data updates
# --- Helper Functions ---
def get_repo_path():
return os.path.abspath(REPO_DIR)
def get_csv_path():
script_dir = os.path.dirname(os.path.abspath(__file__))
return os.path.join(script_dir, CSV_FILENAME)
def get_views_dir_path():
return os.path.join(get_repo_path(), VIEWS_DIR_RELATIVE)
def get_text_output_dir_path():
# Construct absolute path based on the script's directory
script_dir = os.path.dirname(os.path.abspath(__file__))
# Use os.path.join to handle path separators correctly and avoid './'
return os.path.abspath(os.path.join(script_dir, TEXT_OUTPUT_FOLDER))
def normalize(text):
"""Normalize string for comparison: lowercase, treat '-' and '_' the same, remove all whitespace."""
if not isinstance(text, str):
return ""
text = text.lower() # Convert to lowercase
text = text.replace('-', '_') # Replace hyphens with underscores
text = re.sub(r'\s+', '', text) # Remove ALL whitespace characters (including newlines)
return text
def read_manifest(csv_filepath):
"""Reads the manifest CSV into a list of dictionaries."""
manifest_items = []
# Only require Alias and Panel now for basic grouping
required_cols = {CSV_ALIAS_COL, CSV_PANEL_COL}
optional_cols = {CSV_EQ_TYPE_COL, CSV_CONV_TYPE_COL}
try:
# Revert back to 'utf-8-sig' to handle potential BOM from Excel
with open(csv_filepath, mode='r', newline='', encoding='utf-8-sig') as infile:
reader = csv.DictReader(infile)
headers = set(h.strip() for h in reader.fieldnames)
# Check for required columns
missing_required = required_cols - headers
if missing_required:
print(f"Error: Missing required columns in CSV '{csv_filepath}': {', '.join(missing_required)}")
print(f"Available columns: {', '.join(headers)}")
return None
for row in reader:
alias = row.get(CSV_ALIAS_COL, "").strip()
panel = row.get(CSV_PANEL_COL, "").strip()
# unit_number = row.get('Unit Number', "").strip() # No longer needed for filename
# Add if Alias and Control Panel are present (Panel needed for grouping results later)
if alias and panel:
item = {
"alias": alias,
"normalized_alias": normalize(alias),
"control_panel": panel,
# "unit_number": unit_number, # Removed
# "expected_drawing_filename": f"MTN6_SYSDL-{unit_number}.txt", # Removed
# Add optional data if columns exist
"equipment_type": row.get(CSV_EQ_TYPE_COL, "").strip() if CSV_EQ_TYPE_COL in headers else "N/A",
"conveyor_type": row.get(CSV_CONV_TYPE_COL, "").strip() if CSV_CONV_TYPE_COL in headers else "N/A",
# Status fields to be filled later
"found_scada": False,
"found_drawing": False
}
manifest_items.append(item)
# elif alias and panel: # If Unit Number is missing but others are present # Condition removed
# print(f"Warning: Alias '{alias}' in Panel '{panel}' is missing 'Unit Number' in CSV. Skipping drawing check for this item.")
elif alias and not panel:
print(f"Warning: Alias '{alias}' found in CSV but is missing its '{CSV_PANEL_COL}'. Skipping.")
# Add other specific warnings if needed
except FileNotFoundError:
print(f"Error: Manifest file not found at {csv_filepath}")
return None
except Exception as e:
print(f"Error reading CSV file {csv_filepath}: {e}")
return None
print(f"Read {len(manifest_items)} valid items from manifest.")
return manifest_items
def check_scada(manifest_data, views_dir):
"""Checks for aliases in SCADA JSON view files."""
if not manifest_data: return
print(f"Starting SCADA check in directory: {views_dir}...")
found_count = 0
processed_files = 0
# Create a quick lookup map of normalized_alias -> list of manifest items (handles duplicate aliases)
alias_map = {}
for item in manifest_data:
na = item['normalized_alias']
if na not in alias_map:
alias_map[na] = []
alias_map[na].append(item)
try:
for root, _, files in os.walk(views_dir):
for filename in files:
if filename == 'view.json':
filepath = os.path.join(root, filename)
processed_files += 1
try:
with open(filepath, 'r', encoding='utf-8') as f:
# Read the whole file, normalize it for substring search
content = f.read()
normalized_content = normalize(content)
# Check manifest aliases against this file's normalized content
for norm_alias, items in alias_map.items():
if norm_alias in normalized_content:
for item in items:
if not item['found_scada']: # Update only if not already found elsewhere
item['found_scada'] = True
found_count += 1 # Count unique aliases found
except Exception as e:
print(f" Warning: Could not read or process JSON file {filepath}: {e}")
except Exception as e:
print(f"Error walking SCADA views directory {views_dir}: {e}")
print(f"SCADA check finished. Processed {processed_files} view.json files. Found {found_count} manifest aliases.")
def check_drawings(manifest_data, text_output_dir):
"""Checks if aliases from manifest exist in *any* extracted drawing text file."""
if not manifest_data: return
print(f"Starting Drawings check: Scanning all .txt files in directory: {text_output_dir}...")
all_normalized_content = "" # Combine all text content here
processed_files = 0
found_files = []
try:
# Step 1: Read and combine content of all .txt files in the directory
for filename in os.listdir(text_output_dir):
if filename.lower().endswith('.txt'):
filepath = os.path.join(text_output_dir, filename)
processed_files += 1
try:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# Add a separator to prevent false matches across file boundaries
all_normalized_content += normalize(content) + "\n--file-separator--\n"
found_files.append(filename)
except Exception as e:
print(f" Warning: Could not read or process text file {filepath}: {e}")
if processed_files == 0:
print(" Warning: No .txt files found in the directory. Cannot perform drawing check.")
return
else:
print(f" Successfully read and normalized content from {len(found_files)} out of {processed_files} .txt files found.")
# Step 2: Check each manifest alias against the combined content
found_count = 0
for item in manifest_data:
normalized_alias = item['normalized_alias']
if normalized_alias and normalized_alias in all_normalized_content:
item['found_drawing'] = True
found_count += 1
# else: item['found_drawing'] is already False by default
print(f"Drawings check finished. Found {found_count} manifest aliases within the combined text content.")
except FileNotFoundError:
print(f" Error: Drawings text directory not found: {text_output_dir}")
except Exception as e:
print(f" Error during drawings check: {e}")
def calculate_combined_progress(manifest_data):
"""Calculates the combined progress based on scada/drawing status."""
print("Calculating combined progress statistics...")
results = {
"overall": {
"total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0,
"percentage_found_both": 0,
"missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []
},
"panels": {}
}
if not manifest_data:
print("Warning: No manifest data to calculate progress from.")
return results
results["overall"]["total_csv"] = len(manifest_data)
for item in manifest_data:
panel = item['control_panel']
# Initialize panel data if not present
if panel not in results["panels"]:
results["panels"][panel] = {
"total": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0,
"percentage_found_both": 0,
"missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []
}
results["panels"][panel]["total"] += 1
# Categorize and add to lists
item_detail = {k: v for k, v in item.items() if k not in ['normalized_alias']} # Don't need normalized in output
if item['found_scada'] and item['found_drawing']:
results["overall"]["found_both"] += 1
results["panels"][panel]["found_both"] += 1
results["overall"]["found_both_list"].append(item_detail)
results["panels"][panel]["found_both_list"].append(item_detail)
elif item['found_scada'] and not item['found_drawing']:
results["overall"]["found_scada_only"] += 1
results["panels"][panel]["found_scada_only"] += 1
results["overall"]["found_scada_only_list"].append(item_detail)
results["panels"][panel]["found_scada_only_list"].append(item_detail)
elif not item['found_scada'] and item['found_drawing']:
results["overall"]["found_drawing_only"] += 1
results["panels"][panel]["found_drawing_only"] += 1
results["overall"]["found_drawing_only_list"].append(item_detail)
results["panels"][panel]["found_drawing_only_list"].append(item_detail)
else: # Missing both
results["overall"]["missing_both"] += 1
results["panels"][panel]["missing_both"] += 1
results["overall"]["missing_list"].append(item_detail)
results["panels"][panel]["missing_list"].append(item_detail)
# Calculate percentages
if results["overall"]["total_csv"] > 0:
results["overall"]["percentage_found_both"] = round(
(results["overall"]["found_both"] / results["overall"]["total_csv"]) * 100, 1
)
for panel_data in results["panels"].values():
if panel_data["total"] > 0:
panel_data["percentage_found_both"] = round(
(panel_data["found_both"] / panel_data["total"]) * 100, 1
)
print("Combined progress calculation finished.")
# print(json.dumps(results, indent=2)) # DEBUG: Print structure
return results
# --- Core Logic ---
def update_progress_data():
"""Reads manifest, runs both checks, combines results, and updates global state."""
global progress_data, status_message
csv_path = get_csv_path()
views_dir = get_views_dir_path()
text_dir = get_text_output_dir_path()
current_status = ""
new_data_calculated = None
# 1. Read Manifest
status_message = "Reading manifest file..."
print(f"Reading manifest: {csv_path}")
manifest_data = read_manifest(csv_path)
if manifest_data is None:
current_status = f"Error: Failed to read or process manifest file {csv_path}"
print(current_status)
status_message = current_status
data_updated_event.set(); data_updated_event.clear()
return # Cannot proceed without manifest
# 2. Check SCADA (JSON files)
status_message = "Checking SCADA views..."
if not os.path.exists(views_dir):
current_status = f"Warning: SCADA Views directory not found at {views_dir}. Skipping SCADA check."
print(current_status)
# Mark all as not found in SCADA? Or just skip update? Skipping update is safer.
else:
check_scada(manifest_data, views_dir)
# 3. Check Drawings (TXT files)
status_message = "Checking drawing text files..."
if not os.path.exists(text_dir):
current_status = f"Warning: Extracted Text directory not found at {text_dir}. Skipping Drawings check."
print(current_status)
# Mark all as not found in Drawings? Or skip? Skipping update.
else:
check_drawings(manifest_data, text_dir)
# 4. Calculate Combined Progress
status_message = "Calculating combined progress..."
try:
new_data_calculated = calculate_combined_progress(manifest_data)
if new_data_calculated:
current_status = f"Analysis complete at {time.strftime('%Y-%m-%d %H:%M:%S')}"
else:
# This case shouldn't happen if manifest_data was valid
current_status = "Error: Failed to calculate combined progress."
except Exception as e:
current_status = f"Error during progress calculation: {e}"
print(f"Detailed Calculation Error: {e}", exc_info=True) # Log stack trace
new_data_calculated = None # Ensure no partial data update
# Update global state
print(current_status)
status_message = current_status # Update status regardless of calculation success/failure
if new_data_calculated is not None:
progress_data = new_data_calculated
# Signal that an update attempt finished WITH new data
data_updated_event.set()
data_updated_event.clear()
# --- Git Repo Handling (Modified slightly to use updated status messages) ---
def check_and_update_repo():
global last_commit_hash, status_message
repo_path = get_repo_path()
did_update = False # Flag to track if files were actually updated
initial_hash = last_commit_hash # Store hash before check
with repo_lock:
try:
repo_existed = os.path.exists(os.path.join(repo_path, ".git"))
if not repo_existed:
print(f"Cloning repository {REPO_URL} into {repo_path}...")
status_message = f"Cloning repository {REPO_URL}..."
git.Repo.clone_from(REPO_URL, repo_path, branch=BRANCH)
repo = git.Repo(repo_path)
last_commit_hash = repo.head.commit.hexsha
print(f"Initial clone complete. Commit: {last_commit_hash}")
did_update = True # Cloned, so considered an update
else:
repo = git.Repo(repo_path)
print("Fetching updates from remote...")
current_local_commit = repo.head.commit.hexsha
# Update hash *before* fetch in case fetch fails but commit was readable
if last_commit_hash is None: last_commit_hash = current_local_commit
origin = repo.remotes.origin
fetch_info = origin.fetch()
# Check if fetch actually brought new data for the target branch
# fetched_new_commits = any(info.flags & info.NEW_HEAD for info in fetch_info if info.name == f'origin/{BRANCH}') # More precise check if needed
current_remote_commit = repo.commit(f'origin/{BRANCH}').hexsha
print(f"Local commit: {current_local_commit}, Remote commit: {current_remote_commit}")
if current_local_commit != current_remote_commit:
print("New commit detected! Pulling changes...")
status_message = "Pulling updates..."
try:
pull_info = origin.pull()
new_commit_hash = repo.head.commit.hexsha
print(f"Pull successful. New commit: {new_commit_hash}")
last_commit_hash = new_commit_hash
did_update = True # Pulled, so considered an update
except git.GitCommandError as e:
status_message = f"Error pulling repository: {e}"
print(status_message)
# Revert hash if pull failed
last_commit_hash = current_local_commit
else:
print("No new commits detected.")
# Update status if it wasn't an error before
if not status_message.startswith("Error"):
status_message = f"Checked repo at {time.strftime('%Y-%m-%d %H:%M:%S')}. No changes."
# Run analysis IF the repo was updated (cloned or pulled)
if did_update:
# Status will be updated within update_progress_data
update_progress_data()
# If no git update, signal any status change (e.g., "No changes" or error)
# else: # REMOVED block that signaled event for no changes
# REMOVED: data_updated_event.set() # Signal status change event
# REMOVED: data_updated_event.clear()
# Status message is still updated globally, just won't trigger event
except git.GitCommandError as e:
status_message = f"Git command error: {e}"
print(status_message)
# Try to get commit hash even if failed
try:
if os.path.exists(os.path.join(repo_path, ".git")):
repo = git.Repo(repo_path)
# Use previous hash if available, else try to read current
if last_commit_hash is None: last_commit_hash = repo.head.commit.hexsha
except Exception:
if last_commit_hash is None: last_commit_hash = "Error reading commit"
# REMOVED: data_updated_event.set() # Signal error status change
# REMOVED: data_updated_event.clear()
except Exception as e:
status_message = f"Error checking repository: {e}"
print(status_message)
if last_commit_hash is None: last_commit_hash = "Error checking repo"
# REMOVED: data_updated_event.set() # Signal error status change
# REMOVED: data_updated_event.clear()
# Return true if analysis was run (because repo changed), false otherwise
return did_update
def periodic_repo_check():
"""Runs the check_and_update_repo function periodically."""
while True:
print(f"\nStarting periodic repository check (Interval: {CHECK_INTERVAL_SECONDS}s)...")
repo_changed = check_and_update_repo()
# If repo didn't change, analysis wasn't triggered, but we might want to run it anyway?
# For now, analysis only runs if repo changes or on initial startup.
# If you want analysis *every* interval regardless of git changes, add a call here:
# if not repo_changed:
# print("Repo unchanged, triggering analysis anyway...")
# update_progress_data()
print(f"Check finished. Sleeping...")
time.sleep(CHECK_INTERVAL_SECONDS)
# --- Flask Routes (Largely unchanged, rely on updated global state) ---
@app.route('/')
def index():
return render_template('index.html')
@app.route('/drawings')
def drawings_page():
# Render the main index template which now contains all content
return render_template('index.html')
@app.route('/conflicts')
def conflicts_page():
# Render the main index template which now contains all content
return render_template('index.html')
@app.route('/stream')
def stream():
def event_stream():
last_sent_hash_to_client = None # Track hash sent to *this specific client*
# Send initial state immediately on connection
with repo_lock:
current_global_hash = last_commit_hash
current_global_status = status_message
current_global_progress = progress_data
initial_payload = json.dumps({
"status": current_global_status,
"progress": current_global_progress,
"last_commit": current_global_hash
})
yield f"data: {initial_payload}\n\n"
last_sent_hash_to_client = current_global_hash # Record that we sent the initial state for this client
print(f"Sent initial state to new client (Hash: {last_sent_hash_to_client})")
# Now wait for subsequent updates signaled by the event
while True:
data_updated_event.wait() # Wait for background thread to signal completion
with repo_lock: # Re-acquire lock to get the latest state
current_global_hash = last_commit_hash
current_global_status = status_message
current_global_progress = progress_data
# Send update to the client IF the data is different from what they last received
# Check hash first as primary indicator of change in underlying data
if current_global_hash != last_sent_hash_to_client:
print(f"Data updated (Hash changed: {last_sent_hash_to_client} -> {current_global_hash}). Sending update to client.")
data_payload = json.dumps({
"status": current_global_status,
"progress": current_global_progress,
"last_commit": current_global_hash
})
yield f"data: {data_payload}\n\n"
last_sent_hash_to_client = current_global_hash # Update the hash sent to this client
# else: # No need for the else block logging here anymore, as the event shouldn't trigger if hash is same
# If hash is the same, maybe only the status message changed (e.g., error occurred)
# Option: Send update only if status is different from last sent status?
# For simplicity now, we only send if hash differs. Client UI shows last known status.
# print(f"Data updated event triggered, but hash {current_global_hash} unchanged for this client. Status: '{current_global_status}'") # Removed log
return Response(event_stream(), mimetype="text/event-stream")
# --- Main Execution ---
if __name__ == '__main__':
# Ensure repo and text directories exist (optional for text dir if PDFs are pre-processed)
if not os.path.exists(REPO_DIR):
os.makedirs(REPO_DIR)
if not os.path.exists(TEXT_OUTPUT_FOLDER):
print(f"Warning: Text output folder '{TEXT_OUTPUT_FOLDER}' not found. Drawing check might fail unless PDF extraction runs first or files are manually placed.")
# os.makedirs(TEXT_OUTPUT_FOLDER) # Optionally create it
# Perform initial check/clone and data load
print("Performing initial repository check and data load...")
# Run check_and_update_repo which calls update_progress_data if repo updated
initial_update_done = check_and_update_repo()
# If repo existed and was up-to-date on first check, analysis wasn't run yet. Run it now.
if not initial_update_done:
print("Repository present and up-to-date. Running initial analysis...")
# No need for lock here as background thread isn't running yet
update_progress_data() # Run the full analysis
else:
print("Initial analysis was triggered by repo clone/pull.")
# Start the background thread for periodic checks
print("Starting background repository check thread...")
repo_check_thread = threading.Thread(target=periodic_repo_check, daemon=True)
repo_check_thread.start()
# Run the Flask app
print("Starting Flask server on port 5050...")
# Use threaded=True for SSE background sending, debug=False for production/stability
app.run(host='0.0.0.0', port=5050, debug=False, threaded=True)