MonitorProgress/app.py

import os
import threading
import time
import json
import git
import csv
import re
from flask import Flask, render_template, jsonify, Response

app = Flask(__name__)

# --- Configuration ---
REPO_URL = "http://192.168.5.191:3000/LCI/MTN6"
REPO_DIR = "./cloned_repo" # Directory to clone the repo into
BRANCH = "main"
CSV_FILENAME = "MTN6 Equipment Manifest REV6(Conveyor List).csv"
VIEWS_DIR_RELATIVE = "MTN6_SCADA/com.inductiveautomation.perspective/views/Detailed-Views"
TEXT_OUTPUT_FOLDER = "./extracted_texts" # Added: Directory with .txt files
CHECK_INTERVAL_SECONDS = 60

# --- Column Names from CSV (Adjust if necessary) ---
CSV_ALIAS_COL = 'Alias'
CSV_PANEL_COL = 'Control Panel'
CSV_EQ_TYPE_COL = 'Equipment Type' # Optional, for details modal
CSV_CONV_TYPE_COL = 'Type of Conveyor' # Optional, for details modal

# --- Global state ---
last_commit_hash = None
# New detailed progress data structure
progress_data = {
    "overall": {
        "total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0,
        "percentage_found_both": 0,
        "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []
    },
    "panels": {} # Populated dynamically
}
status_message = "Initializing..."
repo_lock = threading.Lock() # Lock for accessing repo and shared data
data_updated_event = threading.Event() # Event to signal data updates

# --- Helper Functions ---

def get_repo_path():
    return os.path.abspath(REPO_DIR)

def get_csv_path():
    script_dir = os.path.dirname(os.path.abspath(__file__))
    return os.path.join(script_dir, CSV_FILENAME)

def get_views_dir_path():
    return os.path.join(get_repo_path(), VIEWS_DIR_RELATIVE)

def get_text_output_dir_path():
     # Construct absolute path based on the script's directory
    script_dir = os.path.dirname(os.path.abspath(__file__))
    # Use os.path.join to handle path separators correctly and avoid './'
    return os.path.abspath(os.path.join(script_dir, TEXT_OUTPUT_FOLDER))

def normalize(text):
    """Normalize string for comparison: lowercase, treat '-' and '_' the same, remove all whitespace."""
    if not isinstance(text, str):
        return ""
    text = text.lower()          # Convert to lowercase
    text = text.replace('-', '_') # Replace hyphens with underscores
    text = re.sub(r'\s+', '', text) # Remove ALL whitespace characters (including newlines)
    return text

def read_manifest(csv_filepath):
    """Reads the manifest CSV into a list of dictionaries."""
    manifest_items = []
    # Only require Alias and Panel now for basic grouping
    required_cols = {CSV_ALIAS_COL, CSV_PANEL_COL}
    optional_cols = {CSV_EQ_TYPE_COL, CSV_CONV_TYPE_COL}
    try:
        # Revert back to 'utf-8-sig' to handle potential BOM from Excel
        with open(csv_filepath, mode='r', newline='', encoding='utf-8-sig') as infile:
            reader = csv.DictReader(infile)
            headers = set(h.strip() for h in reader.fieldnames)

            # Check for required columns
            missing_required = required_cols - headers
            if missing_required:
                 print(f"Error: Missing required columns in CSV '{csv_filepath}': {', '.join(missing_required)}")
                 print(f"Available columns: {', '.join(headers)}")
                 return None

            for row in reader:
                alias = row.get(CSV_ALIAS_COL, "").strip()
                panel = row.get(CSV_PANEL_COL, "").strip()
                # unit_number = row.get('Unit Number', "").strip() # No longer needed for filename

                # Add if Alias and Control Panel are present (Panel needed for grouping results later)
                if alias and panel:
                    item = {
                        "alias": alias,
                        "normalized_alias": normalize(alias),
                        "control_panel": panel,
                        # "unit_number": unit_number, # Removed
                        # "expected_drawing_filename": f"MTN6_SYSDL-{unit_number}.txt", # Removed
                        # Add optional data if columns exist
                        "equipment_type": row.get(CSV_EQ_TYPE_COL, "").strip() if CSV_EQ_TYPE_COL in headers else "N/A",
                        "conveyor_type": row.get(CSV_CONV_TYPE_COL, "").strip() if CSV_CONV_TYPE_COL in headers else "N/A",
                        # Status fields to be filled later
                        "found_scada": False,
                        "found_drawing": False
                    }
                    manifest_items.append(item)
                # elif alias and panel: # If Unit Number is missing but others are present # Condition removed
                #     print(f"Warning: Alias '{alias}' in Panel '{panel}' is missing 'Unit Number' in CSV. Skipping drawing check for this item.")
                elif alias and not panel:
                    print(f"Warning: Alias '{alias}' found in CSV but is missing its '{CSV_PANEL_COL}'. Skipping.")
                # Add other specific warnings if needed

    except FileNotFoundError:
        print(f"Error: Manifest file not found at {csv_filepath}")
        return None
    except Exception as e:
        print(f"Error reading CSV file {csv_filepath}: {e}")
        return None
    print(f"Read {len(manifest_items)} valid items from manifest.")
    return manifest_items

def check_scada(manifest_data, views_dir):
    """Checks for aliases in SCADA JSON view files."""
    if not manifest_data: return
    print(f"Starting SCADA check in directory: {views_dir}...")
    found_count = 0
    processed_files = 0

    # Create a quick lookup map of normalized_alias -> list of manifest items (handles duplicate aliases)
    alias_map = {}
    for item in manifest_data:
        na = item['normalized_alias']
        if na not in alias_map:
            alias_map[na] = []
        alias_map[na].append(item)

    try:
        for root, _, files in os.walk(views_dir):
            for filename in files:
                if filename == 'view.json':
                    filepath = os.path.join(root, filename)
                    processed_files += 1
                    try:
                        with open(filepath, 'r', encoding='utf-8') as f:
                            # Read the whole file, normalize it for substring search
                            content = f.read()
                            normalized_content = normalize(content)

                            # Check manifest aliases against this file's normalized content
                            for norm_alias, items in alias_map.items():
                                if norm_alias in normalized_content:
                                    for item in items:
                                        if not item['found_scada']: # Update only if not already found elsewhere
                                            item['found_scada'] = True
                                            found_count += 1 # Count unique aliases found
                    except Exception as e:
                        print(f"  Warning: Could not read or process JSON file {filepath}: {e}")
    except Exception as e:
        print(f"Error walking SCADA views directory {views_dir}: {e}")

    print(f"SCADA check finished. Processed {processed_files} view.json files. Found {found_count} manifest aliases.")


def check_drawings(manifest_data, text_output_dir):
    """Checks if aliases from manifest exist in *any* extracted drawing text file."""
    if not manifest_data: return
    print(f"Starting Drawings check: Scanning all .txt files in directory: {text_output_dir}...")

    all_normalized_content = "" # Combine all text content here
    processed_files = 0
    found_files = []

    try:
        # Step 1: Read and combine content of all .txt files in the directory
        for filename in os.listdir(text_output_dir):
            if filename.lower().endswith('.txt'):
                filepath = os.path.join(text_output_dir, filename)
                processed_files += 1
                try:
                    with open(filepath, 'r', encoding='utf-8') as f:
                        content = f.read()
                        # Add a separator to prevent false matches across file boundaries
                        all_normalized_content += normalize(content) + "\n--file-separator--\n"
                        found_files.append(filename)
                except Exception as e:
                    print(f"  Warning: Could not read or process text file {filepath}: {e}")

        if processed_files == 0:
            print("  Warning: No .txt files found in the directory. Cannot perform drawing check.")
            return
        else:
            print(f"  Successfully read and normalized content from {len(found_files)} out of {processed_files} .txt files found.")

        # Step 2: Check each manifest alias against the combined content
        found_count = 0
        for item in manifest_data:
            normalized_alias = item['normalized_alias']
            if normalized_alias and normalized_alias in all_normalized_content:
                item['found_drawing'] = True
                found_count += 1
            # else: item['found_drawing'] is already False by default

        print(f"Drawings check finished. Found {found_count} manifest aliases within the combined text content.")

    except FileNotFoundError:
        print(f"  Error: Drawings text directory not found: {text_output_dir}")
    except Exception as e:
        print(f"  Error during drawings check: {e}")


def calculate_combined_progress(manifest_data):
    """Calculates the combined progress based on scada/drawing status."""
    print("Calculating combined progress statistics...")
    results = {
        "overall": {
            "total_csv": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0,
            "percentage_found_both": 0,
            "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []
        },
        "panels": {}
    }
    if not manifest_data:
        print("Warning: No manifest data to calculate progress from.")
        return results

    results["overall"]["total_csv"] = len(manifest_data)

    for item in manifest_data:
        panel = item['control_panel']

        # Initialize panel data if not present
        if panel not in results["panels"]:
             results["panels"][panel] = {
                 "total": 0, "found_both": 0, "found_scada_only": 0, "found_drawing_only": 0, "missing_both": 0,
                 "percentage_found_both": 0,
                 "missing_list": [], "found_scada_only_list": [], "found_drawing_only_list": [], "found_both_list": []
             }

        results["panels"][panel]["total"] += 1

        # Categorize and add to lists
        item_detail = {k: v for k, v in item.items() if k not in ['normalized_alias']} # Don't need normalized in output

        if item['found_scada'] and item['found_drawing']:
            results["overall"]["found_both"] += 1
            results["panels"][panel]["found_both"] += 1
            results["overall"]["found_both_list"].append(item_detail)
            results["panels"][panel]["found_both_list"].append(item_detail)
        elif item['found_scada'] and not item['found_drawing']:
            results["overall"]["found_scada_only"] += 1
            results["panels"][panel]["found_scada_only"] += 1
            results["overall"]["found_scada_only_list"].append(item_detail)
            results["panels"][panel]["found_scada_only_list"].append(item_detail)
        elif not item['found_scada'] and item['found_drawing']:
            results["overall"]["found_drawing_only"] += 1
            results["panels"][panel]["found_drawing_only"] += 1
            results["overall"]["found_drawing_only_list"].append(item_detail)
            results["panels"][panel]["found_drawing_only_list"].append(item_detail)
        else: # Missing both
            results["overall"]["missing_both"] += 1
            results["panels"][panel]["missing_both"] += 1
            results["overall"]["missing_list"].append(item_detail)
            results["panels"][panel]["missing_list"].append(item_detail)

    # Calculate percentages
    if results["overall"]["total_csv"] > 0:
        results["overall"]["percentage_found_both"] = round(
            (results["overall"]["found_both"] / results["overall"]["total_csv"]) * 100, 1
        )
    for panel_data in results["panels"].values():
        if panel_data["total"] > 0:
            panel_data["percentage_found_both"] = round(
                (panel_data["found_both"] / panel_data["total"]) * 100, 1
            )

    print("Combined progress calculation finished.")
    # print(json.dumps(results, indent=2)) # DEBUG: Print structure
    return results


# --- Core Logic ---

def update_progress_data():
    """Reads manifest, runs both checks, combines results, and updates global state."""
    global progress_data, status_message
    csv_path = get_csv_path()
    views_dir = get_views_dir_path()
    text_dir = get_text_output_dir_path()
    current_status = ""
    new_data_calculated = None

    # 1. Read Manifest
    status_message = "Reading manifest file..."
    print(f"Reading manifest: {csv_path}")
    manifest_data = read_manifest(csv_path)
    if manifest_data is None:
        current_status = f"Error: Failed to read or process manifest file {csv_path}"
        print(current_status)
        status_message = current_status
        data_updated_event.set(); data_updated_event.clear()
        return # Cannot proceed without manifest

    # 2. Check SCADA (JSON files)
    status_message = "Checking SCADA views..."
    if not os.path.exists(views_dir):
         current_status = f"Warning: SCADA Views directory not found at {views_dir}. Skipping SCADA check."
         print(current_status)
         # Mark all as not found in SCADA? Or just skip update? Skipping update is safer.
    else:
        check_scada(manifest_data, views_dir)

    # 3. Check Drawings (TXT files)
    status_message = "Checking drawing text files..."
    if not os.path.exists(text_dir):
         current_status = f"Warning: Extracted Text directory not found at {text_dir}. Skipping Drawings check."
         print(current_status)
         # Mark all as not found in Drawings? Or skip? Skipping update.
    else:
        check_drawings(manifest_data, text_dir)

    # 4. Calculate Combined Progress
    status_message = "Calculating combined progress..."
    try:
        new_data_calculated = calculate_combined_progress(manifest_data)
        if new_data_calculated:
             current_status = f"Analysis complete at {time.strftime('%Y-%m-%d %H:%M:%S')}"
        else:
            # This case shouldn't happen if manifest_data was valid
            current_status = "Error: Failed to calculate combined progress."
    except Exception as e:
        current_status = f"Error during progress calculation: {e}"
        print(f"Detailed Calculation Error: {e}", exc_info=True) # Log stack trace
        new_data_calculated = None # Ensure no partial data update

    # Update global state
    print(current_status)
    status_message = current_status # Update status regardless of calculation success/failure
    if new_data_calculated is not None:
        progress_data = new_data_calculated
        # Signal that an update attempt finished WITH new data
        data_updated_event.set()
        data_updated_event.clear()

# --- Git Repo Handling (Modified slightly to use updated status messages) ---

def check_and_update_repo():
    global last_commit_hash, status_message
    repo_path = get_repo_path()
    did_update = False # Flag to track if files were actually updated
    initial_hash = last_commit_hash # Store hash before check

    with repo_lock:
        try:
            repo_existed = os.path.exists(os.path.join(repo_path, ".git"))
            if not repo_existed:
                print(f"Cloning repository {REPO_URL} into {repo_path}...")
                status_message = f"Cloning repository {REPO_URL}..."
                git.Repo.clone_from(REPO_URL, repo_path, branch=BRANCH)
                repo = git.Repo(repo_path)
                last_commit_hash = repo.head.commit.hexsha
                print(f"Initial clone complete. Commit: {last_commit_hash}")
                did_update = True # Cloned, so considered an update
            else:
                repo = git.Repo(repo_path)
                print("Fetching updates from remote...")
                current_local_commit = repo.head.commit.hexsha
                 # Update hash *before* fetch in case fetch fails but commit was readable
                if last_commit_hash is None: last_commit_hash = current_local_commit
                origin = repo.remotes.origin
                fetch_info = origin.fetch()

                # Check if fetch actually brought new data for the target branch
                # fetched_new_commits = any(info.flags & info.NEW_HEAD for info in fetch_info if info.name == f'origin/{BRANCH}') # More precise check if needed

                current_remote_commit = repo.commit(f'origin/{BRANCH}').hexsha

                print(f"Local commit: {current_local_commit}, Remote commit: {current_remote_commit}")

                if current_local_commit != current_remote_commit:
                    print("New commit detected! Pulling changes...")
                    status_message = "Pulling updates..."
                    try:
                        pull_info = origin.pull()
                        new_commit_hash = repo.head.commit.hexsha
                        print(f"Pull successful. New commit: {new_commit_hash}")
                        last_commit_hash = new_commit_hash
                        did_update = True # Pulled, so considered an update
                    except git.GitCommandError as e:
                        status_message = f"Error pulling repository: {e}"
                        print(status_message)
                        # Revert hash if pull failed
                        last_commit_hash = current_local_commit
                else:
                    print("No new commits detected.")
                    # Update status if it wasn't an error before
                    if not status_message.startswith("Error"):
                         status_message = f"Checked repo at {time.strftime('%Y-%m-%d %H:%M:%S')}. No changes."

            # Run analysis IF the repo was updated (cloned or pulled)
            if did_update:
                 # Status will be updated within update_progress_data
                 update_progress_data()
            # If no git update, signal any status change (e.g., "No changes" or error)
            # else: # REMOVED block that signaled event for no changes
                 # REMOVED: data_updated_event.set() # Signal status change event
                 # REMOVED: data_updated_event.clear()
                 # Status message is still updated globally, just won't trigger event

        except git.GitCommandError as e:
            status_message = f"Git command error: {e}"
            print(status_message)
            # Try to get commit hash even if failed
            try:
                 if os.path.exists(os.path.join(repo_path, ".git")):
                      repo = git.Repo(repo_path)
                      # Use previous hash if available, else try to read current
                      if last_commit_hash is None: last_commit_hash = repo.head.commit.hexsha
            except Exception:
                 if last_commit_hash is None: last_commit_hash = "Error reading commit"
            # REMOVED: data_updated_event.set() # Signal error status change
            # REMOVED: data_updated_event.clear()
        except Exception as e:
            status_message = f"Error checking repository: {e}"
            print(status_message)
            if last_commit_hash is None: last_commit_hash = "Error checking repo"
            # REMOVED: data_updated_event.set() # Signal error status change
            # REMOVED: data_updated_event.clear()

    # Return true if analysis was run (because repo changed), false otherwise
    return did_update

def periodic_repo_check():
    """Runs the check_and_update_repo function periodically."""
    while True:
        print(f"\nStarting periodic repository check (Interval: {CHECK_INTERVAL_SECONDS}s)...")
        repo_changed = check_and_update_repo()
        # If repo didn't change, analysis wasn't triggered, but we might want to run it anyway?
        # For now, analysis only runs if repo changes or on initial startup.
        # If you want analysis *every* interval regardless of git changes, add a call here:
        # if not repo_changed:
        #     print("Repo unchanged, triggering analysis anyway...")
        #     update_progress_data()
        print(f"Check finished. Sleeping...")
        time.sleep(CHECK_INTERVAL_SECONDS)

# --- Flask Routes (Largely unchanged, rely on updated global state) ---

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/drawings')
def drawings_page():
    # Render the main index template which now contains all content
    return render_template('index.html')

@app.route('/conflicts')
def conflicts_page():
    # Render the main index template which now contains all content
    return render_template('index.html')

@app.route('/stream')
def stream():
    def event_stream():
        last_sent_hash_to_client = None # Track hash sent to *this specific client*

        # Send initial state immediately on connection
        with repo_lock:
            current_global_hash = last_commit_hash
            current_global_status = status_message
            current_global_progress = progress_data

        initial_payload = json.dumps({
            "status": current_global_status,
            "progress": current_global_progress,
            "last_commit": current_global_hash
        })
        yield f"data: {initial_payload}\n\n"
        last_sent_hash_to_client = current_global_hash # Record that we sent the initial state for this client
        print(f"Sent initial state to new client (Hash: {last_sent_hash_to_client})")

        # Now wait for subsequent updates signaled by the event
        while True:
            data_updated_event.wait() # Wait for background thread to signal completion

            with repo_lock: # Re-acquire lock to get the latest state
                 current_global_hash = last_commit_hash
                 current_global_status = status_message
                 current_global_progress = progress_data

            # Send update to the client IF the data is different from what they last received
            # Check hash first as primary indicator of change in underlying data
            if current_global_hash != last_sent_hash_to_client:
                print(f"Data updated (Hash changed: {last_sent_hash_to_client} -> {current_global_hash}). Sending update to client.")
                data_payload = json.dumps({
                    "status": current_global_status,
                    "progress": current_global_progress,
                    "last_commit": current_global_hash
                })
                yield f"data: {data_payload}\n\n"
                last_sent_hash_to_client = current_global_hash # Update the hash sent to this client
            # else: # No need for the else block logging here anymore, as the event shouldn't trigger if hash is same
                 # If hash is the same, maybe only the status message changed (e.g., error occurred)
                 # Option: Send update only if status is different from last sent status?
                 # For simplicity now, we only send if hash differs. Client UI shows last known status.
                 # print(f"Data updated event triggered, but hash {current_global_hash} unchanged for this client. Status: '{current_global_status}'") # Removed log


    return Response(event_stream(), mimetype="text/event-stream")

# --- Main Execution ---

if __name__ == '__main__':
    # Ensure repo and text directories exist (optional for text dir if PDFs are pre-processed)
    if not os.path.exists(REPO_DIR):
        os.makedirs(REPO_DIR)
    if not os.path.exists(TEXT_OUTPUT_FOLDER):
         print(f"Warning: Text output folder '{TEXT_OUTPUT_FOLDER}' not found. Drawing check might fail unless PDF extraction runs first or files are manually placed.")
         # os.makedirs(TEXT_OUTPUT_FOLDER) # Optionally create it

    # Perform initial check/clone and data load
    print("Performing initial repository check and data load...")
    # Run check_and_update_repo which calls update_progress_data if repo updated
    initial_update_done = check_and_update_repo()
    # If repo existed and was up-to-date on first check, analysis wasn't run yet. Run it now.
    if not initial_update_done:
         print("Repository present and up-to-date. Running initial analysis...")
         # No need for lock here as background thread isn't running yet
         update_progress_data() # Run the full analysis
    else:
         print("Initial analysis was triggered by repo clone/pull.")

    # Start the background thread for periodic checks
    print("Starting background repository check thread...")
    repo_check_thread = threading.Thread(target=periodic_repo_check, daemon=True)
    repo_check_thread.start()

    # Run the Flask app
    print("Starting Flask server on port 5050...")
    # Use threaded=True for SSE background sending, debug=False for production/stability
    app.run(host='0.0.0.0', port=5050, debug=False, threaded=True)