vendor_report/api_server.py

#!/usr/bin/env python3
"""
Web API Server for On-Demand Report Generation

Provides REST API endpoints to trigger report generation on demand.
"""

import logging
from pathlib import Path
from typing import Optional, List, Dict
import json
from datetime import datetime
import shutil
import os

try:
    from flask import Flask, jsonify, request, send_from_directory
    from flask_cors import CORS
    from werkzeug.utils import secure_filename
    FLASK_AVAILABLE = True
except ImportError:
    FLASK_AVAILABLE = False
    logging.warning("Flask not installed. API server features disabled.")

from config import load_config
from report_generator import generate_report
from sharepoint_downloader import download_from_sharepoint

logger = logging.getLogger(__name__)

app = None
config = None
scheduler_thread = None


def cleanup_old_reports(output_dir: Path, reports_dir: Path, max_reports: int = 10):
    """
    Cleanup old reports and Excel files, keeping only the last max_reports.

    Args:
        output_dir: Directory containing report HTML/JSON files
        reports_dir: Directory containing Excel files
        max_reports: Maximum number of reports to keep
    """
    try:
        # Get all report HTML files sorted by modification time (newest first)
        html_files = sorted(output_dir.glob('report-*.html'), key=lambda p: p.stat().st_mtime, reverse=True)

        if len(html_files) <= max_reports:
            return  # No cleanup needed

        # Get reports to delete (oldest ones)
        reports_to_delete = html_files[max_reports:]

        deleted_count = 0
        for html_file in reports_to_delete:
            report_id = html_file.stem

            # Delete HTML file
            try:
                html_file.unlink()
                logger.info(f"Deleted old report HTML: {html_file.name}")
                deleted_count += 1
            except Exception as e:
                logger.warning(f"Failed to delete {html_file.name}: {e}")

            # Delete corresponding JSON file
            json_file = output_dir / f"{report_id}.json"
            if json_file.exists():
                try:
                    json_file.unlink()
                    logger.info(f"Deleted old report JSON: {json_file.name}")
                except Exception as e:
                    logger.warning(f"Failed to delete {json_file.name}: {e}")

        # Cleanup Excel files - keep only files associated with remaining reports
        if reports_dir.exists():
            excel_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))

            if len(excel_files) > max_reports:
                # Sort by modification time and delete oldest
                excel_files_sorted = sorted(excel_files, key=lambda p: p.stat().st_mtime, reverse=True)
                excel_to_delete = excel_files_sorted[max_reports:]

                for excel_file in excel_to_delete:
                    try:
                        excel_file.unlink()
                        logger.info(f"Deleted old Excel file: {excel_file.name}")
                    except Exception as e:
                        logger.warning(f"Failed to delete {excel_file.name}: {e}")

        logger.info(f"Cleanup completed: deleted {deleted_count} old report(s)")

    except Exception as e:
        logger.error(f"Error during cleanup: {e}", exc_info=True)


def create_app(config_path: Optional[str] = None):
    """Create and configure Flask app."""
    global app, config

    if not FLASK_AVAILABLE:
        raise ImportError(
            "Flask is required for API server. "
            "Install it with: pip install flask flask-cors"
        )

    app = Flask(__name__)
    CORS(app)  # Enable CORS for all routes

    config = load_config(config_path)
    api_config = config.get('api', {})
    sharepoint_config = config.get('sharepoint', {})
    report_config = config.get('report', {})

    # Resolve paths relative to script location, not current working directory
    script_dir = Path(__file__).parent.absolute()

    # Convert relative paths to absolute paths relative to script directory
    if 'output_dir' in report_config and report_config['output_dir']:
        output_dir = Path(report_config['output_dir'])
        if not output_dir.is_absolute():
            report_config['output_dir'] = str(script_dir / output_dir)

    if 'reports_dir' in report_config and report_config['reports_dir']:
        reports_dir = Path(report_config['reports_dir'])
        if not reports_dir.is_absolute():
            report_config['reports_dir'] = str(script_dir / reports_dir)

    # Store config in app context
    app.config['API_KEY'] = api_config.get('api_key')
    app.config['SHAREPOINT_CONFIG'] = sharepoint_config
    app.config['REPORT_CONFIG'] = report_config

    @app.route('/health', methods=['GET'])
    def health():
        """Health check endpoint."""
        return jsonify({
            'status': 'healthy',
            'service': 'vendor-report-generator'
        })

    @app.route('/api/generate', methods=['POST'])
    def generate_report_endpoint():
        """
        Generate report on demand.

        Request body (optional):
        {
            "download_from_sharepoint": true,
            "reports_dir": "reports",
            "output_file": "output/report.json"
        }
        """
        # Check API key if configured
        api_key = app.config.get('API_KEY')
        if api_key:
            provided_key = request.headers.get('X-API-Key') or request.json.get('api_key') if request.json else None
            if provided_key != api_key:
                return jsonify({'error': 'Invalid API key'}), 401

        try:
            request_data = request.json or {}
            download_from_sp = request_data.get('download_from_sharepoint', True)  # Default to True for backward compatibility
            downloaded_files = []  # Initialize here for scope

            # Get report config early - needed for error handling
            report_config = app.config['REPORT_CONFIG']

            # Download from SharePoint if requested AND no manual upload happened
            # If download_from_sharepoint is False, it means manual upload was used
            if download_from_sp:
                sp_config = app.config['SHAREPOINT_CONFIG']
                if not sp_config.get('enabled'):
                    return jsonify({
                        'error': 'SharePoint is not enabled in configuration'
                    }), 400

                logger.info("Downloading files from SharePoint...")
                try:
                    downloaded = download_from_sharepoint(
                        site_url=sp_config['site_url'],
                        folder_path=sp_config.get('folder_path'),
                        file_path=sp_config.get('file_path'),
                        local_dir=sp_config.get('local_dir', 'reports'),
                        tenant_id=sp_config.get('tenant_id'),
                        client_id=sp_config.get('client_id'),
                        client_secret=sp_config.get('client_secret'),
                        use_app_authentication=sp_config.get('use_app_authentication', True),
                        file_pattern=sp_config.get('file_pattern'),
                        overwrite=sp_config.get('overwrite', True)
                    )
                    downloaded_files = downloaded if downloaded else []
                    logger.info(f"Downloaded {len(downloaded_files)} file(s) from SharePoint: {downloaded_files}")

                    # If SharePoint download failed (no files downloaded), check if we have existing files
                    if len(downloaded_files) == 0:
                        logger.warning("SharePoint download returned 0 files. This could mean:")
                        logger.warning("1. SharePoint permissions issue (401/403 error)")
                        logger.warning("2. No files found in the specified folder")
                        logger.warning("3. Site access not granted (Resource-Specific Consent needed)")
                        logger.warning("Checking if existing files are available in reports directory...")

                        # Check if there are existing files we can use
                        reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
                        if not reports_dir_path.is_absolute():
                            script_dir = Path(__file__).parent.absolute()
                            reports_dir_path = script_dir / reports_dir_path

                        if reports_dir_path.exists():
                            existing_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
                            if existing_files:
                                logger.warning(f"Found {len(existing_files)} existing file(s) in reports directory. Will use these instead.")
                                logger.warning("NOTE: These may be old files. Consider using manual upload for fresh data.")
                            else:
                                logger.error("No files available - neither from SharePoint nor existing files.")
                                return jsonify({
                                    'error': 'SharePoint download failed and no existing files found',
                                    'details': 'SharePoint access may require Resource-Specific Consent (RSC). Please use manual file upload or fix SharePoint permissions.',
                                    'sharepoint_error': True
                                }), 500
                except Exception as e:
                    error_msg = str(e)
                    logger.error(f"Failed to download from SharePoint: {error_msg}", exc_info=True)

                    # Check if this is a locked file error
                    is_locked_file_error = 'locked' in error_msg.lower() or 'cannot access the file' in error_msg.lower() or 'being used by another process' in error_msg.lower()

                    if is_locked_file_error:
                        # Extract filename from error if possible
                        locked_file_match = None
                        import re
                        # Try to find filename in error message
                        match = re.search(r"['\"]([^'\"]*\.xlsx?)['\"]", error_msg)
                        if match:
                            locked_file_match = match.group(1)

                        locked_file_info = f" ({locked_file_match})" if locked_file_match else ""
                        return jsonify({
                            'error': f'Cannot download from SharePoint: File is locked{locked_file_info}',
                            'details': f'A file in the reports directory is being used by another program (likely Excel). Please close Excel and any other programs that might have this file open, then try again. Error: {error_msg}',
                            'instructions': [
                                '1. Close Microsoft Excel completely',
                                '2. Close any file explorer windows showing the reports folder',
                                '3. Wait a few seconds',
                                '4. Try generating the report again',
                                '',
                                'Alternatively, use manual file upload instead of SharePoint download.'
                            ],
                            'sharepoint_error': True,
                            'locked_file_error': True
                        }), 500

                    # Check if we have existing files as fallback (only for non-locked errors)
                    reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
                    if not reports_dir_path.is_absolute():
                        script_dir = Path(__file__).parent.absolute()
                        reports_dir_path = script_dir / reports_dir_path

                    if reports_dir_path.exists():
                        existing_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
                        if existing_files:
                            logger.warning(f"SharePoint download failed, but found {len(existing_files)} existing file(s). Will use these.")
                            downloaded_files = []  # Continue with existing files
                        else:
                            return jsonify({
                                'error': f'SharePoint download failed: {error_msg}',
                                'details': 'No existing files found. Please use manual file upload or fix SharePoint permissions.',
                                'sharepoint_error': True
                            }), 500
                    else:
                        return jsonify({
                            'error': f'SharePoint download failed: {error_msg}',
                            'details': 'Reports directory does not exist. Please use manual file upload or fix SharePoint permissions.',
                            'sharepoint_error': True
                        }), 500

            # Generate report with timestamp
            reports_dir = request_data.get('reports_dir', report_config.get('reports_dir', 'reports'))
            output_dir_str = report_config.get('output_dir', 'output')
            output_dir = Path(output_dir_str)
            if not output_dir.is_absolute():
                script_dir = Path(__file__).parent.absolute()
                output_dir = script_dir / output_dir

            # Create timestamped filename
            timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
            report_id = f"report-{timestamp}"
            output_file = str(output_dir / f"{report_id}.json")

            # Log which files will be used for generation
            reports_dir_path = Path(reports_dir)
            if not reports_dir_path.is_absolute():
                script_dir = Path(__file__).parent.absolute()
                reports_dir_path = script_dir / reports_dir_path

            logger.info(f"Generating report from {reports_dir_path.absolute()}...")
            logger.info(f"Reports directory exists: {reports_dir_path.exists()}")

            # Determine which files to use for generation
            # CRITICAL: Only use files that were just downloaded/uploaded, not old ones
            if downloaded_files:
                # Files were downloaded from SharePoint - use only those
                logger.info(f"Using {len(downloaded_files)} file(s) downloaded from SharePoint")
                # Verify that reports_dir only contains the downloaded files (should be empty of old files)
                all_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
                downloaded_file_paths = [Path(f).name for f in downloaded_files]  # Get just filenames
                if len(all_files) != len(downloaded_files):
                    logger.warning(f"WARNING: Found {len(all_files)} file(s) in reports_dir but only {len(downloaded_files)} were downloaded!")
                    logger.warning("This might indicate old files weren't cleared. Clearing now...")
                    for file in all_files:
                        if file.name not in downloaded_file_paths:
                            try:
                                file.unlink()
                                logger.info(f"Cleared unexpected file: {file.name}")
                            except Exception as e:
                                logger.error(f"Failed to clear unexpected file {file.name}: {e}")
            elif not download_from_sp:
                # Manual upload was used (download_from_sharepoint=False)
                # Upload endpoint should have cleared old files before saving new ones
                # Use ALL files in the directory (they should all be from the recent upload)
                if reports_dir_path.exists():
                    excel_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
                    current_time = datetime.now().timestamp()
                    recent_files = []

                    logger.info(f"Manual upload generation: Found {len(excel_files)} file(s) in reports directory")

                    # Only use files modified in the last 2 minutes (very recent = just uploaded)
                    # This ensures we don't accidentally use SharePoint-downloaded files
                    for excel_file in excel_files:
                        mtime = excel_file.stat().st_mtime
                        age_seconds = current_time - mtime
                        # Only use files uploaded in the last 2 minutes (120 seconds)
                        # This is tight enough to catch only the most recent upload
                        if age_seconds < 120:  # 2 minutes
                            recent_files.append(excel_file)
                            mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
                            logger.info(f"  - {excel_file.name} (modified: {mtime_str}, age: {age_seconds:.1f}s) - will be used for manual upload generation")
                        else:
                            logger.warning(f"  - {excel_file.name} (modified: {datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')}, age: {age_seconds:.1f}s) - skipping (too old, might be from SharePoint download)")

                    # Clear any files that are too old (likely from SharePoint)
                    if len(recent_files) < len(excel_files):
                        logger.warning(f"Found {len(excel_files)} total file(s), but only {len(recent_files)} are recent (< 2 min old). Clearing old files...")
                        for excel_file in excel_files:
                            if excel_file not in recent_files:
                                try:
                                    excel_file.unlink()
                                    logger.info(f"Cleared old file (likely from SharePoint): {excel_file.name}")
                                except Exception as e:
                                    logger.warning(f"Failed to clear old file {excel_file.name}: {e}")

                    if len(recent_files) == 0:
                        logger.error("Manual upload was used but no recent files (< 2 min old) found in reports directory!")
                        logger.error("This might mean:")
                        logger.error("1. Files were not uploaded successfully")
                        logger.error("2. Upload happened more than 2 minutes ago")
                        logger.error("3. File modification times are incorrect")
                        logger.error("4. SharePoint download happened after upload")
                        return jsonify({
                            'error': 'No recent files found for manual upload generation',
                            'details': 'Files were uploaded but not found or are too old. Please try uploading again and generating immediately.',
                            'manual_upload_error': True
                        }), 400

                    logger.info(f"Will generate report from {len(recent_files)} recently uploaded file(s)")
                else:
                    logger.error("Manual upload was used but reports directory does not exist!")
                    return jsonify({
                        'error': 'Reports directory does not exist',
                        'details': 'Cannot generate report from manual upload - reports directory is missing.',
                        'manual_upload_error': True
                    }), 500
            else:
                # SharePoint download was attempted but failed - this shouldn't happen if download_from_sp=True
                # But if it does, we should NOT use existing files as they might be old
                logger.error("SharePoint download was requested but failed, and no manual upload was used!")
                logger.error("This should not happen - refusing to use potentially old files")
                return jsonify({
                    'error': 'SharePoint download failed and no manual upload provided',
                    'details': 'Cannot generate report - no data source available. Please try again or use manual upload.',
                    'sharepoint_error': True
                }), 400

            # FINAL VERIFICATION: Before generation, ensure only expected files exist
            final_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
            if len(final_files) > 1:
                logger.error(f"CRITICAL: Found {len(final_files)} Excel file(s) before generation!")
                logger.error("This will cause data mixing. Files found:")
                for f in final_files:
                    mtime = f.stat().st_mtime
                    mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
                    logger.error(f"  - {f.name} (modified: {mtime_str})")
                logger.error("Attempting to keep only the most recent file...")

                # Keep only the newest file
                final_files_sorted = sorted(final_files, key=lambda f: f.stat().st_mtime, reverse=True)
                newest_file = final_files_sorted[0]
                for old_file in final_files_sorted[1:]:
                    try:
                        old_file.unlink()
                        logger.info(f"Removed older file before generation: {old_file.name}")
                    except Exception as e:
                        logger.error(f"Failed to remove {old_file.name}: {e}")
                        return jsonify({
                            'error': f'Multiple Excel files found and cannot remove old ones',
                            'details': f'Found {len(final_files)} files. Please ensure only one file exists. Files may be locked.',
                            'files_found': [f.name for f in final_files]
                        }), 400

                logger.warning(f"Proceeding with only the newest file: {newest_file.name}")

            report_data = generate_report(
                reports_dir=str(reports_dir_path),
                output_file=output_file,
                verbose=False  # Don't print to console in API mode
            )

            if report_data:
                # Generate HTML with same timestamp
                html_file = output_dir / f"{report_id}.html"
                from html_generator import generate_html_report
                generate_html_report(output_file, str(html_file))

                # Cleanup old reports (keep only last 10)
                # Ensure reports_dir is a Path object
                reports_dir_for_cleanup = Path(reports_dir) if isinstance(reports_dir, str) else reports_dir
                cleanup_old_reports(output_dir, reports_dir_for_cleanup, max_reports=10)

                return jsonify({
                    'status': 'success',
                    'message': 'Report generated successfully',
                    'report_id': report_id,
                    'report_date': timestamp,
                    'output_file': output_file,
                    'summary': report_data.get('summary', {}),
                    'vendors_count': len(report_data.get('vendors', [])),
                    'downloaded_files': len(downloaded_files) if download_from_sp else 0,
                    'downloaded_file_names': [Path(f).name for f in downloaded_files] if download_from_sp else []
                })
            else:
                return jsonify({
                    'error': 'Report generation failed'
                }), 500

        except Exception as e:
            logger.error(f"Error generating report: {e}", exc_info=True)
            return jsonify({
                'error': f'Report generation failed: {str(e)}'
            }), 500

    @app.route('/api/upload', methods=['POST'])
    def upload_files():
        """Upload Excel files manually. Clears old files before uploading new ones."""
        logger.info("=== MANUAL UPLOAD REQUEST RECEIVED ===")
        try:
            if 'files' not in request.files:
                logger.error("Upload request missing 'files' field")
                return jsonify({'error': 'No files provided'}), 400

            files = request.files.getlist('files')
            logger.info(f"Received {len(files)} file(s) in upload request")
            if not files or all(f.filename == '' for f in files):
                logger.error("No valid files in upload request")
                return jsonify({'error': 'No files selected'}), 400

            report_config = app.config['REPORT_CONFIG']
            reports_dir_str = report_config.get('reports_dir', 'reports')
            reports_dir = Path(reports_dir_str)
            if not reports_dir.is_absolute():
                script_dir = Path(__file__).parent.absolute()
                reports_dir = script_dir / reports_dir

            # Ensure reports directory exists
            reports_dir.mkdir(parents=True, exist_ok=True)

            # ALWAYS clear ALL old Excel files from reports directory before uploading new ones
            # CRITICAL: This prevents combining multiple files in report generation
            old_excel_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
            cleared_count = 0
            failed_to_clear = []

            for old_file in old_excel_files:
                try:
                    # On Windows, files might be locked - try multiple times with increasing delays
                    max_retries = 5
                    retry_count = 0
                    cleared_this_file = False

                    while retry_count < max_retries and not cleared_this_file:
                        try:
                            old_file.unlink()
                            cleared_count += 1
                            cleared_this_file = True
                            logger.info(f"Cleared old file before upload: {old_file.name}")
                            break
                        except PermissionError as pe:
                            retry_count += 1
                            if retry_count < max_retries:
                                # Increasing delay: 0.5s, 1s, 2s, 3s
                                import time
                                delay = min(0.5 * (2 ** retry_count), 3.0)
                                logger.warning(f"File {old_file.name} is locked (attempt {retry_count}/{max_retries}), waiting {delay}s...")
                                time.sleep(delay)
                            else:
                                # Last attempt failed - try renaming instead of deleting
                                logger.warning(f"Cannot delete {old_file.name}, trying to rename instead...")
                                try:
                                    import time
                                    timestamp = int(time.time())
                                    backup_name = f"{old_file.stem}_backup_{timestamp}{old_file.suffix}"
                                    backup_path = old_file.parent / backup_name
                                    old_file.rename(backup_path)
                                    cleared_count += 1
                                    cleared_this_file = True
                                    logger.info(f"Renamed locked file to backup: {old_file.name} -> {backup_name}")
                                except Exception as rename_error:
                                    logger.error(f"Could not rename file either: {rename_error}")
                                    raise pe  # Raise original PermissionError
                        except Exception as e:
                            if retry_count >= max_retries - 1:
                                raise
                            retry_count += 1
                            import time
                            time.sleep(1)

                    if not cleared_this_file:
                        failed_to_clear.append(old_file.name)
                        logger.error(f"Failed to clear old file {old_file.name} after {max_retries} attempts")
                except Exception as e:
                    if old_file.name not in failed_to_clear:
                        failed_to_clear.append(old_file.name)
                    logger.error(f"Failed to clear old file {old_file.name}: {e}")

            # If any files failed to clear, fail the upload to prevent mixing old and new data
            if failed_to_clear:
                logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before upload: {failed_to_clear}")
                locked_files_list = ', '.join(failed_to_clear)
                return jsonify({
                    'error': f'Cannot upload: {len(failed_to_clear)} file(s) are locked',
                    'failed_files': failed_to_clear,
                    'details': f'File(s) {locked_files_list} are being used by another program (likely Excel). Please close Excel and any other programs that might have these files open, then try again.',
                    'instructions': [
                        '1. Close Microsoft Excel completely',
                        '2. Close any file explorer windows showing these files',
                        '3. Wait a few seconds',
                        '4. Try uploading again'
                    ]
                }), 500

            if cleared_count > 0:
                logger.info(f"Cleared {cleared_count} old Excel file(s) before upload")
            else:
                logger.info("No old Excel files found to clear (reports directory was empty)")

            # VERIFY: Double-check that all Excel files are actually gone
            remaining_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
            if remaining_files:
                logger.error(f"CRITICAL: After clearing, {len(remaining_files)} file(s) still exist: {[f.name for f in remaining_files]}")
                logger.error("These files are likely locked. Attempting force removal...")
                force_failed = []
                for remaining_file in remaining_files:
                    try:
                        remaining_file.unlink()
                        logger.info(f"Force-removed locked file: {remaining_file.name}")
                    except Exception as e:
                        force_failed.append(remaining_file.name)
                        logger.error(f"CRITICAL: Cannot remove locked file {remaining_file.name}: {e}")

                if force_failed:
                    logger.error(f"CRITICAL: {len(force_failed)} file(s) still locked after force removal: {force_failed}")
                    return jsonify({
                        'error': f'Cannot upload: {len(force_failed)} file(s) are locked and cannot be deleted',
                        'failed_files': force_failed,
                        'details': 'Please close Excel or any other program using these files, then try again.'
                    }), 500

            logger.info("✓ Verified: All old Excel files cleared successfully before upload")

            uploaded_count = 0
            uploaded_files = []

            for file in files:
                if file.filename == '':
                    continue

                # Check if it's an Excel file
                filename = secure_filename(file.filename)
                if not (filename.endswith('.xlsx') or filename.endswith('.xls')):
                    logger.warning(f"Skipping non-Excel file: {filename}")
                    continue

                # Save file to reports directory
                file_path = reports_dir / filename
                logger.info(f"Saving uploaded file: {filename} -> {file_path}")
                file.save(str(file_path))

                # Verify file was saved and get its modification time
                if file_path.exists():
                    mtime = file_path.stat().st_mtime
                    mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
                    file_size = file_path.stat().st_size
                    logger.info(f"Successfully saved file: {filename} (size: {file_size} bytes, modified: {mtime_str})")
                    uploaded_count += 1
                    uploaded_files.append(filename)
                else:
                    logger.error(f"CRITICAL: File was not saved! {file_path} does not exist after save()")
                    raise Exception(f"Failed to save file {filename}")

            if uploaded_count == 0:
                return jsonify({'error': 'No valid Excel files uploaded'}), 400

            # Warn if multiple files uploaded - reports should be generated from ONE file
            if uploaded_count > 1:
                logger.warning(f"WARNING: {uploaded_count} files uploaded. Reports should be generated from a single file. Only the newest file will be used.")

            return jsonify({
                'status': 'success',
                'message': f'Successfully uploaded {uploaded_count} file(s)',
                'uploaded_count': uploaded_count,
                'uploaded_files': uploaded_files,
                'cleared_old_files': cleared_count,
                'warning': f'{uploaded_count} file(s) uploaded - only the newest will be used for report generation' if uploaded_count > 1 else None
            })
        except Exception as e:
            logger.error(f"Error uploading files: {e}", exc_info=True)
            return jsonify({'error': f'Failed to upload files: {str(e)}'}), 500

    @app.route('/api/status', methods=['GET'])
    def status():
        """Get service status and configuration."""
        return jsonify({
            'status': 'running',
            'sharepoint_enabled': app.config['SHAREPOINT_CONFIG'].get('enabled', False),
            'reports_dir': app.config['REPORT_CONFIG'].get('reports_dir', 'reports'),
            'output_dir': app.config['REPORT_CONFIG'].get('output_dir', 'output')
        })

    @app.route('/api/report/json', methods=['GET'])
    def get_report_json():
        """Get latest report JSON file."""
        try:
            report_config = app.config['REPORT_CONFIG']
            output_dir_str = report_config.get('output_dir', 'output')
            output_dir = Path(output_dir_str)
            if not output_dir.is_absolute():
                script_dir = Path(__file__).parent.absolute()
                output_dir = script_dir / output_dir
            report_file = output_dir / 'report.json'

            if not report_file.exists():
                return jsonify({'error': 'Report not found. Generate a report first.'}), 404

            with open(report_file, 'r', encoding='utf-8') as f:
                report_data = json.load(f)

            return jsonify(report_data)
        except Exception as e:
            logger.error(f"Error reading report JSON: {e}", exc_info=True)
            return jsonify({'error': f'Failed to read report: {str(e)}'}), 500

    @app.route('/api/report/html', methods=['GET'])
    def get_report_html():
        """Get report HTML file by report_id (or latest if not specified)."""
        try:
            from flask import send_from_directory

            report_config = app.config['REPORT_CONFIG']
            output_dir_str = report_config.get('output_dir', 'output')
            output_dir = Path(output_dir_str)
            if not output_dir.is_absolute():
                script_dir = Path(__file__).parent.absolute()
                output_dir = script_dir / output_dir

            # Get report_id from query parameter, default to latest
            report_id = request.args.get('report_id')

            if report_id:
                # Check if it's a timestamped report or legacy report
                html_file = output_dir / f"{report_id}.html"

                # If not found and it starts with "report-", might be a legacy report with generated ID
                if not html_file.exists() and report_id.startswith('report-'):
                    # Try legacy report.html
                    legacy_file = output_dir / 'report.html'
                    if legacy_file.exists():
                        html_file = legacy_file
                    else:
                        return jsonify({'error': f'Report {report_id} not found.'}), 404
                elif not html_file.exists():
                    return jsonify({'error': f'Report {report_id} not found.'}), 404
            else:
                # Get latest report (check both timestamped and legacy)
                timestamped_files = list(output_dir.glob('report-*.html'))
                legacy_file = output_dir / 'report.html'

                html_files = []
                if legacy_file.exists():
                    html_files.append(legacy_file)
                html_files.extend(timestamped_files)

                if not html_files:
                    return jsonify({'error': 'No reports found. Generate a report first.'}), 404

                html_file = sorted(html_files, key=lambda p: p.stat().st_mtime, reverse=True)[0]

            return send_from_directory(str(output_dir), html_file.name, mimetype='text/html')
        except Exception as e:
            logger.error(f"Error reading report HTML: {e}", exc_info=True)
            return jsonify({'error': f'Failed to read report HTML: {str(e)}'}), 500

    @app.route('/api/reports/list', methods=['GET'])
    def list_reports():
        """List all available reports (last 10)."""
        try:
            report_config = app.config['REPORT_CONFIG']
            output_dir_str = report_config.get('output_dir', 'output')
            output_dir = Path(output_dir_str)

            # Ensure absolute path
            if not output_dir.is_absolute():
                script_dir = Path(__file__).parent.absolute()
                output_dir = script_dir / output_dir

            # Log for debugging
            logger.info(f"Looking for reports in: {output_dir.absolute()}")
            logger.info(f"Output directory exists: {output_dir.exists()}")
            if output_dir.exists():
                logger.info(f"Files in output directory: {list(output_dir.glob('*'))}")

            # Find all report HTML files (both timestamped and non-timestamped)
            timestamped_files = list(output_dir.glob('report-*.html'))
            legacy_file = output_dir / 'report.html'

            logger.info(f"Found {len(timestamped_files)} timestamped report files")
            logger.info(f"Legacy report.html exists: {legacy_file.exists()}")
            if legacy_file.exists():
                logger.info(f"Legacy report.html path: {legacy_file.absolute()}")

            html_files = []

            # Add legacy report.html if it exists
            if legacy_file.exists():
                html_files.append(legacy_file)
                logger.info("Added legacy report.html to list")

            # Add timestamped files
            html_files.extend(timestamped_files)
            logger.info(f"Total HTML files found: {len(html_files)}")

            reports = []
            for html_file in sorted(html_files, key=lambda p: p.stat().st_mtime, reverse=True)[:10]:
                report_id = html_file.stem  # e.g., "report-2025-11-08-11-25-46" or "report"

                # Handle legacy report.html
                if report_id == 'report':
                    # Use file modification time as timestamp
                    mtime = html_file.stat().st_mtime
                    dt = datetime.fromtimestamp(mtime)
                    timestamp_str = dt.strftime('%Y-%m-%d-%H-%M-%S')
                    date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
                    report_id = f"report-{timestamp_str}"
                else:
                    # Timestamped report
                    timestamp_str = report_id.replace('report-', '')
                    try:
                        # Parse timestamp to create readable date
                        dt = datetime.strptime(timestamp_str, '%Y-%m-%d-%H-%M-%S')
                        date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
                    except:
                        date_str = timestamp_str

                # Get file size
                file_size = html_file.stat().st_size

                reports.append({
                    'report_id': report_id,
                    'date': date_str,
                    'timestamp': timestamp_str,
                    'file_size': file_size
                })

            return jsonify({
                'reports': reports,
                'count': len(reports)
            })
        except Exception as e:
            logger.error(f"Error listing reports: {e}", exc_info=True)
            return jsonify({'error': f'Failed to list reports: {str(e)}'}), 500

    return app


def start_scheduler(config_path: Optional[str] = None):
    """Start the scheduler in a background thread."""
    global scheduler_thread

    scheduler_config = config.get('scheduler', {})
    if not scheduler_config.get('enabled'):
        logger.info("Scheduler is disabled in configuration")
        return

    try:
        from scheduler import ReportScheduler
        import threading

        def run_scheduler():
            try:
                scheduler = ReportScheduler(config_path=config_path)
                scheduler.start()
            except Exception as e:
                logger.error(f"Scheduler error: {e}", exc_info=True)

        scheduler_thread = threading.Thread(target=run_scheduler, daemon=True)
        scheduler_thread.start()
        logger.info("Scheduler started in background thread")
    except ImportError:
        logger.warning("Scheduler module not available. Install apscheduler to enable scheduling.")
    except Exception as e:
        logger.error(f"Failed to start scheduler: {e}", exc_info=True)


def run_server(config_path: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None):
    """Run the API server."""
    global app, config

    app = create_app(config_path)

    # Start scheduler if enabled
    start_scheduler(config_path)

    api_config = config.get('api', {})
    server_host = host or api_config.get('host', '0.0.0.0')
    server_port = port or api_config.get('port', 8080)

    logger.info(f"Starting API server on {server_host}:{server_port}")
    app.run(host=server_host, port=server_port, debug=False)


if __name__ == "__main__":
    import sys

    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )

    config_path = sys.argv[1] if len(sys.argv) > 1 else None

    # Check if API is enabled
    config = load_config(config_path)
    if not config.get('api', {}).get('enabled', False):
        logger.warning("API is disabled in configuration. Set api.enabled=true to enable.")
        logger.info("Starting API server anyway (for testing)...")

    run_server(config_path=config_path)