update

2025-11-08 15:44:43 +04:00 · 2025-11-08 15:44:43 +04:00 · 460f2b2592
commit 460f2b2592
parent 1d82024a58
31 changed files with 147869 additions and 2595 deletions
--- a/api_server.py
+++ b/api_server.py
@ -7,12 +7,16 @@ Provides REST API endpoints to trigger report generation on demand.
 import logging
 from pathlib import Path
-from typing import Optional
+from typing import Optional, List, Dict
 import json
 from datetime import datetime
 import shutil
 import os
 try:
-    from flask import Flask, jsonify, request
+    from flask import Flask, jsonify, request, send_from_directory
    from flask_cors import CORS
    from werkzeug.utils import secure_filename
    FLASK_AVAILABLE = True
 except ImportError:
    FLASK_AVAILABLE = False
@ -28,6 +32,68 @@ app = None
 config = None
 def cleanup_old_reports(output_dir: Path, reports_dir: Path, max_reports: int = 10):
    """
    Cleanup old reports and Excel files, keeping only the last max_reports.
    Args:
        output_dir: Directory containing report HTML/JSON files
        reports_dir: Directory containing Excel files
        max_reports: Maximum number of reports to keep
    """
    try:
        # Get all report HTML files sorted by modification time (newest first)
        html_files = sorted(output_dir.glob('report-*.html'), key=lambda p: p.stat().st_mtime, reverse=True)
        if len(html_files) <= max_reports:
            return  # No cleanup needed
        # Get reports to delete (oldest ones)
        reports_to_delete = html_files[max_reports:]
        deleted_count = 0
        for html_file in reports_to_delete:
            report_id = html_file.stem
            # Delete HTML file
            try:
                html_file.unlink()
                logger.info(f"Deleted old report HTML: {html_file.name}")
                deleted_count += 1
            except Exception as e:
                logger.warning(f"Failed to delete {html_file.name}: {e}")
            # Delete corresponding JSON file
            json_file = output_dir / f"{report_id}.json"
            if json_file.exists():
                try:
                    json_file.unlink()
                    logger.info(f"Deleted old report JSON: {json_file.name}")
                except Exception as e:
                    logger.warning(f"Failed to delete {json_file.name}: {e}")
        # Cleanup Excel files - keep only files associated with remaining reports
        if reports_dir.exists():
            excel_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
            if len(excel_files) > max_reports:
                # Sort by modification time and delete oldest
                excel_files_sorted = sorted(excel_files, key=lambda p: p.stat().st_mtime, reverse=True)
                excel_to_delete = excel_files_sorted[max_reports:]
                for excel_file in excel_to_delete:
                    try:
                        excel_file.unlink()
                        logger.info(f"Deleted old Excel file: {excel_file.name}")
                    except Exception as e:
                        logger.warning(f"Failed to delete {excel_file.name}: {e}")
        logger.info(f"Cleanup completed: deleted {deleted_count} old report(s)")
    except Exception as e:
        logger.error(f"Error during cleanup: {e}", exc_info=True)
 def create_app(config_path: Optional[str] = None):
    """Create and configure Flask app."""
    global app, config
@ -46,6 +112,20 @@ def create_app(config_path: Optional[str] = None):
    sharepoint_config = config.get('sharepoint', {})
    report_config = config.get('report', {})
    # Resolve paths relative to script location, not current working directory
    script_dir = Path(__file__).parent.absolute()
    # Convert relative paths to absolute paths relative to script directory
    if 'output_dir' in report_config and report_config['output_dir']:
        output_dir = Path(report_config['output_dir'])
        if not output_dir.is_absolute():
            report_config['output_dir'] = str(script_dir / output_dir)
    if 'reports_dir' in report_config and report_config['reports_dir']:
        reports_dir = Path(report_config['reports_dir'])
        if not reports_dir.is_absolute():
            report_config['reports_dir'] = str(script_dir / reports_dir)
    # Store config in app context
    app.config['API_KEY'] = api_config.get('api_key')
    app.config['SHAREPOINT_CONFIG'] = sharepoint_config
@ -80,10 +160,14 @@ def create_app(config_path: Optional[str] = None):
        try:
            request_data = request.json or {}
-            download_from_sp = request_data.get('download_from_sharepoint', False)
+            download_from_sp = request_data.get('download_from_sharepoint', True)  # Default to True for backward compatibility
            downloaded_files = []  # Initialize here for scope
-            # Download from SharePoint if requested
+            # Get report config early - needed for error handling
            report_config = app.config['REPORT_CONFIG']
            # Download from SharePoint if requested AND no manual upload happened
            # If download_from_sharepoint is False, it means manual upload was used
            if download_from_sp:
                sp_config = app.config['SHAREPOINT_CONFIG']
                if not sp_config.get('enabled'):
@ -98,39 +182,204 @@ def create_app(config_path: Optional[str] = None):
                        folder_path=sp_config.get('folder_path'),
                        file_path=sp_config.get('file_path'),
                        local_dir=sp_config.get('local_dir', 'reports'),
-                        username=sp_config.get('username'),
+                        tenant_id=sp_config.get('tenant_id'),
                        password=sp_config.get('password'),
                        client_id=sp_config.get('client_id'),
                        client_secret=sp_config.get('client_secret'),
-                        use_app_authentication=sp_config.get('use_app_authentication', False),
+                        use_app_authentication=sp_config.get('use_app_authentication', True),
                        file_pattern=sp_config.get('file_pattern'),
                        overwrite=sp_config.get('overwrite', True)
                    )
                    downloaded_files = downloaded if downloaded else []
                    logger.info(f"Downloaded {len(downloaded_files)} file(s) from SharePoint: {downloaded_files}")
                    # If SharePoint download failed (no files downloaded), check if we have existing files
                    if len(downloaded_files) == 0:
                        logger.warning("SharePoint download returned 0 files. This could mean:")
                        logger.warning("1. SharePoint permissions issue (401/403 error)")
                        logger.warning("2. No files found in the specified folder")
                        logger.warning("3. Site access not granted (Resource-Specific Consent needed)")
                        logger.warning("Checking if existing files are available in reports directory...")
                        # Check if there are existing files we can use
                        reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
                        if not reports_dir_path.is_absolute():
                            script_dir = Path(__file__).parent.absolute()
                            reports_dir_path = script_dir / reports_dir_path
                        if reports_dir_path.exists():
                            existing_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
                            if existing_files:
                                logger.warning(f"Found {len(existing_files)} existing file(s) in reports directory. Will use these instead.")
                                logger.warning("NOTE: These may be old files. Consider using manual upload for fresh data.")
                            else:
                                logger.error("No files available - neither from SharePoint nor existing files.")
                                return jsonify({
                                    'error': 'SharePoint download failed and no existing files found',
                                    'details': 'SharePoint access may require Resource-Specific Consent (RSC). Please use manual file upload or fix SharePoint permissions.',
                                    'sharepoint_error': True
                                }), 500
                except Exception as e:
                    logger.error(f"Failed to download from SharePoint: {e}", exc_info=True)
                    # Check if we have existing files as fallback
                    reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
                    if not reports_dir_path.is_absolute():
                        script_dir = Path(__file__).parent.absolute()
                        reports_dir_path = script_dir / reports_dir_path
                    if reports_dir_path.exists():
                        existing_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
                        if existing_files:
                            logger.warning(f"SharePoint download failed, but found {len(existing_files)} existing file(s). Will use these.")
                            downloaded_files = []  # Continue with existing files
                        else:
                            return jsonify({
-                        'error': f'SharePoint download failed: {str(e)}'
+                                'error': f'SharePoint download failed: {str(e)}',
                                'details': 'No existing files found. Please use manual file upload or fix SharePoint permissions.',
                                'sharepoint_error': True
                            }), 500
                    else:
                        return jsonify({
                            'error': f'SharePoint download failed: {str(e)}',
                            'details': 'Reports directory does not exist. Please use manual file upload or fix SharePoint permissions.',
                            'sharepoint_error': True
                        }), 500
-            # Generate report
+            # Generate report with timestamp
            report_config = app.config['REPORT_CONFIG']
            reports_dir = request_data.get('reports_dir', report_config.get('reports_dir', 'reports'))
-            output_file = request_data.get('output_file', 
+            output_dir_str = report_config.get('output_dir', 'output')
-                                          str(Path(report_config.get('output_dir', 'output')) / 'report.json'))
+            output_dir = Path(output_dir_str)
            if not output_dir.is_absolute():
                script_dir = Path(__file__).parent.absolute()
                output_dir = script_dir / output_dir
            # Create timestamped filename
            timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
            report_id = f"report-{timestamp}"
            output_file = str(output_dir / f"{report_id}.json")
            # Log which files will be used for generation
            reports_dir_path = Path(reports_dir)
            if not reports_dir_path.is_absolute():
                script_dir = Path(__file__).parent.absolute()
                reports_dir_path = script_dir / reports_dir_path
            logger.info(f"Generating report from {reports_dir_path.absolute()}...")
            logger.info(f"Reports directory exists: {reports_dir_path.exists()}")
            # Determine which files to use for generation
            # CRITICAL: Only use files that were just downloaded/uploaded, not old ones
            if downloaded_files:
                # Files were downloaded from SharePoint - use only those
                logger.info(f"Using {len(downloaded_files)} file(s) downloaded from SharePoint")
                # Verify that reports_dir only contains the downloaded files (should be empty of old files)
                all_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
                downloaded_file_paths = [Path(f).name for f in downloaded_files]  # Get just filenames
                if len(all_files) != len(downloaded_files):
                    logger.warning(f"WARNING: Found {len(all_files)} file(s) in reports_dir but only {len(downloaded_files)} were downloaded!")
                    logger.warning("This might indicate old files weren't cleared. Clearing now...")
                    for file in all_files:
                        if file.name not in downloaded_file_paths:
                            try:
                                file.unlink()
                                logger.info(f"Cleared unexpected file: {file.name}")
                            except Exception as e:
                                logger.error(f"Failed to clear unexpected file {file.name}: {e}")
            elif not download_from_sp:
                # Manual upload was used (download_from_sharepoint=False)
                # Upload endpoint should have cleared old files, but double-check
                # Only use files uploaded in the last 10 minutes to avoid combining with old files
                if reports_dir_path.exists():
                    excel_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
                    current_time = datetime.now().timestamp()
                    recent_files = []
                    for excel_file in excel_files:
                        mtime = excel_file.stat().st_mtime
                        # Only use files modified in the last 10 minutes (should be the uploaded ones)
                        # Increased from 5 to 10 minutes to account for upload + generation delay
                        if current_time - mtime < 600:  # 10 minutes
                            recent_files.append(excel_file)
                            mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
                            logger.info(f"  - {excel_file.name} (modified: {mtime_str}) - will be used for manual upload generation")
                        else:
                            logger.warning(f"  - {excel_file.name} (modified: {datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')}) - skipping (too old, might be from previous run)")
                    if len(recent_files) < len(excel_files):
                        logger.warning(f"Found {len(excel_files)} total file(s), but only {len(recent_files)} are recent. Clearing old files to avoid combining...")
                        # Clear old files to ensure we only use the manually uploaded ones
                        for excel_file in excel_files:
                            if excel_file not in recent_files:
                                try:
                                    excel_file.unlink()
                                    logger.info(f"Cleared old file: {excel_file.name}")
                                except Exception as e:
                                    logger.warning(f"Failed to clear old file {excel_file.name}: {e}")
                    if len(recent_files) == 0:
                        logger.error("Manual upload was used but no recent files found in reports directory!")
                        logger.error("This might mean:")
                        logger.error("1. Files were not uploaded successfully")
                        logger.error("2. Files were uploaded but cleared before generation")
                        logger.error("3. File modification times are incorrect")
                        return jsonify({
                            'error': 'No files found for manual upload generation',
                            'details': 'Files were uploaded but not found in reports directory. Please try uploading again.',
                            'manual_upload_error': True
                        }), 400
                    # Verify we only have the recently uploaded files
                    all_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
                    if len(all_files) != len(recent_files):
                        logger.warning(f"WARNING: Found {len(all_files)} file(s) but only {len(recent_files)} are recent!")
                        logger.warning("Clearing old files to ensure only uploaded files are used...")
                        for file in all_files:
                            if file not in recent_files:
                                try:
                                    file.unlink()
                                    logger.info(f"Cleared unexpected old file: {file.name}")
                                except Exception as e:
                                    logger.error(f"Failed to clear unexpected file {file.name}: {e}")
                    logger.info(f"Will generate report from {len(recent_files)} recently uploaded file(s)")
                else:
                    logger.error("Manual upload was used but reports directory does not exist!")
                    return jsonify({
                        'error': 'Reports directory does not exist',
                        'details': 'Cannot generate report from manual upload - reports directory is missing.',
                        'manual_upload_error': True
                    }), 500
            else:
                # SharePoint download was attempted but failed - this shouldn't happen if download_from_sp=True
                # But if it does, we should NOT use existing files as they might be old
                logger.error("SharePoint download was requested but failed, and no manual upload was used!")
                logger.error("This should not happen - refusing to use potentially old files")
                return jsonify({
                    'error': 'SharePoint download failed and no manual upload provided',
                    'details': 'Cannot generate report - no data source available. Please try again or use manual upload.',
                    'sharepoint_error': True
                }), 400
            logger.info(f"Generating report from {reports_dir}...")
            report_data = generate_report(
-                reports_dir=reports_dir,
+                reports_dir=str(reports_dir_path),
                output_file=output_file,
                verbose=False  # Don't print to console in API mode
            )
            if report_data:
                # Generate HTML with same timestamp
                html_file = output_dir / f"{report_id}.html"
                from html_generator import generate_html_report
                generate_html_report(output_file, str(html_file))
                # Cleanup old reports (keep only last 10)
                # Ensure reports_dir is a Path object
                reports_dir_for_cleanup = Path(reports_dir) if isinstance(reports_dir, str) else reports_dir
                cleanup_old_reports(output_dir, reports_dir_for_cleanup, max_reports=10)
                return jsonify({
                    'status': 'success',
                    'message': 'Report generated successfully',
                    'report_id': report_id,
                    'report_date': timestamp,
                    'output_file': output_file,
                    'summary': report_data.get('summary', {}),
                    'vendors_count': len(report_data.get('vendors', [])),
@ -148,6 +397,108 @@ def create_app(config_path: Optional[str] = None):
                'error': f'Report generation failed: {str(e)}'
            }), 500
    @app.route('/api/upload', methods=['POST'])
    def upload_files():
        """Upload Excel files manually. Clears old files before uploading new ones."""
        try:
            if 'files' not in request.files:
                return jsonify({'error': 'No files provided'}), 400
            files = request.files.getlist('files')
            if not files or all(f.filename == '' for f in files):
                return jsonify({'error': 'No files selected'}), 400
            report_config = app.config['REPORT_CONFIG']
            reports_dir_str = report_config.get('reports_dir', 'reports')
            reports_dir = Path(reports_dir_str)
            if not reports_dir.is_absolute():
                script_dir = Path(__file__).parent.absolute()
                reports_dir = script_dir / reports_dir
            # Ensure reports directory exists
            reports_dir.mkdir(parents=True, exist_ok=True)
            # ALWAYS clear ALL old Excel files from reports directory before uploading new ones
            # CRITICAL: This prevents combining multiple files in report generation
            old_excel_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
            cleared_count = 0
            failed_to_clear = []
            for old_file in old_excel_files:
                try:
                    # On Windows, files might be locked - try multiple times
                    max_retries = 3
                    retry_count = 0
                    while retry_count < max_retries:
                        try:
                            old_file.unlink()
                            cleared_count += 1
                            logger.info(f"Cleared old file before upload: {old_file.name}")
                            break
                        except PermissionError:
                            retry_count += 1
                            if retry_count < max_retries:
                                import time
                                time.sleep(0.5)  # Wait 500ms before retry
                            else:
                                raise
                except Exception as e:
                    failed_to_clear.append(old_file.name)
                    logger.error(f"Failed to clear old file {old_file.name}: {e}")
            # If any files failed to clear, fail the upload to prevent mixing old and new data
            if failed_to_clear:
                logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before upload: {failed_to_clear}")
                return jsonify({
                    'error': f'Failed to clear {len(failed_to_clear)} old file(s) before upload. Please ensure files are not locked or in use.',
                    'failed_files': failed_to_clear,
                    'details': 'Old files must be cleared before upload to ensure report generation uses only the new file(s). Files may be locked by Excel or another process.'
                }), 500
            if cleared_count > 0:
                logger.info(f"Cleared {cleared_count} old Excel file(s) before upload")
            else:
                logger.info("No old Excel files found to clear (reports directory was empty)")
            uploaded_count = 0
            uploaded_files = []
            for file in files:
                if file.filename == '':
                    continue
                # Check if it's an Excel file
                filename = secure_filename(file.filename)
                if not (filename.endswith('.xlsx') or filename.endswith('.xls')):
                    logger.warning(f"Skipping non-Excel file: {filename}")
                    continue
                # Save file to reports directory
                file_path = reports_dir / filename
                file.save(str(file_path))
                uploaded_count += 1
                uploaded_files.append(filename)
                logger.info(f"Uploaded file: {filename} -> {file_path}")
            if uploaded_count == 0:
                return jsonify({'error': 'No valid Excel files uploaded'}), 400
            # Warn if multiple files uploaded - reports should be generated from ONE file
            if uploaded_count > 1:
                logger.warning(f"WARNING: {uploaded_count} files uploaded. Reports should be generated from a single file. Only the newest file will be used.")
            return jsonify({
                'status': 'success',
                'message': f'Successfully uploaded {uploaded_count} file(s)',
                'uploaded_count': uploaded_count,
                'uploaded_files': uploaded_files,
                'cleared_old_files': cleared_count,
                'warning': f'{uploaded_count} file(s) uploaded - only the newest will be used for report generation' if uploaded_count > 1 else None
            })
        except Exception as e:
            logger.error(f"Error uploading files: {e}", exc_info=True)
            return jsonify({'error': f'Failed to upload files: {str(e)}'}), 500
    @app.route('/api/status', methods=['GET'])
    def status():
        """Get service status and configuration."""
@ -163,7 +514,11 @@ def create_app(config_path: Optional[str] = None):
        """Get latest report JSON file."""
        try:
            report_config = app.config['REPORT_CONFIG']
-            output_dir = Path(report_config.get('output_dir', 'output'))
+            output_dir_str = report_config.get('output_dir', 'output')
            output_dir = Path(output_dir_str)
            if not output_dir.is_absolute():
                script_dir = Path(__file__).parent.absolute()
                output_dir = script_dir / output_dir
            report_file = output_dir / 'report.json'
            if not report_file.exists():
@ -179,22 +534,133 @@ def create_app(config_path: Optional[str] = None):
    @app.route('/api/report/html', methods=['GET'])
    def get_report_html():
-        """Get latest report HTML file."""
+        """Get report HTML file by report_id (or latest if not specified)."""
        try:
            from flask import send_from_directory
            report_config = app.config['REPORT_CONFIG']
-            output_dir = Path(report_config.get('output_dir', 'output'))
+            output_dir_str = report_config.get('output_dir', 'output')
-            html_file = output_dir / 'report.html'
+            output_dir = Path(output_dir_str)
            if not output_dir.is_absolute():
                script_dir = Path(__file__).parent.absolute()
                output_dir = script_dir / output_dir
-            if not html_file.exists():
+            # Get report_id from query parameter, default to latest
-                return jsonify({'error': 'Report HTML not found. Generate a report first.'}), 404
+            report_id = request.args.get('report_id')
-            return send_from_directory(str(output_dir), 'report.html', mimetype='text/html')
+            if report_id:
                # Check if it's a timestamped report or legacy report
                html_file = output_dir / f"{report_id}.html"
                # If not found and it starts with "report-", might be a legacy report with generated ID
                if not html_file.exists() and report_id.startswith('report-'):
                    # Try legacy report.html
                    legacy_file = output_dir / 'report.html'
                    if legacy_file.exists():
                        html_file = legacy_file
                    else:
                        return jsonify({'error': f'Report {report_id} not found.'}), 404
                elif not html_file.exists():
                    return jsonify({'error': f'Report {report_id} not found.'}), 404
            else:
                # Get latest report (check both timestamped and legacy)
                timestamped_files = list(output_dir.glob('report-*.html'))
                legacy_file = output_dir / 'report.html'
                html_files = []
                if legacy_file.exists():
                    html_files.append(legacy_file)
                html_files.extend(timestamped_files)
                if not html_files:
                    return jsonify({'error': 'No reports found. Generate a report first.'}), 404
                html_file = sorted(html_files, key=lambda p: p.stat().st_mtime, reverse=True)[0]
            return send_from_directory(str(output_dir), html_file.name, mimetype='text/html')
        except Exception as e:
            logger.error(f"Error reading report HTML: {e}", exc_info=True)
            return jsonify({'error': f'Failed to read report HTML: {str(e)}'}), 500
    @app.route('/api/reports/list', methods=['GET'])
    def list_reports():
        """List all available reports (last 10)."""
        try:
            report_config = app.config['REPORT_CONFIG']
            output_dir_str = report_config.get('output_dir', 'output')
            output_dir = Path(output_dir_str)
            # Ensure absolute path
            if not output_dir.is_absolute():
                script_dir = Path(__file__).parent.absolute()
                output_dir = script_dir / output_dir
            # Log for debugging
            logger.info(f"Looking for reports in: {output_dir.absolute()}")
            logger.info(f"Output directory exists: {output_dir.exists()}")
            if output_dir.exists():
                logger.info(f"Files in output directory: {list(output_dir.glob('*'))}")
            # Find all report HTML files (both timestamped and non-timestamped)
            timestamped_files = list(output_dir.glob('report-*.html'))
            legacy_file = output_dir / 'report.html'
            logger.info(f"Found {len(timestamped_files)} timestamped report files")
            logger.info(f"Legacy report.html exists: {legacy_file.exists()}")
            if legacy_file.exists():
                logger.info(f"Legacy report.html path: {legacy_file.absolute()}")
            html_files = []
            # Add legacy report.html if it exists
            if legacy_file.exists():
                html_files.append(legacy_file)
                logger.info("Added legacy report.html to list")
            # Add timestamped files
            html_files.extend(timestamped_files)
            logger.info(f"Total HTML files found: {len(html_files)}")
            reports = []
            for html_file in sorted(html_files, key=lambda p: p.stat().st_mtime, reverse=True)[:10]:
                report_id = html_file.stem  # e.g., "report-2025-11-08-11-25-46" or "report"
                # Handle legacy report.html
                if report_id == 'report':
                    # Use file modification time as timestamp
                    mtime = html_file.stat().st_mtime
                    dt = datetime.fromtimestamp(mtime)
                    timestamp_str = dt.strftime('%Y-%m-%d-%H-%M-%S')
                    date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
                    report_id = f"report-{timestamp_str}"
                else:
                    # Timestamped report
                    timestamp_str = report_id.replace('report-', '')
                    try:
                        # Parse timestamp to create readable date
                        dt = datetime.strptime(timestamp_str, '%Y-%m-%d-%H-%M-%S')
                        date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
                    except:
                        date_str = timestamp_str
                # Get file size
                file_size = html_file.stat().st_size
                reports.append({
                    'report_id': report_id,
                    'date': date_str,
                    'timestamp': timestamp_str,
                    'file_size': file_size
                })
            return jsonify({
                'reports': reports,
                'count': len(reports)
            })
        except Exception as e:
            logger.error(f"Error listing reports: {e}", exc_info=True)
            return jsonify({'error': f'Failed to list reports: {str(e)}'}), 500
    return app
--- a/config.py
+++ b/config.py
@ -71,9 +71,46 @@ def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
            parent_env = Path(__file__).parent.parent / "taskboard" / ".env"
            if parent_env.exists():
                env_file = parent_env
                logging.info(f"Found .env file in taskboard directory: {env_file}")
            else:
                logging.warning(f".env file not found in vendor_report or taskboard directory")
                logging.warning(f"Checked: {Path(__file__).parent / '.env'}")
                logging.warning(f"Checked: {parent_env}")
        else:
            logging.info(f"Found .env file in vendor_report directory: {env_file}")
        if env_file.exists():
-            load_dotenv(env_file)
+            load_dotenv(env_file, override=True)  # override=True ensures env vars take precedence
-            logging.info(f"Loaded environment variables from {env_file}")
+            logging.info(f"Loaded environment variables from {env_file.absolute()}")
            # Log which SharePoint env vars were found (checking both SHAREPOINT_* and AZURE_AD_* fallbacks)
            sp_vars = ['SHAREPOINT_ENABLED', 'SHAREPOINT_SITE_URL', 'SHAREPOINT_FOLDER_PATH']
            found_vars = [var for var in sp_vars if os.getenv(var)]
            # Check credentials (with fallback)
            client_id = os.getenv('SHAREPOINT_CLIENT_ID') or os.getenv('AZURE_AD_CLIENT_ID')
            tenant_id = os.getenv('SHAREPOINT_TENANT_ID') or os.getenv('AZURE_AD_TENANT_ID')
            client_secret = os.getenv('SHAREPOINT_CLIENT_SECRET') or os.getenv('AZURE_AD_CLIENT_SECRET')
            if client_id:
                found_vars.append('CLIENT_ID (from SHAREPOINT_CLIENT_ID or AZURE_AD_CLIENT_ID)')
            if tenant_id:
                found_vars.append('TENANT_ID (from SHAREPOINT_TENANT_ID or AZURE_AD_TENANT_ID)')
            if client_secret:
                found_vars.append('CLIENT_SECRET (from SHAREPOINT_CLIENT_SECRET or AZURE_AD_CLIENT_SECRET)')
            logging.info(f"Found SharePoint environment variables: {', '.join(found_vars)}")
            missing_vars = []
            if not client_id:
                missing_vars.append('CLIENT_ID (SHAREPOINT_CLIENT_ID or AZURE_AD_CLIENT_ID)')
            if not tenant_id:
                missing_vars.append('TENANT_ID (SHAREPOINT_TENANT_ID or AZURE_AD_TENANT_ID)')
            if not client_secret:
                missing_vars.append('CLIENT_SECRET (SHAREPOINT_CLIENT_SECRET or AZURE_AD_CLIENT_SECRET)')
            if missing_vars:
                logging.warning(f"Missing SharePoint credentials: {', '.join(missing_vars)}")
    if config_path is None:
        config_path = Path(__file__).parent / "config.yaml"
@ -134,6 +171,12 @@ def _load_from_env(config: Dict) -> Dict:
    elif os.getenv('AZURE_AD_CLIENT_SECRET'):
        config['sharepoint']['client_secret'] = os.getenv('AZURE_AD_CLIENT_SECRET')
    # Tenant ID (required for Microsoft Graph API)
    if os.getenv('SHAREPOINT_TENANT_ID'):
        config['sharepoint']['tenant_id'] = os.getenv('SHAREPOINT_TENANT_ID')
    elif os.getenv('AZURE_AD_TENANT_ID'):
        config['sharepoint']['tenant_id'] = os.getenv('AZURE_AD_TENANT_ID')
    if os.getenv('SHAREPOINT_USE_APP_AUTH'):
        config['sharepoint']['use_app_authentication'] = os.getenv('SHAREPOINT_USE_APP_AUTH').lower() == 'true'
    elif os.getenv('SHAREPOINT_USE_APP_AUTH') is None and os.getenv('AZURE_AD_CLIENT_ID'):
--- a/output/preprocessed_data.txt
+++ b/output/preprocessed_data.txt
@ -1,7 +1,7 @@
 PREPROCESSED EXCEL DATA
 ================================================================================
-Current Date (Baltimore/Eastern): 2025-11-06 16:50:13 EST
+Current Date (Baltimore/Eastern): 2025-11-08 06:42:53 EST
-Total Items: 162
+Total Items: 180
 VENDOR: Amazon
 --------------------------------------------------------------------------------
@ -20,25 +20,31 @@ ALL ITEMS:
 VENDOR: Autstand
 --------------------------------------------------------------------------------
-Total Items: 74
+Total Items: 91
-  Closed: 67
+  Closed: 78
  Open: 3
-  Monitor: 4
+  Monitor: 10
 RECENT UPDATES (Yesterday's Date):
  ADDED: NCP1-6-TPE1 did not alarm correctly on SCADA when jammed | 2025-11-07 00:00:00 | Incomplete
  CLOSED: SCADA: We need some type of refrence in SCADA. Suggest adding dock doors that correlate with their belt | 2025-11-07 00:00:00 | Complete
 OLDEST UNADDRESSED (Top 3):
-  Estops are getting damaged on the UL lane | Age: 2 days | 2025-11-04 00:00:00 | Incomplete
+  NCP1-6-TPE1 did not alarm correctly on SCADA when jammed | Age: 1 days | 2025-11-07 00:00:00 | Incomplete
  Raise the fill height ob the DTC's  approx 2 " | Age: 2 days | 2025-11-04 00:00:00 | Incomplete
  3:1 merge code update | Age: None days |  | Incomplete
  Estops are getting damaged on the UL lane | Age: None days | 45965 | Incomplete
-VERY HIGH PRIORITY (6 items):
+VERY HIGH PRIORITY (8 items):
  Semi-Auto Exception Arm Logic | Complete | 10/14/25
  PS Conveyor chute clearing Issues | Complete | 10/14/25
  Replicate logic timers from semi VS-D to the rest of the semis | Monitor | 10/14/25
  Tipper timer | Complete | 10/14/25
  SCADA Accurate Status Reads | Complete | 10/14/25
  NC boxes are diverting to xbelt causing jams particullary at bypass curves | Complete | 10/17
  NCP1-6-TPE1 did not alarm correctly on SCADA when jammed | Incomplete | 2025-11-07 00:00:00
  Replicate logic timers from semi VS-D to the rest of the semis | Monitor | 10/14/25
-HIGH PRIORITY (53 items):
+HIGH PRIORITY (67 items):
  Problem Solve dead rollers | Complete | 10/14/25
  Jam Reset Button needed at end of NC Jackpots | Complete | 10/15/25
  Jam Reset buttons on Bulk divert platforms to be relocated | Complete | 10/15/25
@ -91,9 +97,25 @@ The PLC is not reporting S04 message divert fails properly (no reads to jackpot,
  Encoder failure (4x)  + 2 x | Complete | 2025-10-10 00:00:00
  SCADA performance issue | Complete | 
  7:1 merge code update | Complete | 
  3:1 merge code update | Monitor | 
  Estops are getting damaged on the UL lane | Complete | 2025-11-04 00:00:00
  Raise the fill height ob the DTC's  approx 2 " | Complete | 2025-11-04 00:00:00
  Fix tippers faulting mid-dump sequence | Complete | 2025-11-06 00:00:00
  PS8-5 Jammed but not showing on SCADA | Complete | 2025-11-06 00:00:00
  Bypass improvements | Complete | 2025-11-03 00:00:00
  Sorter statistics on SCADA | Monitor | 2025-11-03 00:00:00
  Update SCADA to show inch and store status of conveyors with such functionality | Complete | 2025-11-07 00:00:00
  Logic for Semi induct D is off very low throughput see video | Monitor | 10/16/2025
  SCADA: We need some type of refrence in SCADA. Suggest adding dock doors that correlate with their belt | Complete | 10/16/2025
  PS11-11CH6NC Intralox Sorter (S02) | Complete | 10/17/2025
  One major issue and one minor issue with the non-con system:
 No-reads are really frequent 
 The PLC is not reporting S04 message divert fails properly (no reads to jackpot, lost container, failed to divert, wrong buildings) | Monitor | 10/17/2025
  Encoder failure (4x)  + 2 x | Complete | 45940
  7:1 merge code update | Complete | 
  3:1 merge code update | Incomplete | 
-  Estops are getting damaged on the UL lane | Incomplete | 2025-11-04 00:00:00
+  Estops are getting damaged on the UL lane | Incomplete | 45965
-  Raise the fill height ob the DTC's  approx 2 " | Incomplete | 2025-11-04 00:00:00
+  Raise the fill height ob the DTC's  approx 2 " | Complete | 45965
 ALL ITEMS:
  Semi-Auto Exception Arm Logic | Vendor: Autstand | Priority: (1) Very High (very_high) | Status: Complete (CLOSED) | Date: 10/14/25 | Description: Exception chute arm disengaged prior to all carton...
@ -168,24 +190,44 @@ The PLC is not reporting S04 message divert fails properly (no reads to jackpot,
  Encoder failure (4x)  + 2 x | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-10-10 00:00:00 | Description: UL8-7  UL11-7     Problem with port on APF...
  SCADA performance issue | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date:  | Description: report export crashed system...
  7:1 merge code update | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date:  | Description: N/A...
-  3:1 merge code update | Vendor: Autstand | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date:  | Description: mcm02 by monday 11/4.       mcm01 ul 1-3 done.    ...
+  3:1 merge code update | Vendor: Autstand | Priority: (2) High (high) | Status: Monitor (OPEN) | Date:  | Description: mcm02 by monday 11/4.       mcm01 ul 1-3 done.    ...
  gap control at non con sorter. | Vendor: Autstand | Priority:  (None) | Status: Complete (CLOSED) | Date: 2025-10-30 00:00:00 | Description: code change/  help with box tracking....
-  Estops are getting damaged on the UL lane | Vendor: Autstand | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date: 2025-11-04 00:00:00 | Description: UL16-1,  UL15-3, UL10-2 (both sides) UL8-1 , UL7-3...
+  Estops are getting damaged on the UL lane | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-04 00:00:00 | Description: UL16-1,  UL15-3, UL10-2 (both sides) UL8-1 , UL7-3...
-  Raise the fill height ob the DTC's  approx 2 " | Vendor: Autstand | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date: 2025-11-04 00:00:00 | Description: N/A...
+  Raise the fill height ob the DTC's  approx 2 " | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-04 00:00:00 | Description: N/A...
  Fix tippers faulting mid-dump sequence | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-06 00:00:00 | Description: Tippers that are flow-stopped during the dumping s...
  PS8-5 Jammed but not showing on SCADA | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-06 00:00:00 | Description: KK 11/6 - Per Chris' Twi sort 11.6 report - PS8-5 ...
  Bypass improvements | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-03 00:00:00 | Description: Bypass showing lane unavailable when not running, ...
  Sorter statistics on SCADA | Vendor: Autstand | Priority: (2) High (high) | Status: Monitor (OPEN) | Date: 2025-11-03 00:00:00 | Description: Add crossbelt sorter statistics to ignition SCADA ...
  Update SCADA to show inch and store status of conveyors with such functionality | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-07 00:00:00 | Description: Update SCADA to show inch and store conveyors as Y...
  NCP1-6-TPE1 did not alarm correctly on SCADA when jammed | Vendor: Autstand | Priority: (1) Very High (very_high) | Status: Incomplete (OPEN) | Date: 2025-11-07 00:00:00 | Description: Jiffy got to noncon and stuck in a T-plate causing...
  Replicate logic timers from semi VS-D to the rest of the semis | Vendor: Autstand | Priority: (1) Very High (very_high) | Status: Monitor (OPEN) | Date: 10/14/25 | Description: Logic timers from semi-auto at all virtual sorters...
  Logic for Semi induct D is off very low throughput see video | Vendor: Autstand | Priority: (2) High (high) | Status: Monitor (OPEN) | Date: 10/16/2025 | Description: Semi Auto D induct...
  SCADA: We need some type of refrence in SCADA. Suggest adding dock doors that correlate with their belt | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 10/16/2025 | Description: Flow Desk...
  PS11-11CH6NC Intralox Sorter (S02) | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 10/17/2025 | Description: PS11-11CH6NC Intralox Sorter (S02)...
  One major issue and one minor issue with the non-con system:
 No-reads are really frequent 
 The PLC is not reporting S04 message divert fails properly (no reads to jackpot, lost container, failed to divert, wrong buildings) | Vendor: Autstand | Priority: (2) High (high) | Status: Monitor (OPEN) | Date: 10/17/2025 | Description: NON con sorter 1 and 2 not diverting...
  DTC chutes on VS-B is randomly disabling | Vendor: Autstand | Priority:  (None) | Status: Monitor (OPEN) | Date: 45952 | Description: N/A...
  Encoder failure (4x)  + 2 x | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 45940 | Description: UL8-7  UL11-7     Problem with port on APF...
  7:1 merge code update | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date:  | Description: N/A...
  3:1 merge code update | Vendor: Autstand | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date:  | Description: mcm02 by monday 11/4.       mcm01 ul 1-3 done.    ...
  Estops are getting damaged on the UL lane | Vendor: Autstand | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date: 45965 | Description: UL16-1,  UL15-3, UL10-2 (both sides) UL8-1 , UL7-3...
  Raise the fill height ob the DTC's  approx 2 " | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 45965 | Description: N/A...
 ================================================================================
 VENDOR: Autstand/Beumer
 --------------------------------------------------------------------------------
-Total Items: 3
+Total Items: 4
-  Closed: 2
+  Closed: 4
  Open: 0
-  Monitor: 1
+  Monitor: 0
 ALL ITEMS:
  Semi induct D - light not allumintating green | Vendor: Autstand/Beumer | Priority:  (None) | Status: Complete (CLOSED) | Date: 10/15/2025 | Description: N/A...
-  Bypasses are showing "lane unavailble"  at a high rate.     should always be available... is it in energy saving mode?   or other reason | Vendor: Autstand/Beumer | Priority:  (None) | Status: Monitor (OPEN) | Date: 2025-10-22 00:00:00 | Description: N/A...
+  Bypasses are showing "lane unavailble"  at a high rate.     should always be available... is it in energy saving mode?   or other reason | Vendor: Autstand/Beumer | Priority:  (None) | Status: Complete (CLOSED) | Date: 2025-10-22 00:00:00 | Description: N/A...
  pe missing prob solve ak chute | Vendor: Autstand/Beumer | Priority: (3) Medium (medium) | Status: Complete (CLOSED) | Date: 2025-10-29 00:00:00 | Description: N/A...
  Bypasses are showing "lane unavailble"  at a high rate.     should always be available... is it in energy saving mode?   or other reason | Vendor: Autstand/Beumer | Priority:  (None) | Status: Complete (CLOSED) | Date: 45952 | Description: N/A...
 ================================================================================
@ -299,14 +341,13 @@ https://t.corp.amazon.com/V1969041198 | Vendor: Caljan | Priority: (2) High (hig
 VENDOR: DCS
 --------------------------------------------------------------------------------
 Total Items: 25
-  Closed: 20
+  Closed: 22
-  Open: 4
+  Open: 2
  Monitor: 1
 OLDEST UNADDRESSED (Top 3):
-  NCS1-1 aligner belt failed | Age: 5 days | 2025-11-01 00:00:00 | Incomplete
+  NCS1-1 aligner belt failed | Age: 7 days | 2025-11-01 00:00:00 | Incomplete
-  ) There is a catchpoint of bent metal that is sticking out from the tail assembly on PS10-1 where it transitions to PS11-1.  This is catching polys during operation.  Jesse is going to look into making proper modifications to eliminate this. | Age: 2 days | 2025-11-04 00:00:00 | Incomplete
+  3) The black UHMW strip under the belt which transitions the belt from slider bed to tail roller is too sharp and is shaving the bottom side of the belt.  Jesse and his team are going to look into pulling this uhmw strip out, properly chamfering it and then re-installing. | Age: 4 days | 2025-11-04 00:00:00 | Incomplete
  2) When product from PS10-1 is flowing towards PS11-1, there is no snowplow and instead the slide just dead ends with a corner of sidepan. I’ve asked Jesse to look into fabricating a UHMW piece that could bridge this corner to push products down onto the belt. | Age: 2 days | 2025-11-04 00:00:00 | Incomplete
 VERY HIGH PRIORITY (4 items):
  Flow turn Belt Replacement | Complete | 10/10/25
@ -329,8 +370,8 @@ HIGH PRIORITY (17 items):
  Non Con Chute/Maint access.   Need Latch upgrade | Complete | 2025-10-10 00:00:00
  Motor falling on HSQ gappers.. 2x  (3:1 merge) | Complete | 
  PRS4-2 Motor Replacement | Complete | 2025-10-26 00:00:00
-  ) There is a catchpoint of bent metal that is sticking out from the tail assembly on PS10-1 where it transitions to PS11-1.  This is catching polys during operation.  Jesse is going to look into making proper modifications to eliminate this. | Incomplete | 2025-11-04 00:00:00
+  ) There is a catchpoint of bent metal that is sticking out from the tail assembly on PS10-1 where it transitions to PS11-1.  This is catching polys during operation.  Jesse is going to look into making proper modifications to eliminate this. | Complete | 2025-11-04 00:00:00
-  2) When product from PS10-1 is flowing towards PS11-1, there is no snowplow and instead the slide just dead ends with a corner of sidepan. I’ve asked Jesse to look into fabricating a UHMW piece that could bridge this corner to push products down onto the belt. | Incomplete | 2025-11-04 00:00:00
+  2) When product from PS10-1 is flowing towards PS11-1, there is no snowplow and instead the slide just dead ends with a corner of sidepan. I’ve asked Jesse to look into fabricating a UHMW piece that could bridge this corner to push products down onto the belt. | Complete | 2025-11-04 00:00:00
  3) The black UHMW strip under the belt which transitions the belt from slider bed to tail roller is too sharp and is shaving the bottom side of the belt.  Jesse and his team are going to look into pulling this uhmw strip out, properly chamfering it and then re-installing. | Incomplete | 2025-11-04 00:00:00
 ALL ITEMS:
@ -356,8 +397,8 @@ ALL ITEMS:
  Motor falling on HSQ gappers.. 2x  (3:1 merge) | Vendor: DCS | Priority: (2) High (high) | Status: Complete (CLOSED) | Date:  | Description: upgrade the bolts...
  PRS4-2 Motor Replacement | Vendor: DCS | Priority: High (high) | Status: Complete (CLOSED) | Date: 2025-10-26 00:00:00 | Description: Motor Oreded will update when I have an ETA ( Tryi...
  NCS1-1 aligner belt failed | Vendor: DCS | Priority: (1) Very High (very_high) | Status: Incomplete (OPEN) | Date: 2025-11-01 00:00:00 | Description: Belt failed prior to flow splitter.    Replaced wi...
-  ) There is a catchpoint of bent metal that is sticking out from the tail assembly on PS10-1 where it transitions to PS11-1.  This is catching polys during operation.  Jesse is going to look into making proper modifications to eliminate this. | Vendor: DCS | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date: 2025-11-04 00:00:00 | Description: N/A...
+  ) There is a catchpoint of bent metal that is sticking out from the tail assembly on PS10-1 where it transitions to PS11-1.  This is catching polys during operation.  Jesse is going to look into making proper modifications to eliminate this. | Vendor: DCS | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-04 00:00:00 | Description: Catached Point in two area's Both have been comple...
-  2) When product from PS10-1 is flowing towards PS11-1, there is no snowplow and instead the slide just dead ends with a corner of sidepan. I’ve asked Jesse to look into fabricating a UHMW piece that could bridge this corner to push products down onto the belt. | Vendor: DCS | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date: 2025-11-04 00:00:00 | Description: N/A...
+  2) When product from PS10-1 is flowing towards PS11-1, there is no snowplow and instead the slide just dead ends with a corner of sidepan. I’ve asked Jesse to look into fabricating a UHMW piece that could bridge this corner to push products down onto the belt. | Vendor: DCS | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-04 00:00:00 | Description: Completed...
  3) The black UHMW strip under the belt which transitions the belt from slider bed to tail roller is too sharp and is shaving the bottom side of the belt.  Jesse and his team are going to look into pulling this uhmw strip out, properly chamfering it and then re-installing. | Vendor: DCS | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date: 2025-11-04 00:00:00 | Description: N/A...
 ================================================================================
@ -409,7 +450,7 @@ Total Items: 5
  Monitor: 1
 OLDEST UNADDRESSED (Top 3):
-  Add DHL label to Scan tunnel valid message | Age: 10 days | 2025-10-27 00:00:00 | Incomplete
+  Add DHL label to Scan tunnel valid message | Age: 12 days | 2025-10-27 00:00:00 | Incomplete
 VERY HIGH PRIORITY (1 items):
  Add DHL label to Scan tunnel valid message | Incomplete | 2025-10-27 00:00:00
--- a/output/report-2025-11-08-15-16-56.html
+++ b/output/report-2025-11-08-15-16-56.html
--- a/output/report-2025-11-08-15-16-56.json
+++ b/output/report-2025-11-08-15-16-56.json
--- a/output/report-2025-11-08-15-18-53.html
+++ b/output/report-2025-11-08-15-18-53.html
--- a/output/report-2025-11-08-15-18-53.json
+++ b/output/report-2025-11-08-15-18-53.json
--- a/output/report-2025-11-08-15-21-12.html
+++ b/output/report-2025-11-08-15-21-12.html
--- a/output/report-2025-11-08-15-21-12.json
+++ b/output/report-2025-11-08-15-21-12.json
--- a/output/report-2025-11-08-15-24-16.html
+++ b/output/report-2025-11-08-15-24-16.html
--- a/output/report-2025-11-08-15-24-16.json
+++ b/output/report-2025-11-08-15-24-16.json
--- a/output/report-2025-11-08-15-24-30.html
+++ b/output/report-2025-11-08-15-24-30.html
--- a/output/report-2025-11-08-15-24-30.json
+++ b/output/report-2025-11-08-15-24-30.json
--- a/output/report-2025-11-08-15-24-35.html
+++ b/output/report-2025-11-08-15-24-35.html
--- a/output/report-2025-11-08-15-24-35.json
+++ b/output/report-2025-11-08-15-24-35.json
--- a/output/report-2025-11-08-15-24-57.html
+++ b/output/report-2025-11-08-15-24-57.html
--- a/output/report-2025-11-08-15-24-57.json
+++ b/output/report-2025-11-08-15-24-57.json
--- a/output/report-2025-11-08-15-33-57.html
+++ b/output/report-2025-11-08-15-33-57.html
--- a/output/report-2025-11-08-15-33-57.json
+++ b/output/report-2025-11-08-15-33-57.json
--- a/output/report-2025-11-08-15-42-46.html
+++ b/output/report-2025-11-08-15-42-46.html
--- a/output/report-2025-11-08-15-42-46.json
+++ b/output/report-2025-11-08-15-42-46.json
--- a/output/report-2025-11-08-15-42-53.html
+++ b/output/report-2025-11-08-15-42-53.html
--- a/output/report-2025-11-08-15-42-53.json
+++ b/output/report-2025-11-08-15-42-53.json
--- a/output/report.html
+++ b/output/report.html
--- a/output/report.json
+++ b/output/report.json
--- a/output/report_direct.json
+++ b/output/report_direct.json
--- a/reports/[MTN6]
+++ b/reports/[MTN6]
--- a/requirements.txt
+++ b/requirements.txt
@ -3,7 +3,7 @@ pandas>=2.0.0
 openpyxl>=3.0.0
 pydantic>=2.0.0
-# Optional: SharePoint integration
+# Optional: SharePoint integration (Office365-REST-Python-Client)
 Office365-REST-Python-Client>=2.3.0
 # Optional: Scheduling
--- a/scheduler.py
+++ b/scheduler.py
@ -66,11 +66,10 @@ class ReportScheduler:
                        folder_path=self.sharepoint_config.get('folder_path'),
                        file_path=self.sharepoint_config.get('file_path'),
                        local_dir=self.sharepoint_config.get('local_dir', 'reports'),
-                        username=self.sharepoint_config.get('username'),
+                        tenant_id=self.sharepoint_config.get('tenant_id'),
                        password=self.sharepoint_config.get('password'),
                        client_id=self.sharepoint_config.get('client_id'),
                        client_secret=self.sharepoint_config.get('client_secret'),
-                        use_app_authentication=self.sharepoint_config.get('use_app_authentication', False),
+                        use_app_authentication=self.sharepoint_config.get('use_app_authentication', True),
                        file_pattern=self.sharepoint_config.get('file_pattern'),
                        overwrite=self.sharepoint_config.get('overwrite', True)
                    )
--- a/sharepoint_downloader.py
+++ b/sharepoint_downloader.py
@ -1,9 +1,9 @@
 #!/usr/bin/env python3
 """
-SharePoint File Downloader
+SharePoint File Downloader using Office365-REST-Python-Client
 Downloads Excel files from SharePoint to the local reports directory.
-Supports both scheduled and on-demand downloads.
+Uses Office365-REST-Python-Client library for SharePoint REST API access.
 """
 import os
@ -15,89 +15,181 @@ import logging
 try:
    from office365.sharepoint.client_context import ClientContext
    from office365.runtime.auth.authentication_context import AuthenticationContext
-    from office365.runtime.auth.user_credential import UserCredential
+    from office365.sharepoint.files.file import File
-    from office365.runtime.auth.client_credential import ClientCredential
+    OFFICE365_AVAILABLE = True
    SHAREPOINT_AVAILABLE = True
 except ImportError:
-    SHAREPOINT_AVAILABLE = False
+    OFFICE365_AVAILABLE = False
    logging.warning("office365-rest-python-client not installed. SharePoint features disabled.")
 logger = logging.getLogger(__name__)
 class SharePointDownloader:
-    """Downloads files from SharePoint."""
+    """Downloads files from SharePoint using Office365-REST-Python-Client."""
    def __init__(
        self,
        site_url: str,
-        username: Optional[str] = None,
+        tenant_id: Optional[str] = None,
        password: Optional[str] = None,
        client_id: Optional[str] = None,
        client_secret: Optional[str] = None,
-        use_app_authentication: bool = False
+        use_app_authentication: bool = True
    ):
        """
-        Initialize SharePoint downloader.
+        Initialize SharePoint downloader using Office365-REST-Python-Client.
        Args:
            site_url: SharePoint site URL (e.g., "https://yourcompany.sharepoint.com/sites/YourSite")
-            username: Username for user authentication (if not using app authentication)
+            tenant_id: Azure AD tenant ID (required for app authentication)
-            password: Password for user authentication (if not using app authentication)
+            client_id: Azure AD app client ID (required for app authentication)
-            client_id: Azure AD app client ID (for app authentication)
+            client_secret: Azure AD app client secret (required for app authentication)
-            client_secret: Azure AD app client secret (for app authentication)
+            use_app_authentication: Whether to use app authentication (default: True)
            use_app_authentication: Whether to use app authentication (recommended for automation)
        """
-        if not SHAREPOINT_AVAILABLE:
+        if not OFFICE365_AVAILABLE:
            raise ImportError(
                "office365-rest-python-client is required for SharePoint integration. "
                "Install it with: pip install Office365-REST-Python-Client"
            )
-        self.site_url = site_url
+        self.site_url = site_url.rstrip('/')
-        self.username = username
+        self.tenant_id = tenant_id
        self.password = password
        self.client_id = client_id
        self.client_secret = client_secret
        self.use_app_authentication = use_app_authentication
        self.ctx = None
-    def authenticate(self) -> bool:
+        if not self.client_id or not self.client_secret:
-        """Authenticate with SharePoint."""
+            logger.error("Client ID and Client Secret are required for SharePoint authentication.")
            raise ValueError("Missing Azure AD credentials for SharePoint.")
    def connect(self) -> bool:
        """Connect to SharePoint site."""
        if self.ctx:
            return True
        try:
-            if self.use_app_authentication and self.client_id and self.client_secret:
+            if self.use_app_authentication:
-                # App authentication (recommended for automation)
+                # App-only authentication using Office365-REST-Python-Client
-                logger.info(f"Attempting app authentication with client_id: {self.client_id[:8]}...")
+                from office365.runtime.auth.client_credential import ClientCredential
                logger.info(f"Connecting to SharePoint site: {self.site_url}")
                logger.info(f"Using Client ID: {self.client_id[:8]}... (truncated for security)")
                credentials = ClientCredential(self.client_id, self.client_secret)
                self.ctx = ClientContext(self.site_url).with_credentials(credentials)
                logger.info("Created SharePoint context with app credentials")
            elif self.username and self.password:
                # User authentication
                credentials = UserCredential(self.username, self.password)
                self.ctx = ClientContext(self.site_url).with_credentials(credentials)
                logger.info("Authenticated with SharePoint using user credentials")
            else:
                logger.error("No authentication credentials provided")
                return False
-            # Test connection
+                # Test connection by getting web
-            logger.info("Testing SharePoint connection...")
+                # This will fail if RSC is not granted or credentials are wrong
                web = self.ctx.web
                self.ctx.load(web)
                self.ctx.execute_query()
                logger.info(f"Successfully connected to SharePoint site: {web.properties['Title']}")
                return True
            else:
                logger.error("Only app-only authentication is supported")
                return False
        except Exception as e:
-            logger.error(f"SharePoint authentication failed: {e}", exc_info=True)
+            error_msg = str(e)
-            logger.error(f"Site URL: {self.site_url}")
+            logger.error(f"Failed to connect to SharePoint: {error_msg}", exc_info=True)
-            logger.error(f"Client ID: {self.client_id[:8] if self.client_id else 'None'}...")
+            
-            logger.error(f"Using app auth: {self.use_app_authentication}")
+            # Provide helpful error messages
            if "Unsupported app only token" in error_msg or "401" in error_msg:
                logger.error("This error usually means:")
                logger.error("1. Resource-Specific Consent (RSC) is not granted for this site")
                logger.error("2. Go to: {}/_layouts/15/appinv.aspx".format(self.site_url))
                logger.error("3. Enter App ID: {}".format(self.client_id))
                logger.error("4. Grant permission with XML: <AppPermissionRequests AllowAppOnlyPolicy=\"true\"><AppPermissionRequest Scope=\"http://sharepoint/content/sitecollection\" Right=\"Read\" /></AppPermissionRequests>")
            elif "403" in error_msg or "Forbidden" in error_msg:
                logger.error("403 Forbidden - App does not have access to this site")
                logger.error("RSC must be granted via appinv.aspx")
            elif "Invalid client secret" in error_msg or "invalid_client" in error_msg:
                logger.error("Invalid client credentials - check CLIENT_ID and CLIENT_SECRET")
            return False
    def list_files_in_folder(
        self,
        folder_path: str,
        file_pattern: Optional[str] = None
    ) -> List[dict]:
        """
        List files in a SharePoint folder.
        Args:
            folder_path: Folder path relative to site root (e.g., "Shared Documents/General/Amazon Punchlist [EXTERNAL]")
            file_pattern: Optional file pattern filter (e.g., "*.xlsx")
        Returns:
            List of file metadata dictionaries
        """
        if not self.ctx:
            if not self.connect():
                return []
        try:
            # Normalize folder path
            # User provides: /Shared Documents/General/Amazon Punchlist [EXTERNAL]
            # SharePoint needs: /sites/SiteName/Shared Documents/General/Amazon Punchlist [EXTERNAL]
            folder_path = folder_path.strip('/')
            # Extract site path from site_url
            from urllib.parse import urlparse
            site_path = urlparse(self.site_url).path.strip('/')
            # Construct full server-relative URL
            # If folder_path already starts with site path, use as-is
            # Otherwise, prepend site path
            if folder_path.startswith(site_path + '/'):
                server_relative_url = f"/{folder_path}"
            elif site_path:
                server_relative_url = f"/{site_path}/{folder_path}"
            else:
                server_relative_url = f"/{folder_path}"
            logger.info(f"Listing files in folder: {server_relative_url}")
            logger.info(f"Site URL: {self.site_url}, Site path: {site_path}, Folder path: {folder_path}")
            # Get folder
            folder = self.ctx.web.get_folder_by_server_relative_url(server_relative_url)
            files = folder.files
            self.ctx.load(files)
            self.ctx.execute_query()
            excel_files = []
            for file in files:
                file_name = file.properties["Name"]
                # Only consider Excel files
                if file_name and (file_name.endswith('.xlsx') or file_name.endswith('.xls')):
                    # Apply file pattern filter if provided
                    if file_pattern:
                        pattern = file_pattern.replace('*', '')
                        if not file_name.endswith(pattern):
                            continue
                    excel_files.append({
                        "name": file_name,
                        "server_relative_url": file.properties.get("ServerRelativeUrl", ""),
                        "size": file.properties.get("Length", 0),
                        "time_last_modified": file.properties.get("TimeLastModified", "")
                    })
            logger.info(f"Found {len(excel_files)} Excel file(s) in folder")
            for file_info in excel_files:
                logger.info(f"  - {file_info['name']} ({file_info['size']} bytes)")
            return excel_files
        except Exception as e:
            logger.error(f"Error listing files: {e}", exc_info=True)
            return []
    def download_file(
        self,
-        file_path: str,
+        server_relative_url: str,
        file_name: str,
        local_path: str,
        overwrite: bool = True
    ) -> bool:
@ -105,7 +197,8 @@ class SharePointDownloader:
        Download a single file from SharePoint.
        Args:
-            file_path: Path to file in SharePoint (e.g., "/Shared Documents/Reports/file.xlsx")
+            server_relative_url: Server-relative URL of the file
            file_name: The original name of the file (for logging)
            local_path: Local path where file should be saved
            overwrite: Whether to overwrite existing file
@ -113,29 +206,49 @@ class SharePointDownloader:
            True if successful, False otherwise
        """
        if not self.ctx:
-            if not self.authenticate():
+            if not self.connect():
                return False
        local_file = None
        try:
            local_file_path = Path(local_path)
            local_file_path.parent.mkdir(parents=True, exist_ok=True)
            # Check if file exists and overwrite flag
            if local_file_path.exists() and not overwrite:
                logger.info(f"File already exists, skipping: {local_path}")
                return True
-            # Download file
+            logger.info(f"Downloading file: {file_name} from {server_relative_url} to {local_path}")
-            with open(local_file_path, "wb") as local_file:
+            
-                file = self.ctx.web.get_file_by_server_relative_url(file_path)
+            # Get file
-                file.download(local_file)
+            file = self.ctx.web.get_file_by_server_relative_url(server_relative_url)
            self.ctx.load(file)
            self.ctx.execute_query()
-            logger.info(f"Downloaded: {file_path} -> {local_path}")
+            # Open file and keep it open during download
            # The Office365 library writes to the file during execute_query()
            local_file = open(local_file_path, "wb")
            # Download file content - this sets up the download callback
            file.download(local_file)
            # Execute the query - this actually performs the download and writes to the file
            self.ctx.execute_query()
            # Close the file after download completes
            local_file.close()
            local_file = None
            logger.info(f"Successfully downloaded: {file_name} -> {local_path}")
            return True
        except Exception as e:
-            logger.error(f"Failed to download {file_path}: {e}")
+            logger.error(f"Error downloading file {file_name}: {e}", exc_info=True)
            if local_file:
                try:
                    local_file.close()
                except:
                    pass
            return False
    def download_files_from_folder(
@ -143,121 +256,174 @@ class SharePointDownloader:
        folder_path: str,
        local_dir: str,
        file_pattern: Optional[str] = None,
-        overwrite: bool = True
+        overwrite: bool = True,
        clear_existing: bool = True
    ) -> List[str]:
        """
-        Download all files from a SharePoint folder.
+        Download Excel files from a SharePoint folder.
        By default, downloads only the newest file and clears old files.
        Args:
-            folder_path: Path to folder in SharePoint (e.g., "/Shared Documents/Reports")
+            folder_path: Folder path relative to site root
-            local_dir: Local directory where files should be saved
+            local_dir: Local directory to save files
-            file_pattern: Optional pattern to filter files (e.g., "*.xlsx")
+            file_pattern: Optional file pattern filter (e.g., "*.xlsx")
            overwrite: Whether to overwrite existing files
            clear_existing: If True, clear all existing Excel files before downloading (default: True)
        Returns:
-            List of successfully downloaded file paths
+            List of downloaded file paths (typically 1 file - the newest)
        """
-        if not self.ctx:
+        # Connect to SharePoint
-            if not self.authenticate():
+        if not self.connect():
            logger.error("Failed to connect to SharePoint")
            return []
-        downloaded_files = []
+        # Prepare local directory
        try:
            folder = self.ctx.web.get_folder_by_server_relative_url(folder_path)
            files = folder.files
            self.ctx.load(files)
            self.ctx.execute_query()
        local_dir_path = Path(local_dir)
        local_dir_path.mkdir(parents=True, exist_ok=True)
-            for file in files:
+        # ALWAYS clear ALL existing Excel files before downloading (to ensure only new files are used)
-                file_name = file.properties["Name"]
+        # This is critical to prevent combining multiple files
        existing_files = list(local_dir_path.glob('*.xlsx')) + list(local_dir_path.glob('*.xls'))
        cleared_count = 0
        failed_to_clear = []
-                # Filter by pattern if provided
+        for old_file in existing_files:
-                if file_pattern:
+            try:
-                    if not file_name.endswith(file_pattern.replace("*", "")):
+                # On Windows, files might be locked - try multiple times
-                        continue
+                max_retries = 3
                retry_count = 0
                while retry_count < max_retries:
                    try:
                        old_file.unlink()
                        cleared_count += 1
                        logger.info(f"Cleared existing file before download: {old_file.name}")
                        break
                    except PermissionError:
                        retry_count += 1
                        if retry_count < max_retries:
                            import time
                            time.sleep(0.5)  # Wait 500ms before retry
                        else:
                            raise
            except Exception as e:
                failed_to_clear.append(old_file.name)
                logger.error(f"Failed to clear existing file {old_file.name}: {e}")
-                # Only download Excel files
+        if failed_to_clear:
-                if not (file_name.endswith('.xlsx') or file_name.endswith('.xls')):
+            logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before download: {failed_to_clear}")
-                    continue
+            logger.error("This will cause data mixing! Files may be locked by another process.")
            # Don't fail here - let the download proceed, but log the warning
        if cleared_count > 0:
            logger.info(f"Cleared {cleared_count} existing Excel file(s) before downloading from SharePoint")
        else:
            logger.info("No existing Excel files found to clear (reports directory was empty)")
        # List files in folder
        files = self.list_files_in_folder(folder_path, file_pattern)
        if not files:
            logger.warning(f"No Excel files found in folder: {folder_path}")
            return []
        # Sort files by last modified date (newest first) and download only the newest one
        def parse_time(time_str):
            try:
                if time_str:
                    # Office365 library returns datetime objects or ISO strings
                    if isinstance(time_str, datetime):
                        return time_str
                    # Try parsing ISO format
                    return datetime.fromisoformat(str(time_str).replace('Z', '+00:00'))
                return datetime.min
            except:
                return datetime.min
        files_sorted = sorted(files, key=lambda f: parse_time(f.get("time_last_modified", "")), reverse=True)
        if len(files_sorted) > 1:
            logger.info(f"Found {len(files_sorted)} Excel file(s) in SharePoint folder. Using only the newest file.")
            logger.info(f"Newest file: {files_sorted[0]['name']} (modified: {files_sorted[0].get('time_last_modified', 'Unknown')})")
            if len(files_sorted) > 1:
                logger.info(f"Skipping {len(files_sorted) - 1} older file(s) to avoid combining data")
        # Download only the newest file
        downloaded_files = []
        newest_file = files_sorted[0]
        file_name = newest_file["name"]
        server_relative_url = newest_file["server_relative_url"]
        local_file_path = local_dir_path / file_name
-                if self.download_file(
+        if self.download_file(server_relative_url, file_name, str(local_file_path), overwrite=overwrite):
                    file.properties["ServerRelativeUrl"],
                    str(local_file_path),
                    overwrite=overwrite
                ):
            downloaded_files.append(str(local_file_path))
            logger.info(f"Successfully downloaded newest file: {file_name}")
        else:
            logger.error(f"Failed to download file: {file_name}")
-            logger.info(f"Downloaded {len(downloaded_files)} files from {folder_path}")
+        logger.info(f"Downloaded {len(downloaded_files)} file(s) from {folder_path} (using only newest file)")
            return downloaded_files
        except Exception as e:
            logger.error(f"Failed to download files from folder {folder_path}: {e}")
        return downloaded_files
 def download_from_sharepoint(
    site_url: str,
    file_path: Optional[str] = None,
    folder_path: Optional[str] = None,
    file_path: Optional[str] = None,
    local_dir: str = "reports",
-    username: Optional[str] = None,
+    tenant_id: Optional[str] = None,
    password: Optional[str] = None,
    client_id: Optional[str] = None,
    client_secret: Optional[str] = None,
-    use_app_authentication: bool = False,
+    use_app_authentication: bool = True,
    file_pattern: Optional[str] = None,
-    overwrite: bool = True
+    overwrite: bool = True,
    clear_existing: bool = True
 ) -> List[str]:
    """
-    Convenience function to download files from SharePoint.
+    Convenience function to download files from SharePoint using Office365-REST-Python-Client.
    Args:
        site_url: SharePoint site URL
        file_path: Path to specific file (if downloading single file)
        folder_path: Path to folder (if downloading all files from folder)
        file_path: Path to specific file (if downloading single file) - NOT YET IMPLEMENTED
        local_dir: Local directory to save files
-        username: Username for authentication
+        tenant_id: Azure AD tenant ID (not used by Office365 library, but kept for compatibility)
-        password: Password for authentication
+        client_id: Azure AD app client ID (required for app authentication)
-        client_id: Azure AD app client ID
+        client_secret: Azure AD app client secret (required for app authentication)
-        client_secret: Azure AD app client secret
+        use_app_authentication: Use app authentication (default: True)
        use_app_authentication: Use app authentication
        file_pattern: Pattern to filter files (e.g., "*.xlsx")
        overwrite: Whether to overwrite existing files
        clear_existing: If True, clear all existing Excel files before downloading (default: True)
    Returns:
-        List of downloaded file paths
+        List of downloaded file paths (typically 1 file - the newest)
    """
    if not folder_path and not file_path:
        logger.error("Either folder_path or file_path must be provided")
        return []
    if file_path:
        logger.warning("Single file download not yet implemented")
        return []
    downloader = SharePointDownloader(
        site_url=site_url,
-        username=username,
+        tenant_id=tenant_id,
        password=password,
        client_id=client_id,
        client_secret=client_secret,
        use_app_authentication=use_app_authentication
    )
-    if file_path:
+    if folder_path:
-        # Download single file
+        # Download only the newest file from folder (clears existing files first)
        local_file_path = Path(local_dir) / Path(file_path).name
        if downloader.download_file(file_path, str(local_file_path), overwrite=overwrite):
            return [str(local_file_path)]
        return []
    elif folder_path:
        # Download all files from folder
        return downloader.download_files_from_folder(
            folder_path=folder_path,
            local_dir=local_dir,
            file_pattern=file_pattern,
-            overwrite=overwrite
+            overwrite=overwrite,
            clear_existing=clear_existing
        )
    else:
-        logger.error("Either file_path or folder_path must be provided")
+        logger.error("file_path download not yet implemented")
        return []
@ -265,33 +431,31 @@ if __name__ == "__main__":
    import sys
    from config import load_config
-    logging.basicConfig(level=logging.INFO)
+    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )
    # Load configuration
    config = load_config()
    if not config.get('sharepoint'):
-        print("SharePoint configuration not found in config.yaml")
+        logger.error("SharePoint configuration not found")
        sys.exit(1)
    sp_config = config['sharepoint']
    # Download files
    downloaded = download_from_sharepoint(
        site_url=sp_config['site_url'],
        folder_path=sp_config.get('folder_path'),
        file_path=sp_config.get('file_path'),
        local_dir=sp_config.get('local_dir', 'reports'),
-        username=sp_config.get('username'),
+        tenant_id=sp_config.get('tenant_id'),
        password=sp_config.get('password'),
        client_id=sp_config.get('client_id'),
        client_secret=sp_config.get('client_secret'),
-        use_app_authentication=sp_config.get('use_app_authentication', False),
+        use_app_authentication=sp_config.get('use_app_authentication', True),
        file_pattern=sp_config.get('file_pattern'),
-        overwrite=True
+        overwrite=sp_config.get('overwrite', True)
    )
-    print(f"Downloaded {len(downloaded)} file(s):")
+    print(f"Downloaded {len(downloaded)} file(s)")
-    for file in downloaded:
+    for file_path in downloaded:
-        print(f"  - {file}")
+        print(f"  - {file_path}")
--- a/web_ui.py
+++ b/web_ui.py
@ -607,11 +607,10 @@ def create_app(config_path: Optional[str] = None):
                    folder_path=sp_config.get('folder_path'),
                    file_path=sp_config.get('file_path'),
                    local_dir=sp_config.get('local_dir', 'reports'),
-                    username=sp_config.get('username'),
+                    tenant_id=sp_config.get('tenant_id'),
                    password=sp_config.get('password'),
                    client_id=sp_config.get('client_id'),
                    client_secret=sp_config.get('client_secret'),
-                    use_app_authentication=sp_config.get('use_app_authentication', False),
+                    use_app_authentication=sp_config.get('use_app_authentication', True),
                    file_pattern=sp_config.get('file_pattern'),
                    overwrite=sp_config.get('overwrite', True)
                )