859 lines
44 KiB
Python
859 lines
44 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Web API Server for On-Demand Report Generation
|
|
|
|
Provides REST API endpoints to trigger report generation on demand.
|
|
"""
|
|
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Optional, List, Dict
|
|
import json
|
|
from datetime import datetime
|
|
import shutil
|
|
import os
|
|
|
|
try:
|
|
from flask import Flask, jsonify, request, send_from_directory
|
|
from flask_cors import CORS
|
|
from werkzeug.utils import secure_filename
|
|
FLASK_AVAILABLE = True
|
|
except ImportError:
|
|
FLASK_AVAILABLE = False
|
|
logging.warning("Flask not installed. API server features disabled.")
|
|
|
|
from config import load_config
|
|
from report_generator import generate_report
|
|
from sharepoint_downloader import download_from_sharepoint
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
app = None
|
|
config = None
|
|
scheduler_thread = None
|
|
|
|
|
|
def cleanup_old_reports(output_dir: Path, reports_dir: Path, max_reports: int = 10):
|
|
"""
|
|
Cleanup old reports and Excel files, keeping only the last max_reports.
|
|
|
|
Args:
|
|
output_dir: Directory containing report HTML/JSON files
|
|
reports_dir: Directory containing Excel files
|
|
max_reports: Maximum number of reports to keep
|
|
"""
|
|
try:
|
|
# Get all report HTML files sorted by modification time (newest first)
|
|
html_files = sorted(output_dir.glob('report-*.html'), key=lambda p: p.stat().st_mtime, reverse=True)
|
|
|
|
if len(html_files) <= max_reports:
|
|
return # No cleanup needed
|
|
|
|
# Get reports to delete (oldest ones)
|
|
reports_to_delete = html_files[max_reports:]
|
|
|
|
deleted_count = 0
|
|
for html_file in reports_to_delete:
|
|
report_id = html_file.stem
|
|
|
|
# Delete HTML file
|
|
try:
|
|
html_file.unlink()
|
|
logger.info(f"Deleted old report HTML: {html_file.name}")
|
|
deleted_count += 1
|
|
except Exception as e:
|
|
logger.warning(f"Failed to delete {html_file.name}: {e}")
|
|
|
|
# Delete corresponding JSON file
|
|
json_file = output_dir / f"{report_id}.json"
|
|
if json_file.exists():
|
|
try:
|
|
json_file.unlink()
|
|
logger.info(f"Deleted old report JSON: {json_file.name}")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to delete {json_file.name}: {e}")
|
|
|
|
# Cleanup Excel files - keep only files associated with remaining reports
|
|
if reports_dir.exists():
|
|
excel_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
|
|
|
|
if len(excel_files) > max_reports:
|
|
# Sort by modification time and delete oldest
|
|
excel_files_sorted = sorted(excel_files, key=lambda p: p.stat().st_mtime, reverse=True)
|
|
excel_to_delete = excel_files_sorted[max_reports:]
|
|
|
|
for excel_file in excel_to_delete:
|
|
try:
|
|
excel_file.unlink()
|
|
logger.info(f"Deleted old Excel file: {excel_file.name}")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to delete {excel_file.name}: {e}")
|
|
|
|
logger.info(f"Cleanup completed: deleted {deleted_count} old report(s)")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error during cleanup: {e}", exc_info=True)
|
|
|
|
|
|
def create_app(config_path: Optional[str] = None):
|
|
"""Create and configure Flask app."""
|
|
global app, config
|
|
|
|
if not FLASK_AVAILABLE:
|
|
raise ImportError(
|
|
"Flask is required for API server. "
|
|
"Install it with: pip install flask flask-cors"
|
|
)
|
|
|
|
app = Flask(__name__)
|
|
CORS(app) # Enable CORS for all routes
|
|
|
|
config = load_config(config_path)
|
|
api_config = config.get('api', {})
|
|
sharepoint_config = config.get('sharepoint', {})
|
|
report_config = config.get('report', {})
|
|
|
|
# Resolve paths relative to script location, not current working directory
|
|
script_dir = Path(__file__).parent.absolute()
|
|
|
|
# Convert relative paths to absolute paths relative to script directory
|
|
if 'output_dir' in report_config and report_config['output_dir']:
|
|
output_dir = Path(report_config['output_dir'])
|
|
if not output_dir.is_absolute():
|
|
report_config['output_dir'] = str(script_dir / output_dir)
|
|
|
|
if 'reports_dir' in report_config and report_config['reports_dir']:
|
|
reports_dir = Path(report_config['reports_dir'])
|
|
if not reports_dir.is_absolute():
|
|
report_config['reports_dir'] = str(script_dir / reports_dir)
|
|
|
|
# Store config in app context
|
|
app.config['API_KEY'] = api_config.get('api_key')
|
|
app.config['SHAREPOINT_CONFIG'] = sharepoint_config
|
|
app.config['REPORT_CONFIG'] = report_config
|
|
|
|
@app.route('/health', methods=['GET'])
|
|
def health():
|
|
"""Health check endpoint."""
|
|
return jsonify({
|
|
'status': 'healthy',
|
|
'service': 'vendor-report-generator'
|
|
})
|
|
|
|
@app.route('/api/generate', methods=['POST'])
|
|
def generate_report_endpoint():
|
|
"""
|
|
Generate report on demand.
|
|
|
|
Request body (optional):
|
|
{
|
|
"download_from_sharepoint": true,
|
|
"reports_dir": "reports",
|
|
"output_file": "output/report.json"
|
|
}
|
|
"""
|
|
# Check API key if configured
|
|
api_key = app.config.get('API_KEY')
|
|
if api_key:
|
|
provided_key = request.headers.get('X-API-Key') or request.json.get('api_key') if request.json else None
|
|
if provided_key != api_key:
|
|
return jsonify({'error': 'Invalid API key'}), 401
|
|
|
|
try:
|
|
request_data = request.json or {}
|
|
download_from_sp = request_data.get('download_from_sharepoint', True) # Default to True for backward compatibility
|
|
downloaded_files = [] # Initialize here for scope
|
|
|
|
# Get report config early - needed for error handling
|
|
report_config = app.config['REPORT_CONFIG']
|
|
|
|
# Download from SharePoint if requested AND no manual upload happened
|
|
# If download_from_sharepoint is False, it means manual upload was used
|
|
if download_from_sp:
|
|
sp_config = app.config['SHAREPOINT_CONFIG']
|
|
if not sp_config.get('enabled'):
|
|
return jsonify({
|
|
'error': 'SharePoint is not enabled in configuration'
|
|
}), 400
|
|
|
|
logger.info("Downloading files from SharePoint...")
|
|
try:
|
|
downloaded = download_from_sharepoint(
|
|
site_url=sp_config['site_url'],
|
|
folder_path=sp_config.get('folder_path'),
|
|
file_path=sp_config.get('file_path'),
|
|
local_dir=sp_config.get('local_dir', 'reports'),
|
|
tenant_id=sp_config.get('tenant_id'),
|
|
client_id=sp_config.get('client_id'),
|
|
client_secret=sp_config.get('client_secret'),
|
|
use_app_authentication=sp_config.get('use_app_authentication', True),
|
|
file_pattern=sp_config.get('file_pattern'),
|
|
overwrite=sp_config.get('overwrite', True)
|
|
)
|
|
downloaded_files = downloaded if downloaded else []
|
|
logger.info(f"Downloaded {len(downloaded_files)} file(s) from SharePoint: {downloaded_files}")
|
|
|
|
# If SharePoint download failed (no files downloaded), check if we have existing files
|
|
if len(downloaded_files) == 0:
|
|
logger.warning("SharePoint download returned 0 files. This could mean:")
|
|
logger.warning("1. SharePoint permissions issue (401/403 error)")
|
|
logger.warning("2. No files found in the specified folder")
|
|
logger.warning("3. Site access not granted (Resource-Specific Consent needed)")
|
|
logger.warning("Checking if existing files are available in reports directory...")
|
|
|
|
# Check if there are existing files we can use
|
|
reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
|
|
if not reports_dir_path.is_absolute():
|
|
script_dir = Path(__file__).parent.absolute()
|
|
reports_dir_path = script_dir / reports_dir_path
|
|
|
|
if reports_dir_path.exists():
|
|
existing_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
|
if existing_files:
|
|
logger.warning(f"Found {len(existing_files)} existing file(s) in reports directory. Will use these instead.")
|
|
logger.warning("NOTE: These may be old files. Consider using manual upload for fresh data.")
|
|
else:
|
|
logger.error("No files available - neither from SharePoint nor existing files.")
|
|
return jsonify({
|
|
'error': 'SharePoint download failed and no existing files found',
|
|
'details': 'SharePoint access may require Resource-Specific Consent (RSC). Please use manual file upload or fix SharePoint permissions.',
|
|
'sharepoint_error': True
|
|
}), 500
|
|
except Exception as e:
|
|
error_msg = str(e)
|
|
logger.error(f"Failed to download from SharePoint: {error_msg}", exc_info=True)
|
|
|
|
# Check if this is a locked file error
|
|
is_locked_file_error = 'locked' in error_msg.lower() or 'cannot access the file' in error_msg.lower() or 'being used by another process' in error_msg.lower()
|
|
|
|
if is_locked_file_error:
|
|
# Extract filename from error if possible
|
|
locked_file_match = None
|
|
import re
|
|
# Try to find filename in error message
|
|
match = re.search(r"['\"]([^'\"]*\.xlsx?)['\"]", error_msg)
|
|
if match:
|
|
locked_file_match = match.group(1)
|
|
|
|
locked_file_info = f" ({locked_file_match})" if locked_file_match else ""
|
|
return jsonify({
|
|
'error': f'Cannot download from SharePoint: File is locked{locked_file_info}',
|
|
'details': f'A file in the reports directory is being used by another program (likely Excel). Please close Excel and any other programs that might have this file open, then try again. Error: {error_msg}',
|
|
'instructions': [
|
|
'1. Close Microsoft Excel completely',
|
|
'2. Close any file explorer windows showing the reports folder',
|
|
'3. Wait a few seconds',
|
|
'4. Try generating the report again',
|
|
'',
|
|
'Alternatively, use manual file upload instead of SharePoint download.'
|
|
],
|
|
'sharepoint_error': True,
|
|
'locked_file_error': True
|
|
}), 500
|
|
|
|
# Check if we have existing files as fallback (only for non-locked errors)
|
|
reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
|
|
if not reports_dir_path.is_absolute():
|
|
script_dir = Path(__file__).parent.absolute()
|
|
reports_dir_path = script_dir / reports_dir_path
|
|
|
|
if reports_dir_path.exists():
|
|
existing_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
|
if existing_files:
|
|
logger.warning(f"SharePoint download failed, but found {len(existing_files)} existing file(s). Will use these.")
|
|
downloaded_files = [] # Continue with existing files
|
|
else:
|
|
return jsonify({
|
|
'error': f'SharePoint download failed: {error_msg}',
|
|
'details': 'No existing files found. Please use manual file upload or fix SharePoint permissions.',
|
|
'sharepoint_error': True
|
|
}), 500
|
|
else:
|
|
return jsonify({
|
|
'error': f'SharePoint download failed: {error_msg}',
|
|
'details': 'Reports directory does not exist. Please use manual file upload or fix SharePoint permissions.',
|
|
'sharepoint_error': True
|
|
}), 500
|
|
|
|
# Generate report with timestamp
|
|
reports_dir = request_data.get('reports_dir', report_config.get('reports_dir', 'reports'))
|
|
output_dir_str = report_config.get('output_dir', 'output')
|
|
output_dir = Path(output_dir_str)
|
|
if not output_dir.is_absolute():
|
|
script_dir = Path(__file__).parent.absolute()
|
|
output_dir = script_dir / output_dir
|
|
|
|
# Create timestamped filename
|
|
timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
|
|
report_id = f"report-{timestamp}"
|
|
output_file = str(output_dir / f"{report_id}.json")
|
|
|
|
# Log which files will be used for generation
|
|
reports_dir_path = Path(reports_dir)
|
|
if not reports_dir_path.is_absolute():
|
|
script_dir = Path(__file__).parent.absolute()
|
|
reports_dir_path = script_dir / reports_dir_path
|
|
|
|
logger.info(f"Generating report from {reports_dir_path.absolute()}...")
|
|
logger.info(f"Reports directory exists: {reports_dir_path.exists()}")
|
|
|
|
# Determine which files to use for generation
|
|
# CRITICAL: Only use files that were just downloaded/uploaded, not old ones
|
|
if downloaded_files:
|
|
# Files were downloaded from SharePoint - use only those
|
|
logger.info(f"Using {len(downloaded_files)} file(s) downloaded from SharePoint")
|
|
# Verify that reports_dir only contains the downloaded files (should be empty of old files)
|
|
all_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
|
downloaded_file_paths = [Path(f).name for f in downloaded_files] # Get just filenames
|
|
if len(all_files) != len(downloaded_files):
|
|
logger.warning(f"WARNING: Found {len(all_files)} file(s) in reports_dir but only {len(downloaded_files)} were downloaded!")
|
|
logger.warning("This might indicate old files weren't cleared. Clearing now...")
|
|
for file in all_files:
|
|
if file.name not in downloaded_file_paths:
|
|
try:
|
|
file.unlink()
|
|
logger.info(f"Cleared unexpected file: {file.name}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to clear unexpected file {file.name}: {e}")
|
|
elif not download_from_sp:
|
|
# Manual upload was used (download_from_sharepoint=False)
|
|
# Upload endpoint should have cleared old files before saving new ones
|
|
# Use ALL files in the directory (they should all be from the recent upload)
|
|
if reports_dir_path.exists():
|
|
excel_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
|
current_time = datetime.now().timestamp()
|
|
recent_files = []
|
|
|
|
logger.info(f"Manual upload generation: Found {len(excel_files)} file(s) in reports directory")
|
|
|
|
# Only use files modified in the last 2 minutes (very recent = just uploaded)
|
|
# This ensures we don't accidentally use SharePoint-downloaded files
|
|
for excel_file in excel_files:
|
|
mtime = excel_file.stat().st_mtime
|
|
age_seconds = current_time - mtime
|
|
# Only use files uploaded in the last 2 minutes (120 seconds)
|
|
# This is tight enough to catch only the most recent upload
|
|
if age_seconds < 120: # 2 minutes
|
|
recent_files.append(excel_file)
|
|
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
|
|
logger.info(f" - {excel_file.name} (modified: {mtime_str}, age: {age_seconds:.1f}s) - will be used for manual upload generation")
|
|
else:
|
|
logger.warning(f" - {excel_file.name} (modified: {datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')}, age: {age_seconds:.1f}s) - skipping (too old, might be from SharePoint download)")
|
|
|
|
# Clear any files that are too old (likely from SharePoint)
|
|
if len(recent_files) < len(excel_files):
|
|
logger.warning(f"Found {len(excel_files)} total file(s), but only {len(recent_files)} are recent (< 2 min old). Clearing old files...")
|
|
for excel_file in excel_files:
|
|
if excel_file not in recent_files:
|
|
try:
|
|
excel_file.unlink()
|
|
logger.info(f"Cleared old file (likely from SharePoint): {excel_file.name}")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to clear old file {excel_file.name}: {e}")
|
|
|
|
if len(recent_files) == 0:
|
|
logger.error("Manual upload was used but no recent files (< 2 min old) found in reports directory!")
|
|
logger.error("This might mean:")
|
|
logger.error("1. Files were not uploaded successfully")
|
|
logger.error("2. Upload happened more than 2 minutes ago")
|
|
logger.error("3. File modification times are incorrect")
|
|
logger.error("4. SharePoint download happened after upload")
|
|
return jsonify({
|
|
'error': 'No recent files found for manual upload generation',
|
|
'details': 'Files were uploaded but not found or are too old. Please try uploading again and generating immediately.',
|
|
'manual_upload_error': True
|
|
}), 400
|
|
|
|
logger.info(f"Will generate report from {len(recent_files)} recently uploaded file(s)")
|
|
else:
|
|
logger.error("Manual upload was used but reports directory does not exist!")
|
|
return jsonify({
|
|
'error': 'Reports directory does not exist',
|
|
'details': 'Cannot generate report from manual upload - reports directory is missing.',
|
|
'manual_upload_error': True
|
|
}), 500
|
|
else:
|
|
# SharePoint download was attempted but failed - this shouldn't happen if download_from_sp=True
|
|
# But if it does, we should NOT use existing files as they might be old
|
|
logger.error("SharePoint download was requested but failed, and no manual upload was used!")
|
|
logger.error("This should not happen - refusing to use potentially old files")
|
|
return jsonify({
|
|
'error': 'SharePoint download failed and no manual upload provided',
|
|
'details': 'Cannot generate report - no data source available. Please try again or use manual upload.',
|
|
'sharepoint_error': True
|
|
}), 400
|
|
|
|
# FINAL VERIFICATION: Before generation, ensure only expected files exist
|
|
final_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
|
if len(final_files) > 1:
|
|
logger.error(f"CRITICAL: Found {len(final_files)} Excel file(s) before generation!")
|
|
logger.error("This will cause data mixing. Files found:")
|
|
for f in final_files:
|
|
mtime = f.stat().st_mtime
|
|
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
|
|
logger.error(f" - {f.name} (modified: {mtime_str})")
|
|
logger.error("Attempting to keep only the most recent file...")
|
|
|
|
# Keep only the newest file
|
|
final_files_sorted = sorted(final_files, key=lambda f: f.stat().st_mtime, reverse=True)
|
|
newest_file = final_files_sorted[0]
|
|
for old_file in final_files_sorted[1:]:
|
|
try:
|
|
old_file.unlink()
|
|
logger.info(f"Removed older file before generation: {old_file.name}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to remove {old_file.name}: {e}")
|
|
return jsonify({
|
|
'error': f'Multiple Excel files found and cannot remove old ones',
|
|
'details': f'Found {len(final_files)} files. Please ensure only one file exists. Files may be locked.',
|
|
'files_found': [f.name for f in final_files]
|
|
}), 400
|
|
|
|
logger.warning(f"Proceeding with only the newest file: {newest_file.name}")
|
|
|
|
report_data = generate_report(
|
|
reports_dir=str(reports_dir_path),
|
|
output_file=output_file,
|
|
verbose=False # Don't print to console in API mode
|
|
)
|
|
|
|
if report_data:
|
|
# Generate HTML with same timestamp
|
|
html_file = output_dir / f"{report_id}.html"
|
|
from html_generator import generate_html_report
|
|
generate_html_report(output_file, str(html_file))
|
|
|
|
# Cleanup old reports (keep only last 10)
|
|
# Ensure reports_dir is a Path object
|
|
reports_dir_for_cleanup = Path(reports_dir) if isinstance(reports_dir, str) else reports_dir
|
|
cleanup_old_reports(output_dir, reports_dir_for_cleanup, max_reports=10)
|
|
|
|
return jsonify({
|
|
'status': 'success',
|
|
'message': 'Report generated successfully',
|
|
'report_id': report_id,
|
|
'report_date': timestamp,
|
|
'output_file': output_file,
|
|
'summary': report_data.get('summary', {}),
|
|
'vendors_count': len(report_data.get('vendors', [])),
|
|
'downloaded_files': len(downloaded_files) if download_from_sp else 0,
|
|
'downloaded_file_names': [Path(f).name for f in downloaded_files] if download_from_sp else []
|
|
})
|
|
else:
|
|
return jsonify({
|
|
'error': 'Report generation failed'
|
|
}), 500
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating report: {e}", exc_info=True)
|
|
return jsonify({
|
|
'error': f'Report generation failed: {str(e)}'
|
|
}), 500
|
|
|
|
@app.route('/api/upload', methods=['POST'])
|
|
def upload_files():
|
|
"""Upload Excel files manually. Clears old files before uploading new ones."""
|
|
logger.info("=== MANUAL UPLOAD REQUEST RECEIVED ===")
|
|
try:
|
|
if 'files' not in request.files:
|
|
logger.error("Upload request missing 'files' field")
|
|
return jsonify({'error': 'No files provided'}), 400
|
|
|
|
files = request.files.getlist('files')
|
|
logger.info(f"Received {len(files)} file(s) in upload request")
|
|
if not files or all(f.filename == '' for f in files):
|
|
logger.error("No valid files in upload request")
|
|
return jsonify({'error': 'No files selected'}), 400
|
|
|
|
report_config = app.config['REPORT_CONFIG']
|
|
reports_dir_str = report_config.get('reports_dir', 'reports')
|
|
reports_dir = Path(reports_dir_str)
|
|
if not reports_dir.is_absolute():
|
|
script_dir = Path(__file__).parent.absolute()
|
|
reports_dir = script_dir / reports_dir
|
|
|
|
# Ensure reports directory exists
|
|
reports_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# ALWAYS clear ALL old Excel files from reports directory before uploading new ones
|
|
# CRITICAL: This prevents combining multiple files in report generation
|
|
old_excel_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
|
|
cleared_count = 0
|
|
failed_to_clear = []
|
|
|
|
for old_file in old_excel_files:
|
|
try:
|
|
# On Windows, files might be locked - try multiple times with increasing delays
|
|
max_retries = 5
|
|
retry_count = 0
|
|
cleared_this_file = False
|
|
|
|
while retry_count < max_retries and not cleared_this_file:
|
|
try:
|
|
old_file.unlink()
|
|
cleared_count += 1
|
|
cleared_this_file = True
|
|
logger.info(f"Cleared old file before upload: {old_file.name}")
|
|
break
|
|
except PermissionError as pe:
|
|
retry_count += 1
|
|
if retry_count < max_retries:
|
|
# Increasing delay: 0.5s, 1s, 2s, 3s
|
|
import time
|
|
delay = min(0.5 * (2 ** retry_count), 3.0)
|
|
logger.warning(f"File {old_file.name} is locked (attempt {retry_count}/{max_retries}), waiting {delay}s...")
|
|
time.sleep(delay)
|
|
else:
|
|
# Last attempt failed - try renaming instead of deleting
|
|
logger.warning(f"Cannot delete {old_file.name}, trying to rename instead...")
|
|
try:
|
|
import time
|
|
timestamp = int(time.time())
|
|
backup_name = f"{old_file.stem}_backup_{timestamp}{old_file.suffix}"
|
|
backup_path = old_file.parent / backup_name
|
|
old_file.rename(backup_path)
|
|
cleared_count += 1
|
|
cleared_this_file = True
|
|
logger.info(f"Renamed locked file to backup: {old_file.name} -> {backup_name}")
|
|
except Exception as rename_error:
|
|
logger.error(f"Could not rename file either: {rename_error}")
|
|
raise pe # Raise original PermissionError
|
|
except Exception as e:
|
|
if retry_count >= max_retries - 1:
|
|
raise
|
|
retry_count += 1
|
|
import time
|
|
time.sleep(1)
|
|
|
|
if not cleared_this_file:
|
|
failed_to_clear.append(old_file.name)
|
|
logger.error(f"Failed to clear old file {old_file.name} after {max_retries} attempts")
|
|
except Exception as e:
|
|
if old_file.name not in failed_to_clear:
|
|
failed_to_clear.append(old_file.name)
|
|
logger.error(f"Failed to clear old file {old_file.name}: {e}")
|
|
|
|
# If any files failed to clear, fail the upload to prevent mixing old and new data
|
|
if failed_to_clear:
|
|
logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before upload: {failed_to_clear}")
|
|
locked_files_list = ', '.join(failed_to_clear)
|
|
return jsonify({
|
|
'error': f'Cannot upload: {len(failed_to_clear)} file(s) are locked',
|
|
'failed_files': failed_to_clear,
|
|
'details': f'File(s) {locked_files_list} are being used by another program (likely Excel). Please close Excel and any other programs that might have these files open, then try again.',
|
|
'instructions': [
|
|
'1. Close Microsoft Excel completely',
|
|
'2. Close any file explorer windows showing these files',
|
|
'3. Wait a few seconds',
|
|
'4. Try uploading again'
|
|
]
|
|
}), 500
|
|
|
|
if cleared_count > 0:
|
|
logger.info(f"Cleared {cleared_count} old Excel file(s) before upload")
|
|
else:
|
|
logger.info("No old Excel files found to clear (reports directory was empty)")
|
|
|
|
# VERIFY: Double-check that all Excel files are actually gone
|
|
remaining_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
|
|
if remaining_files:
|
|
logger.error(f"CRITICAL: After clearing, {len(remaining_files)} file(s) still exist: {[f.name for f in remaining_files]}")
|
|
logger.error("These files are likely locked. Attempting force removal...")
|
|
force_failed = []
|
|
for remaining_file in remaining_files:
|
|
try:
|
|
remaining_file.unlink()
|
|
logger.info(f"Force-removed locked file: {remaining_file.name}")
|
|
except Exception as e:
|
|
force_failed.append(remaining_file.name)
|
|
logger.error(f"CRITICAL: Cannot remove locked file {remaining_file.name}: {e}")
|
|
|
|
if force_failed:
|
|
logger.error(f"CRITICAL: {len(force_failed)} file(s) still locked after force removal: {force_failed}")
|
|
return jsonify({
|
|
'error': f'Cannot upload: {len(force_failed)} file(s) are locked and cannot be deleted',
|
|
'failed_files': force_failed,
|
|
'details': 'Please close Excel or any other program using these files, then try again.'
|
|
}), 500
|
|
|
|
logger.info("✓ Verified: All old Excel files cleared successfully before upload")
|
|
|
|
uploaded_count = 0
|
|
uploaded_files = []
|
|
|
|
for file in files:
|
|
if file.filename == '':
|
|
continue
|
|
|
|
# Check if it's an Excel file
|
|
filename = secure_filename(file.filename)
|
|
if not (filename.endswith('.xlsx') or filename.endswith('.xls')):
|
|
logger.warning(f"Skipping non-Excel file: {filename}")
|
|
continue
|
|
|
|
# Save file to reports directory
|
|
file_path = reports_dir / filename
|
|
logger.info(f"Saving uploaded file: {filename} -> {file_path}")
|
|
file.save(str(file_path))
|
|
|
|
# Verify file was saved and get its modification time
|
|
if file_path.exists():
|
|
mtime = file_path.stat().st_mtime
|
|
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
|
|
file_size = file_path.stat().st_size
|
|
logger.info(f"Successfully saved file: {filename} (size: {file_size} bytes, modified: {mtime_str})")
|
|
uploaded_count += 1
|
|
uploaded_files.append(filename)
|
|
else:
|
|
logger.error(f"CRITICAL: File was not saved! {file_path} does not exist after save()")
|
|
raise Exception(f"Failed to save file {filename}")
|
|
|
|
if uploaded_count == 0:
|
|
return jsonify({'error': 'No valid Excel files uploaded'}), 400
|
|
|
|
# Warn if multiple files uploaded - reports should be generated from ONE file
|
|
if uploaded_count > 1:
|
|
logger.warning(f"WARNING: {uploaded_count} files uploaded. Reports should be generated from a single file. Only the newest file will be used.")
|
|
|
|
return jsonify({
|
|
'status': 'success',
|
|
'message': f'Successfully uploaded {uploaded_count} file(s)',
|
|
'uploaded_count': uploaded_count,
|
|
'uploaded_files': uploaded_files,
|
|
'cleared_old_files': cleared_count,
|
|
'warning': f'{uploaded_count} file(s) uploaded - only the newest will be used for report generation' if uploaded_count > 1 else None
|
|
})
|
|
except Exception as e:
|
|
logger.error(f"Error uploading files: {e}", exc_info=True)
|
|
return jsonify({'error': f'Failed to upload files: {str(e)}'}), 500
|
|
|
|
@app.route('/api/status', methods=['GET'])
|
|
def status():
|
|
"""Get service status and configuration."""
|
|
return jsonify({
|
|
'status': 'running',
|
|
'sharepoint_enabled': app.config['SHAREPOINT_CONFIG'].get('enabled', False),
|
|
'reports_dir': app.config['REPORT_CONFIG'].get('reports_dir', 'reports'),
|
|
'output_dir': app.config['REPORT_CONFIG'].get('output_dir', 'output')
|
|
})
|
|
|
|
@app.route('/api/report/json', methods=['GET'])
|
|
def get_report_json():
|
|
"""Get latest report JSON file."""
|
|
try:
|
|
report_config = app.config['REPORT_CONFIG']
|
|
output_dir_str = report_config.get('output_dir', 'output')
|
|
output_dir = Path(output_dir_str)
|
|
if not output_dir.is_absolute():
|
|
script_dir = Path(__file__).parent.absolute()
|
|
output_dir = script_dir / output_dir
|
|
report_file = output_dir / 'report.json'
|
|
|
|
if not report_file.exists():
|
|
return jsonify({'error': 'Report not found. Generate a report first.'}), 404
|
|
|
|
with open(report_file, 'r', encoding='utf-8') as f:
|
|
report_data = json.load(f)
|
|
|
|
return jsonify(report_data)
|
|
except Exception as e:
|
|
logger.error(f"Error reading report JSON: {e}", exc_info=True)
|
|
return jsonify({'error': f'Failed to read report: {str(e)}'}), 500
|
|
|
|
@app.route('/api/report/html', methods=['GET'])
|
|
def get_report_html():
|
|
"""Get report HTML file by report_id (or latest if not specified)."""
|
|
try:
|
|
from flask import send_from_directory
|
|
|
|
report_config = app.config['REPORT_CONFIG']
|
|
output_dir_str = report_config.get('output_dir', 'output')
|
|
output_dir = Path(output_dir_str)
|
|
if not output_dir.is_absolute():
|
|
script_dir = Path(__file__).parent.absolute()
|
|
output_dir = script_dir / output_dir
|
|
|
|
# Get report_id from query parameter, default to latest
|
|
report_id = request.args.get('report_id')
|
|
|
|
if report_id:
|
|
# Check if it's a timestamped report or legacy report
|
|
html_file = output_dir / f"{report_id}.html"
|
|
|
|
# If not found and it starts with "report-", might be a legacy report with generated ID
|
|
if not html_file.exists() and report_id.startswith('report-'):
|
|
# Try legacy report.html
|
|
legacy_file = output_dir / 'report.html'
|
|
if legacy_file.exists():
|
|
html_file = legacy_file
|
|
else:
|
|
return jsonify({'error': f'Report {report_id} not found.'}), 404
|
|
elif not html_file.exists():
|
|
return jsonify({'error': f'Report {report_id} not found.'}), 404
|
|
else:
|
|
# Get latest report (check both timestamped and legacy)
|
|
timestamped_files = list(output_dir.glob('report-*.html'))
|
|
legacy_file = output_dir / 'report.html'
|
|
|
|
html_files = []
|
|
if legacy_file.exists():
|
|
html_files.append(legacy_file)
|
|
html_files.extend(timestamped_files)
|
|
|
|
if not html_files:
|
|
return jsonify({'error': 'No reports found. Generate a report first.'}), 404
|
|
|
|
html_file = sorted(html_files, key=lambda p: p.stat().st_mtime, reverse=True)[0]
|
|
|
|
return send_from_directory(str(output_dir), html_file.name, mimetype='text/html')
|
|
except Exception as e:
|
|
logger.error(f"Error reading report HTML: {e}", exc_info=True)
|
|
return jsonify({'error': f'Failed to read report HTML: {str(e)}'}), 500
|
|
|
|
@app.route('/api/reports/list', methods=['GET'])
|
|
def list_reports():
|
|
"""List all available reports (last 10)."""
|
|
try:
|
|
report_config = app.config['REPORT_CONFIG']
|
|
output_dir_str = report_config.get('output_dir', 'output')
|
|
output_dir = Path(output_dir_str)
|
|
|
|
# Ensure absolute path
|
|
if not output_dir.is_absolute():
|
|
script_dir = Path(__file__).parent.absolute()
|
|
output_dir = script_dir / output_dir
|
|
|
|
# Log for debugging
|
|
logger.info(f"Looking for reports in: {output_dir.absolute()}")
|
|
logger.info(f"Output directory exists: {output_dir.exists()}")
|
|
if output_dir.exists():
|
|
logger.info(f"Files in output directory: {list(output_dir.glob('*'))}")
|
|
|
|
# Find all report HTML files (both timestamped and non-timestamped)
|
|
timestamped_files = list(output_dir.glob('report-*.html'))
|
|
legacy_file = output_dir / 'report.html'
|
|
|
|
logger.info(f"Found {len(timestamped_files)} timestamped report files")
|
|
logger.info(f"Legacy report.html exists: {legacy_file.exists()}")
|
|
if legacy_file.exists():
|
|
logger.info(f"Legacy report.html path: {legacy_file.absolute()}")
|
|
|
|
html_files = []
|
|
|
|
# Add legacy report.html if it exists
|
|
if legacy_file.exists():
|
|
html_files.append(legacy_file)
|
|
logger.info("Added legacy report.html to list")
|
|
|
|
# Add timestamped files
|
|
html_files.extend(timestamped_files)
|
|
logger.info(f"Total HTML files found: {len(html_files)}")
|
|
|
|
reports = []
|
|
for html_file in sorted(html_files, key=lambda p: p.stat().st_mtime, reverse=True)[:10]:
|
|
report_id = html_file.stem # e.g., "report-2025-11-08-11-25-46" or "report"
|
|
|
|
# Handle legacy report.html
|
|
if report_id == 'report':
|
|
# Use file modification time as timestamp
|
|
mtime = html_file.stat().st_mtime
|
|
dt = datetime.fromtimestamp(mtime)
|
|
timestamp_str = dt.strftime('%Y-%m-%d-%H-%M-%S')
|
|
date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
|
|
report_id = f"report-{timestamp_str}"
|
|
else:
|
|
# Timestamped report
|
|
timestamp_str = report_id.replace('report-', '')
|
|
try:
|
|
# Parse timestamp to create readable date
|
|
dt = datetime.strptime(timestamp_str, '%Y-%m-%d-%H-%M-%S')
|
|
date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
|
|
except:
|
|
date_str = timestamp_str
|
|
|
|
# Get file size
|
|
file_size = html_file.stat().st_size
|
|
|
|
reports.append({
|
|
'report_id': report_id,
|
|
'date': date_str,
|
|
'timestamp': timestamp_str,
|
|
'file_size': file_size
|
|
})
|
|
|
|
return jsonify({
|
|
'reports': reports,
|
|
'count': len(reports)
|
|
})
|
|
except Exception as e:
|
|
logger.error(f"Error listing reports: {e}", exc_info=True)
|
|
return jsonify({'error': f'Failed to list reports: {str(e)}'}), 500
|
|
|
|
return app
|
|
|
|
|
|
def start_scheduler(config_path: Optional[str] = None):
|
|
"""Start the scheduler in a background thread."""
|
|
global scheduler_thread
|
|
|
|
scheduler_config = config.get('scheduler', {})
|
|
if not scheduler_config.get('enabled'):
|
|
logger.info("Scheduler is disabled in configuration")
|
|
return
|
|
|
|
try:
|
|
from scheduler import ReportScheduler
|
|
import threading
|
|
|
|
def run_scheduler():
|
|
try:
|
|
scheduler = ReportScheduler(config_path=config_path)
|
|
scheduler.start()
|
|
except Exception as e:
|
|
logger.error(f"Scheduler error: {e}", exc_info=True)
|
|
|
|
scheduler_thread = threading.Thread(target=run_scheduler, daemon=True)
|
|
scheduler_thread.start()
|
|
logger.info("Scheduler started in background thread")
|
|
except ImportError:
|
|
logger.warning("Scheduler module not available. Install apscheduler to enable scheduling.")
|
|
except Exception as e:
|
|
logger.error(f"Failed to start scheduler: {e}", exc_info=True)
|
|
|
|
|
|
def run_server(config_path: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None):
|
|
"""Run the API server."""
|
|
global app, config
|
|
|
|
app = create_app(config_path)
|
|
|
|
# Start scheduler if enabled
|
|
start_scheduler(config_path)
|
|
|
|
api_config = config.get('api', {})
|
|
server_host = host or api_config.get('host', '0.0.0.0')
|
|
server_port = port or api_config.get('port', 8080)
|
|
|
|
logger.info(f"Starting API server on {server_host}:{server_port}")
|
|
app.run(host=server_host, port=server_port, debug=False)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
|
|
config_path = sys.argv[1] if len(sys.argv) > 1 else None
|
|
|
|
# Check if API is enabled
|
|
config = load_config(config_path)
|
|
if not config.get('api', {}).get('enabled', False):
|
|
logger.warning("API is disabled in configuration. Set api.enabled=true to enable.")
|
|
logger.info("Starting API server anyway (for testing)...")
|
|
|
|
run_server(config_path=config_path)
|
|
|