Add local changes: scheduler integration, file locking fixes, config updates (preserving coworker's html_generator.py changes)
This commit is contained in:
parent
51f618b654
commit
dedd02274e
242
api_server.py
242
api_server.py
@ -30,6 +30,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
app = None
|
app = None
|
||||||
config = None
|
config = None
|
||||||
|
scheduler_thread = None
|
||||||
|
|
||||||
|
|
||||||
def cleanup_old_reports(output_dir: Path, reports_dir: Path, max_reports: int = 10):
|
def cleanup_old_reports(output_dir: Path, reports_dir: Path, max_reports: int = 10):
|
||||||
@ -219,8 +220,38 @@ def create_app(config_path: Optional[str] = None):
|
|||||||
'sharepoint_error': True
|
'sharepoint_error': True
|
||||||
}), 500
|
}), 500
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to download from SharePoint: {e}", exc_info=True)
|
error_msg = str(e)
|
||||||
# Check if we have existing files as fallback
|
logger.error(f"Failed to download from SharePoint: {error_msg}", exc_info=True)
|
||||||
|
|
||||||
|
# Check if this is a locked file error
|
||||||
|
is_locked_file_error = 'locked' in error_msg.lower() or 'cannot access the file' in error_msg.lower() or 'being used by another process' in error_msg.lower()
|
||||||
|
|
||||||
|
if is_locked_file_error:
|
||||||
|
# Extract filename from error if possible
|
||||||
|
locked_file_match = None
|
||||||
|
import re
|
||||||
|
# Try to find filename in error message
|
||||||
|
match = re.search(r"['\"]([^'\"]*\.xlsx?)['\"]", error_msg)
|
||||||
|
if match:
|
||||||
|
locked_file_match = match.group(1)
|
||||||
|
|
||||||
|
locked_file_info = f" ({locked_file_match})" if locked_file_match else ""
|
||||||
|
return jsonify({
|
||||||
|
'error': f'Cannot download from SharePoint: File is locked{locked_file_info}',
|
||||||
|
'details': f'A file in the reports directory is being used by another program (likely Excel). Please close Excel and any other programs that might have this file open, then try again. Error: {error_msg}',
|
||||||
|
'instructions': [
|
||||||
|
'1. Close Microsoft Excel completely',
|
||||||
|
'2. Close any file explorer windows showing the reports folder',
|
||||||
|
'3. Wait a few seconds',
|
||||||
|
'4. Try generating the report again',
|
||||||
|
'',
|
||||||
|
'Alternatively, use manual file upload instead of SharePoint download.'
|
||||||
|
],
|
||||||
|
'sharepoint_error': True,
|
||||||
|
'locked_file_error': True
|
||||||
|
}), 500
|
||||||
|
|
||||||
|
# Check if we have existing files as fallback (only for non-locked errors)
|
||||||
reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
|
reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
|
||||||
if not reports_dir_path.is_absolute():
|
if not reports_dir_path.is_absolute():
|
||||||
script_dir = Path(__file__).parent.absolute()
|
script_dir = Path(__file__).parent.absolute()
|
||||||
@ -233,13 +264,13 @@ def create_app(config_path: Optional[str] = None):
|
|||||||
downloaded_files = [] # Continue with existing files
|
downloaded_files = [] # Continue with existing files
|
||||||
else:
|
else:
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'error': f'SharePoint download failed: {str(e)}',
|
'error': f'SharePoint download failed: {error_msg}',
|
||||||
'details': 'No existing files found. Please use manual file upload or fix SharePoint permissions.',
|
'details': 'No existing files found. Please use manual file upload or fix SharePoint permissions.',
|
||||||
'sharepoint_error': True
|
'sharepoint_error': True
|
||||||
}), 500
|
}), 500
|
||||||
else:
|
else:
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'error': f'SharePoint download failed: {str(e)}',
|
'error': f'SharePoint download failed: {error_msg}',
|
||||||
'details': 'Reports directory does not exist. Please use manual file upload or fix SharePoint permissions.',
|
'details': 'Reports directory does not exist. Please use manual file upload or fix SharePoint permissions.',
|
||||||
'sharepoint_error': True
|
'sharepoint_error': True
|
||||||
}), 500
|
}), 500
|
||||||
@ -286,59 +317,53 @@ def create_app(config_path: Optional[str] = None):
|
|||||||
logger.error(f"Failed to clear unexpected file {file.name}: {e}")
|
logger.error(f"Failed to clear unexpected file {file.name}: {e}")
|
||||||
elif not download_from_sp:
|
elif not download_from_sp:
|
||||||
# Manual upload was used (download_from_sharepoint=False)
|
# Manual upload was used (download_from_sharepoint=False)
|
||||||
# Upload endpoint should have cleared old files, but double-check
|
# Upload endpoint should have cleared old files before saving new ones
|
||||||
# Only use files uploaded in the last 10 minutes to avoid combining with old files
|
# Use ALL files in the directory (they should all be from the recent upload)
|
||||||
if reports_dir_path.exists():
|
if reports_dir_path.exists():
|
||||||
excel_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
excel_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
||||||
current_time = datetime.now().timestamp()
|
current_time = datetime.now().timestamp()
|
||||||
recent_files = []
|
recent_files = []
|
||||||
|
|
||||||
|
logger.info(f"Manual upload generation: Found {len(excel_files)} file(s) in reports directory")
|
||||||
|
|
||||||
|
# Only use files modified in the last 2 minutes (very recent = just uploaded)
|
||||||
|
# This ensures we don't accidentally use SharePoint-downloaded files
|
||||||
for excel_file in excel_files:
|
for excel_file in excel_files:
|
||||||
mtime = excel_file.stat().st_mtime
|
mtime = excel_file.stat().st_mtime
|
||||||
# Only use files modified in the last 10 minutes (should be the uploaded ones)
|
age_seconds = current_time - mtime
|
||||||
# Increased from 5 to 10 minutes to account for upload + generation delay
|
# Only use files uploaded in the last 2 minutes (120 seconds)
|
||||||
if current_time - mtime < 600: # 10 minutes
|
# This is tight enough to catch only the most recent upload
|
||||||
|
if age_seconds < 120: # 2 minutes
|
||||||
recent_files.append(excel_file)
|
recent_files.append(excel_file)
|
||||||
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
|
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
logger.info(f" - {excel_file.name} (modified: {mtime_str}) - will be used for manual upload generation")
|
logger.info(f" - {excel_file.name} (modified: {mtime_str}, age: {age_seconds:.1f}s) - will be used for manual upload generation")
|
||||||
else:
|
else:
|
||||||
logger.warning(f" - {excel_file.name} (modified: {datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')}) - skipping (too old, might be from previous run)")
|
logger.warning(f" - {excel_file.name} (modified: {datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')}, age: {age_seconds:.1f}s) - skipping (too old, might be from SharePoint download)")
|
||||||
|
|
||||||
|
# Clear any files that are too old (likely from SharePoint)
|
||||||
if len(recent_files) < len(excel_files):
|
if len(recent_files) < len(excel_files):
|
||||||
logger.warning(f"Found {len(excel_files)} total file(s), but only {len(recent_files)} are recent. Clearing old files to avoid combining...")
|
logger.warning(f"Found {len(excel_files)} total file(s), but only {len(recent_files)} are recent (< 2 min old). Clearing old files...")
|
||||||
# Clear old files to ensure we only use the manually uploaded ones
|
|
||||||
for excel_file in excel_files:
|
for excel_file in excel_files:
|
||||||
if excel_file not in recent_files:
|
if excel_file not in recent_files:
|
||||||
try:
|
try:
|
||||||
excel_file.unlink()
|
excel_file.unlink()
|
||||||
logger.info(f"Cleared old file: {excel_file.name}")
|
logger.info(f"Cleared old file (likely from SharePoint): {excel_file.name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to clear old file {excel_file.name}: {e}")
|
logger.warning(f"Failed to clear old file {excel_file.name}: {e}")
|
||||||
|
|
||||||
if len(recent_files) == 0:
|
if len(recent_files) == 0:
|
||||||
logger.error("Manual upload was used but no recent files found in reports directory!")
|
logger.error("Manual upload was used but no recent files (< 2 min old) found in reports directory!")
|
||||||
logger.error("This might mean:")
|
logger.error("This might mean:")
|
||||||
logger.error("1. Files were not uploaded successfully")
|
logger.error("1. Files were not uploaded successfully")
|
||||||
logger.error("2. Files were uploaded but cleared before generation")
|
logger.error("2. Upload happened more than 2 minutes ago")
|
||||||
logger.error("3. File modification times are incorrect")
|
logger.error("3. File modification times are incorrect")
|
||||||
|
logger.error("4. SharePoint download happened after upload")
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'error': 'No files found for manual upload generation',
|
'error': 'No recent files found for manual upload generation',
|
||||||
'details': 'Files were uploaded but not found in reports directory. Please try uploading again.',
|
'details': 'Files were uploaded but not found or are too old. Please try uploading again and generating immediately.',
|
||||||
'manual_upload_error': True
|
'manual_upload_error': True
|
||||||
}), 400
|
}), 400
|
||||||
|
|
||||||
# Verify we only have the recently uploaded files
|
|
||||||
all_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
|
||||||
if len(all_files) != len(recent_files):
|
|
||||||
logger.warning(f"WARNING: Found {len(all_files)} file(s) but only {len(recent_files)} are recent!")
|
|
||||||
logger.warning("Clearing old files to ensure only uploaded files are used...")
|
|
||||||
for file in all_files:
|
|
||||||
if file not in recent_files:
|
|
||||||
try:
|
|
||||||
file.unlink()
|
|
||||||
logger.info(f"Cleared unexpected old file: {file.name}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to clear unexpected file {file.name}: {e}")
|
|
||||||
|
|
||||||
logger.info(f"Will generate report from {len(recent_files)} recently uploaded file(s)")
|
logger.info(f"Will generate report from {len(recent_files)} recently uploaded file(s)")
|
||||||
else:
|
else:
|
||||||
logger.error("Manual upload was used but reports directory does not exist!")
|
logger.error("Manual upload was used but reports directory does not exist!")
|
||||||
@ -358,6 +383,34 @@ def create_app(config_path: Optional[str] = None):
|
|||||||
'sharepoint_error': True
|
'sharepoint_error': True
|
||||||
}), 400
|
}), 400
|
||||||
|
|
||||||
|
# FINAL VERIFICATION: Before generation, ensure only expected files exist
|
||||||
|
final_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
||||||
|
if len(final_files) > 1:
|
||||||
|
logger.error(f"CRITICAL: Found {len(final_files)} Excel file(s) before generation!")
|
||||||
|
logger.error("This will cause data mixing. Files found:")
|
||||||
|
for f in final_files:
|
||||||
|
mtime = f.stat().st_mtime
|
||||||
|
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
logger.error(f" - {f.name} (modified: {mtime_str})")
|
||||||
|
logger.error("Attempting to keep only the most recent file...")
|
||||||
|
|
||||||
|
# Keep only the newest file
|
||||||
|
final_files_sorted = sorted(final_files, key=lambda f: f.stat().st_mtime, reverse=True)
|
||||||
|
newest_file = final_files_sorted[0]
|
||||||
|
for old_file in final_files_sorted[1:]:
|
||||||
|
try:
|
||||||
|
old_file.unlink()
|
||||||
|
logger.info(f"Removed older file before generation: {old_file.name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to remove {old_file.name}: {e}")
|
||||||
|
return jsonify({
|
||||||
|
'error': f'Multiple Excel files found and cannot remove old ones',
|
||||||
|
'details': f'Found {len(final_files)} files. Please ensure only one file exists. Files may be locked.',
|
||||||
|
'files_found': [f.name for f in final_files]
|
||||||
|
}), 400
|
||||||
|
|
||||||
|
logger.warning(f"Proceeding with only the newest file: {newest_file.name}")
|
||||||
|
|
||||||
report_data = generate_report(
|
report_data = generate_report(
|
||||||
reports_dir=str(reports_dir_path),
|
reports_dir=str(reports_dir_path),
|
||||||
output_file=output_file,
|
output_file=output_file,
|
||||||
@ -400,12 +453,16 @@ def create_app(config_path: Optional[str] = None):
|
|||||||
@app.route('/api/upload', methods=['POST'])
|
@app.route('/api/upload', methods=['POST'])
|
||||||
def upload_files():
|
def upload_files():
|
||||||
"""Upload Excel files manually. Clears old files before uploading new ones."""
|
"""Upload Excel files manually. Clears old files before uploading new ones."""
|
||||||
|
logger.info("=== MANUAL UPLOAD REQUEST RECEIVED ===")
|
||||||
try:
|
try:
|
||||||
if 'files' not in request.files:
|
if 'files' not in request.files:
|
||||||
|
logger.error("Upload request missing 'files' field")
|
||||||
return jsonify({'error': 'No files provided'}), 400
|
return jsonify({'error': 'No files provided'}), 400
|
||||||
|
|
||||||
files = request.files.getlist('files')
|
files = request.files.getlist('files')
|
||||||
|
logger.info(f"Received {len(files)} file(s) in upload request")
|
||||||
if not files or all(f.filename == '' for f in files):
|
if not files or all(f.filename == '' for f in files):
|
||||||
|
logger.error("No valid files in upload request")
|
||||||
return jsonify({'error': 'No files selected'}), 400
|
return jsonify({'error': 'No files selected'}), 400
|
||||||
|
|
||||||
report_config = app.config['REPORT_CONFIG']
|
report_config = app.config['REPORT_CONFIG']
|
||||||
@ -426,33 +483,70 @@ def create_app(config_path: Optional[str] = None):
|
|||||||
|
|
||||||
for old_file in old_excel_files:
|
for old_file in old_excel_files:
|
||||||
try:
|
try:
|
||||||
# On Windows, files might be locked - try multiple times
|
# On Windows, files might be locked - try multiple times with increasing delays
|
||||||
max_retries = 3
|
max_retries = 5
|
||||||
retry_count = 0
|
retry_count = 0
|
||||||
while retry_count < max_retries:
|
cleared_this_file = False
|
||||||
|
|
||||||
|
while retry_count < max_retries and not cleared_this_file:
|
||||||
try:
|
try:
|
||||||
old_file.unlink()
|
old_file.unlink()
|
||||||
cleared_count += 1
|
cleared_count += 1
|
||||||
|
cleared_this_file = True
|
||||||
logger.info(f"Cleared old file before upload: {old_file.name}")
|
logger.info(f"Cleared old file before upload: {old_file.name}")
|
||||||
break
|
break
|
||||||
except PermissionError:
|
except PermissionError as pe:
|
||||||
retry_count += 1
|
retry_count += 1
|
||||||
if retry_count < max_retries:
|
if retry_count < max_retries:
|
||||||
|
# Increasing delay: 0.5s, 1s, 2s, 3s
|
||||||
import time
|
import time
|
||||||
time.sleep(0.5) # Wait 500ms before retry
|
delay = min(0.5 * (2 ** retry_count), 3.0)
|
||||||
|
logger.warning(f"File {old_file.name} is locked (attempt {retry_count}/{max_retries}), waiting {delay}s...")
|
||||||
|
time.sleep(delay)
|
||||||
else:
|
else:
|
||||||
raise
|
# Last attempt failed - try renaming instead of deleting
|
||||||
|
logger.warning(f"Cannot delete {old_file.name}, trying to rename instead...")
|
||||||
|
try:
|
||||||
|
import time
|
||||||
|
timestamp = int(time.time())
|
||||||
|
backup_name = f"{old_file.stem}_backup_{timestamp}{old_file.suffix}"
|
||||||
|
backup_path = old_file.parent / backup_name
|
||||||
|
old_file.rename(backup_path)
|
||||||
|
cleared_count += 1
|
||||||
|
cleared_this_file = True
|
||||||
|
logger.info(f"Renamed locked file to backup: {old_file.name} -> {backup_name}")
|
||||||
|
except Exception as rename_error:
|
||||||
|
logger.error(f"Could not rename file either: {rename_error}")
|
||||||
|
raise pe # Raise original PermissionError
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
if retry_count >= max_retries - 1:
|
||||||
|
raise
|
||||||
|
retry_count += 1
|
||||||
|
import time
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
if not cleared_this_file:
|
||||||
|
failed_to_clear.append(old_file.name)
|
||||||
|
logger.error(f"Failed to clear old file {old_file.name} after {max_retries} attempts")
|
||||||
|
except Exception as e:
|
||||||
|
if old_file.name not in failed_to_clear:
|
||||||
failed_to_clear.append(old_file.name)
|
failed_to_clear.append(old_file.name)
|
||||||
logger.error(f"Failed to clear old file {old_file.name}: {e}")
|
logger.error(f"Failed to clear old file {old_file.name}: {e}")
|
||||||
|
|
||||||
# If any files failed to clear, fail the upload to prevent mixing old and new data
|
# If any files failed to clear, fail the upload to prevent mixing old and new data
|
||||||
if failed_to_clear:
|
if failed_to_clear:
|
||||||
logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before upload: {failed_to_clear}")
|
logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before upload: {failed_to_clear}")
|
||||||
|
locked_files_list = ', '.join(failed_to_clear)
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'error': f'Failed to clear {len(failed_to_clear)} old file(s) before upload. Please ensure files are not locked or in use.',
|
'error': f'Cannot upload: {len(failed_to_clear)} file(s) are locked',
|
||||||
'failed_files': failed_to_clear,
|
'failed_files': failed_to_clear,
|
||||||
'details': 'Old files must be cleared before upload to ensure report generation uses only the new file(s). Files may be locked by Excel or another process.'
|
'details': f'File(s) {locked_files_list} are being used by another program (likely Excel). Please close Excel and any other programs that might have these files open, then try again.',
|
||||||
|
'instructions': [
|
||||||
|
'1. Close Microsoft Excel completely',
|
||||||
|
'2. Close any file explorer windows showing these files',
|
||||||
|
'3. Wait a few seconds',
|
||||||
|
'4. Try uploading again'
|
||||||
|
]
|
||||||
}), 500
|
}), 500
|
||||||
|
|
||||||
if cleared_count > 0:
|
if cleared_count > 0:
|
||||||
@ -460,6 +554,30 @@ def create_app(config_path: Optional[str] = None):
|
|||||||
else:
|
else:
|
||||||
logger.info("No old Excel files found to clear (reports directory was empty)")
|
logger.info("No old Excel files found to clear (reports directory was empty)")
|
||||||
|
|
||||||
|
# VERIFY: Double-check that all Excel files are actually gone
|
||||||
|
remaining_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
|
||||||
|
if remaining_files:
|
||||||
|
logger.error(f"CRITICAL: After clearing, {len(remaining_files)} file(s) still exist: {[f.name for f in remaining_files]}")
|
||||||
|
logger.error("These files are likely locked. Attempting force removal...")
|
||||||
|
force_failed = []
|
||||||
|
for remaining_file in remaining_files:
|
||||||
|
try:
|
||||||
|
remaining_file.unlink()
|
||||||
|
logger.info(f"Force-removed locked file: {remaining_file.name}")
|
||||||
|
except Exception as e:
|
||||||
|
force_failed.append(remaining_file.name)
|
||||||
|
logger.error(f"CRITICAL: Cannot remove locked file {remaining_file.name}: {e}")
|
||||||
|
|
||||||
|
if force_failed:
|
||||||
|
logger.error(f"CRITICAL: {len(force_failed)} file(s) still locked after force removal: {force_failed}")
|
||||||
|
return jsonify({
|
||||||
|
'error': f'Cannot upload: {len(force_failed)} file(s) are locked and cannot be deleted',
|
||||||
|
'failed_files': force_failed,
|
||||||
|
'details': 'Please close Excel or any other program using these files, then try again.'
|
||||||
|
}), 500
|
||||||
|
|
||||||
|
logger.info("✓ Verified: All old Excel files cleared successfully before upload")
|
||||||
|
|
||||||
uploaded_count = 0
|
uploaded_count = 0
|
||||||
uploaded_files = []
|
uploaded_files = []
|
||||||
|
|
||||||
@ -475,10 +593,20 @@ def create_app(config_path: Optional[str] = None):
|
|||||||
|
|
||||||
# Save file to reports directory
|
# Save file to reports directory
|
||||||
file_path = reports_dir / filename
|
file_path = reports_dir / filename
|
||||||
|
logger.info(f"Saving uploaded file: {filename} -> {file_path}")
|
||||||
file.save(str(file_path))
|
file.save(str(file_path))
|
||||||
|
|
||||||
|
# Verify file was saved and get its modification time
|
||||||
|
if file_path.exists():
|
||||||
|
mtime = file_path.stat().st_mtime
|
||||||
|
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
file_size = file_path.stat().st_size
|
||||||
|
logger.info(f"Successfully saved file: {filename} (size: {file_size} bytes, modified: {mtime_str})")
|
||||||
uploaded_count += 1
|
uploaded_count += 1
|
||||||
uploaded_files.append(filename)
|
uploaded_files.append(filename)
|
||||||
logger.info(f"Uploaded file: {filename} -> {file_path}")
|
else:
|
||||||
|
logger.error(f"CRITICAL: File was not saved! {file_path} does not exist after save()")
|
||||||
|
raise Exception(f"Failed to save file {filename}")
|
||||||
|
|
||||||
if uploaded_count == 0:
|
if uploaded_count == 0:
|
||||||
return jsonify({'error': 'No valid Excel files uploaded'}), 400
|
return jsonify({'error': 'No valid Excel files uploaded'}), 400
|
||||||
@ -664,10 +792,44 @@ def create_app(config_path: Optional[str] = None):
|
|||||||
return app
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
def start_scheduler(config_path: Optional[str] = None):
|
||||||
|
"""Start the scheduler in a background thread."""
|
||||||
|
global scheduler_thread
|
||||||
|
|
||||||
|
scheduler_config = config.get('scheduler', {})
|
||||||
|
if not scheduler_config.get('enabled'):
|
||||||
|
logger.info("Scheduler is disabled in configuration")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
from scheduler import ReportScheduler
|
||||||
|
import threading
|
||||||
|
|
||||||
|
def run_scheduler():
|
||||||
|
try:
|
||||||
|
scheduler = ReportScheduler(config_path=config_path)
|
||||||
|
scheduler.start()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Scheduler error: {e}", exc_info=True)
|
||||||
|
|
||||||
|
scheduler_thread = threading.Thread(target=run_scheduler, daemon=True)
|
||||||
|
scheduler_thread.start()
|
||||||
|
logger.info("Scheduler started in background thread")
|
||||||
|
except ImportError:
|
||||||
|
logger.warning("Scheduler module not available. Install apscheduler to enable scheduling.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to start scheduler: {e}", exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
def run_server(config_path: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None):
|
def run_server(config_path: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None):
|
||||||
"""Run the API server."""
|
"""Run the API server."""
|
||||||
|
global app, config
|
||||||
|
|
||||||
app = create_app(config_path)
|
app = create_app(config_path)
|
||||||
|
|
||||||
|
# Start scheduler if enabled
|
||||||
|
start_scheduler(config_path)
|
||||||
|
|
||||||
api_config = config.get('api', {})
|
api_config = config.get('api', {})
|
||||||
server_host = host or api_config.get('host', '0.0.0.0')
|
server_host = host or api_config.get('host', '0.0.0.0')
|
||||||
server_port = port or api_config.get('port', 8080)
|
server_port = port or api_config.get('port', 8080)
|
||||||
|
|||||||
10
config.py
10
config.py
@ -34,10 +34,10 @@ DEFAULT_CONFIG = {
|
|||||||
},
|
},
|
||||||
'scheduler': {
|
'scheduler': {
|
||||||
'enabled': False,
|
'enabled': False,
|
||||||
'schedule_type': 'interval', # 'interval', 'cron', or 'once'
|
'schedule_type': 'cron', # 'interval', 'cron', or 'once'
|
||||||
'interval_hours': 24, # For interval type
|
'interval_hours': 24, # For interval type
|
||||||
'cron_expression': '0 8 * * *', # For cron type (8 AM daily)
|
'cron_expression': '0 10 * * *', # For cron type (10 AM EST/EDT daily)
|
||||||
'timezone': 'America/New_York'
|
'timezone': 'America/New_York' # EST/EDT timezone
|
||||||
},
|
},
|
||||||
'api': {
|
'api': {
|
||||||
'enabled': False,
|
'enabled': False,
|
||||||
@ -186,10 +186,14 @@ def _load_from_env(config: Dict) -> Dict:
|
|||||||
# Scheduler settings
|
# Scheduler settings
|
||||||
if os.getenv('SCHEDULER_ENABLED'):
|
if os.getenv('SCHEDULER_ENABLED'):
|
||||||
config['scheduler']['enabled'] = os.getenv('SCHEDULER_ENABLED').lower() == 'true'
|
config['scheduler']['enabled'] = os.getenv('SCHEDULER_ENABLED').lower() == 'true'
|
||||||
|
if os.getenv('SCHEDULER_SCHEDULE_TYPE'):
|
||||||
|
config['scheduler']['schedule_type'] = os.getenv('SCHEDULER_SCHEDULE_TYPE')
|
||||||
if os.getenv('SCHEDULER_INTERVAL_HOURS'):
|
if os.getenv('SCHEDULER_INTERVAL_HOURS'):
|
||||||
config['scheduler']['interval_hours'] = int(os.getenv('SCHEDULER_INTERVAL_HOURS'))
|
config['scheduler']['interval_hours'] = int(os.getenv('SCHEDULER_INTERVAL_HOURS'))
|
||||||
if os.getenv('SCHEDULER_CRON'):
|
if os.getenv('SCHEDULER_CRON'):
|
||||||
config['scheduler']['cron_expression'] = os.getenv('SCHEDULER_CRON')
|
config['scheduler']['cron_expression'] = os.getenv('SCHEDULER_CRON')
|
||||||
|
if os.getenv('SCHEDULER_TIMEZONE'):
|
||||||
|
config['scheduler']['timezone'] = os.getenv('SCHEDULER_TIMEZONE')
|
||||||
|
|
||||||
# API settings
|
# API settings
|
||||||
if os.getenv('API_ENABLED'):
|
if os.getenv('API_ENABLED'):
|
||||||
|
|||||||
@ -679,6 +679,22 @@ def preprocess_excel_files(reports_dir: str = "reports", current_date: Optional[
|
|||||||
if not excel_files:
|
if not excel_files:
|
||||||
return f"No Excel files found in '{reports_dir}' directory.", {}
|
return f"No Excel files found in '{reports_dir}' directory.", {}
|
||||||
|
|
||||||
|
# Log which files will be processed
|
||||||
|
import logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.info(f"Processing {len(excel_files)} Excel file(s) from {reports_dir}:")
|
||||||
|
for excel_file in excel_files:
|
||||||
|
file_size = excel_file.stat().st_size
|
||||||
|
mtime = excel_file.stat().st_mtime
|
||||||
|
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
logger.info(f" - {excel_file.name} ({file_size} bytes, modified: {mtime_str})")
|
||||||
|
|
||||||
|
# WARNING: If multiple files found, this will combine data from all files
|
||||||
|
if len(excel_files) > 1:
|
||||||
|
logger.warning(f"WARNING: Found {len(excel_files)} Excel file(s). Report will combine data from ALL files!")
|
||||||
|
logger.warning("This may cause incorrect results. Only ONE file should exist in the reports directory.")
|
||||||
|
logger.warning(f"Files found: {[f.name for f in excel_files]}")
|
||||||
|
|
||||||
# First pass: collect all items with raw vendor names
|
# First pass: collect all items with raw vendor names
|
||||||
all_raw_items = []
|
all_raw_items = []
|
||||||
for excel_file in excel_files:
|
for excel_file in excel_files:
|
||||||
|
|||||||
80
scheduler.py
80
scheduler.py
@ -12,6 +12,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
from apscheduler.schedulers.blocking import BlockingScheduler
|
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||||
|
from apscheduler.schedulers.background import BackgroundScheduler
|
||||||
from apscheduler.triggers.interval import IntervalTrigger
|
from apscheduler.triggers.interval import IntervalTrigger
|
||||||
from apscheduler.triggers.cron import CronTrigger
|
from apscheduler.triggers.cron import CronTrigger
|
||||||
from apscheduler.triggers.date import DateTrigger
|
from apscheduler.triggers.date import DateTrigger
|
||||||
@ -26,6 +27,52 @@ from sharepoint_downloader import download_from_sharepoint
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Cleanup function (duplicated from api_server to avoid circular import)
|
||||||
|
def cleanup_old_reports(output_dir: Path, reports_dir: Path, max_reports: int = 10):
|
||||||
|
"""
|
||||||
|
Cleanup old reports and Excel files, keeping only the last max_reports.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
output_dir: Directory containing report HTML/JSON files
|
||||||
|
reports_dir: Directory containing Excel files
|
||||||
|
max_reports: Maximum number of reports to keep
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get all report HTML files sorted by modification time (newest first)
|
||||||
|
html_files = sorted(output_dir.glob('report-*.html'), key=lambda p: p.stat().st_mtime, reverse=True)
|
||||||
|
|
||||||
|
if len(html_files) <= max_reports:
|
||||||
|
return # No cleanup needed
|
||||||
|
|
||||||
|
# Get reports to delete (oldest ones)
|
||||||
|
reports_to_delete = html_files[max_reports:]
|
||||||
|
|
||||||
|
deleted_count = 0
|
||||||
|
for html_file in reports_to_delete:
|
||||||
|
report_id = html_file.stem
|
||||||
|
|
||||||
|
# Delete HTML file
|
||||||
|
try:
|
||||||
|
html_file.unlink()
|
||||||
|
logger.info(f"Deleted old report HTML: {html_file.name}")
|
||||||
|
deleted_count += 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to delete {html_file.name}: {e}")
|
||||||
|
|
||||||
|
# Delete corresponding JSON file
|
||||||
|
json_file = output_dir / f"{report_id}.json"
|
||||||
|
if json_file.exists():
|
||||||
|
try:
|
||||||
|
json_file.unlink()
|
||||||
|
logger.info(f"Deleted old report JSON: {json_file.name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to delete {json_file.name}: {e}")
|
||||||
|
|
||||||
|
if deleted_count > 0:
|
||||||
|
logger.info(f"Cleanup completed: deleted {deleted_count} old report(s)")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during cleanup: {e}", exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
class ReportScheduler:
|
class ReportScheduler:
|
||||||
"""Manages scheduled report generation."""
|
"""Manages scheduled report generation."""
|
||||||
@ -44,7 +91,14 @@ class ReportScheduler:
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.config = load_config(config_path)
|
self.config = load_config(config_path)
|
||||||
self.scheduler = BlockingScheduler(timezone=self.config['scheduler']['timezone'])
|
scheduler_timezone = self.config['scheduler'].get('timezone', 'America/New_York')
|
||||||
|
# Use BackgroundScheduler for thread compatibility (when run from API server)
|
||||||
|
# Use BlockingScheduler when run standalone
|
||||||
|
self.use_background = True # Set to False if running standalone
|
||||||
|
if self.use_background:
|
||||||
|
self.scheduler = BackgroundScheduler(timezone=scheduler_timezone)
|
||||||
|
else:
|
||||||
|
self.scheduler = BlockingScheduler(timezone=scheduler_timezone)
|
||||||
self.scheduler_config = self.config['scheduler']
|
self.scheduler_config = self.config['scheduler']
|
||||||
self.sharepoint_config = self.config.get('sharepoint', {})
|
self.sharepoint_config = self.config.get('sharepoint', {})
|
||||||
self.report_config = self.config.get('report', {})
|
self.report_config = self.config.get('report', {})
|
||||||
@ -78,10 +132,15 @@ class ReportScheduler:
|
|||||||
logger.error(f"Failed to download from SharePoint: {e}")
|
logger.error(f"Failed to download from SharePoint: {e}")
|
||||||
# Continue with report generation even if download fails
|
# Continue with report generation even if download fails
|
||||||
|
|
||||||
# Generate report
|
# Generate report with timestamp
|
||||||
logger.info("Generating report...")
|
logger.info("Generating report...")
|
||||||
reports_dir = self.report_config.get('reports_dir', 'reports')
|
reports_dir = self.report_config.get('reports_dir', 'reports')
|
||||||
output_file = Path(self.report_config.get('output_dir', 'output')) / 'report.json'
|
output_dir = Path(self.report_config.get('output_dir', 'output'))
|
||||||
|
|
||||||
|
# Create timestamped filename (same format as API server)
|
||||||
|
timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
|
||||||
|
report_id = f"report-{timestamp}"
|
||||||
|
output_file = output_dir / f"{report_id}.json"
|
||||||
|
|
||||||
report_data = generate_report(
|
report_data = generate_report(
|
||||||
reports_dir=reports_dir,
|
reports_dir=reports_dir,
|
||||||
@ -91,6 +150,12 @@ class ReportScheduler:
|
|||||||
|
|
||||||
if report_data:
|
if report_data:
|
||||||
logger.info("✓ Scheduled report generation completed successfully")
|
logger.info("✓ Scheduled report generation completed successfully")
|
||||||
|
|
||||||
|
# Cleanup old reports (keep last 10)
|
||||||
|
try:
|
||||||
|
cleanup_old_reports(output_dir, Path(reports_dir), max_reports=10)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to cleanup old reports: {e}")
|
||||||
else:
|
else:
|
||||||
logger.error("✗ Scheduled report generation failed")
|
logger.error("✗ Scheduled report generation failed")
|
||||||
|
|
||||||
@ -150,11 +215,18 @@ class ReportScheduler:
|
|||||||
replace_existing=True
|
replace_existing=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self.use_background:
|
||||||
|
# BackgroundScheduler - just start it, don't block
|
||||||
|
self.scheduler.start()
|
||||||
|
logger.info("Scheduler started in background mode")
|
||||||
|
else:
|
||||||
|
# BlockingScheduler - block until interrupted
|
||||||
logger.info("Scheduler started. Press Ctrl+C to stop.")
|
logger.info("Scheduler started. Press Ctrl+C to stop.")
|
||||||
try:
|
try:
|
||||||
self.scheduler.start()
|
self.scheduler.start()
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
logger.info("Scheduler stopped by user")
|
logger.info("Scheduler stopped by user")
|
||||||
|
self.scheduler.shutdown()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@ -168,5 +240,7 @@ if __name__ == "__main__":
|
|||||||
config_path = sys.argv[1] if len(sys.argv) > 1 else None
|
config_path = sys.argv[1] if len(sys.argv) > 1 else None
|
||||||
|
|
||||||
scheduler = ReportScheduler(config_path=config_path)
|
scheduler = ReportScheduler(config_path=config_path)
|
||||||
|
scheduler.use_background = False # Use BlockingScheduler for standalone mode
|
||||||
|
scheduler.scheduler = BlockingScheduler(timezone=scheduler.config['scheduler'].get('timezone', 'America/New_York'))
|
||||||
scheduler.start()
|
scheduler.start()
|
||||||
|
|
||||||
|
|||||||
@ -284,42 +284,92 @@ class SharePointDownloader:
|
|||||||
|
|
||||||
# ALWAYS clear ALL existing Excel files before downloading (to ensure only new files are used)
|
# ALWAYS clear ALL existing Excel files before downloading (to ensure only new files are used)
|
||||||
# This is critical to prevent combining multiple files
|
# This is critical to prevent combining multiple files
|
||||||
|
# Wait a moment first to allow any previous file operations to complete
|
||||||
|
import time
|
||||||
|
time.sleep(1.0) # Give file handles time to close
|
||||||
|
|
||||||
existing_files = list(local_dir_path.glob('*.xlsx')) + list(local_dir_path.glob('*.xls'))
|
existing_files = list(local_dir_path.glob('*.xlsx')) + list(local_dir_path.glob('*.xls'))
|
||||||
cleared_count = 0
|
cleared_count = 0
|
||||||
failed_to_clear = []
|
failed_to_clear = []
|
||||||
|
|
||||||
for old_file in existing_files:
|
for old_file in existing_files:
|
||||||
try:
|
try:
|
||||||
# On Windows, files might be locked - try multiple times
|
# On Windows, files might be locked - try multiple times with increasing delays
|
||||||
max_retries = 3
|
max_retries = 5
|
||||||
retry_count = 0
|
retry_count = 0
|
||||||
while retry_count < max_retries:
|
cleared_this_file = False
|
||||||
|
|
||||||
|
while retry_count < max_retries and not cleared_this_file:
|
||||||
try:
|
try:
|
||||||
old_file.unlink()
|
old_file.unlink()
|
||||||
cleared_count += 1
|
cleared_count += 1
|
||||||
|
cleared_this_file = True
|
||||||
logger.info(f"Cleared existing file before download: {old_file.name}")
|
logger.info(f"Cleared existing file before download: {old_file.name}")
|
||||||
break
|
break
|
||||||
except PermissionError:
|
except PermissionError as pe:
|
||||||
retry_count += 1
|
retry_count += 1
|
||||||
if retry_count < max_retries:
|
if retry_count < max_retries:
|
||||||
|
# Increasing delay: 0.5s, 1s, 2s, 3s
|
||||||
import time
|
import time
|
||||||
time.sleep(0.5) # Wait 500ms before retry
|
delay = min(0.5 * (2 ** retry_count), 3.0)
|
||||||
|
logger.warning(f"File {old_file.name} is locked (attempt {retry_count}/{max_retries}), waiting {delay}s...")
|
||||||
|
time.sleep(delay)
|
||||||
else:
|
else:
|
||||||
raise
|
# Last attempt failed - try renaming instead of deleting
|
||||||
|
logger.warning(f"Cannot delete {old_file.name}, trying to rename instead...")
|
||||||
|
try:
|
||||||
|
import time
|
||||||
|
timestamp = int(time.time())
|
||||||
|
backup_name = f"{old_file.stem}_backup_{timestamp}{old_file.suffix}"
|
||||||
|
backup_path = old_file.parent / backup_name
|
||||||
|
old_file.rename(backup_path)
|
||||||
|
cleared_count += 1
|
||||||
|
cleared_this_file = True
|
||||||
|
logger.info(f"Renamed locked file to backup: {old_file.name} -> {backup_name}")
|
||||||
|
except Exception as rename_error:
|
||||||
|
logger.error(f"Could not rename file either: {rename_error}")
|
||||||
|
raise pe # Raise original PermissionError
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
if retry_count >= max_retries - 1:
|
||||||
|
raise
|
||||||
|
retry_count += 1
|
||||||
|
import time
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
if not cleared_this_file:
|
||||||
|
failed_to_clear.append(old_file.name)
|
||||||
|
logger.error(f"Failed to clear existing file {old_file.name} after {max_retries} attempts")
|
||||||
|
except Exception as e:
|
||||||
|
if old_file.name not in failed_to_clear:
|
||||||
failed_to_clear.append(old_file.name)
|
failed_to_clear.append(old_file.name)
|
||||||
logger.error(f"Failed to clear existing file {old_file.name}: {e}")
|
logger.error(f"Failed to clear existing file {old_file.name}: {e}")
|
||||||
|
|
||||||
if failed_to_clear:
|
if failed_to_clear:
|
||||||
logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before download: {failed_to_clear}")
|
logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before download: {failed_to_clear}")
|
||||||
logger.error("This will cause data mixing! Files may be locked by another process.")
|
logger.error("This will cause data mixing! Files may be locked by another process.")
|
||||||
# Don't fail here - let the download proceed, but log the warning
|
logger.error("ABORTING download to prevent combining multiple files.")
|
||||||
|
raise Exception(f"Cannot download from SharePoint: {len(failed_to_clear)} file(s) could not be cleared. Please close any programs that might have these files open: {failed_to_clear}")
|
||||||
|
|
||||||
if cleared_count > 0:
|
if cleared_count > 0:
|
||||||
logger.info(f"Cleared {cleared_count} existing Excel file(s) before downloading from SharePoint")
|
logger.info(f"Cleared {cleared_count} existing Excel file(s) before downloading from SharePoint")
|
||||||
else:
|
else:
|
||||||
logger.info("No existing Excel files found to clear (reports directory was empty)")
|
logger.info("No existing Excel files found to clear (reports directory was empty)")
|
||||||
|
|
||||||
|
# VERIFY: Double-check that all Excel files are actually gone
|
||||||
|
remaining_files = list(local_dir_path.glob('*.xlsx')) + list(local_dir_path.glob('*.xls'))
|
||||||
|
if remaining_files:
|
||||||
|
logger.error(f"CRITICAL: After clearing, {len(remaining_files)} file(s) still exist: {[f.name for f in remaining_files]}")
|
||||||
|
logger.error("These files are likely locked. Attempting force removal...")
|
||||||
|
for remaining_file in remaining_files:
|
||||||
|
try:
|
||||||
|
remaining_file.unlink()
|
||||||
|
logger.info(f"Force-removed locked file: {remaining_file.name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"CRITICAL: Cannot remove locked file {remaining_file.name}: {e}")
|
||||||
|
raise Exception(f"Cannot proceed: File {remaining_file.name} is locked and cannot be deleted. Please close Excel or any other program using this file.")
|
||||||
|
|
||||||
|
logger.info("✓ Verified: All old Excel files cleared successfully")
|
||||||
|
|
||||||
# List files in folder
|
# List files in folder
|
||||||
files = self.list_files_in_folder(folder_path, file_pattern)
|
files = self.list_files_in_folder(folder_path, file_pattern)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user