Compare commits

..

No commits in common. "65b06aaf92f4db39c2043c7683ce2d4dbd542ed8" and "51f618b6547af34ae94847377aa63a3c68da3a0d" have entirely different histories.

23 changed files with 30448 additions and 42733 deletions

View File

@ -30,7 +30,6 @@ logger = logging.getLogger(__name__)
app = None
config = None
scheduler_thread = None
def cleanup_old_reports(output_dir: Path, reports_dir: Path, max_reports: int = 10):
@ -220,38 +219,8 @@ def create_app(config_path: Optional[str] = None):
'sharepoint_error': True
}), 500
except Exception as e:
error_msg = str(e)
logger.error(f"Failed to download from SharePoint: {error_msg}", exc_info=True)
# Check if this is a locked file error
is_locked_file_error = 'locked' in error_msg.lower() or 'cannot access the file' in error_msg.lower() or 'being used by another process' in error_msg.lower()
if is_locked_file_error:
# Extract filename from error if possible
locked_file_match = None
import re
# Try to find filename in error message
match = re.search(r"['\"]([^'\"]*\.xlsx?)['\"]", error_msg)
if match:
locked_file_match = match.group(1)
locked_file_info = f" ({locked_file_match})" if locked_file_match else ""
return jsonify({
'error': f'Cannot download from SharePoint: File is locked{locked_file_info}',
'details': f'A file in the reports directory is being used by another program (likely Excel). Please close Excel and any other programs that might have this file open, then try again. Error: {error_msg}',
'instructions': [
'1. Close Microsoft Excel completely',
'2. Close any file explorer windows showing the reports folder',
'3. Wait a few seconds',
'4. Try generating the report again',
'',
'Alternatively, use manual file upload instead of SharePoint download.'
],
'sharepoint_error': True,
'locked_file_error': True
}), 500
# Check if we have existing files as fallback (only for non-locked errors)
logger.error(f"Failed to download from SharePoint: {e}", exc_info=True)
# Check if we have existing files as fallback
reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
if not reports_dir_path.is_absolute():
script_dir = Path(__file__).parent.absolute()
@ -264,13 +233,13 @@ def create_app(config_path: Optional[str] = None):
downloaded_files = [] # Continue with existing files
else:
return jsonify({
'error': f'SharePoint download failed: {error_msg}',
'error': f'SharePoint download failed: {str(e)}',
'details': 'No existing files found. Please use manual file upload or fix SharePoint permissions.',
'sharepoint_error': True
}), 500
else:
return jsonify({
'error': f'SharePoint download failed: {error_msg}',
'error': f'SharePoint download failed: {str(e)}',
'details': 'Reports directory does not exist. Please use manual file upload or fix SharePoint permissions.',
'sharepoint_error': True
}), 500
@ -317,53 +286,59 @@ def create_app(config_path: Optional[str] = None):
logger.error(f"Failed to clear unexpected file {file.name}: {e}")
elif not download_from_sp:
# Manual upload was used (download_from_sharepoint=False)
# Upload endpoint should have cleared old files before saving new ones
# Use ALL files in the directory (they should all be from the recent upload)
# Upload endpoint should have cleared old files, but double-check
# Only use files uploaded in the last 10 minutes to avoid combining with old files
if reports_dir_path.exists():
excel_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
current_time = datetime.now().timestamp()
recent_files = []
logger.info(f"Manual upload generation: Found {len(excel_files)} file(s) in reports directory")
# Only use files modified in the last 2 minutes (very recent = just uploaded)
# This ensures we don't accidentally use SharePoint-downloaded files
for excel_file in excel_files:
mtime = excel_file.stat().st_mtime
age_seconds = current_time - mtime
# Only use files uploaded in the last 2 minutes (120 seconds)
# This is tight enough to catch only the most recent upload
if age_seconds < 120: # 2 minutes
# Only use files modified in the last 10 minutes (should be the uploaded ones)
# Increased from 5 to 10 minutes to account for upload + generation delay
if current_time - mtime < 600: # 10 minutes
recent_files.append(excel_file)
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
logger.info(f" - {excel_file.name} (modified: {mtime_str}, age: {age_seconds:.1f}s) - will be used for manual upload generation")
logger.info(f" - {excel_file.name} (modified: {mtime_str}) - will be used for manual upload generation")
else:
logger.warning(f" - {excel_file.name} (modified: {datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')}, age: {age_seconds:.1f}s) - skipping (too old, might be from SharePoint download)")
logger.warning(f" - {excel_file.name} (modified: {datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')}) - skipping (too old, might be from previous run)")
# Clear any files that are too old (likely from SharePoint)
if len(recent_files) < len(excel_files):
logger.warning(f"Found {len(excel_files)} total file(s), but only {len(recent_files)} are recent (< 2 min old). Clearing old files...")
logger.warning(f"Found {len(excel_files)} total file(s), but only {len(recent_files)} are recent. Clearing old files to avoid combining...")
# Clear old files to ensure we only use the manually uploaded ones
for excel_file in excel_files:
if excel_file not in recent_files:
try:
excel_file.unlink()
logger.info(f"Cleared old file (likely from SharePoint): {excel_file.name}")
logger.info(f"Cleared old file: {excel_file.name}")
except Exception as e:
logger.warning(f"Failed to clear old file {excel_file.name}: {e}")
if len(recent_files) == 0:
logger.error("Manual upload was used but no recent files (< 2 min old) found in reports directory!")
logger.error("Manual upload was used but no recent files found in reports directory!")
logger.error("This might mean:")
logger.error("1. Files were not uploaded successfully")
logger.error("2. Upload happened more than 2 minutes ago")
logger.error("2. Files were uploaded but cleared before generation")
logger.error("3. File modification times are incorrect")
logger.error("4. SharePoint download happened after upload")
return jsonify({
'error': 'No recent files found for manual upload generation',
'details': 'Files were uploaded but not found or are too old. Please try uploading again and generating immediately.',
'error': 'No files found for manual upload generation',
'details': 'Files were uploaded but not found in reports directory. Please try uploading again.',
'manual_upload_error': True
}), 400
# Verify we only have the recently uploaded files
all_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
if len(all_files) != len(recent_files):
logger.warning(f"WARNING: Found {len(all_files)} file(s) but only {len(recent_files)} are recent!")
logger.warning("Clearing old files to ensure only uploaded files are used...")
for file in all_files:
if file not in recent_files:
try:
file.unlink()
logger.info(f"Cleared unexpected old file: {file.name}")
except Exception as e:
logger.error(f"Failed to clear unexpected file {file.name}: {e}")
logger.info(f"Will generate report from {len(recent_files)} recently uploaded file(s)")
else:
logger.error("Manual upload was used but reports directory does not exist!")
@ -383,34 +358,6 @@ def create_app(config_path: Optional[str] = None):
'sharepoint_error': True
}), 400
# FINAL VERIFICATION: Before generation, ensure only expected files exist
final_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
if len(final_files) > 1:
logger.error(f"CRITICAL: Found {len(final_files)} Excel file(s) before generation!")
logger.error("This will cause data mixing. Files found:")
for f in final_files:
mtime = f.stat().st_mtime
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
logger.error(f" - {f.name} (modified: {mtime_str})")
logger.error("Attempting to keep only the most recent file...")
# Keep only the newest file
final_files_sorted = sorted(final_files, key=lambda f: f.stat().st_mtime, reverse=True)
newest_file = final_files_sorted[0]
for old_file in final_files_sorted[1:]:
try:
old_file.unlink()
logger.info(f"Removed older file before generation: {old_file.name}")
except Exception as e:
logger.error(f"Failed to remove {old_file.name}: {e}")
return jsonify({
'error': f'Multiple Excel files found and cannot remove old ones',
'details': f'Found {len(final_files)} files. Please ensure only one file exists. Files may be locked.',
'files_found': [f.name for f in final_files]
}), 400
logger.warning(f"Proceeding with only the newest file: {newest_file.name}")
report_data = generate_report(
reports_dir=str(reports_dir_path),
output_file=output_file,
@ -453,16 +400,12 @@ def create_app(config_path: Optional[str] = None):
@app.route('/api/upload', methods=['POST'])
def upload_files():
"""Upload Excel files manually. Clears old files before uploading new ones."""
logger.info("=== MANUAL UPLOAD REQUEST RECEIVED ===")
try:
if 'files' not in request.files:
logger.error("Upload request missing 'files' field")
return jsonify({'error': 'No files provided'}), 400
files = request.files.getlist('files')
logger.info(f"Received {len(files)} file(s) in upload request")
if not files or all(f.filename == '' for f in files):
logger.error("No valid files in upload request")
return jsonify({'error': 'No files selected'}), 400
report_config = app.config['REPORT_CONFIG']
@ -483,70 +426,33 @@ def create_app(config_path: Optional[str] = None):
for old_file in old_excel_files:
try:
# On Windows, files might be locked - try multiple times with increasing delays
max_retries = 5
# On Windows, files might be locked - try multiple times
max_retries = 3
retry_count = 0
cleared_this_file = False
while retry_count < max_retries and not cleared_this_file:
while retry_count < max_retries:
try:
old_file.unlink()
cleared_count += 1
cleared_this_file = True
logger.info(f"Cleared old file before upload: {old_file.name}")
break
except PermissionError as pe:
except PermissionError:
retry_count += 1
if retry_count < max_retries:
# Increasing delay: 0.5s, 1s, 2s, 3s
import time
delay = min(0.5 * (2 ** retry_count), 3.0)
logger.warning(f"File {old_file.name} is locked (attempt {retry_count}/{max_retries}), waiting {delay}s...")
time.sleep(delay)
time.sleep(0.5) # Wait 500ms before retry
else:
# Last attempt failed - try renaming instead of deleting
logger.warning(f"Cannot delete {old_file.name}, trying to rename instead...")
try:
import time
timestamp = int(time.time())
backup_name = f"{old_file.stem}_backup_{timestamp}{old_file.suffix}"
backup_path = old_file.parent / backup_name
old_file.rename(backup_path)
cleared_count += 1
cleared_this_file = True
logger.info(f"Renamed locked file to backup: {old_file.name} -> {backup_name}")
except Exception as rename_error:
logger.error(f"Could not rename file either: {rename_error}")
raise pe # Raise original PermissionError
except Exception as e:
if retry_count >= max_retries - 1:
raise
retry_count += 1
import time
time.sleep(1)
if not cleared_this_file:
failed_to_clear.append(old_file.name)
logger.error(f"Failed to clear old file {old_file.name} after {max_retries} attempts")
except Exception as e:
if old_file.name not in failed_to_clear:
failed_to_clear.append(old_file.name)
failed_to_clear.append(old_file.name)
logger.error(f"Failed to clear old file {old_file.name}: {e}")
# If any files failed to clear, fail the upload to prevent mixing old and new data
if failed_to_clear:
logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before upload: {failed_to_clear}")
locked_files_list = ', '.join(failed_to_clear)
return jsonify({
'error': f'Cannot upload: {len(failed_to_clear)} file(s) are locked',
'error': f'Failed to clear {len(failed_to_clear)} old file(s) before upload. Please ensure files are not locked or in use.',
'failed_files': failed_to_clear,
'details': f'File(s) {locked_files_list} are being used by another program (likely Excel). Please close Excel and any other programs that might have these files open, then try again.',
'instructions': [
'1. Close Microsoft Excel completely',
'2. Close any file explorer windows showing these files',
'3. Wait a few seconds',
'4. Try uploading again'
]
'details': 'Old files must be cleared before upload to ensure report generation uses only the new file(s). Files may be locked by Excel or another process.'
}), 500
if cleared_count > 0:
@ -554,30 +460,6 @@ def create_app(config_path: Optional[str] = None):
else:
logger.info("No old Excel files found to clear (reports directory was empty)")
# VERIFY: Double-check that all Excel files are actually gone
remaining_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
if remaining_files:
logger.error(f"CRITICAL: After clearing, {len(remaining_files)} file(s) still exist: {[f.name for f in remaining_files]}")
logger.error("These files are likely locked. Attempting force removal...")
force_failed = []
for remaining_file in remaining_files:
try:
remaining_file.unlink()
logger.info(f"Force-removed locked file: {remaining_file.name}")
except Exception as e:
force_failed.append(remaining_file.name)
logger.error(f"CRITICAL: Cannot remove locked file {remaining_file.name}: {e}")
if force_failed:
logger.error(f"CRITICAL: {len(force_failed)} file(s) still locked after force removal: {force_failed}")
return jsonify({
'error': f'Cannot upload: {len(force_failed)} file(s) are locked and cannot be deleted',
'failed_files': force_failed,
'details': 'Please close Excel or any other program using these files, then try again.'
}), 500
logger.info("✓ Verified: All old Excel files cleared successfully before upload")
uploaded_count = 0
uploaded_files = []
@ -593,20 +475,10 @@ def create_app(config_path: Optional[str] = None):
# Save file to reports directory
file_path = reports_dir / filename
logger.info(f"Saving uploaded file: {filename} -> {file_path}")
file.save(str(file_path))
# Verify file was saved and get its modification time
if file_path.exists():
mtime = file_path.stat().st_mtime
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
file_size = file_path.stat().st_size
logger.info(f"Successfully saved file: {filename} (size: {file_size} bytes, modified: {mtime_str})")
uploaded_count += 1
uploaded_files.append(filename)
else:
logger.error(f"CRITICAL: File was not saved! {file_path} does not exist after save()")
raise Exception(f"Failed to save file {filename}")
uploaded_count += 1
uploaded_files.append(filename)
logger.info(f"Uploaded file: {filename} -> {file_path}")
if uploaded_count == 0:
return jsonify({'error': 'No valid Excel files uploaded'}), 400
@ -792,44 +664,10 @@ def create_app(config_path: Optional[str] = None):
return app
def start_scheduler(config_path: Optional[str] = None):
"""Start the scheduler in a background thread."""
global scheduler_thread
scheduler_config = config.get('scheduler', {})
if not scheduler_config.get('enabled'):
logger.info("Scheduler is disabled in configuration")
return
try:
from scheduler import ReportScheduler
import threading
def run_scheduler():
try:
scheduler = ReportScheduler(config_path=config_path)
scheduler.start()
except Exception as e:
logger.error(f"Scheduler error: {e}", exc_info=True)
scheduler_thread = threading.Thread(target=run_scheduler, daemon=True)
scheduler_thread.start()
logger.info("Scheduler started in background thread")
except ImportError:
logger.warning("Scheduler module not available. Install apscheduler to enable scheduling.")
except Exception as e:
logger.error(f"Failed to start scheduler: {e}", exc_info=True)
def run_server(config_path: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None):
"""Run the API server."""
global app, config
app = create_app(config_path)
# Start scheduler if enabled
start_scheduler(config_path)
api_config = config.get('api', {})
server_host = host or api_config.get('host', '0.0.0.0')
server_port = port or api_config.get('port', 8080)

View File

@ -34,10 +34,10 @@ DEFAULT_CONFIG = {
},
'scheduler': {
'enabled': False,
'schedule_type': 'cron', # 'interval', 'cron', or 'once'
'schedule_type': 'interval', # 'interval', 'cron', or 'once'
'interval_hours': 24, # For interval type
'cron_expression': '0 10 * * *', # For cron type (10 AM EST/EDT daily)
'timezone': 'America/New_York' # EST/EDT timezone
'cron_expression': '0 8 * * *', # For cron type (8 AM daily)
'timezone': 'America/New_York'
},
'api': {
'enabled': False,
@ -186,14 +186,10 @@ def _load_from_env(config: Dict) -> Dict:
# Scheduler settings
if os.getenv('SCHEDULER_ENABLED'):
config['scheduler']['enabled'] = os.getenv('SCHEDULER_ENABLED').lower() == 'true'
if os.getenv('SCHEDULER_SCHEDULE_TYPE'):
config['scheduler']['schedule_type'] = os.getenv('SCHEDULER_SCHEDULE_TYPE')
if os.getenv('SCHEDULER_INTERVAL_HOURS'):
config['scheduler']['interval_hours'] = int(os.getenv('SCHEDULER_INTERVAL_HOURS'))
if os.getenv('SCHEDULER_CRON'):
config['scheduler']['cron_expression'] = os.getenv('SCHEDULER_CRON')
if os.getenv('SCHEDULER_TIMEZONE'):
config['scheduler']['timezone'] = os.getenv('SCHEDULER_TIMEZONE')
# API settings
if os.getenv('API_ENABLED'):

View File

@ -679,22 +679,6 @@ def preprocess_excel_files(reports_dir: str = "reports", current_date: Optional[
if not excel_files:
return f"No Excel files found in '{reports_dir}' directory.", {}
# Log which files will be processed
import logging
logger = logging.getLogger(__name__)
logger.info(f"Processing {len(excel_files)} Excel file(s) from {reports_dir}:")
for excel_file in excel_files:
file_size = excel_file.stat().st_size
mtime = excel_file.stat().st_mtime
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
logger.info(f" - {excel_file.name} ({file_size} bytes, modified: {mtime_str})")
# WARNING: If multiple files found, this will combine data from all files
if len(excel_files) > 1:
logger.warning(f"WARNING: Found {len(excel_files)} Excel file(s). Report will combine data from ALL files!")
logger.warning("This may cause incorrect results. Only ONE file should exist in the reports directory.")
logger.warning(f"Files found: {[f.name for f in excel_files]}")
# First pass: collect all items with raw vendor names
all_raw_items = []
for excel_file in excel_files:

View File

@ -1,7 +1,7 @@
PREPROCESSED EXCEL DATA
================================================================================
Current Date (Baltimore/Eastern): 2025-11-08 08:41:02 EST
Total Items: 180
Current Date (Baltimore/Eastern): 2025-11-07 12:05:51 EST
Total Items: 167
VENDOR: Amazon
--------------------------------------------------------------------------------

View File

@ -587,9 +587,9 @@
<header>
<h1>Vendor Punchlist Report</h1>
<div class="meta">
Generated: 2025-11-08 17:29:31 |
Generated: 2025-11-08 15:16:56 |
Total Vendors: 16 |
Total Items: 180
Total Items: 181
</div>
</header>
@ -649,7 +649,7 @@
<p>Vendors</p>
</div>
<div class="summary-card">
<h3>180</h3>
<h3>181</h3>
<p>Total Items</p>
</div>
<div class="summary-card success">
@ -661,11 +661,11 @@
<p>Monitor</p>
</div>
<div class="summary-card danger">
<h3>7</h3>
<h3>8</h3>
<p>Open</p>
</div>
<div class="summary-card danger">
<h3>7</h3>
<h3>8</h3>
<p>Incomplete</p>
</div>
</div>
@ -8849,7 +8849,7 @@ https://t.corp.amazon.com/V1969041198</div>
<div class="vendor-name">MISC</div>
<div class="vendor-stats">
<div class="stat-item">
<div class="stat-value">7</div>
<div class="stat-value">8</div>
<div class="stat-label">Total</div>
</div>
<div class="stat-item">
@ -8861,11 +8861,11 @@ https://t.corp.amazon.com/V1969041198</div>
<div class="stat-label">Monitor</div>
</div>
<div class="stat-item">
<div class="stat-value" style="color: #ef4444;">0</div>
<div class="stat-value" style="color: #ef4444;">1</div>
<div class="stat-label">Open</div>
</div>
<div class="stat-item">
<div class="stat-value" style="color: #dc2626;">0</div>
<div class="stat-value" style="color: #dc2626;">1</div>
<div class="stat-label">Incomplete</div>
</div>
</div>
@ -8873,13 +8873,13 @@ https://t.corp.amazon.com/V1969041198</div>
<div class="vendor-content">
<div class="status-tabs">
<button class="status-tab active" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="all">All (7)</button>
<button class="status-tab active" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="all">All (8)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="updates_24h">Yesterday's Updates (0)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="oldest_unaddressed">Oldest Unaddressed (0)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="oldest_unaddressed">Oldest Unaddressed (1)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="closed">Closed (7)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="monitor">Monitor (0)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="open">Open (0)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="incomplete">Incomplete (0)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="open">Open (1)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="incomplete">Incomplete (1)</button>
</div>
<div class="status-tab-content active" data-status="all" data-vendor="MISC">
@ -8933,7 +8933,7 @@ https://t.corp.amazon.com/V1969041198</div>
<div class="section">
<div class="section-title">No Priority (5 items)</div>
<div class="section-title">No Priority (6 items)</div>
<ul class="item-list">
<li>
@ -9023,6 +9023,24 @@ https://t.corp.amazon.com/V1969041198</div>
</div>
</li>
<li>
<div class="item-header">
<div class="item-name">Testing Vendor report tool</div>
<div class="badges">
<span class="badge badge-success">Incomplete</span>
</div>
</div>
<div class="item-details">
</div>
</li>
@ -9046,12 +9064,30 @@ https://t.corp.amazon.com/V1969041198</div>
<div class="status-tab-content" data-status="oldest_unaddressed" data-vendor="MISC">
<div class="section">
<div class="section-title">Oldest 3 Unaddressed Items</div>
<div class="section-title">Oldest 3 Unaddressed Items (1)</div>
<ul class="item-list">
<li class="empty">No unaddressed items</li>
<li>
<div class="item-header">
<div class="item-name">Testing Vendor report tool</div>
<div class="badges">
<span class="badge badge-success">Incomplete</span>
</div>
</div>
<div class="item-details">
</div>
</li>
</ul>
</div>
</div>
<div class="status-tab-content" data-status="closed" data-vendor="MISC">
@ -9200,18 +9236,54 @@ https://t.corp.amazon.com/V1969041198</div>
<div class="status-tab-content" data-status="open" data-vendor="MISC">
<div class="section">
<div class="section-title">Open Items (0)</div>
<div class="section-title">Open Items (1)</div>
<ul class="item-list">
<li class="empty">No open items</li>
<li>
<div class="item-header">
<div class="item-name">Testing Vendor report tool</div>
<div class="badges">
<span class="badge badge-success">Incomplete</span>
</div>
</div>
<div class="item-details">
</div>
</li>
</ul>
</div>
</div>
<div class="status-tab-content" data-status="incomplete" data-vendor="MISC">
<div class="section">
<div class="section-title">Incomplete Items (0)</div>
<div class="section-title">Incomplete Items (1)</div>
<ul class="item-list">
<li class="empty">No incomplete items</li>
<li>
<div class="item-header">
<div class="item-name">Testing Vendor report tool</div>
<div class="badges">
<span class="badge badge-success">Incomplete</span>
</div>
</div>
<div class="item-details">
</div>
</li>
</ul>
</div>
</div>

View File

@ -1,5 +1,5 @@
{
"report_generated_at": "2025-11-08T17:29:31.912420",
"report_generated_at": "2025-11-08T15:16:56.515245",
"vendors": [
{
"vendor_name": "Amazon",
@ -3761,16 +3761,28 @@
},
{
"vendor_name": "MISC",
"total_items": 7,
"total_items": 8,
"closed_count": 7,
"open_count": 0,
"open_count": 1,
"monitor_count": 0,
"updates_24h": {
"added": [],
"closed": [],
"changed_to_monitor": []
},
"oldest_unaddressed": [],
"oldest_unaddressed": [
{
"punchlist_name": "Testing Vendor report tool",
"description": null,
"priority": null,
"date_identified": null,
"date_completed": null,
"status": "Incomplete",
"status_updates": null,
"issue_image": null,
"age_days": null
}
],
"very_high_priority_items": [],
"high_priority_items": [
{
@ -3876,9 +3888,33 @@
}
],
"monitor_items": [],
"open_items": [],
"incomplete_items": [],
"incomplete_count": 0
"open_items": [
{
"punchlist_name": "Testing Vendor report tool",
"description": null,
"priority": null,
"date_identified": null,
"date_completed": null,
"status": "Incomplete",
"status_updates": null,
"issue_image": null,
"age_days": null
}
],
"incomplete_items": [
{
"punchlist_name": "Testing Vendor report tool",
"description": null,
"priority": null,
"date_identified": null,
"date_completed": null,
"status": "Incomplete",
"status_updates": null,
"issue_image": null,
"age_days": null
}
],
"incomplete_count": 1
},
{
"vendor_name": "Startup (Amazon)",
@ -3927,10 +3963,10 @@
],
"summary": {
"total_vendors": 16,
"total_items": 180,
"total_items": 181,
"total_closed": 156,
"total_open": 7,
"total_open": 8,
"total_monitor": 17,
"total_incomplete": 7
"total_incomplete": 8
}
}

View File

@ -587,9 +587,9 @@
<header>
<h1>Vendor Punchlist Report</h1>
<div class="meta">
Generated: 2025-11-08 17:41:02 |
Generated: 2025-11-08 15:18:53 |
Total Vendors: 16 |
Total Items: 180
Total Items: 181
</div>
</header>
@ -649,7 +649,7 @@
<p>Vendors</p>
</div>
<div class="summary-card">
<h3>180</h3>
<h3>181</h3>
<p>Total Items</p>
</div>
<div class="summary-card success">
@ -661,11 +661,11 @@
<p>Monitor</p>
</div>
<div class="summary-card danger">
<h3>7</h3>
<h3>8</h3>
<p>Open</p>
</div>
<div class="summary-card danger">
<h3>7</h3>
<h3>8</h3>
<p>Incomplete</p>
</div>
</div>
@ -8849,7 +8849,7 @@ https://t.corp.amazon.com/V1969041198</div>
<div class="vendor-name">MISC</div>
<div class="vendor-stats">
<div class="stat-item">
<div class="stat-value">7</div>
<div class="stat-value">8</div>
<div class="stat-label">Total</div>
</div>
<div class="stat-item">
@ -8861,11 +8861,11 @@ https://t.corp.amazon.com/V1969041198</div>
<div class="stat-label">Monitor</div>
</div>
<div class="stat-item">
<div class="stat-value" style="color: #ef4444;">0</div>
<div class="stat-value" style="color: #ef4444;">1</div>
<div class="stat-label">Open</div>
</div>
<div class="stat-item">
<div class="stat-value" style="color: #dc2626;">0</div>
<div class="stat-value" style="color: #dc2626;">1</div>
<div class="stat-label">Incomplete</div>
</div>
</div>
@ -8873,13 +8873,13 @@ https://t.corp.amazon.com/V1969041198</div>
<div class="vendor-content">
<div class="status-tabs">
<button class="status-tab active" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="all">All (7)</button>
<button class="status-tab active" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="all">All (8)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="updates_24h">Yesterday's Updates (0)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="oldest_unaddressed">Oldest Unaddressed (0)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="oldest_unaddressed">Oldest Unaddressed (1)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="closed">Closed (7)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="monitor">Monitor (0)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="open">Open (0)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="incomplete">Incomplete (0)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="open">Open (1)</button>
<button class="status-tab" onclick="switchStatusTab(this, '" + escape_js_string(vendor_name) + "')" data-status="incomplete">Incomplete (1)</button>
</div>
<div class="status-tab-content active" data-status="all" data-vendor="MISC">
@ -8933,7 +8933,7 @@ https://t.corp.amazon.com/V1969041198</div>
<div class="section">
<div class="section-title">No Priority (5 items)</div>
<div class="section-title">No Priority (6 items)</div>
<ul class="item-list">
<li>
@ -9023,6 +9023,24 @@ https://t.corp.amazon.com/V1969041198</div>
</div>
</li>
<li>
<div class="item-header">
<div class="item-name">Testing Vendor report tool</div>
<div class="badges">
<span class="badge badge-success">Incomplete</span>
</div>
</div>
<div class="item-details">
</div>
</li>
@ -9046,12 +9064,30 @@ https://t.corp.amazon.com/V1969041198</div>
<div class="status-tab-content" data-status="oldest_unaddressed" data-vendor="MISC">
<div class="section">
<div class="section-title">Oldest 3 Unaddressed Items</div>
<div class="section-title">Oldest 3 Unaddressed Items (1)</div>
<ul class="item-list">
<li class="empty">No unaddressed items</li>
<li>
<div class="item-header">
<div class="item-name">Testing Vendor report tool</div>
<div class="badges">
<span class="badge badge-success">Incomplete</span>
</div>
</div>
<div class="item-details">
</div>
</li>
</ul>
</div>
</div>
<div class="status-tab-content" data-status="closed" data-vendor="MISC">
@ -9200,18 +9236,54 @@ https://t.corp.amazon.com/V1969041198</div>
<div class="status-tab-content" data-status="open" data-vendor="MISC">
<div class="section">
<div class="section-title">Open Items (0)</div>
<div class="section-title">Open Items (1)</div>
<ul class="item-list">
<li class="empty">No open items</li>
<li>
<div class="item-header">
<div class="item-name">Testing Vendor report tool</div>
<div class="badges">
<span class="badge badge-success">Incomplete</span>
</div>
</div>
<div class="item-details">
</div>
</li>
</ul>
</div>
</div>
<div class="status-tab-content" data-status="incomplete" data-vendor="MISC">
<div class="section">
<div class="section-title">Incomplete Items (0)</div>
<div class="section-title">Incomplete Items (1)</div>
<ul class="item-list">
<li class="empty">No incomplete items</li>
<li>
<div class="item-header">
<div class="item-name">Testing Vendor report tool</div>
<div class="badges">
<span class="badge badge-success">Incomplete</span>
</div>
</div>
<div class="item-details">
</div>
</li>
</ul>
</div>
</div>

View File

@ -1,5 +1,5 @@
{
"report_generated_at": "2025-11-08T17:41:02.563427",
"report_generated_at": "2025-11-08T15:18:53.184680",
"vendors": [
{
"vendor_name": "Amazon",
@ -3761,16 +3761,28 @@
},
{
"vendor_name": "MISC",
"total_items": 7,
"total_items": 8,
"closed_count": 7,
"open_count": 0,
"open_count": 1,
"monitor_count": 0,
"updates_24h": {
"added": [],
"closed": [],
"changed_to_monitor": []
},
"oldest_unaddressed": [],
"oldest_unaddressed": [
{
"punchlist_name": "Testing Vendor report tool",
"description": null,
"priority": null,
"date_identified": null,
"date_completed": null,
"status": "Incomplete",
"status_updates": null,
"issue_image": null,
"age_days": null
}
],
"very_high_priority_items": [],
"high_priority_items": [
{
@ -3876,9 +3888,33 @@
}
],
"monitor_items": [],
"open_items": [],
"incomplete_items": [],
"incomplete_count": 0
"open_items": [
{
"punchlist_name": "Testing Vendor report tool",
"description": null,
"priority": null,
"date_identified": null,
"date_completed": null,
"status": "Incomplete",
"status_updates": null,
"issue_image": null,
"age_days": null
}
],
"incomplete_items": [
{
"punchlist_name": "Testing Vendor report tool",
"description": null,
"priority": null,
"date_identified": null,
"date_completed": null,
"status": "Incomplete",
"status_updates": null,
"issue_image": null,
"age_days": null
}
],
"incomplete_count": 1
},
{
"vendor_name": "Startup (Amazon)",
@ -3927,10 +3963,10 @@
],
"summary": {
"total_vendors": 16,
"total_items": 180,
"total_items": 181,
"total_closed": 156,
"total_open": 7,
"total_open": 8,
"total_monitor": 17,
"total_incomplete": 7
"total_incomplete": 8
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -587,7 +587,7 @@
<header>
<h1>Vendor Punchlist Report</h1>
<div class="meta">
Generated: 2025-11-08 16:57:00 |
Generated: 2025-11-08 15:24:16 |
Total Vendors: 16 |
Total Items: 174
</div>

View File

@ -1,5 +1,5 @@
{
"report_generated_at": "2025-11-08T16:57:00.566153",
"report_generated_at": "2025-11-08T15:24:16.444403",
"vendors": [
{
"vendor_name": "Amazon",

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -12,7 +12,6 @@ from pathlib import Path
try:
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
from apscheduler.triggers.cron import CronTrigger
from apscheduler.triggers.date import DateTrigger
@ -27,52 +26,6 @@ from sharepoint_downloader import download_from_sharepoint
logger = logging.getLogger(__name__)
# Cleanup function (duplicated from api_server to avoid circular import)
def cleanup_old_reports(output_dir: Path, reports_dir: Path, max_reports: int = 10):
"""
Cleanup old reports and Excel files, keeping only the last max_reports.
Args:
output_dir: Directory containing report HTML/JSON files
reports_dir: Directory containing Excel files
max_reports: Maximum number of reports to keep
"""
try:
# Get all report HTML files sorted by modification time (newest first)
html_files = sorted(output_dir.glob('report-*.html'), key=lambda p: p.stat().st_mtime, reverse=True)
if len(html_files) <= max_reports:
return # No cleanup needed
# Get reports to delete (oldest ones)
reports_to_delete = html_files[max_reports:]
deleted_count = 0
for html_file in reports_to_delete:
report_id = html_file.stem
# Delete HTML file
try:
html_file.unlink()
logger.info(f"Deleted old report HTML: {html_file.name}")
deleted_count += 1
except Exception as e:
logger.warning(f"Failed to delete {html_file.name}: {e}")
# Delete corresponding JSON file
json_file = output_dir / f"{report_id}.json"
if json_file.exists():
try:
json_file.unlink()
logger.info(f"Deleted old report JSON: {json_file.name}")
except Exception as e:
logger.warning(f"Failed to delete {json_file.name}: {e}")
if deleted_count > 0:
logger.info(f"Cleanup completed: deleted {deleted_count} old report(s)")
except Exception as e:
logger.error(f"Error during cleanup: {e}", exc_info=True)
class ReportScheduler:
"""Manages scheduled report generation."""
@ -91,14 +44,7 @@ class ReportScheduler:
)
self.config = load_config(config_path)
scheduler_timezone = self.config['scheduler'].get('timezone', 'America/New_York')
# Use BackgroundScheduler for thread compatibility (when run from API server)
# Use BlockingScheduler when run standalone
self.use_background = True # Set to False if running standalone
if self.use_background:
self.scheduler = BackgroundScheduler(timezone=scheduler_timezone)
else:
self.scheduler = BlockingScheduler(timezone=scheduler_timezone)
self.scheduler = BlockingScheduler(timezone=self.config['scheduler']['timezone'])
self.scheduler_config = self.config['scheduler']
self.sharepoint_config = self.config.get('sharepoint', {})
self.report_config = self.config.get('report', {})
@ -132,15 +78,10 @@ class ReportScheduler:
logger.error(f"Failed to download from SharePoint: {e}")
# Continue with report generation even if download fails
# Generate report with timestamp
# Generate report
logger.info("Generating report...")
reports_dir = self.report_config.get('reports_dir', 'reports')
output_dir = Path(self.report_config.get('output_dir', 'output'))
# Create timestamped filename (same format as API server)
timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
report_id = f"report-{timestamp}"
output_file = output_dir / f"{report_id}.json"
output_file = Path(self.report_config.get('output_dir', 'output')) / 'report.json'
report_data = generate_report(
reports_dir=reports_dir,
@ -150,12 +91,6 @@ class ReportScheduler:
if report_data:
logger.info("✓ Scheduled report generation completed successfully")
# Cleanup old reports (keep last 10)
try:
cleanup_old_reports(output_dir, Path(reports_dir), max_reports=10)
except Exception as e:
logger.warning(f"Failed to cleanup old reports: {e}")
else:
logger.error("✗ Scheduled report generation failed")
@ -215,18 +150,11 @@ class ReportScheduler:
replace_existing=True
)
if self.use_background:
# BackgroundScheduler - just start it, don't block
logger.info("Scheduler started. Press Ctrl+C to stop.")
try:
self.scheduler.start()
logger.info("Scheduler started in background mode")
else:
# BlockingScheduler - block until interrupted
logger.info("Scheduler started. Press Ctrl+C to stop.")
try:
self.scheduler.start()
except KeyboardInterrupt:
logger.info("Scheduler stopped by user")
self.scheduler.shutdown()
except KeyboardInterrupt:
logger.info("Scheduler stopped by user")
if __name__ == "__main__":
@ -240,7 +168,5 @@ if __name__ == "__main__":
config_path = sys.argv[1] if len(sys.argv) > 1 else None
scheduler = ReportScheduler(config_path=config_path)
scheduler.use_background = False # Use BlockingScheduler for standalone mode
scheduler.scheduler = BlockingScheduler(timezone=scheduler.config['scheduler'].get('timezone', 'America/New_York'))
scheduler.start()

View File

@ -284,92 +284,42 @@ class SharePointDownloader:
# ALWAYS clear ALL existing Excel files before downloading (to ensure only new files are used)
# This is critical to prevent combining multiple files
# Wait a moment first to allow any previous file operations to complete
import time
time.sleep(1.0) # Give file handles time to close
existing_files = list(local_dir_path.glob('*.xlsx')) + list(local_dir_path.glob('*.xls'))
cleared_count = 0
failed_to_clear = []
for old_file in existing_files:
try:
# On Windows, files might be locked - try multiple times with increasing delays
max_retries = 5
# On Windows, files might be locked - try multiple times
max_retries = 3
retry_count = 0
cleared_this_file = False
while retry_count < max_retries and not cleared_this_file:
while retry_count < max_retries:
try:
old_file.unlink()
cleared_count += 1
cleared_this_file = True
logger.info(f"Cleared existing file before download: {old_file.name}")
break
except PermissionError as pe:
except PermissionError:
retry_count += 1
if retry_count < max_retries:
# Increasing delay: 0.5s, 1s, 2s, 3s
import time
delay = min(0.5 * (2 ** retry_count), 3.0)
logger.warning(f"File {old_file.name} is locked (attempt {retry_count}/{max_retries}), waiting {delay}s...")
time.sleep(delay)
time.sleep(0.5) # Wait 500ms before retry
else:
# Last attempt failed - try renaming instead of deleting
logger.warning(f"Cannot delete {old_file.name}, trying to rename instead...")
try:
import time
timestamp = int(time.time())
backup_name = f"{old_file.stem}_backup_{timestamp}{old_file.suffix}"
backup_path = old_file.parent / backup_name
old_file.rename(backup_path)
cleared_count += 1
cleared_this_file = True
logger.info(f"Renamed locked file to backup: {old_file.name} -> {backup_name}")
except Exception as rename_error:
logger.error(f"Could not rename file either: {rename_error}")
raise pe # Raise original PermissionError
except Exception as e:
if retry_count >= max_retries - 1:
raise
retry_count += 1
import time
time.sleep(1)
if not cleared_this_file:
failed_to_clear.append(old_file.name)
logger.error(f"Failed to clear existing file {old_file.name} after {max_retries} attempts")
except Exception as e:
if old_file.name not in failed_to_clear:
failed_to_clear.append(old_file.name)
failed_to_clear.append(old_file.name)
logger.error(f"Failed to clear existing file {old_file.name}: {e}")
if failed_to_clear:
logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before download: {failed_to_clear}")
logger.error("This will cause data mixing! Files may be locked by another process.")
logger.error("ABORTING download to prevent combining multiple files.")
raise Exception(f"Cannot download from SharePoint: {len(failed_to_clear)} file(s) could not be cleared. Please close any programs that might have these files open: {failed_to_clear}")
# Don't fail here - let the download proceed, but log the warning
if cleared_count > 0:
logger.info(f"Cleared {cleared_count} existing Excel file(s) before downloading from SharePoint")
else:
logger.info("No existing Excel files found to clear (reports directory was empty)")
# VERIFY: Double-check that all Excel files are actually gone
remaining_files = list(local_dir_path.glob('*.xlsx')) + list(local_dir_path.glob('*.xls'))
if remaining_files:
logger.error(f"CRITICAL: After clearing, {len(remaining_files)} file(s) still exist: {[f.name for f in remaining_files]}")
logger.error("These files are likely locked. Attempting force removal...")
for remaining_file in remaining_files:
try:
remaining_file.unlink()
logger.info(f"Force-removed locked file: {remaining_file.name}")
except Exception as e:
logger.error(f"CRITICAL: Cannot remove locked file {remaining_file.name}: {e}")
raise Exception(f"Cannot proceed: File {remaining_file.name} is locked and cannot be deleted. Please close Excel or any other program using this file.")
logger.info("✓ Verified: All old Excel files cleared successfully")
# List files in folder
files = self.list_files_in_folder(folder_path, file_pattern)