update
This commit is contained in:
parent
1d82024a58
commit
460f2b2592
508
api_server.py
508
api_server.py
@ -7,12 +7,16 @@ Provides REST API endpoints to trigger report generation on demand.
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from typing import Optional, List, Dict
|
||||
import json
|
||||
from datetime import datetime
|
||||
import shutil
|
||||
import os
|
||||
|
||||
try:
|
||||
from flask import Flask, jsonify, request
|
||||
from flask import Flask, jsonify, request, send_from_directory
|
||||
from flask_cors import CORS
|
||||
from werkzeug.utils import secure_filename
|
||||
FLASK_AVAILABLE = True
|
||||
except ImportError:
|
||||
FLASK_AVAILABLE = False
|
||||
@ -28,6 +32,68 @@ app = None
|
||||
config = None
|
||||
|
||||
|
||||
def cleanup_old_reports(output_dir: Path, reports_dir: Path, max_reports: int = 10):
|
||||
"""
|
||||
Cleanup old reports and Excel files, keeping only the last max_reports.
|
||||
|
||||
Args:
|
||||
output_dir: Directory containing report HTML/JSON files
|
||||
reports_dir: Directory containing Excel files
|
||||
max_reports: Maximum number of reports to keep
|
||||
"""
|
||||
try:
|
||||
# Get all report HTML files sorted by modification time (newest first)
|
||||
html_files = sorted(output_dir.glob('report-*.html'), key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
|
||||
if len(html_files) <= max_reports:
|
||||
return # No cleanup needed
|
||||
|
||||
# Get reports to delete (oldest ones)
|
||||
reports_to_delete = html_files[max_reports:]
|
||||
|
||||
deleted_count = 0
|
||||
for html_file in reports_to_delete:
|
||||
report_id = html_file.stem
|
||||
|
||||
# Delete HTML file
|
||||
try:
|
||||
html_file.unlink()
|
||||
logger.info(f"Deleted old report HTML: {html_file.name}")
|
||||
deleted_count += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete {html_file.name}: {e}")
|
||||
|
||||
# Delete corresponding JSON file
|
||||
json_file = output_dir / f"{report_id}.json"
|
||||
if json_file.exists():
|
||||
try:
|
||||
json_file.unlink()
|
||||
logger.info(f"Deleted old report JSON: {json_file.name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete {json_file.name}: {e}")
|
||||
|
||||
# Cleanup Excel files - keep only files associated with remaining reports
|
||||
if reports_dir.exists():
|
||||
excel_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
|
||||
|
||||
if len(excel_files) > max_reports:
|
||||
# Sort by modification time and delete oldest
|
||||
excel_files_sorted = sorted(excel_files, key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
excel_to_delete = excel_files_sorted[max_reports:]
|
||||
|
||||
for excel_file in excel_to_delete:
|
||||
try:
|
||||
excel_file.unlink()
|
||||
logger.info(f"Deleted old Excel file: {excel_file.name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete {excel_file.name}: {e}")
|
||||
|
||||
logger.info(f"Cleanup completed: deleted {deleted_count} old report(s)")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during cleanup: {e}", exc_info=True)
|
||||
|
||||
|
||||
def create_app(config_path: Optional[str] = None):
|
||||
"""Create and configure Flask app."""
|
||||
global app, config
|
||||
@ -46,6 +112,20 @@ def create_app(config_path: Optional[str] = None):
|
||||
sharepoint_config = config.get('sharepoint', {})
|
||||
report_config = config.get('report', {})
|
||||
|
||||
# Resolve paths relative to script location, not current working directory
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
|
||||
# Convert relative paths to absolute paths relative to script directory
|
||||
if 'output_dir' in report_config and report_config['output_dir']:
|
||||
output_dir = Path(report_config['output_dir'])
|
||||
if not output_dir.is_absolute():
|
||||
report_config['output_dir'] = str(script_dir / output_dir)
|
||||
|
||||
if 'reports_dir' in report_config and report_config['reports_dir']:
|
||||
reports_dir = Path(report_config['reports_dir'])
|
||||
if not reports_dir.is_absolute():
|
||||
report_config['reports_dir'] = str(script_dir / reports_dir)
|
||||
|
||||
# Store config in app context
|
||||
app.config['API_KEY'] = api_config.get('api_key')
|
||||
app.config['SHAREPOINT_CONFIG'] = sharepoint_config
|
||||
@ -80,10 +160,14 @@ def create_app(config_path: Optional[str] = None):
|
||||
|
||||
try:
|
||||
request_data = request.json or {}
|
||||
download_from_sp = request_data.get('download_from_sharepoint', False)
|
||||
download_from_sp = request_data.get('download_from_sharepoint', True) # Default to True for backward compatibility
|
||||
downloaded_files = [] # Initialize here for scope
|
||||
|
||||
# Download from SharePoint if requested
|
||||
# Get report config early - needed for error handling
|
||||
report_config = app.config['REPORT_CONFIG']
|
||||
|
||||
# Download from SharePoint if requested AND no manual upload happened
|
||||
# If download_from_sharepoint is False, it means manual upload was used
|
||||
if download_from_sp:
|
||||
sp_config = app.config['SHAREPOINT_CONFIG']
|
||||
if not sp_config.get('enabled'):
|
||||
@ -98,39 +182,204 @@ def create_app(config_path: Optional[str] = None):
|
||||
folder_path=sp_config.get('folder_path'),
|
||||
file_path=sp_config.get('file_path'),
|
||||
local_dir=sp_config.get('local_dir', 'reports'),
|
||||
username=sp_config.get('username'),
|
||||
password=sp_config.get('password'),
|
||||
tenant_id=sp_config.get('tenant_id'),
|
||||
client_id=sp_config.get('client_id'),
|
||||
client_secret=sp_config.get('client_secret'),
|
||||
use_app_authentication=sp_config.get('use_app_authentication', False),
|
||||
use_app_authentication=sp_config.get('use_app_authentication', True),
|
||||
file_pattern=sp_config.get('file_pattern'),
|
||||
overwrite=sp_config.get('overwrite', True)
|
||||
)
|
||||
downloaded_files = downloaded if downloaded else []
|
||||
logger.info(f"Downloaded {len(downloaded_files)} file(s) from SharePoint: {downloaded_files}")
|
||||
|
||||
# If SharePoint download failed (no files downloaded), check if we have existing files
|
||||
if len(downloaded_files) == 0:
|
||||
logger.warning("SharePoint download returned 0 files. This could mean:")
|
||||
logger.warning("1. SharePoint permissions issue (401/403 error)")
|
||||
logger.warning("2. No files found in the specified folder")
|
||||
logger.warning("3. Site access not granted (Resource-Specific Consent needed)")
|
||||
logger.warning("Checking if existing files are available in reports directory...")
|
||||
|
||||
# Check if there are existing files we can use
|
||||
reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
|
||||
if not reports_dir_path.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
reports_dir_path = script_dir / reports_dir_path
|
||||
|
||||
if reports_dir_path.exists():
|
||||
existing_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
||||
if existing_files:
|
||||
logger.warning(f"Found {len(existing_files)} existing file(s) in reports directory. Will use these instead.")
|
||||
logger.warning("NOTE: These may be old files. Consider using manual upload for fresh data.")
|
||||
else:
|
||||
logger.error("No files available - neither from SharePoint nor existing files.")
|
||||
return jsonify({
|
||||
'error': 'SharePoint download failed and no existing files found',
|
||||
'details': 'SharePoint access may require Resource-Specific Consent (RSC). Please use manual file upload or fix SharePoint permissions.',
|
||||
'sharepoint_error': True
|
||||
}), 500
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to download from SharePoint: {e}", exc_info=True)
|
||||
# Check if we have existing files as fallback
|
||||
reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
|
||||
if not reports_dir_path.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
reports_dir_path = script_dir / reports_dir_path
|
||||
|
||||
if reports_dir_path.exists():
|
||||
existing_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
||||
if existing_files:
|
||||
logger.warning(f"SharePoint download failed, but found {len(existing_files)} existing file(s). Will use these.")
|
||||
downloaded_files = [] # Continue with existing files
|
||||
else:
|
||||
return jsonify({
|
||||
'error': f'SharePoint download failed: {str(e)}'
|
||||
'error': f'SharePoint download failed: {str(e)}',
|
||||
'details': 'No existing files found. Please use manual file upload or fix SharePoint permissions.',
|
||||
'sharepoint_error': True
|
||||
}), 500
|
||||
else:
|
||||
return jsonify({
|
||||
'error': f'SharePoint download failed: {str(e)}',
|
||||
'details': 'Reports directory does not exist. Please use manual file upload or fix SharePoint permissions.',
|
||||
'sharepoint_error': True
|
||||
}), 500
|
||||
|
||||
# Generate report
|
||||
report_config = app.config['REPORT_CONFIG']
|
||||
# Generate report with timestamp
|
||||
reports_dir = request_data.get('reports_dir', report_config.get('reports_dir', 'reports'))
|
||||
output_file = request_data.get('output_file',
|
||||
str(Path(report_config.get('output_dir', 'output')) / 'report.json'))
|
||||
output_dir_str = report_config.get('output_dir', 'output')
|
||||
output_dir = Path(output_dir_str)
|
||||
if not output_dir.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
output_dir = script_dir / output_dir
|
||||
|
||||
# Create timestamped filename
|
||||
timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
|
||||
report_id = f"report-{timestamp}"
|
||||
output_file = str(output_dir / f"{report_id}.json")
|
||||
|
||||
# Log which files will be used for generation
|
||||
reports_dir_path = Path(reports_dir)
|
||||
if not reports_dir_path.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
reports_dir_path = script_dir / reports_dir_path
|
||||
|
||||
logger.info(f"Generating report from {reports_dir_path.absolute()}...")
|
||||
logger.info(f"Reports directory exists: {reports_dir_path.exists()}")
|
||||
|
||||
# Determine which files to use for generation
|
||||
# CRITICAL: Only use files that were just downloaded/uploaded, not old ones
|
||||
if downloaded_files:
|
||||
# Files were downloaded from SharePoint - use only those
|
||||
logger.info(f"Using {len(downloaded_files)} file(s) downloaded from SharePoint")
|
||||
# Verify that reports_dir only contains the downloaded files (should be empty of old files)
|
||||
all_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
||||
downloaded_file_paths = [Path(f).name for f in downloaded_files] # Get just filenames
|
||||
if len(all_files) != len(downloaded_files):
|
||||
logger.warning(f"WARNING: Found {len(all_files)} file(s) in reports_dir but only {len(downloaded_files)} were downloaded!")
|
||||
logger.warning("This might indicate old files weren't cleared. Clearing now...")
|
||||
for file in all_files:
|
||||
if file.name not in downloaded_file_paths:
|
||||
try:
|
||||
file.unlink()
|
||||
logger.info(f"Cleared unexpected file: {file.name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to clear unexpected file {file.name}: {e}")
|
||||
elif not download_from_sp:
|
||||
# Manual upload was used (download_from_sharepoint=False)
|
||||
# Upload endpoint should have cleared old files, but double-check
|
||||
# Only use files uploaded in the last 10 minutes to avoid combining with old files
|
||||
if reports_dir_path.exists():
|
||||
excel_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
||||
current_time = datetime.now().timestamp()
|
||||
recent_files = []
|
||||
for excel_file in excel_files:
|
||||
mtime = excel_file.stat().st_mtime
|
||||
# Only use files modified in the last 10 minutes (should be the uploaded ones)
|
||||
# Increased from 5 to 10 minutes to account for upload + generation delay
|
||||
if current_time - mtime < 600: # 10 minutes
|
||||
recent_files.append(excel_file)
|
||||
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
|
||||
logger.info(f" - {excel_file.name} (modified: {mtime_str}) - will be used for manual upload generation")
|
||||
else:
|
||||
logger.warning(f" - {excel_file.name} (modified: {datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')}) - skipping (too old, might be from previous run)")
|
||||
|
||||
if len(recent_files) < len(excel_files):
|
||||
logger.warning(f"Found {len(excel_files)} total file(s), but only {len(recent_files)} are recent. Clearing old files to avoid combining...")
|
||||
# Clear old files to ensure we only use the manually uploaded ones
|
||||
for excel_file in excel_files:
|
||||
if excel_file not in recent_files:
|
||||
try:
|
||||
excel_file.unlink()
|
||||
logger.info(f"Cleared old file: {excel_file.name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to clear old file {excel_file.name}: {e}")
|
||||
|
||||
if len(recent_files) == 0:
|
||||
logger.error("Manual upload was used but no recent files found in reports directory!")
|
||||
logger.error("This might mean:")
|
||||
logger.error("1. Files were not uploaded successfully")
|
||||
logger.error("2. Files were uploaded but cleared before generation")
|
||||
logger.error("3. File modification times are incorrect")
|
||||
return jsonify({
|
||||
'error': 'No files found for manual upload generation',
|
||||
'details': 'Files were uploaded but not found in reports directory. Please try uploading again.',
|
||||
'manual_upload_error': True
|
||||
}), 400
|
||||
|
||||
# Verify we only have the recently uploaded files
|
||||
all_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
||||
if len(all_files) != len(recent_files):
|
||||
logger.warning(f"WARNING: Found {len(all_files)} file(s) but only {len(recent_files)} are recent!")
|
||||
logger.warning("Clearing old files to ensure only uploaded files are used...")
|
||||
for file in all_files:
|
||||
if file not in recent_files:
|
||||
try:
|
||||
file.unlink()
|
||||
logger.info(f"Cleared unexpected old file: {file.name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to clear unexpected file {file.name}: {e}")
|
||||
|
||||
logger.info(f"Will generate report from {len(recent_files)} recently uploaded file(s)")
|
||||
else:
|
||||
logger.error("Manual upload was used but reports directory does not exist!")
|
||||
return jsonify({
|
||||
'error': 'Reports directory does not exist',
|
||||
'details': 'Cannot generate report from manual upload - reports directory is missing.',
|
||||
'manual_upload_error': True
|
||||
}), 500
|
||||
else:
|
||||
# SharePoint download was attempted but failed - this shouldn't happen if download_from_sp=True
|
||||
# But if it does, we should NOT use existing files as they might be old
|
||||
logger.error("SharePoint download was requested but failed, and no manual upload was used!")
|
||||
logger.error("This should not happen - refusing to use potentially old files")
|
||||
return jsonify({
|
||||
'error': 'SharePoint download failed and no manual upload provided',
|
||||
'details': 'Cannot generate report - no data source available. Please try again or use manual upload.',
|
||||
'sharepoint_error': True
|
||||
}), 400
|
||||
|
||||
logger.info(f"Generating report from {reports_dir}...")
|
||||
report_data = generate_report(
|
||||
reports_dir=reports_dir,
|
||||
reports_dir=str(reports_dir_path),
|
||||
output_file=output_file,
|
||||
verbose=False # Don't print to console in API mode
|
||||
)
|
||||
|
||||
if report_data:
|
||||
# Generate HTML with same timestamp
|
||||
html_file = output_dir / f"{report_id}.html"
|
||||
from html_generator import generate_html_report
|
||||
generate_html_report(output_file, str(html_file))
|
||||
|
||||
# Cleanup old reports (keep only last 10)
|
||||
# Ensure reports_dir is a Path object
|
||||
reports_dir_for_cleanup = Path(reports_dir) if isinstance(reports_dir, str) else reports_dir
|
||||
cleanup_old_reports(output_dir, reports_dir_for_cleanup, max_reports=10)
|
||||
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'message': 'Report generated successfully',
|
||||
'report_id': report_id,
|
||||
'report_date': timestamp,
|
||||
'output_file': output_file,
|
||||
'summary': report_data.get('summary', {}),
|
||||
'vendors_count': len(report_data.get('vendors', [])),
|
||||
@ -148,6 +397,108 @@ def create_app(config_path: Optional[str] = None):
|
||||
'error': f'Report generation failed: {str(e)}'
|
||||
}), 500
|
||||
|
||||
@app.route('/api/upload', methods=['POST'])
|
||||
def upload_files():
|
||||
"""Upload Excel files manually. Clears old files before uploading new ones."""
|
||||
try:
|
||||
if 'files' not in request.files:
|
||||
return jsonify({'error': 'No files provided'}), 400
|
||||
|
||||
files = request.files.getlist('files')
|
||||
if not files or all(f.filename == '' for f in files):
|
||||
return jsonify({'error': 'No files selected'}), 400
|
||||
|
||||
report_config = app.config['REPORT_CONFIG']
|
||||
reports_dir_str = report_config.get('reports_dir', 'reports')
|
||||
reports_dir = Path(reports_dir_str)
|
||||
if not reports_dir.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
reports_dir = script_dir / reports_dir
|
||||
|
||||
# Ensure reports directory exists
|
||||
reports_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ALWAYS clear ALL old Excel files from reports directory before uploading new ones
|
||||
# CRITICAL: This prevents combining multiple files in report generation
|
||||
old_excel_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
|
||||
cleared_count = 0
|
||||
failed_to_clear = []
|
||||
|
||||
for old_file in old_excel_files:
|
||||
try:
|
||||
# On Windows, files might be locked - try multiple times
|
||||
max_retries = 3
|
||||
retry_count = 0
|
||||
while retry_count < max_retries:
|
||||
try:
|
||||
old_file.unlink()
|
||||
cleared_count += 1
|
||||
logger.info(f"Cleared old file before upload: {old_file.name}")
|
||||
break
|
||||
except PermissionError:
|
||||
retry_count += 1
|
||||
if retry_count < max_retries:
|
||||
import time
|
||||
time.sleep(0.5) # Wait 500ms before retry
|
||||
else:
|
||||
raise
|
||||
except Exception as e:
|
||||
failed_to_clear.append(old_file.name)
|
||||
logger.error(f"Failed to clear old file {old_file.name}: {e}")
|
||||
|
||||
# If any files failed to clear, fail the upload to prevent mixing old and new data
|
||||
if failed_to_clear:
|
||||
logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before upload: {failed_to_clear}")
|
||||
return jsonify({
|
||||
'error': f'Failed to clear {len(failed_to_clear)} old file(s) before upload. Please ensure files are not locked or in use.',
|
||||
'failed_files': failed_to_clear,
|
||||
'details': 'Old files must be cleared before upload to ensure report generation uses only the new file(s). Files may be locked by Excel or another process.'
|
||||
}), 500
|
||||
|
||||
if cleared_count > 0:
|
||||
logger.info(f"Cleared {cleared_count} old Excel file(s) before upload")
|
||||
else:
|
||||
logger.info("No old Excel files found to clear (reports directory was empty)")
|
||||
|
||||
uploaded_count = 0
|
||||
uploaded_files = []
|
||||
|
||||
for file in files:
|
||||
if file.filename == '':
|
||||
continue
|
||||
|
||||
# Check if it's an Excel file
|
||||
filename = secure_filename(file.filename)
|
||||
if not (filename.endswith('.xlsx') or filename.endswith('.xls')):
|
||||
logger.warning(f"Skipping non-Excel file: {filename}")
|
||||
continue
|
||||
|
||||
# Save file to reports directory
|
||||
file_path = reports_dir / filename
|
||||
file.save(str(file_path))
|
||||
uploaded_count += 1
|
||||
uploaded_files.append(filename)
|
||||
logger.info(f"Uploaded file: {filename} -> {file_path}")
|
||||
|
||||
if uploaded_count == 0:
|
||||
return jsonify({'error': 'No valid Excel files uploaded'}), 400
|
||||
|
||||
# Warn if multiple files uploaded - reports should be generated from ONE file
|
||||
if uploaded_count > 1:
|
||||
logger.warning(f"WARNING: {uploaded_count} files uploaded. Reports should be generated from a single file. Only the newest file will be used.")
|
||||
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'message': f'Successfully uploaded {uploaded_count} file(s)',
|
||||
'uploaded_count': uploaded_count,
|
||||
'uploaded_files': uploaded_files,
|
||||
'cleared_old_files': cleared_count,
|
||||
'warning': f'{uploaded_count} file(s) uploaded - only the newest will be used for report generation' if uploaded_count > 1 else None
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading files: {e}", exc_info=True)
|
||||
return jsonify({'error': f'Failed to upload files: {str(e)}'}), 500
|
||||
|
||||
@app.route('/api/status', methods=['GET'])
|
||||
def status():
|
||||
"""Get service status and configuration."""
|
||||
@ -163,7 +514,11 @@ def create_app(config_path: Optional[str] = None):
|
||||
"""Get latest report JSON file."""
|
||||
try:
|
||||
report_config = app.config['REPORT_CONFIG']
|
||||
output_dir = Path(report_config.get('output_dir', 'output'))
|
||||
output_dir_str = report_config.get('output_dir', 'output')
|
||||
output_dir = Path(output_dir_str)
|
||||
if not output_dir.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
output_dir = script_dir / output_dir
|
||||
report_file = output_dir / 'report.json'
|
||||
|
||||
if not report_file.exists():
|
||||
@ -179,22 +534,133 @@ def create_app(config_path: Optional[str] = None):
|
||||
|
||||
@app.route('/api/report/html', methods=['GET'])
|
||||
def get_report_html():
|
||||
"""Get latest report HTML file."""
|
||||
"""Get report HTML file by report_id (or latest if not specified)."""
|
||||
try:
|
||||
from flask import send_from_directory
|
||||
|
||||
report_config = app.config['REPORT_CONFIG']
|
||||
output_dir = Path(report_config.get('output_dir', 'output'))
|
||||
html_file = output_dir / 'report.html'
|
||||
output_dir_str = report_config.get('output_dir', 'output')
|
||||
output_dir = Path(output_dir_str)
|
||||
if not output_dir.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
output_dir = script_dir / output_dir
|
||||
|
||||
if not html_file.exists():
|
||||
return jsonify({'error': 'Report HTML not found. Generate a report first.'}), 404
|
||||
# Get report_id from query parameter, default to latest
|
||||
report_id = request.args.get('report_id')
|
||||
|
||||
return send_from_directory(str(output_dir), 'report.html', mimetype='text/html')
|
||||
if report_id:
|
||||
# Check if it's a timestamped report or legacy report
|
||||
html_file = output_dir / f"{report_id}.html"
|
||||
|
||||
# If not found and it starts with "report-", might be a legacy report with generated ID
|
||||
if not html_file.exists() and report_id.startswith('report-'):
|
||||
# Try legacy report.html
|
||||
legacy_file = output_dir / 'report.html'
|
||||
if legacy_file.exists():
|
||||
html_file = legacy_file
|
||||
else:
|
||||
return jsonify({'error': f'Report {report_id} not found.'}), 404
|
||||
elif not html_file.exists():
|
||||
return jsonify({'error': f'Report {report_id} not found.'}), 404
|
||||
else:
|
||||
# Get latest report (check both timestamped and legacy)
|
||||
timestamped_files = list(output_dir.glob('report-*.html'))
|
||||
legacy_file = output_dir / 'report.html'
|
||||
|
||||
html_files = []
|
||||
if legacy_file.exists():
|
||||
html_files.append(legacy_file)
|
||||
html_files.extend(timestamped_files)
|
||||
|
||||
if not html_files:
|
||||
return jsonify({'error': 'No reports found. Generate a report first.'}), 404
|
||||
|
||||
html_file = sorted(html_files, key=lambda p: p.stat().st_mtime, reverse=True)[0]
|
||||
|
||||
return send_from_directory(str(output_dir), html_file.name, mimetype='text/html')
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading report HTML: {e}", exc_info=True)
|
||||
return jsonify({'error': f'Failed to read report HTML: {str(e)}'}), 500
|
||||
|
||||
@app.route('/api/reports/list', methods=['GET'])
|
||||
def list_reports():
|
||||
"""List all available reports (last 10)."""
|
||||
try:
|
||||
report_config = app.config['REPORT_CONFIG']
|
||||
output_dir_str = report_config.get('output_dir', 'output')
|
||||
output_dir = Path(output_dir_str)
|
||||
|
||||
# Ensure absolute path
|
||||
if not output_dir.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
output_dir = script_dir / output_dir
|
||||
|
||||
# Log for debugging
|
||||
logger.info(f"Looking for reports in: {output_dir.absolute()}")
|
||||
logger.info(f"Output directory exists: {output_dir.exists()}")
|
||||
if output_dir.exists():
|
||||
logger.info(f"Files in output directory: {list(output_dir.glob('*'))}")
|
||||
|
||||
# Find all report HTML files (both timestamped and non-timestamped)
|
||||
timestamped_files = list(output_dir.glob('report-*.html'))
|
||||
legacy_file = output_dir / 'report.html'
|
||||
|
||||
logger.info(f"Found {len(timestamped_files)} timestamped report files")
|
||||
logger.info(f"Legacy report.html exists: {legacy_file.exists()}")
|
||||
if legacy_file.exists():
|
||||
logger.info(f"Legacy report.html path: {legacy_file.absolute()}")
|
||||
|
||||
html_files = []
|
||||
|
||||
# Add legacy report.html if it exists
|
||||
if legacy_file.exists():
|
||||
html_files.append(legacy_file)
|
||||
logger.info("Added legacy report.html to list")
|
||||
|
||||
# Add timestamped files
|
||||
html_files.extend(timestamped_files)
|
||||
logger.info(f"Total HTML files found: {len(html_files)}")
|
||||
|
||||
reports = []
|
||||
for html_file in sorted(html_files, key=lambda p: p.stat().st_mtime, reverse=True)[:10]:
|
||||
report_id = html_file.stem # e.g., "report-2025-11-08-11-25-46" or "report"
|
||||
|
||||
# Handle legacy report.html
|
||||
if report_id == 'report':
|
||||
# Use file modification time as timestamp
|
||||
mtime = html_file.stat().st_mtime
|
||||
dt = datetime.fromtimestamp(mtime)
|
||||
timestamp_str = dt.strftime('%Y-%m-%d-%H-%M-%S')
|
||||
date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
report_id = f"report-{timestamp_str}"
|
||||
else:
|
||||
# Timestamped report
|
||||
timestamp_str = report_id.replace('report-', '')
|
||||
try:
|
||||
# Parse timestamp to create readable date
|
||||
dt = datetime.strptime(timestamp_str, '%Y-%m-%d-%H-%M-%S')
|
||||
date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
except:
|
||||
date_str = timestamp_str
|
||||
|
||||
# Get file size
|
||||
file_size = html_file.stat().st_size
|
||||
|
||||
reports.append({
|
||||
'report_id': report_id,
|
||||
'date': date_str,
|
||||
'timestamp': timestamp_str,
|
||||
'file_size': file_size
|
||||
})
|
||||
|
||||
return jsonify({
|
||||
'reports': reports,
|
||||
'count': len(reports)
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing reports: {e}", exc_info=True)
|
||||
return jsonify({'error': f'Failed to list reports: {str(e)}'}), 500
|
||||
|
||||
return app
|
||||
|
||||
|
||||
|
||||
47
config.py
47
config.py
@ -71,9 +71,46 @@ def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
|
||||
parent_env = Path(__file__).parent.parent / "taskboard" / ".env"
|
||||
if parent_env.exists():
|
||||
env_file = parent_env
|
||||
logging.info(f"Found .env file in taskboard directory: {env_file}")
|
||||
else:
|
||||
logging.warning(f".env file not found in vendor_report or taskboard directory")
|
||||
logging.warning(f"Checked: {Path(__file__).parent / '.env'}")
|
||||
logging.warning(f"Checked: {parent_env}")
|
||||
else:
|
||||
logging.info(f"Found .env file in vendor_report directory: {env_file}")
|
||||
|
||||
if env_file.exists():
|
||||
load_dotenv(env_file)
|
||||
logging.info(f"Loaded environment variables from {env_file}")
|
||||
load_dotenv(env_file, override=True) # override=True ensures env vars take precedence
|
||||
logging.info(f"Loaded environment variables from {env_file.absolute()}")
|
||||
|
||||
# Log which SharePoint env vars were found (checking both SHAREPOINT_* and AZURE_AD_* fallbacks)
|
||||
sp_vars = ['SHAREPOINT_ENABLED', 'SHAREPOINT_SITE_URL', 'SHAREPOINT_FOLDER_PATH']
|
||||
found_vars = [var for var in sp_vars if os.getenv(var)]
|
||||
|
||||
# Check credentials (with fallback)
|
||||
client_id = os.getenv('SHAREPOINT_CLIENT_ID') or os.getenv('AZURE_AD_CLIENT_ID')
|
||||
tenant_id = os.getenv('SHAREPOINT_TENANT_ID') or os.getenv('AZURE_AD_TENANT_ID')
|
||||
client_secret = os.getenv('SHAREPOINT_CLIENT_SECRET') or os.getenv('AZURE_AD_CLIENT_SECRET')
|
||||
|
||||
if client_id:
|
||||
found_vars.append('CLIENT_ID (from SHAREPOINT_CLIENT_ID or AZURE_AD_CLIENT_ID)')
|
||||
if tenant_id:
|
||||
found_vars.append('TENANT_ID (from SHAREPOINT_TENANT_ID or AZURE_AD_TENANT_ID)')
|
||||
if client_secret:
|
||||
found_vars.append('CLIENT_SECRET (from SHAREPOINT_CLIENT_SECRET or AZURE_AD_CLIENT_SECRET)')
|
||||
|
||||
logging.info(f"Found SharePoint environment variables: {', '.join(found_vars)}")
|
||||
|
||||
missing_vars = []
|
||||
if not client_id:
|
||||
missing_vars.append('CLIENT_ID (SHAREPOINT_CLIENT_ID or AZURE_AD_CLIENT_ID)')
|
||||
if not tenant_id:
|
||||
missing_vars.append('TENANT_ID (SHAREPOINT_TENANT_ID or AZURE_AD_TENANT_ID)')
|
||||
if not client_secret:
|
||||
missing_vars.append('CLIENT_SECRET (SHAREPOINT_CLIENT_SECRET or AZURE_AD_CLIENT_SECRET)')
|
||||
|
||||
if missing_vars:
|
||||
logging.warning(f"Missing SharePoint credentials: {', '.join(missing_vars)}")
|
||||
|
||||
if config_path is None:
|
||||
config_path = Path(__file__).parent / "config.yaml"
|
||||
@ -134,6 +171,12 @@ def _load_from_env(config: Dict) -> Dict:
|
||||
elif os.getenv('AZURE_AD_CLIENT_SECRET'):
|
||||
config['sharepoint']['client_secret'] = os.getenv('AZURE_AD_CLIENT_SECRET')
|
||||
|
||||
# Tenant ID (required for Microsoft Graph API)
|
||||
if os.getenv('SHAREPOINT_TENANT_ID'):
|
||||
config['sharepoint']['tenant_id'] = os.getenv('SHAREPOINT_TENANT_ID')
|
||||
elif os.getenv('AZURE_AD_TENANT_ID'):
|
||||
config['sharepoint']['tenant_id'] = os.getenv('AZURE_AD_TENANT_ID')
|
||||
|
||||
if os.getenv('SHAREPOINT_USE_APP_AUTH'):
|
||||
config['sharepoint']['use_app_authentication'] = os.getenv('SHAREPOINT_USE_APP_AUTH').lower() == 'true'
|
||||
elif os.getenv('SHAREPOINT_USE_APP_AUTH') is None and os.getenv('AZURE_AD_CLIENT_ID'):
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
PREPROCESSED EXCEL DATA
|
||||
================================================================================
|
||||
Current Date (Baltimore/Eastern): 2025-11-06 16:50:13 EST
|
||||
Total Items: 162
|
||||
Current Date (Baltimore/Eastern): 2025-11-08 06:42:53 EST
|
||||
Total Items: 180
|
||||
|
||||
VENDOR: Amazon
|
||||
--------------------------------------------------------------------------------
|
||||
@ -20,25 +20,31 @@ ALL ITEMS:
|
||||
|
||||
VENDOR: Autstand
|
||||
--------------------------------------------------------------------------------
|
||||
Total Items: 74
|
||||
Closed: 67
|
||||
Total Items: 91
|
||||
Closed: 78
|
||||
Open: 3
|
||||
Monitor: 4
|
||||
Monitor: 10
|
||||
|
||||
RECENT UPDATES (Yesterday's Date):
|
||||
ADDED: NCP1-6-TPE1 did not alarm correctly on SCADA when jammed | 2025-11-07 00:00:00 | Incomplete
|
||||
CLOSED: SCADA: We need some type of refrence in SCADA. Suggest adding dock doors that correlate with their belt | 2025-11-07 00:00:00 | Complete
|
||||
|
||||
OLDEST UNADDRESSED (Top 3):
|
||||
Estops are getting damaged on the UL lane | Age: 2 days | 2025-11-04 00:00:00 | Incomplete
|
||||
Raise the fill height ob the DTC's approx 2 " | Age: 2 days | 2025-11-04 00:00:00 | Incomplete
|
||||
NCP1-6-TPE1 did not alarm correctly on SCADA when jammed | Age: 1 days | 2025-11-07 00:00:00 | Incomplete
|
||||
3:1 merge code update | Age: None days | | Incomplete
|
||||
Estops are getting damaged on the UL lane | Age: None days | 45965 | Incomplete
|
||||
|
||||
VERY HIGH PRIORITY (6 items):
|
||||
VERY HIGH PRIORITY (8 items):
|
||||
Semi-Auto Exception Arm Logic | Complete | 10/14/25
|
||||
PS Conveyor chute clearing Issues | Complete | 10/14/25
|
||||
Replicate logic timers from semi VS-D to the rest of the semis | Monitor | 10/14/25
|
||||
Tipper timer | Complete | 10/14/25
|
||||
SCADA Accurate Status Reads | Complete | 10/14/25
|
||||
NC boxes are diverting to xbelt causing jams particullary at bypass curves | Complete | 10/17
|
||||
NCP1-6-TPE1 did not alarm correctly on SCADA when jammed | Incomplete | 2025-11-07 00:00:00
|
||||
Replicate logic timers from semi VS-D to the rest of the semis | Monitor | 10/14/25
|
||||
|
||||
HIGH PRIORITY (53 items):
|
||||
HIGH PRIORITY (67 items):
|
||||
Problem Solve dead rollers | Complete | 10/14/25
|
||||
Jam Reset Button needed at end of NC Jackpots | Complete | 10/15/25
|
||||
Jam Reset buttons on Bulk divert platforms to be relocated | Complete | 10/15/25
|
||||
@ -91,9 +97,25 @@ The PLC is not reporting S04 message divert fails properly (no reads to jackpot,
|
||||
Encoder failure (4x) + 2 x | Complete | 2025-10-10 00:00:00
|
||||
SCADA performance issue | Complete |
|
||||
7:1 merge code update | Complete |
|
||||
3:1 merge code update | Monitor |
|
||||
Estops are getting damaged on the UL lane | Complete | 2025-11-04 00:00:00
|
||||
Raise the fill height ob the DTC's approx 2 " | Complete | 2025-11-04 00:00:00
|
||||
Fix tippers faulting mid-dump sequence | Complete | 2025-11-06 00:00:00
|
||||
PS8-5 Jammed but not showing on SCADA | Complete | 2025-11-06 00:00:00
|
||||
Bypass improvements | Complete | 2025-11-03 00:00:00
|
||||
Sorter statistics on SCADA | Monitor | 2025-11-03 00:00:00
|
||||
Update SCADA to show inch and store status of conveyors with such functionality | Complete | 2025-11-07 00:00:00
|
||||
Logic for Semi induct D is off very low throughput see video | Monitor | 10/16/2025
|
||||
SCADA: We need some type of refrence in SCADA. Suggest adding dock doors that correlate with their belt | Complete | 10/16/2025
|
||||
PS11-11CH6NC Intralox Sorter (S02) | Complete | 10/17/2025
|
||||
One major issue and one minor issue with the non-con system:
|
||||
No-reads are really frequent
|
||||
The PLC is not reporting S04 message divert fails properly (no reads to jackpot, lost container, failed to divert, wrong buildings) | Monitor | 10/17/2025
|
||||
Encoder failure (4x) + 2 x | Complete | 45940
|
||||
7:1 merge code update | Complete |
|
||||
3:1 merge code update | Incomplete |
|
||||
Estops are getting damaged on the UL lane | Incomplete | 2025-11-04 00:00:00
|
||||
Raise the fill height ob the DTC's approx 2 " | Incomplete | 2025-11-04 00:00:00
|
||||
Estops are getting damaged on the UL lane | Incomplete | 45965
|
||||
Raise the fill height ob the DTC's approx 2 " | Complete | 45965
|
||||
|
||||
ALL ITEMS:
|
||||
Semi-Auto Exception Arm Logic | Vendor: Autstand | Priority: (1) Very High (very_high) | Status: Complete (CLOSED) | Date: 10/14/25 | Description: Exception chute arm disengaged prior to all carton...
|
||||
@ -168,24 +190,44 @@ The PLC is not reporting S04 message divert fails properly (no reads to jackpot,
|
||||
Encoder failure (4x) + 2 x | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-10-10 00:00:00 | Description: UL8-7 UL11-7 Problem with port on APF...
|
||||
SCADA performance issue | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: | Description: report export crashed system...
|
||||
7:1 merge code update | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: | Description: N/A...
|
||||
3:1 merge code update | Vendor: Autstand | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date: | Description: mcm02 by monday 11/4. mcm01 ul 1-3 done. ...
|
||||
3:1 merge code update | Vendor: Autstand | Priority: (2) High (high) | Status: Monitor (OPEN) | Date: | Description: mcm02 by monday 11/4. mcm01 ul 1-3 done. ...
|
||||
gap control at non con sorter. | Vendor: Autstand | Priority: (None) | Status: Complete (CLOSED) | Date: 2025-10-30 00:00:00 | Description: code change/ help with box tracking....
|
||||
Estops are getting damaged on the UL lane | Vendor: Autstand | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date: 2025-11-04 00:00:00 | Description: UL16-1, UL15-3, UL10-2 (both sides) UL8-1 , UL7-3...
|
||||
Raise the fill height ob the DTC's approx 2 " | Vendor: Autstand | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date: 2025-11-04 00:00:00 | Description: N/A...
|
||||
Estops are getting damaged on the UL lane | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-04 00:00:00 | Description: UL16-1, UL15-3, UL10-2 (both sides) UL8-1 , UL7-3...
|
||||
Raise the fill height ob the DTC's approx 2 " | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-04 00:00:00 | Description: N/A...
|
||||
Fix tippers faulting mid-dump sequence | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-06 00:00:00 | Description: Tippers that are flow-stopped during the dumping s...
|
||||
PS8-5 Jammed but not showing on SCADA | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-06 00:00:00 | Description: KK 11/6 - Per Chris' Twi sort 11.6 report - PS8-5 ...
|
||||
Bypass improvements | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-03 00:00:00 | Description: Bypass showing lane unavailable when not running, ...
|
||||
Sorter statistics on SCADA | Vendor: Autstand | Priority: (2) High (high) | Status: Monitor (OPEN) | Date: 2025-11-03 00:00:00 | Description: Add crossbelt sorter statistics to ignition SCADA ...
|
||||
Update SCADA to show inch and store status of conveyors with such functionality | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-07 00:00:00 | Description: Update SCADA to show inch and store conveyors as Y...
|
||||
NCP1-6-TPE1 did not alarm correctly on SCADA when jammed | Vendor: Autstand | Priority: (1) Very High (very_high) | Status: Incomplete (OPEN) | Date: 2025-11-07 00:00:00 | Description: Jiffy got to noncon and stuck in a T-plate causing...
|
||||
Replicate logic timers from semi VS-D to the rest of the semis | Vendor: Autstand | Priority: (1) Very High (very_high) | Status: Monitor (OPEN) | Date: 10/14/25 | Description: Logic timers from semi-auto at all virtual sorters...
|
||||
Logic for Semi induct D is off very low throughput see video | Vendor: Autstand | Priority: (2) High (high) | Status: Monitor (OPEN) | Date: 10/16/2025 | Description: Semi Auto D induct...
|
||||
SCADA: We need some type of refrence in SCADA. Suggest adding dock doors that correlate with their belt | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 10/16/2025 | Description: Flow Desk...
|
||||
PS11-11CH6NC Intralox Sorter (S02) | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 10/17/2025 | Description: PS11-11CH6NC Intralox Sorter (S02)...
|
||||
One major issue and one minor issue with the non-con system:
|
||||
No-reads are really frequent
|
||||
The PLC is not reporting S04 message divert fails properly (no reads to jackpot, lost container, failed to divert, wrong buildings) | Vendor: Autstand | Priority: (2) High (high) | Status: Monitor (OPEN) | Date: 10/17/2025 | Description: NON con sorter 1 and 2 not diverting...
|
||||
DTC chutes on VS-B is randomly disabling | Vendor: Autstand | Priority: (None) | Status: Monitor (OPEN) | Date: 45952 | Description: N/A...
|
||||
Encoder failure (4x) + 2 x | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 45940 | Description: UL8-7 UL11-7 Problem with port on APF...
|
||||
7:1 merge code update | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: | Description: N/A...
|
||||
3:1 merge code update | Vendor: Autstand | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date: | Description: mcm02 by monday 11/4. mcm01 ul 1-3 done. ...
|
||||
Estops are getting damaged on the UL lane | Vendor: Autstand | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date: 45965 | Description: UL16-1, UL15-3, UL10-2 (both sides) UL8-1 , UL7-3...
|
||||
Raise the fill height ob the DTC's approx 2 " | Vendor: Autstand | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 45965 | Description: N/A...
|
||||
|
||||
================================================================================
|
||||
|
||||
VENDOR: Autstand/Beumer
|
||||
--------------------------------------------------------------------------------
|
||||
Total Items: 3
|
||||
Closed: 2
|
||||
Total Items: 4
|
||||
Closed: 4
|
||||
Open: 0
|
||||
Monitor: 1
|
||||
Monitor: 0
|
||||
|
||||
ALL ITEMS:
|
||||
Semi induct D - light not allumintating green | Vendor: Autstand/Beumer | Priority: (None) | Status: Complete (CLOSED) | Date: 10/15/2025 | Description: N/A...
|
||||
Bypasses are showing "lane unavailble" at a high rate. should always be available... is it in energy saving mode? or other reason | Vendor: Autstand/Beumer | Priority: (None) | Status: Monitor (OPEN) | Date: 2025-10-22 00:00:00 | Description: N/A...
|
||||
Bypasses are showing "lane unavailble" at a high rate. should always be available... is it in energy saving mode? or other reason | Vendor: Autstand/Beumer | Priority: (None) | Status: Complete (CLOSED) | Date: 2025-10-22 00:00:00 | Description: N/A...
|
||||
pe missing prob solve ak chute | Vendor: Autstand/Beumer | Priority: (3) Medium (medium) | Status: Complete (CLOSED) | Date: 2025-10-29 00:00:00 | Description: N/A...
|
||||
Bypasses are showing "lane unavailble" at a high rate. should always be available... is it in energy saving mode? or other reason | Vendor: Autstand/Beumer | Priority: (None) | Status: Complete (CLOSED) | Date: 45952 | Description: N/A...
|
||||
|
||||
================================================================================
|
||||
|
||||
@ -299,14 +341,13 @@ https://t.corp.amazon.com/V1969041198 | Vendor: Caljan | Priority: (2) High (hig
|
||||
VENDOR: DCS
|
||||
--------------------------------------------------------------------------------
|
||||
Total Items: 25
|
||||
Closed: 20
|
||||
Open: 4
|
||||
Closed: 22
|
||||
Open: 2
|
||||
Monitor: 1
|
||||
|
||||
OLDEST UNADDRESSED (Top 3):
|
||||
NCS1-1 aligner belt failed | Age: 5 days | 2025-11-01 00:00:00 | Incomplete
|
||||
) There is a catchpoint of bent metal that is sticking out from the tail assembly on PS10-1 where it transitions to PS11-1. This is catching polys during operation. Jesse is going to look into making proper modifications to eliminate this. | Age: 2 days | 2025-11-04 00:00:00 | Incomplete
|
||||
2) When product from PS10-1 is flowing towards PS11-1, there is no snowplow and instead the slide just dead ends with a corner of sidepan. I’ve asked Jesse to look into fabricating a UHMW piece that could bridge this corner to push products down onto the belt. | Age: 2 days | 2025-11-04 00:00:00 | Incomplete
|
||||
NCS1-1 aligner belt failed | Age: 7 days | 2025-11-01 00:00:00 | Incomplete
|
||||
3) The black UHMW strip under the belt which transitions the belt from slider bed to tail roller is too sharp and is shaving the bottom side of the belt. Jesse and his team are going to look into pulling this uhmw strip out, properly chamfering it and then re-installing. | Age: 4 days | 2025-11-04 00:00:00 | Incomplete
|
||||
|
||||
VERY HIGH PRIORITY (4 items):
|
||||
Flow turn Belt Replacement | Complete | 10/10/25
|
||||
@ -329,8 +370,8 @@ HIGH PRIORITY (17 items):
|
||||
Non Con Chute/Maint access. Need Latch upgrade | Complete | 2025-10-10 00:00:00
|
||||
Motor falling on HSQ gappers.. 2x (3:1 merge) | Complete |
|
||||
PRS4-2 Motor Replacement | Complete | 2025-10-26 00:00:00
|
||||
) There is a catchpoint of bent metal that is sticking out from the tail assembly on PS10-1 where it transitions to PS11-1. This is catching polys during operation. Jesse is going to look into making proper modifications to eliminate this. | Incomplete | 2025-11-04 00:00:00
|
||||
2) When product from PS10-1 is flowing towards PS11-1, there is no snowplow and instead the slide just dead ends with a corner of sidepan. I’ve asked Jesse to look into fabricating a UHMW piece that could bridge this corner to push products down onto the belt. | Incomplete | 2025-11-04 00:00:00
|
||||
) There is a catchpoint of bent metal that is sticking out from the tail assembly on PS10-1 where it transitions to PS11-1. This is catching polys during operation. Jesse is going to look into making proper modifications to eliminate this. | Complete | 2025-11-04 00:00:00
|
||||
2) When product from PS10-1 is flowing towards PS11-1, there is no snowplow and instead the slide just dead ends with a corner of sidepan. I’ve asked Jesse to look into fabricating a UHMW piece that could bridge this corner to push products down onto the belt. | Complete | 2025-11-04 00:00:00
|
||||
3) The black UHMW strip under the belt which transitions the belt from slider bed to tail roller is too sharp and is shaving the bottom side of the belt. Jesse and his team are going to look into pulling this uhmw strip out, properly chamfering it and then re-installing. | Incomplete | 2025-11-04 00:00:00
|
||||
|
||||
ALL ITEMS:
|
||||
@ -356,8 +397,8 @@ ALL ITEMS:
|
||||
Motor falling on HSQ gappers.. 2x (3:1 merge) | Vendor: DCS | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: | Description: upgrade the bolts...
|
||||
PRS4-2 Motor Replacement | Vendor: DCS | Priority: High (high) | Status: Complete (CLOSED) | Date: 2025-10-26 00:00:00 | Description: Motor Oreded will update when I have an ETA ( Tryi...
|
||||
NCS1-1 aligner belt failed | Vendor: DCS | Priority: (1) Very High (very_high) | Status: Incomplete (OPEN) | Date: 2025-11-01 00:00:00 | Description: Belt failed prior to flow splitter. Replaced wi...
|
||||
) There is a catchpoint of bent metal that is sticking out from the tail assembly on PS10-1 where it transitions to PS11-1. This is catching polys during operation. Jesse is going to look into making proper modifications to eliminate this. | Vendor: DCS | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date: 2025-11-04 00:00:00 | Description: N/A...
|
||||
2) When product from PS10-1 is flowing towards PS11-1, there is no snowplow and instead the slide just dead ends with a corner of sidepan. I’ve asked Jesse to look into fabricating a UHMW piece that could bridge this corner to push products down onto the belt. | Vendor: DCS | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date: 2025-11-04 00:00:00 | Description: N/A...
|
||||
) There is a catchpoint of bent metal that is sticking out from the tail assembly on PS10-1 where it transitions to PS11-1. This is catching polys during operation. Jesse is going to look into making proper modifications to eliminate this. | Vendor: DCS | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-04 00:00:00 | Description: Catached Point in two area's Both have been comple...
|
||||
2) When product from PS10-1 is flowing towards PS11-1, there is no snowplow and instead the slide just dead ends with a corner of sidepan. I’ve asked Jesse to look into fabricating a UHMW piece that could bridge this corner to push products down onto the belt. | Vendor: DCS | Priority: (2) High (high) | Status: Complete (CLOSED) | Date: 2025-11-04 00:00:00 | Description: Completed...
|
||||
3) The black UHMW strip under the belt which transitions the belt from slider bed to tail roller is too sharp and is shaving the bottom side of the belt. Jesse and his team are going to look into pulling this uhmw strip out, properly chamfering it and then re-installing. | Vendor: DCS | Priority: (2) High (high) | Status: Incomplete (OPEN) | Date: 2025-11-04 00:00:00 | Description: N/A...
|
||||
|
||||
================================================================================
|
||||
@ -409,7 +450,7 @@ Total Items: 5
|
||||
Monitor: 1
|
||||
|
||||
OLDEST UNADDRESSED (Top 3):
|
||||
Add DHL label to Scan tunnel valid message | Age: 10 days | 2025-10-27 00:00:00 | Incomplete
|
||||
Add DHL label to Scan tunnel valid message | Age: 12 days | 2025-10-27 00:00:00 | Incomplete
|
||||
|
||||
VERY HIGH PRIORITY (1 items):
|
||||
Add DHL label to Scan tunnel valid message | Incomplete | 2025-10-27 00:00:00
|
||||
|
||||
9795
output/report-2025-11-08-15-16-56.html
Normal file
9795
output/report-2025-11-08-15-16-56.html
Normal file
File diff suppressed because it is too large
Load Diff
3972
output/report-2025-11-08-15-16-56.json
Normal file
3972
output/report-2025-11-08-15-16-56.json
Normal file
File diff suppressed because it is too large
Load Diff
9795
output/report-2025-11-08-15-18-53.html
Normal file
9795
output/report-2025-11-08-15-18-53.html
Normal file
File diff suppressed because it is too large
Load Diff
3972
output/report-2025-11-08-15-18-53.json
Normal file
3972
output/report-2025-11-08-15-18-53.json
Normal file
File diff suppressed because it is too large
Load Diff
9795
output/report-2025-11-08-15-21-12.html
Normal file
9795
output/report-2025-11-08-15-21-12.html
Normal file
File diff suppressed because it is too large
Load Diff
3972
output/report-2025-11-08-15-21-12.json
Normal file
3972
output/report-2025-11-08-15-21-12.json
Normal file
File diff suppressed because it is too large
Load Diff
9916
output/report-2025-11-08-15-24-16.html
Normal file
9916
output/report-2025-11-08-15-24-16.html
Normal file
File diff suppressed because it is too large
Load Diff
4080
output/report-2025-11-08-15-24-16.json
Normal file
4080
output/report-2025-11-08-15-24-16.json
Normal file
File diff suppressed because it is too large
Load Diff
9916
output/report-2025-11-08-15-24-30.html
Normal file
9916
output/report-2025-11-08-15-24-30.html
Normal file
File diff suppressed because it is too large
Load Diff
4080
output/report-2025-11-08-15-24-30.json
Normal file
4080
output/report-2025-11-08-15-24-30.json
Normal file
File diff suppressed because it is too large
Load Diff
9916
output/report-2025-11-08-15-24-35.html
Normal file
9916
output/report-2025-11-08-15-24-35.html
Normal file
File diff suppressed because it is too large
Load Diff
4080
output/report-2025-11-08-15-24-35.json
Normal file
4080
output/report-2025-11-08-15-24-35.json
Normal file
File diff suppressed because it is too large
Load Diff
13534
output/report-2025-11-08-15-24-57.html
Normal file
13534
output/report-2025-11-08-15-24-57.html
Normal file
File diff suppressed because it is too large
Load Diff
7569
output/report-2025-11-08-15-24-57.json
Normal file
7569
output/report-2025-11-08-15-24-57.json
Normal file
File diff suppressed because it is too large
Load Diff
9723
output/report-2025-11-08-15-33-57.html
Normal file
9723
output/report-2025-11-08-15-33-57.html
Normal file
File diff suppressed because it is too large
Load Diff
3936
output/report-2025-11-08-15-33-57.json
Normal file
3936
output/report-2025-11-08-15-33-57.json
Normal file
File diff suppressed because it is too large
Load Diff
9723
output/report-2025-11-08-15-42-46.html
Normal file
9723
output/report-2025-11-08-15-42-46.html
Normal file
File diff suppressed because it is too large
Load Diff
3936
output/report-2025-11-08-15-42-46.json
Normal file
3936
output/report-2025-11-08-15-42-46.json
Normal file
File diff suppressed because it is too large
Load Diff
9723
output/report-2025-11-08-15-42-53.html
Normal file
9723
output/report-2025-11-08-15-42-53.html
Normal file
File diff suppressed because it is too large
Load Diff
3936
output/report-2025-11-08-15-42-53.json
Normal file
3936
output/report-2025-11-08-15-42-53.json
Normal file
File diff suppressed because it is too large
Load Diff
1211
output/report.html
1211
output/report.html
File diff suppressed because it is too large
Load Diff
1140
output/report.json
1140
output/report.json
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -3,7 +3,7 @@ pandas>=2.0.0
|
||||
openpyxl>=3.0.0
|
||||
pydantic>=2.0.0
|
||||
|
||||
# Optional: SharePoint integration
|
||||
# Optional: SharePoint integration (Office365-REST-Python-Client)
|
||||
Office365-REST-Python-Client>=2.3.0
|
||||
|
||||
# Optional: Scheduling
|
||||
|
||||
@ -66,11 +66,10 @@ class ReportScheduler:
|
||||
folder_path=self.sharepoint_config.get('folder_path'),
|
||||
file_path=self.sharepoint_config.get('file_path'),
|
||||
local_dir=self.sharepoint_config.get('local_dir', 'reports'),
|
||||
username=self.sharepoint_config.get('username'),
|
||||
password=self.sharepoint_config.get('password'),
|
||||
tenant_id=self.sharepoint_config.get('tenant_id'),
|
||||
client_id=self.sharepoint_config.get('client_id'),
|
||||
client_secret=self.sharepoint_config.get('client_secret'),
|
||||
use_app_authentication=self.sharepoint_config.get('use_app_authentication', False),
|
||||
use_app_authentication=self.sharepoint_config.get('use_app_authentication', True),
|
||||
file_pattern=self.sharepoint_config.get('file_pattern'),
|
||||
overwrite=self.sharepoint_config.get('overwrite', True)
|
||||
)
|
||||
|
||||
@ -1,9 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
SharePoint File Downloader
|
||||
SharePoint File Downloader using Office365-REST-Python-Client
|
||||
|
||||
Downloads Excel files from SharePoint to the local reports directory.
|
||||
Supports both scheduled and on-demand downloads.
|
||||
Uses Office365-REST-Python-Client library for SharePoint REST API access.
|
||||
"""
|
||||
|
||||
import os
|
||||
@ -15,89 +15,181 @@ import logging
|
||||
try:
|
||||
from office365.sharepoint.client_context import ClientContext
|
||||
from office365.runtime.auth.authentication_context import AuthenticationContext
|
||||
from office365.runtime.auth.user_credential import UserCredential
|
||||
from office365.runtime.auth.client_credential import ClientCredential
|
||||
SHAREPOINT_AVAILABLE = True
|
||||
from office365.sharepoint.files.file import File
|
||||
OFFICE365_AVAILABLE = True
|
||||
except ImportError:
|
||||
SHAREPOINT_AVAILABLE = False
|
||||
OFFICE365_AVAILABLE = False
|
||||
logging.warning("office365-rest-python-client not installed. SharePoint features disabled.")
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SharePointDownloader:
|
||||
"""Downloads files from SharePoint."""
|
||||
"""Downloads files from SharePoint using Office365-REST-Python-Client."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
site_url: str,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
tenant_id: Optional[str] = None,
|
||||
client_id: Optional[str] = None,
|
||||
client_secret: Optional[str] = None,
|
||||
use_app_authentication: bool = False
|
||||
use_app_authentication: bool = True
|
||||
):
|
||||
"""
|
||||
Initialize SharePoint downloader.
|
||||
Initialize SharePoint downloader using Office365-REST-Python-Client.
|
||||
|
||||
Args:
|
||||
site_url: SharePoint site URL (e.g., "https://yourcompany.sharepoint.com/sites/YourSite")
|
||||
username: Username for user authentication (if not using app authentication)
|
||||
password: Password for user authentication (if not using app authentication)
|
||||
client_id: Azure AD app client ID (for app authentication)
|
||||
client_secret: Azure AD app client secret (for app authentication)
|
||||
use_app_authentication: Whether to use app authentication (recommended for automation)
|
||||
tenant_id: Azure AD tenant ID (required for app authentication)
|
||||
client_id: Azure AD app client ID (required for app authentication)
|
||||
client_secret: Azure AD app client secret (required for app authentication)
|
||||
use_app_authentication: Whether to use app authentication (default: True)
|
||||
"""
|
||||
if not SHAREPOINT_AVAILABLE:
|
||||
if not OFFICE365_AVAILABLE:
|
||||
raise ImportError(
|
||||
"office365-rest-python-client is required for SharePoint integration. "
|
||||
"Install it with: pip install Office365-REST-Python-Client"
|
||||
)
|
||||
|
||||
self.site_url = site_url
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.site_url = site_url.rstrip('/')
|
||||
self.tenant_id = tenant_id
|
||||
self.client_id = client_id
|
||||
self.client_secret = client_secret
|
||||
self.use_app_authentication = use_app_authentication
|
||||
self.ctx = None
|
||||
|
||||
def authenticate(self) -> bool:
|
||||
"""Authenticate with SharePoint."""
|
||||
if not self.client_id or not self.client_secret:
|
||||
logger.error("Client ID and Client Secret are required for SharePoint authentication.")
|
||||
raise ValueError("Missing Azure AD credentials for SharePoint.")
|
||||
|
||||
def connect(self) -> bool:
|
||||
"""Connect to SharePoint site."""
|
||||
if self.ctx:
|
||||
return True
|
||||
|
||||
try:
|
||||
if self.use_app_authentication and self.client_id and self.client_secret:
|
||||
# App authentication (recommended for automation)
|
||||
logger.info(f"Attempting app authentication with client_id: {self.client_id[:8]}...")
|
||||
if self.use_app_authentication:
|
||||
# App-only authentication using Office365-REST-Python-Client
|
||||
from office365.runtime.auth.client_credential import ClientCredential
|
||||
|
||||
logger.info(f"Connecting to SharePoint site: {self.site_url}")
|
||||
logger.info(f"Using Client ID: {self.client_id[:8]}... (truncated for security)")
|
||||
|
||||
credentials = ClientCredential(self.client_id, self.client_secret)
|
||||
self.ctx = ClientContext(self.site_url).with_credentials(credentials)
|
||||
logger.info("Created SharePoint context with app credentials")
|
||||
elif self.username and self.password:
|
||||
# User authentication
|
||||
credentials = UserCredential(self.username, self.password)
|
||||
self.ctx = ClientContext(self.site_url).with_credentials(credentials)
|
||||
logger.info("Authenticated with SharePoint using user credentials")
|
||||
else:
|
||||
logger.error("No authentication credentials provided")
|
||||
return False
|
||||
|
||||
# Test connection
|
||||
logger.info("Testing SharePoint connection...")
|
||||
# Test connection by getting web
|
||||
# This will fail if RSC is not granted or credentials are wrong
|
||||
web = self.ctx.web
|
||||
self.ctx.load(web)
|
||||
self.ctx.execute_query()
|
||||
|
||||
logger.info(f"Successfully connected to SharePoint site: {web.properties['Title']}")
|
||||
return True
|
||||
else:
|
||||
logger.error("Only app-only authentication is supported")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"SharePoint authentication failed: {e}", exc_info=True)
|
||||
logger.error(f"Site URL: {self.site_url}")
|
||||
logger.error(f"Client ID: {self.client_id[:8] if self.client_id else 'None'}...")
|
||||
logger.error(f"Using app auth: {self.use_app_authentication}")
|
||||
error_msg = str(e)
|
||||
logger.error(f"Failed to connect to SharePoint: {error_msg}", exc_info=True)
|
||||
|
||||
# Provide helpful error messages
|
||||
if "Unsupported app only token" in error_msg or "401" in error_msg:
|
||||
logger.error("This error usually means:")
|
||||
logger.error("1. Resource-Specific Consent (RSC) is not granted for this site")
|
||||
logger.error("2. Go to: {}/_layouts/15/appinv.aspx".format(self.site_url))
|
||||
logger.error("3. Enter App ID: {}".format(self.client_id))
|
||||
logger.error("4. Grant permission with XML: <AppPermissionRequests AllowAppOnlyPolicy=\"true\"><AppPermissionRequest Scope=\"http://sharepoint/content/sitecollection\" Right=\"Read\" /></AppPermissionRequests>")
|
||||
elif "403" in error_msg or "Forbidden" in error_msg:
|
||||
logger.error("403 Forbidden - App does not have access to this site")
|
||||
logger.error("RSC must be granted via appinv.aspx")
|
||||
elif "Invalid client secret" in error_msg or "invalid_client" in error_msg:
|
||||
logger.error("Invalid client credentials - check CLIENT_ID and CLIENT_SECRET")
|
||||
|
||||
return False
|
||||
|
||||
def list_files_in_folder(
|
||||
self,
|
||||
folder_path: str,
|
||||
file_pattern: Optional[str] = None
|
||||
) -> List[dict]:
|
||||
"""
|
||||
List files in a SharePoint folder.
|
||||
|
||||
Args:
|
||||
folder_path: Folder path relative to site root (e.g., "Shared Documents/General/Amazon Punchlist [EXTERNAL]")
|
||||
file_pattern: Optional file pattern filter (e.g., "*.xlsx")
|
||||
|
||||
Returns:
|
||||
List of file metadata dictionaries
|
||||
"""
|
||||
if not self.ctx:
|
||||
if not self.connect():
|
||||
return []
|
||||
|
||||
try:
|
||||
# Normalize folder path
|
||||
# User provides: /Shared Documents/General/Amazon Punchlist [EXTERNAL]
|
||||
# SharePoint needs: /sites/SiteName/Shared Documents/General/Amazon Punchlist [EXTERNAL]
|
||||
folder_path = folder_path.strip('/')
|
||||
|
||||
# Extract site path from site_url
|
||||
from urllib.parse import urlparse
|
||||
site_path = urlparse(self.site_url).path.strip('/')
|
||||
|
||||
# Construct full server-relative URL
|
||||
# If folder_path already starts with site path, use as-is
|
||||
# Otherwise, prepend site path
|
||||
if folder_path.startswith(site_path + '/'):
|
||||
server_relative_url = f"/{folder_path}"
|
||||
elif site_path:
|
||||
server_relative_url = f"/{site_path}/{folder_path}"
|
||||
else:
|
||||
server_relative_url = f"/{folder_path}"
|
||||
|
||||
logger.info(f"Listing files in folder: {server_relative_url}")
|
||||
logger.info(f"Site URL: {self.site_url}, Site path: {site_path}, Folder path: {folder_path}")
|
||||
|
||||
# Get folder
|
||||
folder = self.ctx.web.get_folder_by_server_relative_url(server_relative_url)
|
||||
files = folder.files
|
||||
self.ctx.load(files)
|
||||
self.ctx.execute_query()
|
||||
|
||||
excel_files = []
|
||||
for file in files:
|
||||
file_name = file.properties["Name"]
|
||||
# Only consider Excel files
|
||||
if file_name and (file_name.endswith('.xlsx') or file_name.endswith('.xls')):
|
||||
# Apply file pattern filter if provided
|
||||
if file_pattern:
|
||||
pattern = file_pattern.replace('*', '')
|
||||
if not file_name.endswith(pattern):
|
||||
continue
|
||||
|
||||
excel_files.append({
|
||||
"name": file_name,
|
||||
"server_relative_url": file.properties.get("ServerRelativeUrl", ""),
|
||||
"size": file.properties.get("Length", 0),
|
||||
"time_last_modified": file.properties.get("TimeLastModified", "")
|
||||
})
|
||||
|
||||
logger.info(f"Found {len(excel_files)} Excel file(s) in folder")
|
||||
for file_info in excel_files:
|
||||
logger.info(f" - {file_info['name']} ({file_info['size']} bytes)")
|
||||
|
||||
return excel_files
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing files: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
def download_file(
|
||||
self,
|
||||
file_path: str,
|
||||
server_relative_url: str,
|
||||
file_name: str,
|
||||
local_path: str,
|
||||
overwrite: bool = True
|
||||
) -> bool:
|
||||
@ -105,7 +197,8 @@ class SharePointDownloader:
|
||||
Download a single file from SharePoint.
|
||||
|
||||
Args:
|
||||
file_path: Path to file in SharePoint (e.g., "/Shared Documents/Reports/file.xlsx")
|
||||
server_relative_url: Server-relative URL of the file
|
||||
file_name: The original name of the file (for logging)
|
||||
local_path: Local path where file should be saved
|
||||
overwrite: Whether to overwrite existing file
|
||||
|
||||
@ -113,29 +206,49 @@ class SharePointDownloader:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
if not self.ctx:
|
||||
if not self.authenticate():
|
||||
if not self.connect():
|
||||
return False
|
||||
|
||||
local_file = None
|
||||
try:
|
||||
local_file_path = Path(local_path)
|
||||
local_file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Check if file exists and overwrite flag
|
||||
if local_file_path.exists() and not overwrite:
|
||||
logger.info(f"File already exists, skipping: {local_path}")
|
||||
return True
|
||||
|
||||
# Download file
|
||||
with open(local_file_path, "wb") as local_file:
|
||||
file = self.ctx.web.get_file_by_server_relative_url(file_path)
|
||||
file.download(local_file)
|
||||
logger.info(f"Downloading file: {file_name} from {server_relative_url} to {local_path}")
|
||||
|
||||
# Get file
|
||||
file = self.ctx.web.get_file_by_server_relative_url(server_relative_url)
|
||||
self.ctx.load(file)
|
||||
self.ctx.execute_query()
|
||||
|
||||
logger.info(f"Downloaded: {file_path} -> {local_path}")
|
||||
# Open file and keep it open during download
|
||||
# The Office365 library writes to the file during execute_query()
|
||||
local_file = open(local_file_path, "wb")
|
||||
|
||||
# Download file content - this sets up the download callback
|
||||
file.download(local_file)
|
||||
|
||||
# Execute the query - this actually performs the download and writes to the file
|
||||
self.ctx.execute_query()
|
||||
|
||||
# Close the file after download completes
|
||||
local_file.close()
|
||||
local_file = None
|
||||
|
||||
logger.info(f"Successfully downloaded: {file_name} -> {local_path}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to download {file_path}: {e}")
|
||||
logger.error(f"Error downloading file {file_name}: {e}", exc_info=True)
|
||||
if local_file:
|
||||
try:
|
||||
local_file.close()
|
||||
except:
|
||||
pass
|
||||
return False
|
||||
|
||||
def download_files_from_folder(
|
||||
@ -143,121 +256,174 @@ class SharePointDownloader:
|
||||
folder_path: str,
|
||||
local_dir: str,
|
||||
file_pattern: Optional[str] = None,
|
||||
overwrite: bool = True
|
||||
overwrite: bool = True,
|
||||
clear_existing: bool = True
|
||||
) -> List[str]:
|
||||
"""
|
||||
Download all files from a SharePoint folder.
|
||||
Download Excel files from a SharePoint folder.
|
||||
By default, downloads only the newest file and clears old files.
|
||||
|
||||
Args:
|
||||
folder_path: Path to folder in SharePoint (e.g., "/Shared Documents/Reports")
|
||||
local_dir: Local directory where files should be saved
|
||||
file_pattern: Optional pattern to filter files (e.g., "*.xlsx")
|
||||
folder_path: Folder path relative to site root
|
||||
local_dir: Local directory to save files
|
||||
file_pattern: Optional file pattern filter (e.g., "*.xlsx")
|
||||
overwrite: Whether to overwrite existing files
|
||||
clear_existing: If True, clear all existing Excel files before downloading (default: True)
|
||||
|
||||
Returns:
|
||||
List of successfully downloaded file paths
|
||||
List of downloaded file paths (typically 1 file - the newest)
|
||||
"""
|
||||
if not self.ctx:
|
||||
if not self.authenticate():
|
||||
# Connect to SharePoint
|
||||
if not self.connect():
|
||||
logger.error("Failed to connect to SharePoint")
|
||||
return []
|
||||
|
||||
downloaded_files = []
|
||||
|
||||
try:
|
||||
folder = self.ctx.web.get_folder_by_server_relative_url(folder_path)
|
||||
files = folder.files
|
||||
self.ctx.load(files)
|
||||
self.ctx.execute_query()
|
||||
|
||||
# Prepare local directory
|
||||
local_dir_path = Path(local_dir)
|
||||
local_dir_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for file in files:
|
||||
file_name = file.properties["Name"]
|
||||
# ALWAYS clear ALL existing Excel files before downloading (to ensure only new files are used)
|
||||
# This is critical to prevent combining multiple files
|
||||
existing_files = list(local_dir_path.glob('*.xlsx')) + list(local_dir_path.glob('*.xls'))
|
||||
cleared_count = 0
|
||||
failed_to_clear = []
|
||||
|
||||
# Filter by pattern if provided
|
||||
if file_pattern:
|
||||
if not file_name.endswith(file_pattern.replace("*", "")):
|
||||
continue
|
||||
for old_file in existing_files:
|
||||
try:
|
||||
# On Windows, files might be locked - try multiple times
|
||||
max_retries = 3
|
||||
retry_count = 0
|
||||
while retry_count < max_retries:
|
||||
try:
|
||||
old_file.unlink()
|
||||
cleared_count += 1
|
||||
logger.info(f"Cleared existing file before download: {old_file.name}")
|
||||
break
|
||||
except PermissionError:
|
||||
retry_count += 1
|
||||
if retry_count < max_retries:
|
||||
import time
|
||||
time.sleep(0.5) # Wait 500ms before retry
|
||||
else:
|
||||
raise
|
||||
except Exception as e:
|
||||
failed_to_clear.append(old_file.name)
|
||||
logger.error(f"Failed to clear existing file {old_file.name}: {e}")
|
||||
|
||||
# Only download Excel files
|
||||
if not (file_name.endswith('.xlsx') or file_name.endswith('.xls')):
|
||||
continue
|
||||
if failed_to_clear:
|
||||
logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before download: {failed_to_clear}")
|
||||
logger.error("This will cause data mixing! Files may be locked by another process.")
|
||||
# Don't fail here - let the download proceed, but log the warning
|
||||
|
||||
if cleared_count > 0:
|
||||
logger.info(f"Cleared {cleared_count} existing Excel file(s) before downloading from SharePoint")
|
||||
else:
|
||||
logger.info("No existing Excel files found to clear (reports directory was empty)")
|
||||
|
||||
# List files in folder
|
||||
files = self.list_files_in_folder(folder_path, file_pattern)
|
||||
|
||||
if not files:
|
||||
logger.warning(f"No Excel files found in folder: {folder_path}")
|
||||
return []
|
||||
|
||||
# Sort files by last modified date (newest first) and download only the newest one
|
||||
def parse_time(time_str):
|
||||
try:
|
||||
if time_str:
|
||||
# Office365 library returns datetime objects or ISO strings
|
||||
if isinstance(time_str, datetime):
|
||||
return time_str
|
||||
# Try parsing ISO format
|
||||
return datetime.fromisoformat(str(time_str).replace('Z', '+00:00'))
|
||||
return datetime.min
|
||||
except:
|
||||
return datetime.min
|
||||
|
||||
files_sorted = sorted(files, key=lambda f: parse_time(f.get("time_last_modified", "")), reverse=True)
|
||||
|
||||
if len(files_sorted) > 1:
|
||||
logger.info(f"Found {len(files_sorted)} Excel file(s) in SharePoint folder. Using only the newest file.")
|
||||
logger.info(f"Newest file: {files_sorted[0]['name']} (modified: {files_sorted[0].get('time_last_modified', 'Unknown')})")
|
||||
if len(files_sorted) > 1:
|
||||
logger.info(f"Skipping {len(files_sorted) - 1} older file(s) to avoid combining data")
|
||||
|
||||
# Download only the newest file
|
||||
downloaded_files = []
|
||||
newest_file = files_sorted[0]
|
||||
file_name = newest_file["name"]
|
||||
server_relative_url = newest_file["server_relative_url"]
|
||||
local_file_path = local_dir_path / file_name
|
||||
|
||||
if self.download_file(
|
||||
file.properties["ServerRelativeUrl"],
|
||||
str(local_file_path),
|
||||
overwrite=overwrite
|
||||
):
|
||||
if self.download_file(server_relative_url, file_name, str(local_file_path), overwrite=overwrite):
|
||||
downloaded_files.append(str(local_file_path))
|
||||
logger.info(f"Successfully downloaded newest file: {file_name}")
|
||||
else:
|
||||
logger.error(f"Failed to download file: {file_name}")
|
||||
|
||||
logger.info(f"Downloaded {len(downloaded_files)} files from {folder_path}")
|
||||
return downloaded_files
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to download files from folder {folder_path}: {e}")
|
||||
logger.info(f"Downloaded {len(downloaded_files)} file(s) from {folder_path} (using only newest file)")
|
||||
return downloaded_files
|
||||
|
||||
|
||||
def download_from_sharepoint(
|
||||
site_url: str,
|
||||
file_path: Optional[str] = None,
|
||||
folder_path: Optional[str] = None,
|
||||
file_path: Optional[str] = None,
|
||||
local_dir: str = "reports",
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
tenant_id: Optional[str] = None,
|
||||
client_id: Optional[str] = None,
|
||||
client_secret: Optional[str] = None,
|
||||
use_app_authentication: bool = False,
|
||||
use_app_authentication: bool = True,
|
||||
file_pattern: Optional[str] = None,
|
||||
overwrite: bool = True
|
||||
overwrite: bool = True,
|
||||
clear_existing: bool = True
|
||||
) -> List[str]:
|
||||
"""
|
||||
Convenience function to download files from SharePoint.
|
||||
Convenience function to download files from SharePoint using Office365-REST-Python-Client.
|
||||
|
||||
Args:
|
||||
site_url: SharePoint site URL
|
||||
file_path: Path to specific file (if downloading single file)
|
||||
folder_path: Path to folder (if downloading all files from folder)
|
||||
file_path: Path to specific file (if downloading single file) - NOT YET IMPLEMENTED
|
||||
local_dir: Local directory to save files
|
||||
username: Username for authentication
|
||||
password: Password for authentication
|
||||
client_id: Azure AD app client ID
|
||||
client_secret: Azure AD app client secret
|
||||
use_app_authentication: Use app authentication
|
||||
tenant_id: Azure AD tenant ID (not used by Office365 library, but kept for compatibility)
|
||||
client_id: Azure AD app client ID (required for app authentication)
|
||||
client_secret: Azure AD app client secret (required for app authentication)
|
||||
use_app_authentication: Use app authentication (default: True)
|
||||
file_pattern: Pattern to filter files (e.g., "*.xlsx")
|
||||
overwrite: Whether to overwrite existing files
|
||||
clear_existing: If True, clear all existing Excel files before downloading (default: True)
|
||||
|
||||
Returns:
|
||||
List of downloaded file paths
|
||||
List of downloaded file paths (typically 1 file - the newest)
|
||||
"""
|
||||
if not folder_path and not file_path:
|
||||
logger.error("Either folder_path or file_path must be provided")
|
||||
return []
|
||||
|
||||
if file_path:
|
||||
logger.warning("Single file download not yet implemented")
|
||||
return []
|
||||
|
||||
downloader = SharePointDownloader(
|
||||
site_url=site_url,
|
||||
username=username,
|
||||
password=password,
|
||||
tenant_id=tenant_id,
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
use_app_authentication=use_app_authentication
|
||||
)
|
||||
|
||||
if file_path:
|
||||
# Download single file
|
||||
local_file_path = Path(local_dir) / Path(file_path).name
|
||||
if downloader.download_file(file_path, str(local_file_path), overwrite=overwrite):
|
||||
return [str(local_file_path)]
|
||||
return []
|
||||
elif folder_path:
|
||||
# Download all files from folder
|
||||
if folder_path:
|
||||
# Download only the newest file from folder (clears existing files first)
|
||||
return downloader.download_files_from_folder(
|
||||
folder_path=folder_path,
|
||||
local_dir=local_dir,
|
||||
file_pattern=file_pattern,
|
||||
overwrite=overwrite
|
||||
overwrite=overwrite,
|
||||
clear_existing=clear_existing
|
||||
)
|
||||
else:
|
||||
logger.error("Either file_path or folder_path must be provided")
|
||||
logger.error("file_path download not yet implemented")
|
||||
return []
|
||||
|
||||
|
||||
@ -265,33 +431,31 @@ if __name__ == "__main__":
|
||||
import sys
|
||||
from config import load_config
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
# Load configuration
|
||||
config = load_config()
|
||||
|
||||
if not config.get('sharepoint'):
|
||||
print("SharePoint configuration not found in config.yaml")
|
||||
logger.error("SharePoint configuration not found")
|
||||
sys.exit(1)
|
||||
|
||||
sp_config = config['sharepoint']
|
||||
|
||||
# Download files
|
||||
downloaded = download_from_sharepoint(
|
||||
site_url=sp_config['site_url'],
|
||||
folder_path=sp_config.get('folder_path'),
|
||||
file_path=sp_config.get('file_path'),
|
||||
local_dir=sp_config.get('local_dir', 'reports'),
|
||||
username=sp_config.get('username'),
|
||||
password=sp_config.get('password'),
|
||||
tenant_id=sp_config.get('tenant_id'),
|
||||
client_id=sp_config.get('client_id'),
|
||||
client_secret=sp_config.get('client_secret'),
|
||||
use_app_authentication=sp_config.get('use_app_authentication', False),
|
||||
use_app_authentication=sp_config.get('use_app_authentication', True),
|
||||
file_pattern=sp_config.get('file_pattern'),
|
||||
overwrite=True
|
||||
overwrite=sp_config.get('overwrite', True)
|
||||
)
|
||||
|
||||
print(f"Downloaded {len(downloaded)} file(s):")
|
||||
for file in downloaded:
|
||||
print(f" - {file}")
|
||||
|
||||
print(f"Downloaded {len(downloaded)} file(s)")
|
||||
for file_path in downloaded:
|
||||
print(f" - {file_path}")
|
||||
|
||||
@ -607,11 +607,10 @@ def create_app(config_path: Optional[str] = None):
|
||||
folder_path=sp_config.get('folder_path'),
|
||||
file_path=sp_config.get('file_path'),
|
||||
local_dir=sp_config.get('local_dir', 'reports'),
|
||||
username=sp_config.get('username'),
|
||||
password=sp_config.get('password'),
|
||||
tenant_id=sp_config.get('tenant_id'),
|
||||
client_id=sp_config.get('client_id'),
|
||||
client_secret=sp_config.get('client_secret'),
|
||||
use_app_authentication=sp_config.get('use_app_authentication', False),
|
||||
use_app_authentication=sp_config.get('use_app_authentication', True),
|
||||
file_pattern=sp_config.get('file_pattern'),
|
||||
overwrite=sp_config.get('overwrite', True)
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user