Merge remote changes and resolve conflicts

- Keep our version of html_generator.py (removed Open tab, kept only actual statuses)
- Keep our generated output files
- Keep our Excel file version
This commit is contained in:
ilia gu 2025-11-08 18:20:30 +04:00
commit 51f618b654
40 changed files with 149074 additions and 1627 deletions

54
.drone.yml Normal file
View File

@ -0,0 +1,54 @@
kind: pipeline
type: docker
name: vendor-report-cicd
trigger:
branch:
- main
- deployment-ready
event:
- push
steps:
- name: build-image
image: docker:24-cli
volumes:
- name: dockersock
path: /var/run/docker.sock
environment:
DOCKER_HOST: unix:///var/run/docker.sock
DOCKER_BUILDKIT: 1
commands:
- echo "Building vendor-report Docker image..."
- docker build -t registry.lci.ge/taskboard/vendor-report-api:${DRONE_COMMIT_SHA:0:8} .
- echo "Tagging image as latest..."
- docker tag registry.lci.ge/taskboard/vendor-report-api:${DRONE_COMMIT_SHA:0:8} registry.lci.ge/taskboard/vendor-report-api:latest
- echo "Vendor-report Docker image built and tagged successfully"
when:
event:
- push
- name: push-image
image: docker:24-cli
volumes:
- name: dockersock
path: /var/run/docker.sock
environment:
DOCKER_HOST: unix:///var/run/docker.sock
commands:
- echo "Pushing vendor-report image to registry..."
- docker push registry.lci.ge/taskboard/vendor-report-api:${DRONE_COMMIT_SHA:0:8}
- docker push registry.lci.ge/taskboard/vendor-report-api:latest
- echo "Vendor-report image pushed to registry"
- echo "Cleaning up local images to save space..."
- docker rmi registry.lci.ge/taskboard/vendor-report-api:${DRONE_COMMIT_SHA:0:8} || true
- docker rmi registry.lci.ge/taskboard/vendor-report-api:latest || true
when:
event:
- push
volumes:
- name: dockersock
host:
path: /var/run/docker.sock

44
Dockerfile Normal file
View File

@ -0,0 +1,44 @@
# Python API Server for Vendor Report Generator
FROM python:3.11-slim
WORKDIR /app
# Install system dependencies (cached unless this changes)
RUN apt-get update && apt-get install -y \
curl \
gcc \
g++ \
&& rm -rf /var/lib/apt/lists/*
# Upgrade pip and install build tools first (cached)
RUN pip install --upgrade pip setuptools wheel
# Copy requirements first for better caching
COPY requirements.txt .
# Install Python dependencies
# Remove --no-cache-dir to use pip's cache (much faster rebuilds)
# This layer will be cached unless requirements.txt changes
RUN pip install --upgrade pip && \
pip install -r requirements.txt
# Copy application files
# Docker automatically detects file changes via content hash
# If any .py file changes, only this layer and after rebuild (apt-get & pip stay cached!)
COPY *.py ./
COPY *.yaml* ./
COPY *.md ./
# Create directories for reports and output
RUN mkdir -p /app/reports /app/output
# Expose port (internal only, not exposed in docker-compose)
EXPOSE 8080
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
CMD curl -f http://localhost:8080/health || exit 1
# Run API server (uses environment variables for configuration)
CMD ["python", "api_server.py"]

163
QUICK_START.md Normal file
View File

@ -0,0 +1,163 @@
# Quick Start Guide: SharePoint Integration & Scheduling
This guide will help you quickly set up SharePoint integration and automated report generation.
## Quick Setup (5 minutes)
### 1. Install Dependencies
```bash
pip install -r requirements.txt
```
### 2. Create Configuration
```bash
cp config.yaml.template config.yaml
```
### 3. Configure SharePoint
Edit `config.yaml`:
```yaml
sharepoint:
enabled: true
site_url: "https://yourcompany.sharepoint.com/sites/YourSite"
folder_path: "/Shared Documents/Reports" # Path to your Excel files
use_app_authentication: true
client_id: "your-azure-ad-client-id"
client_secret: "your-azure-ad-client-secret"
```
**To get Azure AD credentials:**
1. Go to Azure Portal → App registrations
2. Create new registration or use existing
3. Create a client secret
4. Grant SharePoint API permissions: `Sites.Read.All`
5. Copy Client ID and Client Secret to config
### 4. Choose Your Deployment Method
#### Option A: Scheduled Reports (Recommended)
Edit `config.yaml`:
```yaml
scheduler:
enabled: true
schedule_type: "cron"
cron_expression: "0 8 * * *" # 8 AM daily
timezone: "America/New_York"
```
Start scheduler:
```bash
python scheduler.py
```
#### Option B: On-Demand via API
Edit `config.yaml`:
```yaml
api:
enabled: true
port: 8080
api_key: "your-secret-key" # Optional but recommended
```
Start API server:
```bash
python api_server.py
```
Generate report:
```bash
curl -X POST http://localhost:8080/api/generate \
-H "X-API-Key: your-secret-key" \
-H "Content-Type: application/json \
-d '{"download_from_sharepoint": true}'
```
## How It Works
1. **SharePoint Download**: Downloads latest Excel files from SharePoint folder
2. **Report Generation**: Processes Excel files and generates reports
3. **Output**: Creates `output/report.json` and `output/report.html`
## Testing
### Test SharePoint Connection
```bash
python sharepoint_downloader.py
```
This will download files from SharePoint to the `reports/` directory.
### Test Report Generation
```bash
python report_generator.py
```
This will generate reports from files in the `reports/` directory.
## Deployment Options
### As a Service (Linux)
```bash
# Create systemd service
sudo nano /etc/systemd/system/vendor-report.service
# Add:
[Unit]
Description=Vendor Report Scheduler
After=network.target
[Service]
Type=simple
User=your-user
WorkingDirectory=/path/to/vendor_report
ExecStart=/usr/bin/python3 /path/to/vendor_report/scheduler.py
Restart=always
[Install]
WantedBy=multi-user.target
# Enable and start
sudo systemctl enable vendor-report
sudo systemctl start vendor-report
```
### Docker (Coming Soon)
The application can be containerized for easy deployment.
## Troubleshooting
### SharePoint Authentication Fails
- Verify Azure AD app has correct permissions
- Check client ID and secret are correct
- Ensure SharePoint site URL is correct (include `/sites/SiteName`)
### Files Not Downloading
- Check folder path is correct (use SharePoint's "Copy path" feature)
- Verify app has read permissions
- Check file pattern matches your Excel files
### Scheduler Not Running
- Check timezone is correct
- Verify cron expression format
- Check logs for errors
## Next Steps
- Set up monitoring/alerting for failed reports
- Configure webhook notifications
- Set up automated email delivery of reports
- Integrate with other systems via API

273
README.md
View File

@ -2,6 +2,8 @@
A Python tool that generates comprehensive vendor punchlist reports from Excel files. The tool processes Excel data, normalizes vendor information, calculates metrics, and generates both JSON and interactive HTML reports.
> **📘 For Taskboard Integration**: See [TASKBOARD_INTEGRATION_CONTEXT.md](./TASKBOARD_INTEGRATION_CONTEXT.md) for detailed context and integration possibilities.
## Features
- **Direct Excel Processing**: Reads Excel files directly using pandas
@ -11,6 +13,9 @@ A Python tool that generates comprehensive vendor punchlist reports from Excel f
- **Oldest Unaddressed Items**: Identifies and highlights the oldest 3 unaddressed items per vendor
- **Interactive HTML Reports**: Generates searchable, filterable HTML reports with tabs and filters
- **JSON Export**: Exports structured JSON data for further processing
- **SharePoint Integration**: Automatically download Excel files from SharePoint
- **Scheduled Generation**: Automatically generate reports on a schedule (interval or cron)
- **Web API**: REST API for on-demand report generation
## Requirements
@ -221,6 +226,12 @@ vendor_report/
├── html_generator.py # HTML report generation
├── models.py # Pydantic data models
├── excel_to_text.py # Utility for Excel to text conversion
├── sharepoint_downloader.py # SharePoint file downloader
├── scheduler.py # Scheduled report generation
├── api_server.py # REST API for on-demand reports
├── web_ui.py # Web UI for easy access
├── config.py # Configuration management
├── config.yaml.template # Configuration template
├── requirements.txt # Python dependencies
├── reports/ # Directory for input Excel files
├── output/ # Directory for generated reports
@ -256,6 +267,268 @@ pip install -r requirements.txt
The tool uses **Baltimore/Eastern timezone (America/New_York)** for all date calculations. This ensures consistent 24-hour window calculations regardless of where the script is run. All dates are stored as timezone-aware datetime objects.
## SharePoint Integration
The application can automatically download Excel files from SharePoint before generating reports. This is useful when your source data is stored in SharePoint.
### Setup SharePoint Integration
1. **Create a configuration file**:
```bash
cp config.yaml.template config.yaml
```
2. **Edit `config.yaml`** and configure SharePoint settings:
```yaml
sharepoint:
enabled: true
site_url: "https://yourcompany.sharepoint.com/sites/YourSite"
folder_path: "/Shared Documents/Reports"
local_dir: "reports"
use_app_authentication: true # Recommended for automation
client_id: "your-azure-ad-client-id"
client_secret: "your-azure-ad-client-secret"
```
3. **Authentication Options**:
**Option A: App Authentication (Recommended)**
- Register an app in Azure AD
- Grant SharePoint permissions (Sites.Read.All or Sites.ReadWrite.All)
- Use `client_id` and `client_secret` in config
- Set `use_app_authentication: true`
**Option B: User Authentication**
- Use your SharePoint username and password
- Set `username` and `password` in config
- Set `use_app_authentication: false`
4. **Test SharePoint download**:
```bash
python sharepoint_downloader.py
```
### Manual SharePoint Download
Download files from SharePoint without generating a report:
```bash
python sharepoint_downloader.py
```
## Scheduled Report Generation
The application can automatically generate reports on a schedule, optionally downloading from SharePoint first.
### Setup Scheduling
1. **Edit `config.yaml`**:
```yaml
scheduler:
enabled: true
schedule_type: "interval" # or "cron"
interval_hours: 24 # Generate every 24 hours
# OR use cron expression:
# cron_expression: "0 8 * * *" # 8 AM daily
timezone: "America/New_York"
```
2. **Start the scheduler**:
```bash
python scheduler.py
```
The scheduler will run continuously and generate reports according to your schedule.
3. **Schedule Types**:
- **interval**: Generate report every N hours
- **cron**: Use cron expression for precise scheduling (e.g., "0 8 * * *" for 8 AM daily)
- **once**: Run once immediately (for testing)
### Running Scheduler as a Service
**Linux (systemd)**:
```bash
# Create service file: /etc/systemd/system/vendor-report-scheduler.service
[Unit]
Description=Vendor Report Scheduler
After=network.target
[Service]
Type=simple
User=your-user
WorkingDirectory=/path/to/vendor_report
ExecStart=/usr/bin/python3 /path/to/vendor_report/scheduler.py
Restart=always
[Install]
WantedBy=multi-user.target
# Enable and start
sudo systemctl enable vendor-report-scheduler
sudo systemctl start vendor-report-scheduler
```
**Windows (Task Scheduler)**:
- Create a scheduled task that runs `python scheduler.py` at startup or on a schedule
## Web UI & On-Demand Report Generation
The application includes both a **Web UI** and a **REST API** for generating reports on demand.
### Web UI (Recommended for Easy Access)
A simple, user-friendly web interface for generating reports without using the terminal.
1. **Start the Web UI server**:
```bash
python web_ui.py
```
2. **Open in browser**:
```
http://localhost:8080
```
3. **Features**:
- One-click report generation
- Download from SharePoint & generate (single button)
- View generated reports
- View service status
- View configuration
- No terminal knowledge required!
### REST API
The application also includes a REST API for integration with other systems or manual triggers.
### Setup API Server
1. **Edit `config.yaml`**:
```yaml
api:
enabled: true
host: "0.0.0.0"
port: 8080
api_key: "your-secret-api-key" # Optional, for authentication
```
2. **Start the Web UI** (recommended):
```bash
python web_ui.py
```
Then open `http://localhost:8080` in your browser.
**OR start the API server** (for programmatic access):
```bash
python api_server.py
```
3. **Generate report via API**:
```bash
# Without authentication
curl -X POST http://localhost:8080/api/generate \
-H "Content-Type: application/json" \
-d '{"download_from_sharepoint": true}'
# With API key authentication
curl -X POST http://localhost:8080/api/generate \
-H "Content-Type: application/json" \
-H "X-API-Key: your-secret-api-key" \
-d '{"download_from_sharepoint": true}'
```
### API Endpoints
- **POST `/api/generate`**: Generate report on demand
- Request body (optional):
```json
{
"download_from_sharepoint": true,
"reports_dir": "reports",
"output_file": "output/report.json"
}
```
- **GET `/api/status`**: Get service status and configuration
- **GET `/health`**: Health check endpoint
### Example: Integration with Webhook
You can trigger report generation from SharePoint webhooks, Power Automate, or any HTTP client:
```python
import requests
response = requests.post(
'http://your-server:8080/api/generate',
json={'download_from_sharepoint': True},
headers={'X-API-Key': 'your-api-key'}
)
print(response.json())
```
## Configuration
The application uses a YAML configuration file (`config.yaml`) for all settings. You can also use environment variables:
### Environment Variables
```bash
# SharePoint
export SHAREPOINT_ENABLED=true
export SHAREPOINT_SITE_URL="https://yourcompany.sharepoint.com/sites/YourSite"
export SHAREPOINT_FOLDER_PATH="/Shared Documents/Reports"
export SHAREPOINT_CLIENT_ID="your-client-id"
export SHAREPOINT_CLIENT_SECRET="your-client-secret"
export SHAREPOINT_USE_APP_AUTH=true
# Scheduler
export SCHEDULER_ENABLED=true
export SCHEDULER_INTERVAL_HOURS=24
# API
export API_ENABLED=true
export API_PORT=8080
export API_KEY="your-api-key"
```
## Complete Workflow Example
Here's a complete example setup for automated SharePoint → Report generation:
1. **Setup configuration** (`config.yaml`):
```yaml
sharepoint:
enabled: true
site_url: "https://company.sharepoint.com/sites/Reports"
folder_path: "/Shared Documents/Vendor Reports"
use_app_authentication: true
client_id: "your-client-id"
client_secret: "your-client-secret"
scheduler:
enabled: true
schedule_type: "cron"
cron_expression: "0 8 * * *" # 8 AM daily
timezone: "America/New_York"
report:
output_dir: "output"
reports_dir: "reports"
```
2. **Start scheduler**:
```bash
python scheduler.py
```
3. **The scheduler will**:
- Download latest Excel files from SharePoint at 8 AM daily
- Generate reports automatically
- Save to `output/report.json` and `output/report.html`
## License
[Add your license information here]

127
SHAREPOINT_SETUP.md Normal file
View File

@ -0,0 +1,127 @@
# SharePoint Configuration Guide
This guide will help you get the configuration values needed to connect to SharePoint.
## Quick Answer: Where to Get Configuration Values
### 1. SharePoint Site URL
- Go to your SharePoint site in a browser
- Copy the URL from the address bar
- Example: `https://yourcompany.sharepoint.com/sites/YourSiteName`
- **Important**: Include `/sites/SiteName` if it's a subsite
### 2. Folder Path
- Navigate to the folder containing your Excel files in SharePoint
- Right-click the folder → "Copy path" or "Details"
- Example: `/Shared Documents/Reports` or `/sites/YourSite/Shared Documents/Vendor Reports`
- **Tip**: In SharePoint, go to the folder, click "..." menu → "Copy link" and extract the path
### 3. Azure AD App Credentials (Recommended Method)
#### Step 1: Register App in Azure AD
1. Go to [Azure Portal](https://portal.azure.com)
2. Navigate to **Azure Active Directory** → **App registrations**
3. Click **New registration**
4. Name it (e.g., "Vendor Report Generator")
5. Select **Accounts in this organizational directory only**
6. Click **Register**
#### Step 2: Create Client Secret
1. In your app, go to **Certificates & secrets**
2. Click **New client secret**
3. Add description (e.g., "Vendor Report Secret")
4. Choose expiration (recommend 24 months)
5. Click **Add**
6. **IMPORTANT**: Copy the **Value** immediately (you won't see it again!)
- This is your `client_secret`
#### Step 3: Get Client ID
1. In your app, go to **Overview**
2. Copy the **Application (client) ID**
- This is your `client_id`
#### Step 4: Grant SharePoint Permissions
1. In your app, go to **API permissions**
2. Click **Add a permission**
3. Select **SharePoint**
4. Choose **Application permissions** (not Delegated)
5. Select **Sites.Read.All** (or Sites.ReadWrite.All if you need write access)
6. Click **Add permissions**
7. Click **Grant admin consent** (important!)
8. Wait for status to show "Granted for [Your Organization]"
### 4. Alternative: User Credentials (Less Secure)
If you can't use app authentication:
- `username`: Your SharePoint/Office 365 email
- `password`: Your password (not recommended for automation)
## Complete Configuration Example
Once you have all values, add them to `config.yaml`:
```yaml
sharepoint:
enabled: true
site_url: "https://yourcompany.sharepoint.com/sites/YourSite"
folder_path: "/Shared Documents/Reports" # Path to your Excel files folder
local_dir: "reports" # Where to save downloaded files
use_app_authentication: true # Use app auth (recommended)
client_id: "12345678-1234-1234-1234-123456789abc" # From Azure AD
client_secret: "your-secret-value-here" # From Azure AD (the Value, not Secret ID!)
file_pattern: "*.xlsx" # Only download Excel files
overwrite: true # Overwrite existing files
```
## Testing Your Configuration
1. **Test SharePoint connection**:
```bash
python sharepoint_downloader.py
```
2. **Or use the Web UI**:
- Start: `python web_ui.py`
- Open: `http://localhost:8080`
- Click "Update Data from SharePoint"
- Check for errors
## Common Issues
### "SharePoint authentication failed"
- **Check**: Client ID and secret are correct
- **Check**: App has been granted admin consent
- **Check**: Permissions are "Application permissions" (not Delegated)
### "Folder not found"
- **Check**: Folder path is correct (case-sensitive)
- **Tip**: Use SharePoint's "Copy path" feature
- **Check**: Path starts with `/` (e.g., `/Shared Documents/...`)
### "No files downloaded"
- **Check**: Folder contains Excel files (`.xlsx` or `.xls`)
- **Check**: File pattern matches your files
- **Check**: You have read permissions to the folder
### "Access denied"
- **Check**: App has `Sites.Read.All` permission
- **Check**: Admin consent has been granted
- **Check**: App is registered in the same tenant as SharePoint
## Security Best Practices
1. **Use App Authentication** (not user credentials)
2. **Store secrets securely**:
- Use environment variables in production
- Never commit `config.yaml` with secrets to git
- Use a secrets manager for production
3. **Limit permissions**: Only grant `Sites.Read.All` (not write access unless needed)
4. **Rotate secrets**: Update client secrets regularly
## Getting Help
If you're stuck:
1. Check the terminal/console for detailed error messages
2. Verify each configuration value step by step
3. Test with a simple folder first (one Excel file)
4. Check Azure AD app status in Azure Portal

View File

@ -0,0 +1,470 @@
# Vendor Report Generator - Taskboard Integration Context
## 🎯 Goal & Purpose
The **Vendor Report Generator** is a Python-based tool designed to automate the generation of comprehensive vendor punchlist reports from Excel files stored in SharePoint. The goal is to:
1. **Automate Report Generation**: Eliminate manual Excel processing and report creation
2. **Centralize Data**: Pull vendor punchlist data directly from SharePoint
3. **Provide Insights**: Generate actionable reports with metrics, priorities, and status tracking
4. **Enable Integration**: Make reports accessible within Taskboard for team collaboration
### Business Value
- **Time Savings**: Automates hours of manual report generation
- **Accuracy**: Consistent data normalization and calculation
- **Visibility**: Real-time vendor status tracking and metrics
- **Accessibility**: Web-based interface for non-technical users
- **Integration Ready**: Can be embedded as a tool/widget in Taskboard
---
## 📋 Application Overview
### What It Does
The application processes Excel files containing vendor punchlist items and generates:
- **Interactive HTML Reports**: Searchable, filterable web reports with vendor tabs, status filters, and priority grouping
- **JSON Data**: Structured data for further processing or API integration
- **Metrics**: Per-vendor statistics (total items, closed/open counts, 24-hour updates, oldest unaddressed items)
### Key Features
1. **Excel Processing**: Direct pandas-based reading (no manual conversion needed)
2. **Data Normalization**: Automatically handles vendor name variations, status inconsistencies, priority classifications
3. **24-Hour Tracking**: Identifies items added, closed, or changed to monitor status in the last 24 hours (Baltimore/Eastern timezone)
4. **Priority Classification**: Groups items by Very High, High, Medium, Low priorities
5. **Oldest Items**: Highlights the oldest 3 unaddressed items per vendor
6. **SharePoint Integration**: Automatically downloads Excel files from SharePoint
7. **Scheduled Generation**: Can run automatically on a schedule
8. **Web UI**: User-friendly interface for generating reports
9. **REST API**: Programmatic access for integration
---
## 🏗️ Architecture & Components
### Core Components
```
vendor_report/
├── report_generator.py # Main entry point - orchestrates report generation
├── data_preprocessor.py # Excel parsing, normalization, data cleaning
├── html_generator.py # Generates interactive HTML reports
├── models.py # Pydantic data models for validation
├── sharepoint_downloader.py # SharePoint file downloader
├── scheduler.py # Scheduled report generation
├── api_server.py # REST API server
├── web_ui.py # Web UI server (Flask-based)
├── config.py # Configuration management
└── config.yaml # Configuration file
```
### Data Flow
```
SharePoint Excel Files
[SharePoint Downloader] → Local reports/ directory
[Data Preprocessor] → Normalize vendors, statuses, priorities, parse dates
[Report Generator] → Calculate metrics, group by vendor, identify updates
[HTML Generator] → Generate interactive report.html
[Output] → output/report.json + output/report.html
```
### Processing Pipeline
1. **Input**: Excel files with columns:
- Punchlist Name, Vendor, Priority, Description, Date Identified, Status Updates, Issue Image, Status, Date Completed
2. **Preprocessing**:
- Parse Excel files using pandas
- Normalize vendor names (handle case variations, combined vendors)
- Normalize statuses (Complete, Monitor, Incomplete)
- Classify priorities (Very High, High, Medium, Low)
- Parse dates (multiple formats supported)
- Calculate 24-hour windows (Baltimore/Eastern timezone)
- Calculate item age (days since identified)
3. **Report Generation**:
- Group items by vendor
- Calculate metrics per vendor (total, closed, open, monitor counts)
- Identify 24-hour updates (added, closed, changed to monitor)
- Find oldest 3 unaddressed items per vendor
- Group by priority levels
- Generate JSON structure
- Generate HTML report
4. **Output**:
- `output/report.json`: Structured JSON data
- `output/report.html`: Interactive HTML report
- `output/preprocessed_data.txt`: Debug/preview data
---
## 🔧 Technical Details
### Dependencies
```python
# Core
pandas>=2.0.0 # Excel processing
openpyxl>=3.0.0 # Excel file reading
pydantic>=2.0.0 # Data validation
# Optional: SharePoint
Office365-REST-Python-Client>=2.3.0 # SharePoint API
# Optional: Scheduling
apscheduler>=3.10.0 # Task scheduling
# Optional: Web UI/API
flask>=2.3.0 # Web framework
flask-cors>=4.0.0 # CORS support
# Configuration
pyyaml>=6.0 # YAML config parsing
```
### Configuration
Configuration is managed via `config.yaml`:
```yaml
sharepoint:
enabled: true/false
site_url: "https://company.sharepoint.com/sites/SiteName"
folder_path: "/Shared Documents/Reports"
use_app_authentication: true # Azure AD app auth (recommended)
client_id: "azure-ad-client-id"
client_secret: "azure-ad-client-secret"
scheduler:
enabled: true/false
schedule_type: "interval" | "cron" | "once"
interval_hours: 24
cron_expression: "0 8 * * *" # 8 AM daily
api:
enabled: true/false
port: 8080
api_key: "optional-api-key"
report:
output_dir: "output"
reports_dir: "reports"
```
### API Endpoints
**Web UI Server** (`web_ui.py`):
- `GET /` - Web UI interface
- `POST /api/generate` - Generate report
- `POST /api/update-sharepoint` - Download files from SharePoint
- `GET /api/status` - Service status
- `GET /api/reports` - List generated reports
- `GET /api/config` - Configuration (safe, no secrets)
- `GET /reports/<filename>` - Serve report files
**API Server** (`api_server.py`):
- `POST /api/generate` - Generate report (programmatic)
- `GET /api/status` - Service status
- `GET /health` - Health check
### Data Models
**PunchlistItem**:
- punchlist_name, description, priority, date_identified, date_completed
- status, status_updates, issue_image, age_days
**VendorMetrics**:
- vendor_name, total_items, closed_count, open_count, monitor_count
- updates_24h (added, closed, changed_to_monitor)
- oldest_unaddressed (top 3)
- very_high_priority_items, high_priority_items
**FullReport**:
- report_generated_at, vendors[], summary{}
---
## 🔗 Taskboard Integration Possibilities
### Option 1: Embedded Widget/Page
Create a new page in Taskboard (`/vendor-reports`) that:
- Uses Taskboard's authentication (already authenticated users)
- Embeds the generated HTML report in an iframe or renders it directly
- Provides a button to trigger report generation
- Shows report history/list
**Implementation**:
```typescript
// taskboard/src/app/(dashboard)/vendor-reports/page.tsx
// - Call Python API server to generate reports
// - Display generated HTML reports
// - Use Taskboard's UI components for consistency
```
### Option 2: API Integration
Create Taskboard API routes that proxy to the Python API:
- `POST /api/vendor-reports/generate` → Calls Python `POST /api/generate`
- `GET /api/vendor-reports/list` → Calls Python `GET /api/reports`
- `GET /api/vendor-reports/status` → Calls Python `GET /api/status`
**Benefits**:
- Single authentication system (Taskboard)
- Consistent API patterns
- Can add Taskboard-specific features (notifications, task linking)
### Option 3: Background Service
Run the Python scheduler as a background service that:
- Generates reports on schedule
- Saves reports to a shared location
- Taskboard displays the latest report
- Can trigger notifications when reports are updated
### Option 4: Task Integration
Link reports to Taskboard tasks:
- Create tasks for vendors with unaddressed items
- Link report generation to project/task completion
- Use report metrics in task dashboards
---
## 🚀 Usage Examples
### Command Line
```bash
# Generate report from local files
python report_generator.py
# Generate with custom directories
python report_generator.py --reports-dir /path/to/excel --output /path/to/output.json
```
### Web UI
```bash
# Start web UI server
python web_ui.py
# Open browser: http://localhost:8080
# Click "Update Data from SharePoint" → "Generate Report"
```
### API
```bash
# Generate report via API
curl -X POST http://localhost:8080/api/generate \
-H "Content-Type: application/json" \
-d '{"download_from_sharepoint": false}'
# Update from SharePoint
curl -X POST http://localhost:8080/api/update-sharepoint
```
### Scheduled
```bash
# Start scheduler (runs continuously)
python scheduler.py
# Configured via config.yaml:
# scheduler:
# enabled: true
# schedule_type: "cron"
# cron_expression: "0 8 * * *" # 8 AM daily
```
### Programmatic (Python)
```python
from report_generator import generate_report
# Generate report
report_data = generate_report(
reports_dir="reports",
output_file="output/report.json",
verbose=True
)
# Access data
vendors = report_data['vendors']
summary = report_data['summary']
```
---
## 📊 Report Structure
### JSON Report Format
```json
{
"report_generated_at": "2025-11-06T16:00:00",
"vendors": [
{
"vendor_name": "VendorName",
"total_items": 10,
"closed_count": 5,
"open_count": 3,
"monitor_count": 2,
"updates_24h": {
"added": [...],
"closed": [...],
"changed_to_monitor": [...]
},
"oldest_unaddressed": [...],
"very_high_priority_items": [...],
"high_priority_items": [...],
"closed_items": [...],
"monitor_items": [...],
"open_items": [...]
}
],
"summary": {
"total_vendors": 5,
"total_items": 50,
"total_closed": 25,
"total_open": 15,
"total_monitor": 10
}
}
```
### HTML Report Features
- **Summary Cards**: Overview statistics
- **Vendor Tabs**: Quick navigation between vendors
- **Status Tabs**: Filter by All, Yesterday's Updates, Oldest Unaddressed, Closed, Monitor, Open
- **Search & Filters**: Search by name/description, filter by vendor/status/priority
- **Quick Filters**: Show only vendors with updates or oldest items
- **Responsive Design**: Works on desktop and mobile
---
## 🔐 Authentication & Security
### Current State
- **Web UI**: Optional API key authentication
- **SharePoint**: Azure AD app authentication (recommended) or user credentials
- **No User Management**: Standalone application
### Taskboard Integration Benefits
- **Leverage Existing Auth**: Use Taskboard's Authentik/Microsoft Entra ID authentication
- **Role-Based Access**: Control who can generate/view reports
- **Audit Trail**: Track who generated reports (via Taskboard user system)
- **Secure Storage**: Use Taskboard's file storage for reports
---
## 📝 Integration Checklist
### Phase 1: Basic Integration
- [ ] Set up Python API server as background service
- [ ] Create Taskboard API route that proxies to Python API
- [ ] Create Taskboard page to display reports
- [ ] Add "Generate Report" button in Taskboard UI
### Phase 2: Enhanced Integration
- [ ] Use Taskboard authentication for report access
- [ ] Store report metadata in Taskboard database
- [ ] Add report history/versioning
- [ ] Link reports to projects/tasks
### Phase 3: Advanced Features
- [ ] Scheduled report generation via Taskboard
- [ ] Notifications when reports are generated
- [ ] Dashboard widgets showing report metrics
- [ ] Export reports to Taskboard tasks/boards
---
## 🛠️ Development Notes
### Running Locally
```bash
# Setup
cd vendor_report
python -m venv venv
source venv/bin/activate # Windows: venv\Scripts\activate
pip install -r requirements.txt
# Configure
cp config.yaml.template config.yaml
# Edit config.yaml with SharePoint credentials
# Run Web UI
python web_ui.py
# Open http://localhost:8080
```
### Deployment Considerations
- **Python Environment**: Requires Python 3.8+
- **Dependencies**: Install via pip
- **Configuration**: Store secrets securely (environment variables or vault)
- **Port**: Default 8080 (configurable)
- **File Storage**: Reports saved to `output/` directory
- **SharePoint**: Requires Azure AD app registration
### Error Handling
- Graceful handling of missing Excel files
- SharePoint connection errors logged
- Invalid data formats handled
- User-friendly error messages in Web UI
---
## 📚 Additional Resources
- **SharePoint Setup**: See `SHAREPOINT_SETUP.md` for detailed Azure AD configuration
- **Quick Start**: See `QUICK_START.md` for 5-minute setup guide
- **Full Documentation**: See `README.md` for complete usage guide
---
## 💡 Integration Ideas for Taskboard
1. **Vendor Dashboard**: Show vendor metrics as cards/widgets
2. **Report History**: Track when reports were generated, by whom
3. **Task Creation**: Auto-create tasks for vendors with oldest unaddressed items
4. **Notifications**: Alert project managers when reports are generated
5. **Export to Tasks**: Convert report items to Taskboard tasks
6. **Project Linking**: Associate reports with Taskboard projects
7. **Scheduled Reports**: Use Taskboard's scheduling to trigger reports
8. **Role-Based Views**: Different report views for different user roles
---
## 🔄 Current Status
- ✅ Core functionality complete
- ✅ SharePoint integration working
- ✅ Web UI functional
- ✅ API endpoints available
- ✅ Scheduled generation supported
- ⏳ Taskboard integration pending
- ⏳ Authentication integration pending
- ⏳ Database storage pending
---
**Last Updated**: November 6, 2025
**Version**: 1.0
**Status**: Production Ready (Standalone), Integration Ready (Taskboard)

696
api_server.py Normal file
View File

@ -0,0 +1,696 @@
#!/usr/bin/env python3
"""
Web API Server for On-Demand Report Generation
Provides REST API endpoints to trigger report generation on demand.
"""
import logging
from pathlib import Path
from typing import Optional, List, Dict
import json
from datetime import datetime
import shutil
import os
try:
from flask import Flask, jsonify, request, send_from_directory
from flask_cors import CORS
from werkzeug.utils import secure_filename
FLASK_AVAILABLE = True
except ImportError:
FLASK_AVAILABLE = False
logging.warning("Flask not installed. API server features disabled.")
from config import load_config
from report_generator import generate_report
from sharepoint_downloader import download_from_sharepoint
logger = logging.getLogger(__name__)
app = None
config = None
def cleanup_old_reports(output_dir: Path, reports_dir: Path, max_reports: int = 10):
"""
Cleanup old reports and Excel files, keeping only the last max_reports.
Args:
output_dir: Directory containing report HTML/JSON files
reports_dir: Directory containing Excel files
max_reports: Maximum number of reports to keep
"""
try:
# Get all report HTML files sorted by modification time (newest first)
html_files = sorted(output_dir.glob('report-*.html'), key=lambda p: p.stat().st_mtime, reverse=True)
if len(html_files) <= max_reports:
return # No cleanup needed
# Get reports to delete (oldest ones)
reports_to_delete = html_files[max_reports:]
deleted_count = 0
for html_file in reports_to_delete:
report_id = html_file.stem
# Delete HTML file
try:
html_file.unlink()
logger.info(f"Deleted old report HTML: {html_file.name}")
deleted_count += 1
except Exception as e:
logger.warning(f"Failed to delete {html_file.name}: {e}")
# Delete corresponding JSON file
json_file = output_dir / f"{report_id}.json"
if json_file.exists():
try:
json_file.unlink()
logger.info(f"Deleted old report JSON: {json_file.name}")
except Exception as e:
logger.warning(f"Failed to delete {json_file.name}: {e}")
# Cleanup Excel files - keep only files associated with remaining reports
if reports_dir.exists():
excel_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
if len(excel_files) > max_reports:
# Sort by modification time and delete oldest
excel_files_sorted = sorted(excel_files, key=lambda p: p.stat().st_mtime, reverse=True)
excel_to_delete = excel_files_sorted[max_reports:]
for excel_file in excel_to_delete:
try:
excel_file.unlink()
logger.info(f"Deleted old Excel file: {excel_file.name}")
except Exception as e:
logger.warning(f"Failed to delete {excel_file.name}: {e}")
logger.info(f"Cleanup completed: deleted {deleted_count} old report(s)")
except Exception as e:
logger.error(f"Error during cleanup: {e}", exc_info=True)
def create_app(config_path: Optional[str] = None):
"""Create and configure Flask app."""
global app, config
if not FLASK_AVAILABLE:
raise ImportError(
"Flask is required for API server. "
"Install it with: pip install flask flask-cors"
)
app = Flask(__name__)
CORS(app) # Enable CORS for all routes
config = load_config(config_path)
api_config = config.get('api', {})
sharepoint_config = config.get('sharepoint', {})
report_config = config.get('report', {})
# Resolve paths relative to script location, not current working directory
script_dir = Path(__file__).parent.absolute()
# Convert relative paths to absolute paths relative to script directory
if 'output_dir' in report_config and report_config['output_dir']:
output_dir = Path(report_config['output_dir'])
if not output_dir.is_absolute():
report_config['output_dir'] = str(script_dir / output_dir)
if 'reports_dir' in report_config and report_config['reports_dir']:
reports_dir = Path(report_config['reports_dir'])
if not reports_dir.is_absolute():
report_config['reports_dir'] = str(script_dir / reports_dir)
# Store config in app context
app.config['API_KEY'] = api_config.get('api_key')
app.config['SHAREPOINT_CONFIG'] = sharepoint_config
app.config['REPORT_CONFIG'] = report_config
@app.route('/health', methods=['GET'])
def health():
"""Health check endpoint."""
return jsonify({
'status': 'healthy',
'service': 'vendor-report-generator'
})
@app.route('/api/generate', methods=['POST'])
def generate_report_endpoint():
"""
Generate report on demand.
Request body (optional):
{
"download_from_sharepoint": true,
"reports_dir": "reports",
"output_file": "output/report.json"
}
"""
# Check API key if configured
api_key = app.config.get('API_KEY')
if api_key:
provided_key = request.headers.get('X-API-Key') or request.json.get('api_key') if request.json else None
if provided_key != api_key:
return jsonify({'error': 'Invalid API key'}), 401
try:
request_data = request.json or {}
download_from_sp = request_data.get('download_from_sharepoint', True) # Default to True for backward compatibility
downloaded_files = [] # Initialize here for scope
# Get report config early - needed for error handling
report_config = app.config['REPORT_CONFIG']
# Download from SharePoint if requested AND no manual upload happened
# If download_from_sharepoint is False, it means manual upload was used
if download_from_sp:
sp_config = app.config['SHAREPOINT_CONFIG']
if not sp_config.get('enabled'):
return jsonify({
'error': 'SharePoint is not enabled in configuration'
}), 400
logger.info("Downloading files from SharePoint...")
try:
downloaded = download_from_sharepoint(
site_url=sp_config['site_url'],
folder_path=sp_config.get('folder_path'),
file_path=sp_config.get('file_path'),
local_dir=sp_config.get('local_dir', 'reports'),
tenant_id=sp_config.get('tenant_id'),
client_id=sp_config.get('client_id'),
client_secret=sp_config.get('client_secret'),
use_app_authentication=sp_config.get('use_app_authentication', True),
file_pattern=sp_config.get('file_pattern'),
overwrite=sp_config.get('overwrite', True)
)
downloaded_files = downloaded if downloaded else []
logger.info(f"Downloaded {len(downloaded_files)} file(s) from SharePoint: {downloaded_files}")
# If SharePoint download failed (no files downloaded), check if we have existing files
if len(downloaded_files) == 0:
logger.warning("SharePoint download returned 0 files. This could mean:")
logger.warning("1. SharePoint permissions issue (401/403 error)")
logger.warning("2. No files found in the specified folder")
logger.warning("3. Site access not granted (Resource-Specific Consent needed)")
logger.warning("Checking if existing files are available in reports directory...")
# Check if there are existing files we can use
reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
if not reports_dir_path.is_absolute():
script_dir = Path(__file__).parent.absolute()
reports_dir_path = script_dir / reports_dir_path
if reports_dir_path.exists():
existing_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
if existing_files:
logger.warning(f"Found {len(existing_files)} existing file(s) in reports directory. Will use these instead.")
logger.warning("NOTE: These may be old files. Consider using manual upload for fresh data.")
else:
logger.error("No files available - neither from SharePoint nor existing files.")
return jsonify({
'error': 'SharePoint download failed and no existing files found',
'details': 'SharePoint access may require Resource-Specific Consent (RSC). Please use manual file upload or fix SharePoint permissions.',
'sharepoint_error': True
}), 500
except Exception as e:
logger.error(f"Failed to download from SharePoint: {e}", exc_info=True)
# Check if we have existing files as fallback
reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
if not reports_dir_path.is_absolute():
script_dir = Path(__file__).parent.absolute()
reports_dir_path = script_dir / reports_dir_path
if reports_dir_path.exists():
existing_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
if existing_files:
logger.warning(f"SharePoint download failed, but found {len(existing_files)} existing file(s). Will use these.")
downloaded_files = [] # Continue with existing files
else:
return jsonify({
'error': f'SharePoint download failed: {str(e)}',
'details': 'No existing files found. Please use manual file upload or fix SharePoint permissions.',
'sharepoint_error': True
}), 500
else:
return jsonify({
'error': f'SharePoint download failed: {str(e)}',
'details': 'Reports directory does not exist. Please use manual file upload or fix SharePoint permissions.',
'sharepoint_error': True
}), 500
# Generate report with timestamp
reports_dir = request_data.get('reports_dir', report_config.get('reports_dir', 'reports'))
output_dir_str = report_config.get('output_dir', 'output')
output_dir = Path(output_dir_str)
if not output_dir.is_absolute():
script_dir = Path(__file__).parent.absolute()
output_dir = script_dir / output_dir
# Create timestamped filename
timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
report_id = f"report-{timestamp}"
output_file = str(output_dir / f"{report_id}.json")
# Log which files will be used for generation
reports_dir_path = Path(reports_dir)
if not reports_dir_path.is_absolute():
script_dir = Path(__file__).parent.absolute()
reports_dir_path = script_dir / reports_dir_path
logger.info(f"Generating report from {reports_dir_path.absolute()}...")
logger.info(f"Reports directory exists: {reports_dir_path.exists()}")
# Determine which files to use for generation
# CRITICAL: Only use files that were just downloaded/uploaded, not old ones
if downloaded_files:
# Files were downloaded from SharePoint - use only those
logger.info(f"Using {len(downloaded_files)} file(s) downloaded from SharePoint")
# Verify that reports_dir only contains the downloaded files (should be empty of old files)
all_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
downloaded_file_paths = [Path(f).name for f in downloaded_files] # Get just filenames
if len(all_files) != len(downloaded_files):
logger.warning(f"WARNING: Found {len(all_files)} file(s) in reports_dir but only {len(downloaded_files)} were downloaded!")
logger.warning("This might indicate old files weren't cleared. Clearing now...")
for file in all_files:
if file.name not in downloaded_file_paths:
try:
file.unlink()
logger.info(f"Cleared unexpected file: {file.name}")
except Exception as e:
logger.error(f"Failed to clear unexpected file {file.name}: {e}")
elif not download_from_sp:
# Manual upload was used (download_from_sharepoint=False)
# Upload endpoint should have cleared old files, but double-check
# Only use files uploaded in the last 10 minutes to avoid combining with old files
if reports_dir_path.exists():
excel_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
current_time = datetime.now().timestamp()
recent_files = []
for excel_file in excel_files:
mtime = excel_file.stat().st_mtime
# Only use files modified in the last 10 minutes (should be the uploaded ones)
# Increased from 5 to 10 minutes to account for upload + generation delay
if current_time - mtime < 600: # 10 minutes
recent_files.append(excel_file)
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
logger.info(f" - {excel_file.name} (modified: {mtime_str}) - will be used for manual upload generation")
else:
logger.warning(f" - {excel_file.name} (modified: {datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')}) - skipping (too old, might be from previous run)")
if len(recent_files) < len(excel_files):
logger.warning(f"Found {len(excel_files)} total file(s), but only {len(recent_files)} are recent. Clearing old files to avoid combining...")
# Clear old files to ensure we only use the manually uploaded ones
for excel_file in excel_files:
if excel_file not in recent_files:
try:
excel_file.unlink()
logger.info(f"Cleared old file: {excel_file.name}")
except Exception as e:
logger.warning(f"Failed to clear old file {excel_file.name}: {e}")
if len(recent_files) == 0:
logger.error("Manual upload was used but no recent files found in reports directory!")
logger.error("This might mean:")
logger.error("1. Files were not uploaded successfully")
logger.error("2. Files were uploaded but cleared before generation")
logger.error("3. File modification times are incorrect")
return jsonify({
'error': 'No files found for manual upload generation',
'details': 'Files were uploaded but not found in reports directory. Please try uploading again.',
'manual_upload_error': True
}), 400
# Verify we only have the recently uploaded files
all_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
if len(all_files) != len(recent_files):
logger.warning(f"WARNING: Found {len(all_files)} file(s) but only {len(recent_files)} are recent!")
logger.warning("Clearing old files to ensure only uploaded files are used...")
for file in all_files:
if file not in recent_files:
try:
file.unlink()
logger.info(f"Cleared unexpected old file: {file.name}")
except Exception as e:
logger.error(f"Failed to clear unexpected file {file.name}: {e}")
logger.info(f"Will generate report from {len(recent_files)} recently uploaded file(s)")
else:
logger.error("Manual upload was used but reports directory does not exist!")
return jsonify({
'error': 'Reports directory does not exist',
'details': 'Cannot generate report from manual upload - reports directory is missing.',
'manual_upload_error': True
}), 500
else:
# SharePoint download was attempted but failed - this shouldn't happen if download_from_sp=True
# But if it does, we should NOT use existing files as they might be old
logger.error("SharePoint download was requested but failed, and no manual upload was used!")
logger.error("This should not happen - refusing to use potentially old files")
return jsonify({
'error': 'SharePoint download failed and no manual upload provided',
'details': 'Cannot generate report - no data source available. Please try again or use manual upload.',
'sharepoint_error': True
}), 400
report_data = generate_report(
reports_dir=str(reports_dir_path),
output_file=output_file,
verbose=False # Don't print to console in API mode
)
if report_data:
# Generate HTML with same timestamp
html_file = output_dir / f"{report_id}.html"
from html_generator import generate_html_report
generate_html_report(output_file, str(html_file))
# Cleanup old reports (keep only last 10)
# Ensure reports_dir is a Path object
reports_dir_for_cleanup = Path(reports_dir) if isinstance(reports_dir, str) else reports_dir
cleanup_old_reports(output_dir, reports_dir_for_cleanup, max_reports=10)
return jsonify({
'status': 'success',
'message': 'Report generated successfully',
'report_id': report_id,
'report_date': timestamp,
'output_file': output_file,
'summary': report_data.get('summary', {}),
'vendors_count': len(report_data.get('vendors', [])),
'downloaded_files': len(downloaded_files) if download_from_sp else 0,
'downloaded_file_names': [Path(f).name for f in downloaded_files] if download_from_sp else []
})
else:
return jsonify({
'error': 'Report generation failed'
}), 500
except Exception as e:
logger.error(f"Error generating report: {e}", exc_info=True)
return jsonify({
'error': f'Report generation failed: {str(e)}'
}), 500
@app.route('/api/upload', methods=['POST'])
def upload_files():
"""Upload Excel files manually. Clears old files before uploading new ones."""
try:
if 'files' not in request.files:
return jsonify({'error': 'No files provided'}), 400
files = request.files.getlist('files')
if not files or all(f.filename == '' for f in files):
return jsonify({'error': 'No files selected'}), 400
report_config = app.config['REPORT_CONFIG']
reports_dir_str = report_config.get('reports_dir', 'reports')
reports_dir = Path(reports_dir_str)
if not reports_dir.is_absolute():
script_dir = Path(__file__).parent.absolute()
reports_dir = script_dir / reports_dir
# Ensure reports directory exists
reports_dir.mkdir(parents=True, exist_ok=True)
# ALWAYS clear ALL old Excel files from reports directory before uploading new ones
# CRITICAL: This prevents combining multiple files in report generation
old_excel_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
cleared_count = 0
failed_to_clear = []
for old_file in old_excel_files:
try:
# On Windows, files might be locked - try multiple times
max_retries = 3
retry_count = 0
while retry_count < max_retries:
try:
old_file.unlink()
cleared_count += 1
logger.info(f"Cleared old file before upload: {old_file.name}")
break
except PermissionError:
retry_count += 1
if retry_count < max_retries:
import time
time.sleep(0.5) # Wait 500ms before retry
else:
raise
except Exception as e:
failed_to_clear.append(old_file.name)
logger.error(f"Failed to clear old file {old_file.name}: {e}")
# If any files failed to clear, fail the upload to prevent mixing old and new data
if failed_to_clear:
logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before upload: {failed_to_clear}")
return jsonify({
'error': f'Failed to clear {len(failed_to_clear)} old file(s) before upload. Please ensure files are not locked or in use.',
'failed_files': failed_to_clear,
'details': 'Old files must be cleared before upload to ensure report generation uses only the new file(s). Files may be locked by Excel or another process.'
}), 500
if cleared_count > 0:
logger.info(f"Cleared {cleared_count} old Excel file(s) before upload")
else:
logger.info("No old Excel files found to clear (reports directory was empty)")
uploaded_count = 0
uploaded_files = []
for file in files:
if file.filename == '':
continue
# Check if it's an Excel file
filename = secure_filename(file.filename)
if not (filename.endswith('.xlsx') or filename.endswith('.xls')):
logger.warning(f"Skipping non-Excel file: {filename}")
continue
# Save file to reports directory
file_path = reports_dir / filename
file.save(str(file_path))
uploaded_count += 1
uploaded_files.append(filename)
logger.info(f"Uploaded file: {filename} -> {file_path}")
if uploaded_count == 0:
return jsonify({'error': 'No valid Excel files uploaded'}), 400
# Warn if multiple files uploaded - reports should be generated from ONE file
if uploaded_count > 1:
logger.warning(f"WARNING: {uploaded_count} files uploaded. Reports should be generated from a single file. Only the newest file will be used.")
return jsonify({
'status': 'success',
'message': f'Successfully uploaded {uploaded_count} file(s)',
'uploaded_count': uploaded_count,
'uploaded_files': uploaded_files,
'cleared_old_files': cleared_count,
'warning': f'{uploaded_count} file(s) uploaded - only the newest will be used for report generation' if uploaded_count > 1 else None
})
except Exception as e:
logger.error(f"Error uploading files: {e}", exc_info=True)
return jsonify({'error': f'Failed to upload files: {str(e)}'}), 500
@app.route('/api/status', methods=['GET'])
def status():
"""Get service status and configuration."""
return jsonify({
'status': 'running',
'sharepoint_enabled': app.config['SHAREPOINT_CONFIG'].get('enabled', False),
'reports_dir': app.config['REPORT_CONFIG'].get('reports_dir', 'reports'),
'output_dir': app.config['REPORT_CONFIG'].get('output_dir', 'output')
})
@app.route('/api/report/json', methods=['GET'])
def get_report_json():
"""Get latest report JSON file."""
try:
report_config = app.config['REPORT_CONFIG']
output_dir_str = report_config.get('output_dir', 'output')
output_dir = Path(output_dir_str)
if not output_dir.is_absolute():
script_dir = Path(__file__).parent.absolute()
output_dir = script_dir / output_dir
report_file = output_dir / 'report.json'
if not report_file.exists():
return jsonify({'error': 'Report not found. Generate a report first.'}), 404
with open(report_file, 'r', encoding='utf-8') as f:
report_data = json.load(f)
return jsonify(report_data)
except Exception as e:
logger.error(f"Error reading report JSON: {e}", exc_info=True)
return jsonify({'error': f'Failed to read report: {str(e)}'}), 500
@app.route('/api/report/html', methods=['GET'])
def get_report_html():
"""Get report HTML file by report_id (or latest if not specified)."""
try:
from flask import send_from_directory
report_config = app.config['REPORT_CONFIG']
output_dir_str = report_config.get('output_dir', 'output')
output_dir = Path(output_dir_str)
if not output_dir.is_absolute():
script_dir = Path(__file__).parent.absolute()
output_dir = script_dir / output_dir
# Get report_id from query parameter, default to latest
report_id = request.args.get('report_id')
if report_id:
# Check if it's a timestamped report or legacy report
html_file = output_dir / f"{report_id}.html"
# If not found and it starts with "report-", might be a legacy report with generated ID
if not html_file.exists() and report_id.startswith('report-'):
# Try legacy report.html
legacy_file = output_dir / 'report.html'
if legacy_file.exists():
html_file = legacy_file
else:
return jsonify({'error': f'Report {report_id} not found.'}), 404
elif not html_file.exists():
return jsonify({'error': f'Report {report_id} not found.'}), 404
else:
# Get latest report (check both timestamped and legacy)
timestamped_files = list(output_dir.glob('report-*.html'))
legacy_file = output_dir / 'report.html'
html_files = []
if legacy_file.exists():
html_files.append(legacy_file)
html_files.extend(timestamped_files)
if not html_files:
return jsonify({'error': 'No reports found. Generate a report first.'}), 404
html_file = sorted(html_files, key=lambda p: p.stat().st_mtime, reverse=True)[0]
return send_from_directory(str(output_dir), html_file.name, mimetype='text/html')
except Exception as e:
logger.error(f"Error reading report HTML: {e}", exc_info=True)
return jsonify({'error': f'Failed to read report HTML: {str(e)}'}), 500
@app.route('/api/reports/list', methods=['GET'])
def list_reports():
"""List all available reports (last 10)."""
try:
report_config = app.config['REPORT_CONFIG']
output_dir_str = report_config.get('output_dir', 'output')
output_dir = Path(output_dir_str)
# Ensure absolute path
if not output_dir.is_absolute():
script_dir = Path(__file__).parent.absolute()
output_dir = script_dir / output_dir
# Log for debugging
logger.info(f"Looking for reports in: {output_dir.absolute()}")
logger.info(f"Output directory exists: {output_dir.exists()}")
if output_dir.exists():
logger.info(f"Files in output directory: {list(output_dir.glob('*'))}")
# Find all report HTML files (both timestamped and non-timestamped)
timestamped_files = list(output_dir.glob('report-*.html'))
legacy_file = output_dir / 'report.html'
logger.info(f"Found {len(timestamped_files)} timestamped report files")
logger.info(f"Legacy report.html exists: {legacy_file.exists()}")
if legacy_file.exists():
logger.info(f"Legacy report.html path: {legacy_file.absolute()}")
html_files = []
# Add legacy report.html if it exists
if legacy_file.exists():
html_files.append(legacy_file)
logger.info("Added legacy report.html to list")
# Add timestamped files
html_files.extend(timestamped_files)
logger.info(f"Total HTML files found: {len(html_files)}")
reports = []
for html_file in sorted(html_files, key=lambda p: p.stat().st_mtime, reverse=True)[:10]:
report_id = html_file.stem # e.g., "report-2025-11-08-11-25-46" or "report"
# Handle legacy report.html
if report_id == 'report':
# Use file modification time as timestamp
mtime = html_file.stat().st_mtime
dt = datetime.fromtimestamp(mtime)
timestamp_str = dt.strftime('%Y-%m-%d-%H-%M-%S')
date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
report_id = f"report-{timestamp_str}"
else:
# Timestamped report
timestamp_str = report_id.replace('report-', '')
try:
# Parse timestamp to create readable date
dt = datetime.strptime(timestamp_str, '%Y-%m-%d-%H-%M-%S')
date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
except:
date_str = timestamp_str
# Get file size
file_size = html_file.stat().st_size
reports.append({
'report_id': report_id,
'date': date_str,
'timestamp': timestamp_str,
'file_size': file_size
})
return jsonify({
'reports': reports,
'count': len(reports)
})
except Exception as e:
logger.error(f"Error listing reports: {e}", exc_info=True)
return jsonify({'error': f'Failed to list reports: {str(e)}'}), 500
return app
def run_server(config_path: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None):
"""Run the API server."""
app = create_app(config_path)
api_config = config.get('api', {})
server_host = host or api_config.get('host', '0.0.0.0')
server_port = port or api_config.get('port', 8080)
logger.info(f"Starting API server on {server_host}:{server_port}")
app.run(host=server_host, port=server_port, debug=False)
if __name__ == "__main__":
import sys
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
config_path = sys.argv[1] if len(sys.argv) > 1 else None
# Check if API is enabled
config = load_config(config_path)
if not config.get('api', {}).get('enabled', False):
logger.warning("API is disabled in configuration. Set api.enabled=true to enable.")
logger.info("Starting API server anyway (for testing)...")
run_server(config_path=config_path)

262
config.py Normal file
View File

@ -0,0 +1,262 @@
#!/usr/bin/env python3
"""
Configuration Management
Loads configuration from YAML file or environment variables.
"""
import os
import yaml
import logging
from pathlib import Path
from typing import Dict, Optional, Any
try:
from dotenv import load_dotenv
DOTENV_AVAILABLE = True
except ImportError:
DOTENV_AVAILABLE = False
DEFAULT_CONFIG = {
'sharepoint': {
'enabled': False,
'site_url': '',
'folder_path': '/Shared Documents/Reports',
'file_path': None, # Use folder_path for multiple files, file_path for single file
'local_dir': 'reports',
'username': None,
'password': None,
'client_id': None,
'client_secret': None,
'use_app_authentication': False,
'file_pattern': '*.xlsx',
'overwrite': True
},
'scheduler': {
'enabled': False,
'schedule_type': 'interval', # 'interval', 'cron', or 'once'
'interval_hours': 24, # For interval type
'cron_expression': '0 8 * * *', # For cron type (8 AM daily)
'timezone': 'America/New_York'
},
'api': {
'enabled': False,
'host': '0.0.0.0',
'port': 8080,
'api_key': None # Optional API key for authentication
},
'report': {
'output_dir': 'output',
'reports_dir': 'reports'
}
}
def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
"""
Load configuration from YAML file or environment variables.
Args:
config_path: Path to config.yaml file (default: config.yaml in current directory)
Returns:
Configuration dictionary
"""
# Load .env file if available (from current directory or parent taskboard directory)
if DOTENV_AVAILABLE:
# Try loading from vendor_report/.env first
env_file = Path(__file__).parent / ".env"
if not env_file.exists():
# Try loading from parent taskboard/.env
parent_env = Path(__file__).parent.parent / "taskboard" / ".env"
if parent_env.exists():
env_file = parent_env
logging.info(f"Found .env file in taskboard directory: {env_file}")
else:
logging.warning(f".env file not found in vendor_report or taskboard directory")
logging.warning(f"Checked: {Path(__file__).parent / '.env'}")
logging.warning(f"Checked: {parent_env}")
else:
logging.info(f"Found .env file in vendor_report directory: {env_file}")
if env_file.exists():
load_dotenv(env_file, override=True) # override=True ensures env vars take precedence
logging.info(f"Loaded environment variables from {env_file.absolute()}")
# Log which SharePoint env vars were found (checking both SHAREPOINT_* and AZURE_AD_* fallbacks)
sp_vars = ['SHAREPOINT_ENABLED', 'SHAREPOINT_SITE_URL', 'SHAREPOINT_FOLDER_PATH']
found_vars = [var for var in sp_vars if os.getenv(var)]
# Check credentials (with fallback)
client_id = os.getenv('SHAREPOINT_CLIENT_ID') or os.getenv('AZURE_AD_CLIENT_ID')
tenant_id = os.getenv('SHAREPOINT_TENANT_ID') or os.getenv('AZURE_AD_TENANT_ID')
client_secret = os.getenv('SHAREPOINT_CLIENT_SECRET') or os.getenv('AZURE_AD_CLIENT_SECRET')
if client_id:
found_vars.append('CLIENT_ID (from SHAREPOINT_CLIENT_ID or AZURE_AD_CLIENT_ID)')
if tenant_id:
found_vars.append('TENANT_ID (from SHAREPOINT_TENANT_ID or AZURE_AD_TENANT_ID)')
if client_secret:
found_vars.append('CLIENT_SECRET (from SHAREPOINT_CLIENT_SECRET or AZURE_AD_CLIENT_SECRET)')
logging.info(f"Found SharePoint environment variables: {', '.join(found_vars)}")
missing_vars = []
if not client_id:
missing_vars.append('CLIENT_ID (SHAREPOINT_CLIENT_ID or AZURE_AD_CLIENT_ID)')
if not tenant_id:
missing_vars.append('TENANT_ID (SHAREPOINT_TENANT_ID or AZURE_AD_TENANT_ID)')
if not client_secret:
missing_vars.append('CLIENT_SECRET (SHAREPOINT_CLIENT_SECRET or AZURE_AD_CLIENT_SECRET)')
if missing_vars:
logging.warning(f"Missing SharePoint credentials: {', '.join(missing_vars)}")
if config_path is None:
config_path = Path(__file__).parent / "config.yaml"
else:
config_path = Path(config_path)
config = DEFAULT_CONFIG.copy()
# Load from YAML file if exists
if config_path.exists():
try:
with open(config_path, 'r') as f:
file_config = yaml.safe_load(f) or {}
# Deep merge with defaults
config = _deep_merge(config, file_config)
except Exception as e:
print(f"Warning: Failed to load config from {config_path}: {e}")
# Override with environment variables
config = _load_from_env(config)
return config
def _deep_merge(base: Dict, override: Dict) -> Dict:
"""Deep merge two dictionaries."""
result = base.copy()
for key, value in override.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
result[key] = _deep_merge(result[key], value)
else:
result[key] = value
return result
def _load_from_env(config: Dict) -> Dict:
"""Load configuration from environment variables."""
# SharePoint settings
if os.getenv('SHAREPOINT_ENABLED'):
config['sharepoint']['enabled'] = os.getenv('SHAREPOINT_ENABLED').lower() == 'true'
if os.getenv('SHAREPOINT_SITE_URL'):
config['sharepoint']['site_url'] = os.getenv('SHAREPOINT_SITE_URL')
if os.getenv('SHAREPOINT_FOLDER_PATH'):
config['sharepoint']['folder_path'] = os.getenv('SHAREPOINT_FOLDER_PATH')
if os.getenv('SHAREPOINT_USERNAME'):
config['sharepoint']['username'] = os.getenv('SHAREPOINT_USERNAME')
if os.getenv('SHAREPOINT_PASSWORD'):
config['sharepoint']['password'] = os.getenv('SHAREPOINT_PASSWORD')
# Check for SHAREPOINT_CLIENT_ID first, fallback to AZURE_AD_CLIENT_ID
if os.getenv('SHAREPOINT_CLIENT_ID'):
config['sharepoint']['client_id'] = os.getenv('SHAREPOINT_CLIENT_ID')
elif os.getenv('AZURE_AD_CLIENT_ID'):
config['sharepoint']['client_id'] = os.getenv('AZURE_AD_CLIENT_ID')
# Check for SHAREPOINT_CLIENT_SECRET first, fallback to AZURE_AD_CLIENT_SECRET
if os.getenv('SHAREPOINT_CLIENT_SECRET'):
config['sharepoint']['client_secret'] = os.getenv('SHAREPOINT_CLIENT_SECRET')
elif os.getenv('AZURE_AD_CLIENT_SECRET'):
config['sharepoint']['client_secret'] = os.getenv('AZURE_AD_CLIENT_SECRET')
# Tenant ID (required for Microsoft Graph API)
if os.getenv('SHAREPOINT_TENANT_ID'):
config['sharepoint']['tenant_id'] = os.getenv('SHAREPOINT_TENANT_ID')
elif os.getenv('AZURE_AD_TENANT_ID'):
config['sharepoint']['tenant_id'] = os.getenv('AZURE_AD_TENANT_ID')
if os.getenv('SHAREPOINT_USE_APP_AUTH'):
config['sharepoint']['use_app_authentication'] = os.getenv('SHAREPOINT_USE_APP_AUTH').lower() == 'true'
elif os.getenv('SHAREPOINT_USE_APP_AUTH') is None and os.getenv('AZURE_AD_CLIENT_ID'):
# If Azure AD credentials are present, default to app auth
config['sharepoint']['use_app_authentication'] = True
# Scheduler settings
if os.getenv('SCHEDULER_ENABLED'):
config['scheduler']['enabled'] = os.getenv('SCHEDULER_ENABLED').lower() == 'true'
if os.getenv('SCHEDULER_INTERVAL_HOURS'):
config['scheduler']['interval_hours'] = int(os.getenv('SCHEDULER_INTERVAL_HOURS'))
if os.getenv('SCHEDULER_CRON'):
config['scheduler']['cron_expression'] = os.getenv('SCHEDULER_CRON')
# API settings
if os.getenv('API_ENABLED'):
config['api']['enabled'] = os.getenv('API_ENABLED').lower() == 'true'
if os.getenv('API_PORT'):
config['api']['port'] = int(os.getenv('API_PORT'))
if os.getenv('API_HOST'):
config['api']['host'] = os.getenv('API_HOST')
if os.getenv('API_KEY'):
config['api']['api_key'] = os.getenv('API_KEY')
# Report settings
if os.getenv('REPORT_OUTPUT_DIR'):
config['report']['output_dir'] = os.getenv('REPORT_OUTPUT_DIR')
if os.getenv('REPORT_REPORTS_DIR'):
config['report']['reports_dir'] = os.getenv('REPORT_REPORTS_DIR')
return config
def save_config_template(config_path: Optional[str] = None) -> None:
"""Save a template configuration file."""
if config_path is None:
config_path = Path(__file__).parent / "config.yaml.template"
else:
config_path = Path(config_path)
template = """# Vendor Report Generator Configuration
# SharePoint Integration
sharepoint:
enabled: false # Set to true to enable SharePoint downloads
site_url: "https://yourcompany.sharepoint.com/sites/YourSite"
folder_path: "/Shared Documents/Reports" # Path to folder containing Excel files
# file_path: "/Shared Documents/Reports/file.xlsx" # Alternative: single file path
local_dir: "reports" # Local directory to save downloaded files
username: null # Username for user authentication (leave null if using app auth)
password: null # Password for user authentication (leave null if using app auth)
client_id: null # Azure AD app client ID (for app authentication)
client_secret: null # Azure AD app client secret (for app authentication)
use_app_authentication: false # Set to true to use app authentication (recommended)
file_pattern: "*.xlsx" # Pattern to filter files
overwrite: true # Whether to overwrite existing files
# Scheduler Configuration
scheduler:
enabled: false # Set to true to enable scheduled report generation
schedule_type: "interval" # Options: "interval", "cron", or "once"
interval_hours: 24 # For interval type: generate report every N hours
cron_expression: "0 8 * * *" # For cron type: generate at 8 AM daily (cron format)
timezone: "America/New_York" # Timezone for scheduling
# API Configuration (for on-demand report generation)
api:
enabled: false # Set to true to enable web API
host: "0.0.0.0" # Host to bind API server
port: 8080 # Port for API server
api_key: null # Optional API key for authentication (set to enable auth)
# Report Settings
report:
output_dir: "output" # Directory for generated reports
reports_dir: "reports" # Directory containing Excel files
"""
with open(config_path, 'w') as f:
f.write(template)
print(f"Configuration template saved to: {config_path}")

37
config.yaml.template Normal file
View File

@ -0,0 +1,37 @@
# Vendor Report Generator Configuration
# SharePoint Integration
sharepoint:
enabled: false # Set to true to enable SharePoint downloads
site_url: "https://yourcompany.sharepoint.com/sites/YourSite"
folder_path: "/Shared Documents/Reports" # Path to folder containing Excel files
# file_path: "/Shared Documents/Reports/file.xlsx" # Alternative: single file path
local_dir: "reports" # Local directory to save downloaded files
username: null # Username for user authentication (leave null if using app auth)
password: null # Password for user authentication (leave null if using app auth)
client_id: null # Azure AD app client ID (for app authentication)
client_secret: null # Azure AD app client secret (for app authentication)
use_app_authentication: false # Set to true to use app authentication (recommended)
file_pattern: "*.xlsx" # Pattern to filter files
overwrite: true # Whether to overwrite existing files
# Scheduler Configuration
scheduler:
enabled: false # Set to true to enable scheduled report generation
schedule_type: "interval" # Options: "interval", "cron", or "once"
interval_hours: 24 # For interval type: generate report every N hours
cron_expression: "0 8 * * *" # For cron type: generate at 8 AM daily (cron format)
timezone: "America/New_York" # Timezone for scheduling
# API Configuration (for on-demand report generation)
api:
enabled: false # Set to true to enable web API
host: "0.0.0.0" # Host to bind API server
port: 8080 # Port for API server
api_key: null # Optional API key for authentication (set to enable auth)
# Report Settings
report:
output_dir: "output" # Directory for generated reports
reports_dir: "reports" # Directory containing Excel files

View File

@ -63,6 +63,11 @@ def get_priority_badge_class(priority: Optional[str]) -> str:
return "badge-secondary"
def escape_js_string(s: str) -> str:
"""Escape a string for use in JavaScript double-quoted strings."""
return s.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n').replace('\r', '\\r')
def generate_html_report(json_path: str, output_path: Optional[str] = None) -> str:
"""
Generate HTML report from JSON report file.
@ -784,7 +789,7 @@ def generate_html_content(report_data: Dict) -> str:
<div class="tabs-container">
<div class="tabs" id="vendor-tabs">
<button class="tab active" onclick="switchVendorTab('all')" data-vendor="all">All Vendors</button>
{''.join([f'<button class="tab" onclick="switchVendorTab(\'{vn}\')" data-vendor="{vn}">{vn}</button>' for vn in vendor_names])}
{''.join(['<button class="tab" onclick="switchVendorTab(' + "'" + escape_js_string(vn) + "'" + ')" data-vendor="' + vn + '">' + vn + '</button>' for vn in vendor_names])}
</div>
</div>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -2,3 +2,17 @@
pandas>=2.0.0
openpyxl>=3.0.0
pydantic>=2.0.0
# Optional: SharePoint integration (Office365-REST-Python-Client)
Office365-REST-Python-Client>=2.3.0
# Optional: Scheduling
apscheduler>=3.10.0
# Optional: Web API
flask>=2.3.0
flask-cors>=4.0.0
# Configuration
pyyaml>=6.0
python-dotenv>=1.0.0

28
run-local.ps1 Normal file
View File

@ -0,0 +1,28 @@
# PowerShell script to run vendor-report API locally with environment variables
# Set SharePoint Configuration
$env:SHAREPOINT_ENABLED = "true"
$env:SHAREPOINT_SITE_URL = "https://automationstandard.sharepoint.com/sites/2429ODF_AMZ_MTN6_25K"
$env:SHAREPOINT_FOLDER_PATH = "/Documents/General/Amazon Punchlist [EXTERNAL]"
$env:SHAREPOINT_CLIENT_ID = "5e00db88-ff96-4070-8270-e6c9ea9282f0"
$env:SHAREPOINT_CLIENT_SECRET = "tYY8Q~e6hrzNA5EsTcUtDfZ4q3vT-c134r7nkaM8"
$env:SHAREPOINT_USE_APP_AUTH = "true"
# Set API Configuration
$env:API_ENABLED = "true"
$env:API_PORT = "8080"
$env:API_HOST = "0.0.0.0"
# Set Report Configuration
$env:REPORT_OUTPUT_DIR = "output"
$env:REPORT_REPORTS_DIR = "reports"
Write-Host "Starting vendor-report API with SharePoint configuration..." -ForegroundColor Green
Write-Host "SharePoint Site: $env:SHAREPOINT_SITE_URL" -ForegroundColor Cyan
Write-Host "Folder Path: $env:SHAREPOINT_FOLDER_PATH" -ForegroundColor Cyan
Write-Host "API will run on: http://localhost:8080" -ForegroundColor Cyan
Write-Host ""
# Run the API
python api_server.py

172
scheduler.py Normal file
View File

@ -0,0 +1,172 @@
#!/usr/bin/env python3
"""
Report Scheduler
Schedules automatic report generation with optional SharePoint downloads.
"""
import logging
from datetime import datetime
from typing import Optional
from pathlib import Path
try:
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.triggers.interval import IntervalTrigger
from apscheduler.triggers.cron import CronTrigger
from apscheduler.triggers.date import DateTrigger
SCHEDULER_AVAILABLE = True
except ImportError:
SCHEDULER_AVAILABLE = False
logging.warning("APScheduler not installed. Scheduling features disabled.")
from config import load_config
from report_generator import generate_report
from sharepoint_downloader import download_from_sharepoint
logger = logging.getLogger(__name__)
class ReportScheduler:
"""Manages scheduled report generation."""
def __init__(self, config_path: Optional[str] = None):
"""
Initialize scheduler.
Args:
config_path: Path to configuration file
"""
if not SCHEDULER_AVAILABLE:
raise ImportError(
"APScheduler is required for scheduling. "
"Install it with: pip install apscheduler"
)
self.config = load_config(config_path)
self.scheduler = BlockingScheduler(timezone=self.config['scheduler']['timezone'])
self.scheduler_config = self.config['scheduler']
self.sharepoint_config = self.config.get('sharepoint', {})
self.report_config = self.config.get('report', {})
def generate_report_job(self):
"""Job function to generate report."""
logger.info("=" * 70)
logger.info("SCHEDULED REPORT GENERATION")
logger.info("=" * 70)
logger.info(f"Started at: {datetime.now()}")
try:
# Download from SharePoint if enabled
if self.sharepoint_config.get('enabled'):
logger.info("Downloading files from SharePoint...")
try:
downloaded = download_from_sharepoint(
site_url=self.sharepoint_config['site_url'],
folder_path=self.sharepoint_config.get('folder_path'),
file_path=self.sharepoint_config.get('file_path'),
local_dir=self.sharepoint_config.get('local_dir', 'reports'),
tenant_id=self.sharepoint_config.get('tenant_id'),
client_id=self.sharepoint_config.get('client_id'),
client_secret=self.sharepoint_config.get('client_secret'),
use_app_authentication=self.sharepoint_config.get('use_app_authentication', True),
file_pattern=self.sharepoint_config.get('file_pattern'),
overwrite=self.sharepoint_config.get('overwrite', True)
)
logger.info(f"Downloaded {len(downloaded)} file(s) from SharePoint")
except Exception as e:
logger.error(f"Failed to download from SharePoint: {e}")
# Continue with report generation even if download fails
# Generate report
logger.info("Generating report...")
reports_dir = self.report_config.get('reports_dir', 'reports')
output_file = Path(self.report_config.get('output_dir', 'output')) / 'report.json'
report_data = generate_report(
reports_dir=reports_dir,
output_file=str(output_file),
verbose=True
)
if report_data:
logger.info("✓ Scheduled report generation completed successfully")
else:
logger.error("✗ Scheduled report generation failed")
except Exception as e:
logger.error(f"Error in scheduled report generation: {e}", exc_info=True)
def start(self):
"""Start the scheduler."""
if not self.scheduler_config.get('enabled'):
logger.warning("Scheduler is disabled in configuration")
return
schedule_type = self.scheduler_config.get('schedule_type', 'interval')
if schedule_type == 'interval':
# Schedule at regular intervals
interval_hours = self.scheduler_config.get('interval_hours', 24)
trigger = IntervalTrigger(hours=interval_hours)
logger.info(f"Scheduling reports every {interval_hours} hours")
elif schedule_type == 'cron':
# Schedule using cron expression
cron_expression = self.scheduler_config.get('cron_expression', '0 8 * * *')
# Parse cron expression (format: "minute hour day month day_of_week")
parts = cron_expression.split()
if len(parts) == 5:
trigger = CronTrigger(
minute=parts[0],
hour=parts[1],
day=parts[2],
month=parts[3],
day_of_week=parts[4]
)
else:
logger.error(f"Invalid cron expression: {cron_expression}")
return
logger.info(f"Scheduling reports with cron: {cron_expression}")
elif schedule_type == 'once':
# Run once at a specific time
# For "once", you'd typically use DateTrigger, but for simplicity,
# we'll just run it immediately
logger.info("Running report generation once (immediately)")
self.generate_report_job()
return
else:
logger.error(f"Unknown schedule type: {schedule_type}")
return
# Add job to scheduler
self.scheduler.add_job(
self.generate_report_job,
trigger=trigger,
id='generate_report',
name='Generate Vendor Report',
replace_existing=True
)
logger.info("Scheduler started. Press Ctrl+C to stop.")
try:
self.scheduler.start()
except KeyboardInterrupt:
logger.info("Scheduler stopped by user")
if __name__ == "__main__":
import sys
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
config_path = sys.argv[1] if len(sys.argv) > 1 else None
scheduler = ReportScheduler(config_path=config_path)
scheduler.start()

461
sharepoint_downloader.py Normal file
View File

@ -0,0 +1,461 @@
#!/usr/bin/env python3
"""
SharePoint File Downloader using Office365-REST-Python-Client
Downloads Excel files from SharePoint to the local reports directory.
Uses Office365-REST-Python-Client library for SharePoint REST API access.
"""
import os
from pathlib import Path
from typing import Optional, List
from datetime import datetime
import logging
try:
from office365.sharepoint.client_context import ClientContext
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.files.file import File
OFFICE365_AVAILABLE = True
except ImportError:
OFFICE365_AVAILABLE = False
logging.warning("office365-rest-python-client not installed. SharePoint features disabled.")
logger = logging.getLogger(__name__)
class SharePointDownloader:
"""Downloads files from SharePoint using Office365-REST-Python-Client."""
def __init__(
self,
site_url: str,
tenant_id: Optional[str] = None,
client_id: Optional[str] = None,
client_secret: Optional[str] = None,
use_app_authentication: bool = True
):
"""
Initialize SharePoint downloader using Office365-REST-Python-Client.
Args:
site_url: SharePoint site URL (e.g., "https://yourcompany.sharepoint.com/sites/YourSite")
tenant_id: Azure AD tenant ID (required for app authentication)
client_id: Azure AD app client ID (required for app authentication)
client_secret: Azure AD app client secret (required for app authentication)
use_app_authentication: Whether to use app authentication (default: True)
"""
if not OFFICE365_AVAILABLE:
raise ImportError(
"office365-rest-python-client is required for SharePoint integration. "
"Install it with: pip install Office365-REST-Python-Client"
)
self.site_url = site_url.rstrip('/')
self.tenant_id = tenant_id
self.client_id = client_id
self.client_secret = client_secret
self.use_app_authentication = use_app_authentication
self.ctx = None
if not self.client_id or not self.client_secret:
logger.error("Client ID and Client Secret are required for SharePoint authentication.")
raise ValueError("Missing Azure AD credentials for SharePoint.")
def connect(self) -> bool:
"""Connect to SharePoint site."""
if self.ctx:
return True
try:
if self.use_app_authentication:
# App-only authentication using Office365-REST-Python-Client
from office365.runtime.auth.client_credential import ClientCredential
logger.info(f"Connecting to SharePoint site: {self.site_url}")
logger.info(f"Using Client ID: {self.client_id[:8]}... (truncated for security)")
credentials = ClientCredential(self.client_id, self.client_secret)
self.ctx = ClientContext(self.site_url).with_credentials(credentials)
# Test connection by getting web
# This will fail if RSC is not granted or credentials are wrong
web = self.ctx.web
self.ctx.load(web)
self.ctx.execute_query()
logger.info(f"Successfully connected to SharePoint site: {web.properties['Title']}")
return True
else:
logger.error("Only app-only authentication is supported")
return False
except Exception as e:
error_msg = str(e)
logger.error(f"Failed to connect to SharePoint: {error_msg}", exc_info=True)
# Provide helpful error messages
if "Unsupported app only token" in error_msg or "401" in error_msg:
logger.error("This error usually means:")
logger.error("1. Resource-Specific Consent (RSC) is not granted for this site")
logger.error("2. Go to: {}/_layouts/15/appinv.aspx".format(self.site_url))
logger.error("3. Enter App ID: {}".format(self.client_id))
logger.error("4. Grant permission with XML: <AppPermissionRequests AllowAppOnlyPolicy=\"true\"><AppPermissionRequest Scope=\"http://sharepoint/content/sitecollection\" Right=\"Read\" /></AppPermissionRequests>")
elif "403" in error_msg or "Forbidden" in error_msg:
logger.error("403 Forbidden - App does not have access to this site")
logger.error("RSC must be granted via appinv.aspx")
elif "Invalid client secret" in error_msg or "invalid_client" in error_msg:
logger.error("Invalid client credentials - check CLIENT_ID and CLIENT_SECRET")
return False
def list_files_in_folder(
self,
folder_path: str,
file_pattern: Optional[str] = None
) -> List[dict]:
"""
List files in a SharePoint folder.
Args:
folder_path: Folder path relative to site root (e.g., "Shared Documents/General/Amazon Punchlist [EXTERNAL]")
file_pattern: Optional file pattern filter (e.g., "*.xlsx")
Returns:
List of file metadata dictionaries
"""
if not self.ctx:
if not self.connect():
return []
try:
# Normalize folder path
# User provides: /Shared Documents/General/Amazon Punchlist [EXTERNAL]
# SharePoint needs: /sites/SiteName/Shared Documents/General/Amazon Punchlist [EXTERNAL]
folder_path = folder_path.strip('/')
# Extract site path from site_url
from urllib.parse import urlparse
site_path = urlparse(self.site_url).path.strip('/')
# Construct full server-relative URL
# If folder_path already starts with site path, use as-is
# Otherwise, prepend site path
if folder_path.startswith(site_path + '/'):
server_relative_url = f"/{folder_path}"
elif site_path:
server_relative_url = f"/{site_path}/{folder_path}"
else:
server_relative_url = f"/{folder_path}"
logger.info(f"Listing files in folder: {server_relative_url}")
logger.info(f"Site URL: {self.site_url}, Site path: {site_path}, Folder path: {folder_path}")
# Get folder
folder = self.ctx.web.get_folder_by_server_relative_url(server_relative_url)
files = folder.files
self.ctx.load(files)
self.ctx.execute_query()
excel_files = []
for file in files:
file_name = file.properties["Name"]
# Only consider Excel files
if file_name and (file_name.endswith('.xlsx') or file_name.endswith('.xls')):
# Apply file pattern filter if provided
if file_pattern:
pattern = file_pattern.replace('*', '')
if not file_name.endswith(pattern):
continue
excel_files.append({
"name": file_name,
"server_relative_url": file.properties.get("ServerRelativeUrl", ""),
"size": file.properties.get("Length", 0),
"time_last_modified": file.properties.get("TimeLastModified", "")
})
logger.info(f"Found {len(excel_files)} Excel file(s) in folder")
for file_info in excel_files:
logger.info(f" - {file_info['name']} ({file_info['size']} bytes)")
return excel_files
except Exception as e:
logger.error(f"Error listing files: {e}", exc_info=True)
return []
def download_file(
self,
server_relative_url: str,
file_name: str,
local_path: str,
overwrite: bool = True
) -> bool:
"""
Download a single file from SharePoint.
Args:
server_relative_url: Server-relative URL of the file
file_name: The original name of the file (for logging)
local_path: Local path where file should be saved
overwrite: Whether to overwrite existing file
Returns:
True if successful, False otherwise
"""
if not self.ctx:
if not self.connect():
return False
local_file = None
try:
local_file_path = Path(local_path)
local_file_path.parent.mkdir(parents=True, exist_ok=True)
if local_file_path.exists() and not overwrite:
logger.info(f"File already exists, skipping: {local_path}")
return True
logger.info(f"Downloading file: {file_name} from {server_relative_url} to {local_path}")
# Get file
file = self.ctx.web.get_file_by_server_relative_url(server_relative_url)
self.ctx.load(file)
self.ctx.execute_query()
# Open file and keep it open during download
# The Office365 library writes to the file during execute_query()
local_file = open(local_file_path, "wb")
# Download file content - this sets up the download callback
file.download(local_file)
# Execute the query - this actually performs the download and writes to the file
self.ctx.execute_query()
# Close the file after download completes
local_file.close()
local_file = None
logger.info(f"Successfully downloaded: {file_name} -> {local_path}")
return True
except Exception as e:
logger.error(f"Error downloading file {file_name}: {e}", exc_info=True)
if local_file:
try:
local_file.close()
except:
pass
return False
def download_files_from_folder(
self,
folder_path: str,
local_dir: str,
file_pattern: Optional[str] = None,
overwrite: bool = True,
clear_existing: bool = True
) -> List[str]:
"""
Download Excel files from a SharePoint folder.
By default, downloads only the newest file and clears old files.
Args:
folder_path: Folder path relative to site root
local_dir: Local directory to save files
file_pattern: Optional file pattern filter (e.g., "*.xlsx")
overwrite: Whether to overwrite existing files
clear_existing: If True, clear all existing Excel files before downloading (default: True)
Returns:
List of downloaded file paths (typically 1 file - the newest)
"""
# Connect to SharePoint
if not self.connect():
logger.error("Failed to connect to SharePoint")
return []
# Prepare local directory
local_dir_path = Path(local_dir)
local_dir_path.mkdir(parents=True, exist_ok=True)
# ALWAYS clear ALL existing Excel files before downloading (to ensure only new files are used)
# This is critical to prevent combining multiple files
existing_files = list(local_dir_path.glob('*.xlsx')) + list(local_dir_path.glob('*.xls'))
cleared_count = 0
failed_to_clear = []
for old_file in existing_files:
try:
# On Windows, files might be locked - try multiple times
max_retries = 3
retry_count = 0
while retry_count < max_retries:
try:
old_file.unlink()
cleared_count += 1
logger.info(f"Cleared existing file before download: {old_file.name}")
break
except PermissionError:
retry_count += 1
if retry_count < max_retries:
import time
time.sleep(0.5) # Wait 500ms before retry
else:
raise
except Exception as e:
failed_to_clear.append(old_file.name)
logger.error(f"Failed to clear existing file {old_file.name}: {e}")
if failed_to_clear:
logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before download: {failed_to_clear}")
logger.error("This will cause data mixing! Files may be locked by another process.")
# Don't fail here - let the download proceed, but log the warning
if cleared_count > 0:
logger.info(f"Cleared {cleared_count} existing Excel file(s) before downloading from SharePoint")
else:
logger.info("No existing Excel files found to clear (reports directory was empty)")
# List files in folder
files = self.list_files_in_folder(folder_path, file_pattern)
if not files:
logger.warning(f"No Excel files found in folder: {folder_path}")
return []
# Sort files by last modified date (newest first) and download only the newest one
def parse_time(time_str):
try:
if time_str:
# Office365 library returns datetime objects or ISO strings
if isinstance(time_str, datetime):
return time_str
# Try parsing ISO format
return datetime.fromisoformat(str(time_str).replace('Z', '+00:00'))
return datetime.min
except:
return datetime.min
files_sorted = sorted(files, key=lambda f: parse_time(f.get("time_last_modified", "")), reverse=True)
if len(files_sorted) > 1:
logger.info(f"Found {len(files_sorted)} Excel file(s) in SharePoint folder. Using only the newest file.")
logger.info(f"Newest file: {files_sorted[0]['name']} (modified: {files_sorted[0].get('time_last_modified', 'Unknown')})")
if len(files_sorted) > 1:
logger.info(f"Skipping {len(files_sorted) - 1} older file(s) to avoid combining data")
# Download only the newest file
downloaded_files = []
newest_file = files_sorted[0]
file_name = newest_file["name"]
server_relative_url = newest_file["server_relative_url"]
local_file_path = local_dir_path / file_name
if self.download_file(server_relative_url, file_name, str(local_file_path), overwrite=overwrite):
downloaded_files.append(str(local_file_path))
logger.info(f"Successfully downloaded newest file: {file_name}")
else:
logger.error(f"Failed to download file: {file_name}")
logger.info(f"Downloaded {len(downloaded_files)} file(s) from {folder_path} (using only newest file)")
return downloaded_files
def download_from_sharepoint(
site_url: str,
folder_path: Optional[str] = None,
file_path: Optional[str] = None,
local_dir: str = "reports",
tenant_id: Optional[str] = None,
client_id: Optional[str] = None,
client_secret: Optional[str] = None,
use_app_authentication: bool = True,
file_pattern: Optional[str] = None,
overwrite: bool = True,
clear_existing: bool = True
) -> List[str]:
"""
Convenience function to download files from SharePoint using Office365-REST-Python-Client.
Args:
site_url: SharePoint site URL
folder_path: Path to folder (if downloading all files from folder)
file_path: Path to specific file (if downloading single file) - NOT YET IMPLEMENTED
local_dir: Local directory to save files
tenant_id: Azure AD tenant ID (not used by Office365 library, but kept for compatibility)
client_id: Azure AD app client ID (required for app authentication)
client_secret: Azure AD app client secret (required for app authentication)
use_app_authentication: Use app authentication (default: True)
file_pattern: Pattern to filter files (e.g., "*.xlsx")
overwrite: Whether to overwrite existing files
clear_existing: If True, clear all existing Excel files before downloading (default: True)
Returns:
List of downloaded file paths (typically 1 file - the newest)
"""
if not folder_path and not file_path:
logger.error("Either folder_path or file_path must be provided")
return []
if file_path:
logger.warning("Single file download not yet implemented")
return []
downloader = SharePointDownloader(
site_url=site_url,
tenant_id=tenant_id,
client_id=client_id,
client_secret=client_secret,
use_app_authentication=use_app_authentication
)
if folder_path:
# Download only the newest file from folder (clears existing files first)
return downloader.download_files_from_folder(
folder_path=folder_path,
local_dir=local_dir,
file_pattern=file_pattern,
overwrite=overwrite,
clear_existing=clear_existing
)
else:
logger.error("file_path download not yet implemented")
return []
if __name__ == "__main__":
import sys
from config import load_config
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
config = load_config()
if not config.get('sharepoint'):
logger.error("SharePoint configuration not found")
sys.exit(1)
sp_config = config['sharepoint']
downloaded = download_from_sharepoint(
site_url=sp_config['site_url'],
folder_path=sp_config.get('folder_path'),
file_path=sp_config.get('file_path'),
local_dir=sp_config.get('local_dir', 'reports'),
tenant_id=sp_config.get('tenant_id'),
client_id=sp_config.get('client_id'),
client_secret=sp_config.get('client_secret'),
use_app_authentication=sp_config.get('use_app_authentication', True),
file_pattern=sp_config.get('file_pattern'),
overwrite=sp_config.get('overwrite', True)
)
print(f"Downloaded {len(downloaded)} file(s)")
for file_path in downloaded:
print(f" - {file_path}")

26
test_docker.ps1 Normal file
View File

@ -0,0 +1,26 @@
# Simple Docker test script
Write-Host "=== Testing Docker Image ===" -ForegroundColor Cyan
Write-Host "`n1. Testing local file..." -ForegroundColor Yellow
python test_syntax.py
Write-Host "`n2. Building Docker image..." -ForegroundColor Yellow
docker build --no-cache -t vendor-report-api-test . 2>&1 | Select-String "Step|COPY|ERROR" | Select-Object -Last 5
Write-Host "`n3. Checking line 794 in LOCAL file:" -ForegroundColor Yellow
Get-Content html_generator.py | Select-Object -Index 793
Write-Host "`n4. Checking line 794 in DOCKER image:" -ForegroundColor Yellow
docker run --rm vendor-report-api-test sed -n '794p' /app/html_generator.py 2>&1
Write-Host "`n5. Checking line 1284 in LOCAL file:" -ForegroundColor Yellow
Get-Content html_generator.py | Select-Object -Index 1283
Write-Host "`n6. Checking line 1284 in DOCKER image:" -ForegroundColor Yellow
docker run --rm vendor-report-api-test sed -n '1284p' /app/html_generator.py 2>&1
Write-Host "`n7. Testing Python import in Docker:" -ForegroundColor Yellow
docker run --rm vendor-report-api-test python -c "import html_generator; print('SUCCESS')" 2>&1
Write-Host "`n=== Done ===" -ForegroundColor Cyan

20
test_syntax.py Normal file
View File

@ -0,0 +1,20 @@
#!/usr/bin/env python3
"""Quick syntax test for html_generator.py"""
import sys
try:
import html_generator
print("SUCCESS: html_generator.py imports without syntax errors!")
print(f"escape_js_string function exists: {hasattr(html_generator, 'escape_js_string')}")
sys.exit(0)
except SyntaxError as e:
print(f"SYNTAX ERROR: {e}")
print(f" File: {e.filename}")
print(f" Line: {e.lineno}")
print(f" Text: {e.text}")
sys.exit(1)
except Exception as e:
print(f"ERROR: {e}")
sys.exit(1)

55
verify_build.ps1 Normal file
View File

@ -0,0 +1,55 @@
# Comprehensive build verification script
Write-Host "=== Vendor Report API Build Verification ===" -ForegroundColor Cyan
# 1. Verify local file syntax
Write-Host "`n1. Checking local file syntax..." -ForegroundColor Yellow
python -c "import html_generator; print('Local file syntax OK')" 2>&1
if ($LASTEXITCODE -ne 0) {
Write-Host "Local file has syntax errors!" -ForegroundColor Red
exit 1
} else {
Write-Host "Local file syntax OK" -ForegroundColor Green
}
# 2. Check specific lines
Write-Host "`n2. Checking fixed lines..." -ForegroundColor Yellow
$line794 = (Get-Content html_generator.py)[793]
$line1284 = (Get-Content html_generator.py)[1283]
Write-Host "Line 794: $($line794.Substring(0, [Math]::Min(80, $line794.Length)))..."
if ($line794 -match '\\"') {
Write-Host "Line 794 still has backslash!" -ForegroundColor Red
} else {
Write-Host "Line 794 looks correct (no backslash)" -ForegroundColor Green
}
Write-Host "Line 1284: $($line1284.Substring(0, [Math]::Min(80, $line1284.Length)))..."
if ($line1284 -match '\\"') {
Write-Host "Line 1284 still has backslash!" -ForegroundColor Red
} else {
Write-Host "Line 1284 looks correct (no backslash)" -ForegroundColor Green
}
# 3. Build image with --no-cache
Write-Host "`n3. Building Docker image with --no-cache..." -ForegroundColor Yellow
docker build --no-cache -t vendor-report-api-test . 2>&1 | Select-String "Step|COPY|ERROR" | Select-Object -Last 10
# 4. Verify what's in the image
Write-Host "`n4. Verifying Docker image contents..." -ForegroundColor Yellow
Write-Host "Line 794 in image:"
docker run --rm vendor-report-api-test sed -n '794p' /app/html_generator.py 2>&1
Write-Host "`nLine 1284 in image:"
docker run --rm vendor-report-api-test sed -n '1284p' /app/html_generator.py 2>&1
# 5. Test import in Docker
Write-Host "`n5. Testing Python import in Docker container..." -ForegroundColor Yellow
docker run --rm vendor-report-api-test python -c "import html_generator; print('SUCCESS')" 2>&1
if ($LASTEXITCODE -eq 0) {
Write-Host "✓ Docker image works!" -ForegroundColor Green
} else {
Write-Host "✗ Docker image still has errors!" -ForegroundColor Red
}
Write-Host "`n=== Verification Complete ===" -ForegroundColor Cyan

32
verify_docker_image.ps1 Normal file
View File

@ -0,0 +1,32 @@
# Verify what's actually in the Docker image
param(
[string]$ImageName = "vendor-report-api-test"
)
Write-Host "=== Verifying Docker Image Contents ===" -ForegroundColor Cyan
# Build the image
Write-Host "`n1. Building image: $ImageName" -ForegroundColor Yellow
docker build -t $ImageName . 2>&1 | Select-String "COPY|Step"
# Check line 794 in the image
Write-Host "`n2. Checking line 794 in Docker image:" -ForegroundColor Yellow
docker run --rm $ImageName sed -n '794p' /app/html_generator.py
# Check line 1284 in the image
Write-Host "`n3. Checking line 1284 in Docker image:" -ForegroundColor Yellow
docker run --rm $ImageName sed -n '1284p' /app/html_generator.py
# Try to import the module
Write-Host "`n4. Testing Python import in Docker:" -ForegroundColor Yellow
docker run --rm $ImageName python -c "import html_generator; print('SUCCESS')" 2>&1
# Compare with local file
Write-Host "`n5. Local file line 794:" -ForegroundColor Yellow
Get-Content html_generator.py | Select-Object -Index 793
Write-Host "`n6. Local file line 1284:" -ForegroundColor Yellow
Get-Content html_generator.py | Select-Object -Index 1283
Write-Host "`n=== Done ===" -ForegroundColor Cyan

765
web_ui.py Normal file
View File

@ -0,0 +1,765 @@
#!/usr/bin/env python3
"""
Web UI for Vendor Report Generator
Provides a simple web interface for generating reports, viewing status, and managing configuration.
"""
import logging
import json
from pathlib import Path
from typing import Optional
from datetime import datetime
try:
from flask import Flask, render_template_string, jsonify, request, send_from_directory, redirect, url_for
from flask_cors import CORS
FLASK_AVAILABLE = True
except ImportError:
FLASK_AVAILABLE = False
logging.warning("Flask not installed. Web UI features disabled.")
from config import load_config
from report_generator import generate_report
from sharepoint_downloader import download_from_sharepoint
logger = logging.getLogger(__name__)
app = None
config = None
# HTML Template for the Web UI
UI_TEMPLATE = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Vendor Report Generator</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 1200px;
margin: 0 auto;
background: white;
border-radius: 12px;
box-shadow: 0 10px 40px rgba(0,0,0,0.2);
overflow: hidden;
}
header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 30px;
text-align: center;
}
header h1 {
font-size: 2.5em;
margin-bottom: 10px;
}
header p {
opacity: 0.9;
font-size: 1.1em;
}
.content {
padding: 40px;
}
.section {
margin-bottom: 40px;
padding: 30px;
background: #f9fafb;
border-radius: 8px;
border: 1px solid #e5e7eb;
}
.section h2 {
color: #1e40af;
margin-bottom: 20px;
font-size: 1.5em;
}
.button-group {
display: flex;
gap: 15px;
flex-wrap: wrap;
margin-top: 20px;
}
.btn {
padding: 12px 24px;
border: none;
border-radius: 6px;
font-size: 1em;
font-weight: 600;
cursor: pointer;
transition: all 0.2s;
text-decoration: none;
display: inline-block;
}
.btn-primary {
background: #2563eb;
color: white;
}
.btn-primary:hover {
background: #1d4ed8;
transform: translateY(-2px);
box-shadow: 0 4px 12px rgba(37, 99, 235, 0.4);
}
.btn-success {
background: #10b981;
color: white;
}
.btn-success:hover {
background: #059669;
}
.btn-secondary {
background: #6b7280;
color: white;
}
.btn-secondary:hover {
background: #4b5563;
}
.btn:disabled {
opacity: 0.5;
cursor: not-allowed;
transform: none;
}
.status-card {
background: white;
padding: 20px;
border-radius: 8px;
border-left: 4px solid #2563eb;
margin-bottom: 15px;
}
.status-card h3 {
color: #374151;
margin-bottom: 10px;
}
.status-card p {
color: #6b7280;
margin: 5px 0;
}
.status-indicator {
display: inline-block;
width: 12px;
height: 12px;
border-radius: 50%;
margin-right: 8px;
}
.status-indicator.active {
background: #10b981;
}
.status-indicator.inactive {
background: #ef4444;
}
.loading {
display: none;
text-align: center;
padding: 20px;
}
.loading.active {
display: block;
}
.spinner {
border: 4px solid #f3f4f6;
border-top: 4px solid #2563eb;
border-radius: 50%;
width: 40px;
height: 40px;
animation: spin 1s linear infinite;
margin: 0 auto 15px;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
.alert {
padding: 15px;
border-radius: 6px;
margin-bottom: 20px;
display: none;
}
.alert.active {
display: block;
}
.alert-success {
background: #d1fae5;
color: #065f46;
border: 1px solid #10b981;
}
.alert-error {
background: #fee2e2;
color: #991b1b;
border: 1px solid #ef4444;
}
.alert-info {
background: #dbeafe;
color: #1e40af;
border: 1px solid #2563eb;
}
.report-list {
list-style: none;
}
.report-item {
background: white;
padding: 15px;
border-radius: 6px;
margin-bottom: 10px;
border: 1px solid #e5e7eb;
display: flex;
justify-content: space-between;
align-items: center;
}
.report-item:hover {
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
.report-info {
flex: 1;
}
.report-info strong {
color: #1e40af;
display: block;
margin-bottom: 5px;
}
.report-info small {
color: #6b7280;
}
.config-item {
margin-bottom: 15px;
padding: 15px;
background: white;
border-radius: 6px;
border: 1px solid #e5e7eb;
}
.config-item label {
display: block;
font-weight: 600;
color: #374151;
margin-bottom: 5px;
}
.config-item .value {
color: #6b7280;
font-family: monospace;
}
.badge {
display: inline-block;
padding: 4px 12px;
border-radius: 12px;
font-size: 0.85em;
font-weight: 600;
}
.badge-enabled {
background: #d1fae5;
color: #065f46;
}
.badge-disabled {
background: #fee2e2;
color: #991b1b;
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>📊 Vendor Report Generator</h1>
<p>Generate comprehensive vendor punchlist reports from Excel files</p>
</header>
<div class="content">
<div id="alert-container"></div>
<!-- Update Data Section -->
<div class="section">
<h2>Update Data</h2>
<p>Download the latest Excel files from SharePoint to update your local data.</p>
<div class="button-group">
<button class="btn btn-success" onclick="updateFromSharePoint()">
Update Data from SharePoint
</button>
</div>
<div class="loading" id="loading-update">
<div class="spinner"></div>
<p>Downloading files from SharePoint... This may take a moment.</p>
</div>
</div>
<!-- Generate Report Section -->
<div class="section">
<h2>Generate Report</h2>
<p>Generate a new report from Excel files in the local reports directory.</p>
<div class="button-group">
<button class="btn btn-primary" onclick="generateReport()">
Generate Report
</button>
</div>
<div class="loading" id="loading">
<div class="spinner"></div>
<p>Generating report... This may take a moment.</p>
</div>
</div>
<!-- Status Section -->
<div class="section">
<h2>Service Status</h2>
<div id="status-container">
<div class="status-card">
<h3>Loading status...</h3>
</div>
</div>
</div>
<!-- Reports Section -->
<div class="section">
<h2>Generated Reports</h2>
<div id="reports-container">
<p>Loading reports...</p>
</div>
</div>
<!-- Configuration Section -->
<div class="section">
<h2>Configuration</h2>
<div id="config-container">
<p>Loading configuration...</p>
</div>
</div>
</div>
</div>
<script>
// Update data from SharePoint
async function updateFromSharePoint() {
const loading = document.getElementById('loading-update');
const alertContainer = document.getElementById('alert-container');
loading.classList.add('active');
alertContainer.innerHTML = '';
try {
const response = await fetch('/api/update-sharepoint', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
}
});
const data = await response.json();
if (response.ok) {
showAlert('success', `Successfully downloaded ${data.downloaded_count} file(s) from SharePoint!`);
loadStatus();
} else {
showAlert('error', `Error: ${data.error || 'Failed to download from SharePoint'}`);
}
} catch (error) {
showAlert('error', `Error: ${error.message}`);
} finally {
loading.classList.remove('active');
}
}
// Generate report
async function generateReport() {
const loading = document.getElementById('loading');
const alertContainer = document.getElementById('alert-container');
loading.classList.add('active');
alertContainer.innerHTML = '';
try {
const response = await fetch('/api/generate', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
download_from_sharepoint: false
})
});
const data = await response.json();
if (response.ok) {
showAlert('success', `Report generated successfully! Processed ${data.vendors_count || 0} vendors.`);
loadReports();
loadStatus();
} else {
showAlert('error', `Error: ${data.error || 'Failed to generate report'}`);
}
} catch (error) {
showAlert('error', `Error: ${error.message}`);
console.error('Generate report error:', error);
} finally {
loading.classList.remove('active');
}
}
// Load status
async function loadStatus() {
try {
const response = await fetch('/api/status');
const data = await response.json();
const container = document.getElementById('status-container');
container.innerHTML = `
<div class="status-card">
<h3>
<span class="status-indicator ${data.status === 'running' ? 'active' : 'inactive'}"></span>
Service Status: ${data.status}
</h3>
<p><strong>SharePoint:</strong> <span class="badge ${data.sharepoint_enabled ? 'badge-enabled' : 'badge-disabled'}">${data.sharepoint_enabled ? 'Enabled' : 'Disabled'}</span></p>
<p><strong>Reports Directory:</strong> ${data.reports_dir}</p>
<p><strong>Output Directory:</strong> ${data.output_dir}</p>
</div>
`;
} catch (error) {
console.error('Failed to load status:', error);
}
}
// Load reports
async function loadReports() {
try {
const response = await fetch('/api/reports');
const data = await response.json();
const container = document.getElementById('reports-container');
if (data.reports && data.reports.length > 0) {
const reportsList = data.reports.map(report => `
<div class="report-item">
<div class="report-info">
<strong>${report.name}</strong>
<small>Generated: ${report.generated_at} | Size: ${report.size}</small>
</div>
<div>
<a href="/reports/${report.name}" class="btn btn-primary" target="_blank">View HTML</a>
${report.json_exists ? `<a href="/reports/${report.json_name}" class="btn btn-secondary" download>Download JSON</a>` : ''}
</div>
</div>
`).join('');
container.innerHTML = `<ul class="report-list">${reportsList}</ul>`;
} else {
container.innerHTML = '<p>No reports generated yet.</p>';
} catch (error) {
console.error('Failed to load reports:', error);
document.getElementById('reports-container').innerHTML = '<p>Error loading reports.</p>';
}
}
// Load configuration
async function loadConfig() {
try {
const response = await fetch('/api/config');
const config = await response.json();
const container = document.getElementById('config-container');
const configItems = Object.entries(config).map(([key, value]) => {
const displayValue = typeof value === 'boolean'
? `<span class="badge ${value ? 'badge-enabled' : 'badge-disabled'}">${value ? 'Enabled' : 'Disabled'}</span>`
: String(value || 'Not configured');
return `
<div class="config-item">
<label>${key.replace(/_/g, ' ').replace(/\\b\\w/g, l => l.toUpperCase())}</label>
<div class="value">${displayValue}</div>
</div>
`;
}).join('');
container.innerHTML = configItems;
} catch (error) {
console.error('Failed to load config:', error);
document.getElementById('config-container').innerHTML = '<p>Error loading configuration.</p>';
}
}
// Show alert
function showAlert(type, message) {
const container = document.getElementById('alert-container');
const alert = document.createElement('div');
alert.className = `alert alert-${type} active`;
alert.textContent = message;
container.appendChild(alert);
setTimeout(() => {
alert.remove();
}, 5000);
}
// Load data on page load
window.addEventListener('DOMContentLoaded', () => {
loadStatus();
loadReports();
loadConfig();
// Refresh every 30 seconds
setInterval(() => {
loadStatus();
loadReports();
}, 30000);
});
</script>
</body>
</html>
"""
def create_app(config_path: Optional[str] = None):
"""Create and configure Flask app with Web UI."""
global app, config
if not FLASK_AVAILABLE:
raise ImportError(
"Flask is required for Web UI. "
"Install it with: pip install flask flask-cors"
)
app = Flask(__name__)
CORS(app)
config = load_config(config_path)
api_config = config.get('api', {})
sharepoint_config = config.get('sharepoint', {})
report_config = config.get('report', {})
app.config['API_KEY'] = api_config.get('api_key')
app.config['SHAREPOINT_CONFIG'] = sharepoint_config
app.config['REPORT_CONFIG'] = report_config
@app.route('/')
def index():
"""Main web UI page."""
return render_template_string(UI_TEMPLATE)
@app.route('/api/update-sharepoint', methods=['POST'])
def update_sharepoint_endpoint():
"""Download files from SharePoint."""
api_key = app.config.get('API_KEY')
if api_key:
provided_key = request.headers.get('X-API-Key') or (request.json.get('api_key') if request.json else None)
if provided_key != api_key:
return jsonify({'error': 'Invalid API key'}), 401
try:
sp_config = app.config['SHAREPOINT_CONFIG']
if not sp_config.get('enabled'):
return jsonify({'error': 'SharePoint is not enabled in configuration'}), 400
logger.info("Downloading files from SharePoint...")
try:
downloaded = download_from_sharepoint(
site_url=sp_config['site_url'],
folder_path=sp_config.get('folder_path'),
file_path=sp_config.get('file_path'),
local_dir=sp_config.get('local_dir', 'reports'),
tenant_id=sp_config.get('tenant_id'),
client_id=sp_config.get('client_id'),
client_secret=sp_config.get('client_secret'),
use_app_authentication=sp_config.get('use_app_authentication', True),
file_pattern=sp_config.get('file_pattern'),
overwrite=sp_config.get('overwrite', True)
)
logger.info(f"Downloaded {len(downloaded)} file(s) from SharePoint")
return jsonify({
'status': 'success',
'message': f'Successfully downloaded {len(downloaded)} file(s) from SharePoint',
'downloaded_count': len(downloaded),
'files': downloaded
})
except Exception as e:
logger.error(f"Failed to download from SharePoint: {e}", exc_info=True)
return jsonify({'error': f'SharePoint download failed: {str(e)}'}), 500
except Exception as e:
logger.error(f"Error updating from SharePoint: {e}", exc_info=True)
return jsonify({'error': f'Update failed: {str(e)}'}), 500
@app.route('/api/generate', methods=['POST'])
def generate_report_endpoint():
"""Generate report on demand."""
api_key = app.config.get('API_KEY')
if api_key:
provided_key = request.headers.get('X-API-Key') or (request.json.get('api_key') if request.json else None)
if provided_key != api_key:
return jsonify({'error': 'Invalid API key'}), 401
try:
request_data = request.json or {}
report_config = app.config['REPORT_CONFIG']
reports_dir = request_data.get('reports_dir', report_config.get('reports_dir', 'reports'))
output_file = request_data.get('output_file',
str(Path(report_config.get('output_dir', 'output')) / 'report.json'))
# Check if reports directory exists and has files
reports_path = Path(reports_dir)
if not reports_path.exists():
return jsonify({'error': f'Reports directory not found: {reports_dir}'}), 400
excel_files = list(reports_path.glob('*.xlsx')) + list(reports_path.glob('*.xls'))
if not excel_files:
return jsonify({'error': f'No Excel files found in {reports_dir}. Please update data from SharePoint first.'}), 400
logger.info(f"Generating report from {reports_dir} ({len(excel_files)} Excel file(s))...")
report_data = generate_report(
reports_dir=reports_dir,
output_file=output_file,
verbose=False
)
if report_data and report_data.get('vendors'):
return jsonify({
'status': 'success',
'message': 'Report generated successfully',
'output_file': output_file,
'summary': report_data.get('summary', {}),
'vendors_count': len(report_data.get('vendors', []))
})
else:
return jsonify({'error': 'Report generation failed - no data processed'}), 500
except Exception as e:
logger.error(f"Error generating report: {e}", exc_info=True)
return jsonify({'error': f'Report generation failed: {str(e)}'}), 500
@app.route('/api/status', methods=['GET'])
def status():
"""Get service status."""
return jsonify({
'status': 'running',
'sharepoint_enabled': app.config['SHAREPOINT_CONFIG'].get('enabled', False),
'reports_dir': app.config['REPORT_CONFIG'].get('reports_dir', 'reports'),
'output_dir': app.config['REPORT_CONFIG'].get('output_dir', 'output')
})
@app.route('/api/reports', methods=['GET'])
def list_reports():
"""List generated reports."""
output_dir = Path(app.config['REPORT_CONFIG'].get('output_dir', 'output'))
reports = []
if output_dir.exists():
html_files = list(output_dir.glob('*.html'))
for html_file in html_files:
json_file = html_file.with_suffix('.json')
reports.append({
'name': html_file.name,
'json_name': json_file.name if json_file.exists() else None,
'json_exists': json_file.exists(),
'generated_at': datetime.fromtimestamp(html_file.stat().st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
'size': f"{html_file.stat().st_size / 1024:.1f} KB"
})
# Sort by modification time (newest first)
reports.sort(key=lambda x: x['generated_at'], reverse=True)
return jsonify({'reports': reports})
@app.route('/api/config', methods=['GET'])
def get_config():
"""Get configuration (safe, no secrets)."""
return jsonify({
'sharepoint_enabled': app.config['SHAREPOINT_CONFIG'].get('enabled', False),
'sharepoint_site_url': app.config['SHAREPOINT_CONFIG'].get('site_url', 'Not configured'),
'sharepoint_folder_path': app.config['SHAREPOINT_CONFIG'].get('folder_path', 'Not configured'),
'reports_dir': app.config['REPORT_CONFIG'].get('reports_dir', 'reports'),
'output_dir': app.config['REPORT_CONFIG'].get('output_dir', 'output')
})
@app.route('/reports/<filename>')
def serve_report(filename):
"""Serve report files."""
output_dir = Path(app.config['REPORT_CONFIG'].get('output_dir', 'output'))
return send_from_directory(str(output_dir), filename)
@app.route('/health', methods=['GET'])
def health():
"""Health check."""
return jsonify({'status': 'healthy', 'service': 'vendor-report-generator-ui'})
return app
def run_server(config_path: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None):
"""Run the Web UI server."""
app = create_app(config_path)
api_config = config.get('api', {})
server_host = host or api_config.get('host', '0.0.0.0')
server_port = port or api_config.get('port', 8080)
logger.info(f"Starting Web UI server on http://{server_host}:{server_port}")
app.run(host=server_host, port=server_port, debug=False)
if __name__ == "__main__":
import sys
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
config_path = sys.argv[1] if len(sys.argv) > 1 else None
config = load_config(config_path)
if not config.get('api', {}).get('enabled', False):
logger.warning("API is disabled in configuration, but starting Web UI anyway...")
run_server(config_path=config_path)