Merge remote changes and resolve conflicts
- Keep our version of html_generator.py (removed Open tab, kept only actual statuses) - Keep our generated output files - Keep our Excel file version
This commit is contained in:
commit
51f618b654
54
.drone.yml
Normal file
54
.drone.yml
Normal file
@ -0,0 +1,54 @@
|
||||
kind: pipeline
|
||||
type: docker
|
||||
name: vendor-report-cicd
|
||||
|
||||
trigger:
|
||||
branch:
|
||||
- main
|
||||
- deployment-ready
|
||||
event:
|
||||
- push
|
||||
|
||||
steps:
|
||||
- name: build-image
|
||||
image: docker:24-cli
|
||||
volumes:
|
||||
- name: dockersock
|
||||
path: /var/run/docker.sock
|
||||
environment:
|
||||
DOCKER_HOST: unix:///var/run/docker.sock
|
||||
DOCKER_BUILDKIT: 1
|
||||
commands:
|
||||
- echo "Building vendor-report Docker image..."
|
||||
- docker build -t registry.lci.ge/taskboard/vendor-report-api:${DRONE_COMMIT_SHA:0:8} .
|
||||
- echo "Tagging image as latest..."
|
||||
- docker tag registry.lci.ge/taskboard/vendor-report-api:${DRONE_COMMIT_SHA:0:8} registry.lci.ge/taskboard/vendor-report-api:latest
|
||||
- echo "Vendor-report Docker image built and tagged successfully"
|
||||
when:
|
||||
event:
|
||||
- push
|
||||
|
||||
- name: push-image
|
||||
image: docker:24-cli
|
||||
volumes:
|
||||
- name: dockersock
|
||||
path: /var/run/docker.sock
|
||||
environment:
|
||||
DOCKER_HOST: unix:///var/run/docker.sock
|
||||
commands:
|
||||
- echo "Pushing vendor-report image to registry..."
|
||||
- docker push registry.lci.ge/taskboard/vendor-report-api:${DRONE_COMMIT_SHA:0:8}
|
||||
- docker push registry.lci.ge/taskboard/vendor-report-api:latest
|
||||
- echo "Vendor-report image pushed to registry"
|
||||
- echo "Cleaning up local images to save space..."
|
||||
- docker rmi registry.lci.ge/taskboard/vendor-report-api:${DRONE_COMMIT_SHA:0:8} || true
|
||||
- docker rmi registry.lci.ge/taskboard/vendor-report-api:latest || true
|
||||
when:
|
||||
event:
|
||||
- push
|
||||
|
||||
volumes:
|
||||
- name: dockersock
|
||||
host:
|
||||
path: /var/run/docker.sock
|
||||
|
||||
44
Dockerfile
Normal file
44
Dockerfile
Normal file
@ -0,0 +1,44 @@
|
||||
# Python API Server for Vendor Report Generator
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies (cached unless this changes)
|
||||
RUN apt-get update && apt-get install -y \
|
||||
curl \
|
||||
gcc \
|
||||
g++ \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Upgrade pip and install build tools first (cached)
|
||||
RUN pip install --upgrade pip setuptools wheel
|
||||
|
||||
# Copy requirements first for better caching
|
||||
COPY requirements.txt .
|
||||
|
||||
# Install Python dependencies
|
||||
# Remove --no-cache-dir to use pip's cache (much faster rebuilds)
|
||||
# This layer will be cached unless requirements.txt changes
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Copy application files
|
||||
# Docker automatically detects file changes via content hash
|
||||
# If any .py file changes, only this layer and after rebuild (apt-get & pip stay cached!)
|
||||
COPY *.py ./
|
||||
COPY *.yaml* ./
|
||||
COPY *.md ./
|
||||
|
||||
# Create directories for reports and output
|
||||
RUN mkdir -p /app/reports /app/output
|
||||
|
||||
# Expose port (internal only, not exposed in docker-compose)
|
||||
EXPOSE 8080
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
|
||||
CMD curl -f http://localhost:8080/health || exit 1
|
||||
|
||||
# Run API server (uses environment variables for configuration)
|
||||
CMD ["python", "api_server.py"]
|
||||
|
||||
163
QUICK_START.md
Normal file
163
QUICK_START.md
Normal file
@ -0,0 +1,163 @@
|
||||
# Quick Start Guide: SharePoint Integration & Scheduling
|
||||
|
||||
This guide will help you quickly set up SharePoint integration and automated report generation.
|
||||
|
||||
## Quick Setup (5 minutes)
|
||||
|
||||
### 1. Install Dependencies
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### 2. Create Configuration
|
||||
|
||||
```bash
|
||||
cp config.yaml.template config.yaml
|
||||
```
|
||||
|
||||
### 3. Configure SharePoint
|
||||
|
||||
Edit `config.yaml`:
|
||||
|
||||
```yaml
|
||||
sharepoint:
|
||||
enabled: true
|
||||
site_url: "https://yourcompany.sharepoint.com/sites/YourSite"
|
||||
folder_path: "/Shared Documents/Reports" # Path to your Excel files
|
||||
use_app_authentication: true
|
||||
client_id: "your-azure-ad-client-id"
|
||||
client_secret: "your-azure-ad-client-secret"
|
||||
```
|
||||
|
||||
**To get Azure AD credentials:**
|
||||
1. Go to Azure Portal → App registrations
|
||||
2. Create new registration or use existing
|
||||
3. Create a client secret
|
||||
4. Grant SharePoint API permissions: `Sites.Read.All`
|
||||
5. Copy Client ID and Client Secret to config
|
||||
|
||||
### 4. Choose Your Deployment Method
|
||||
|
||||
#### Option A: Scheduled Reports (Recommended)
|
||||
|
||||
Edit `config.yaml`:
|
||||
```yaml
|
||||
scheduler:
|
||||
enabled: true
|
||||
schedule_type: "cron"
|
||||
cron_expression: "0 8 * * *" # 8 AM daily
|
||||
timezone: "America/New_York"
|
||||
```
|
||||
|
||||
Start scheduler:
|
||||
```bash
|
||||
python scheduler.py
|
||||
```
|
||||
|
||||
#### Option B: On-Demand via API
|
||||
|
||||
Edit `config.yaml`:
|
||||
```yaml
|
||||
api:
|
||||
enabled: true
|
||||
port: 8080
|
||||
api_key: "your-secret-key" # Optional but recommended
|
||||
```
|
||||
|
||||
Start API server:
|
||||
```bash
|
||||
python api_server.py
|
||||
```
|
||||
|
||||
Generate report:
|
||||
```bash
|
||||
curl -X POST http://localhost:8080/api/generate \
|
||||
-H "X-API-Key: your-secret-key" \
|
||||
-H "Content-Type: application/json \
|
||||
-d '{"download_from_sharepoint": true}'
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **SharePoint Download**: Downloads latest Excel files from SharePoint folder
|
||||
2. **Report Generation**: Processes Excel files and generates reports
|
||||
3. **Output**: Creates `output/report.json` and `output/report.html`
|
||||
|
||||
## Testing
|
||||
|
||||
### Test SharePoint Connection
|
||||
|
||||
```bash
|
||||
python sharepoint_downloader.py
|
||||
```
|
||||
|
||||
This will download files from SharePoint to the `reports/` directory.
|
||||
|
||||
### Test Report Generation
|
||||
|
||||
```bash
|
||||
python report_generator.py
|
||||
```
|
||||
|
||||
This will generate reports from files in the `reports/` directory.
|
||||
|
||||
## Deployment Options
|
||||
|
||||
### As a Service (Linux)
|
||||
|
||||
```bash
|
||||
# Create systemd service
|
||||
sudo nano /etc/systemd/system/vendor-report.service
|
||||
|
||||
# Add:
|
||||
[Unit]
|
||||
Description=Vendor Report Scheduler
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=your-user
|
||||
WorkingDirectory=/path/to/vendor_report
|
||||
ExecStart=/usr/bin/python3 /path/to/vendor_report/scheduler.py
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
# Enable and start
|
||||
sudo systemctl enable vendor-report
|
||||
sudo systemctl start vendor-report
|
||||
```
|
||||
|
||||
### Docker (Coming Soon)
|
||||
|
||||
The application can be containerized for easy deployment.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### SharePoint Authentication Fails
|
||||
|
||||
- Verify Azure AD app has correct permissions
|
||||
- Check client ID and secret are correct
|
||||
- Ensure SharePoint site URL is correct (include `/sites/SiteName`)
|
||||
|
||||
### Files Not Downloading
|
||||
|
||||
- Check folder path is correct (use SharePoint's "Copy path" feature)
|
||||
- Verify app has read permissions
|
||||
- Check file pattern matches your Excel files
|
||||
|
||||
### Scheduler Not Running
|
||||
|
||||
- Check timezone is correct
|
||||
- Verify cron expression format
|
||||
- Check logs for errors
|
||||
|
||||
## Next Steps
|
||||
|
||||
- Set up monitoring/alerting for failed reports
|
||||
- Configure webhook notifications
|
||||
- Set up automated email delivery of reports
|
||||
- Integrate with other systems via API
|
||||
|
||||
273
README.md
273
README.md
@ -2,6 +2,8 @@
|
||||
|
||||
A Python tool that generates comprehensive vendor punchlist reports from Excel files. The tool processes Excel data, normalizes vendor information, calculates metrics, and generates both JSON and interactive HTML reports.
|
||||
|
||||
> **📘 For Taskboard Integration**: See [TASKBOARD_INTEGRATION_CONTEXT.md](./TASKBOARD_INTEGRATION_CONTEXT.md) for detailed context and integration possibilities.
|
||||
|
||||
## Features
|
||||
|
||||
- **Direct Excel Processing**: Reads Excel files directly using pandas
|
||||
@ -11,6 +13,9 @@ A Python tool that generates comprehensive vendor punchlist reports from Excel f
|
||||
- **Oldest Unaddressed Items**: Identifies and highlights the oldest 3 unaddressed items per vendor
|
||||
- **Interactive HTML Reports**: Generates searchable, filterable HTML reports with tabs and filters
|
||||
- **JSON Export**: Exports structured JSON data for further processing
|
||||
- **SharePoint Integration**: Automatically download Excel files from SharePoint
|
||||
- **Scheduled Generation**: Automatically generate reports on a schedule (interval or cron)
|
||||
- **Web API**: REST API for on-demand report generation
|
||||
|
||||
## Requirements
|
||||
|
||||
@ -221,6 +226,12 @@ vendor_report/
|
||||
├── html_generator.py # HTML report generation
|
||||
├── models.py # Pydantic data models
|
||||
├── excel_to_text.py # Utility for Excel to text conversion
|
||||
├── sharepoint_downloader.py # SharePoint file downloader
|
||||
├── scheduler.py # Scheduled report generation
|
||||
├── api_server.py # REST API for on-demand reports
|
||||
├── web_ui.py # Web UI for easy access
|
||||
├── config.py # Configuration management
|
||||
├── config.yaml.template # Configuration template
|
||||
├── requirements.txt # Python dependencies
|
||||
├── reports/ # Directory for input Excel files
|
||||
├── output/ # Directory for generated reports
|
||||
@ -256,6 +267,268 @@ pip install -r requirements.txt
|
||||
|
||||
The tool uses **Baltimore/Eastern timezone (America/New_York)** for all date calculations. This ensures consistent 24-hour window calculations regardless of where the script is run. All dates are stored as timezone-aware datetime objects.
|
||||
|
||||
## SharePoint Integration
|
||||
|
||||
The application can automatically download Excel files from SharePoint before generating reports. This is useful when your source data is stored in SharePoint.
|
||||
|
||||
### Setup SharePoint Integration
|
||||
|
||||
1. **Create a configuration file**:
|
||||
```bash
|
||||
cp config.yaml.template config.yaml
|
||||
```
|
||||
|
||||
2. **Edit `config.yaml`** and configure SharePoint settings:
|
||||
```yaml
|
||||
sharepoint:
|
||||
enabled: true
|
||||
site_url: "https://yourcompany.sharepoint.com/sites/YourSite"
|
||||
folder_path: "/Shared Documents/Reports"
|
||||
local_dir: "reports"
|
||||
use_app_authentication: true # Recommended for automation
|
||||
client_id: "your-azure-ad-client-id"
|
||||
client_secret: "your-azure-ad-client-secret"
|
||||
```
|
||||
|
||||
3. **Authentication Options**:
|
||||
|
||||
**Option A: App Authentication (Recommended)**
|
||||
- Register an app in Azure AD
|
||||
- Grant SharePoint permissions (Sites.Read.All or Sites.ReadWrite.All)
|
||||
- Use `client_id` and `client_secret` in config
|
||||
- Set `use_app_authentication: true`
|
||||
|
||||
**Option B: User Authentication**
|
||||
- Use your SharePoint username and password
|
||||
- Set `username` and `password` in config
|
||||
- Set `use_app_authentication: false`
|
||||
|
||||
4. **Test SharePoint download**:
|
||||
```bash
|
||||
python sharepoint_downloader.py
|
||||
```
|
||||
|
||||
### Manual SharePoint Download
|
||||
|
||||
Download files from SharePoint without generating a report:
|
||||
```bash
|
||||
python sharepoint_downloader.py
|
||||
```
|
||||
|
||||
## Scheduled Report Generation
|
||||
|
||||
The application can automatically generate reports on a schedule, optionally downloading from SharePoint first.
|
||||
|
||||
### Setup Scheduling
|
||||
|
||||
1. **Edit `config.yaml`**:
|
||||
```yaml
|
||||
scheduler:
|
||||
enabled: true
|
||||
schedule_type: "interval" # or "cron"
|
||||
interval_hours: 24 # Generate every 24 hours
|
||||
# OR use cron expression:
|
||||
# cron_expression: "0 8 * * *" # 8 AM daily
|
||||
timezone: "America/New_York"
|
||||
```
|
||||
|
||||
2. **Start the scheduler**:
|
||||
```bash
|
||||
python scheduler.py
|
||||
```
|
||||
|
||||
The scheduler will run continuously and generate reports according to your schedule.
|
||||
|
||||
3. **Schedule Types**:
|
||||
- **interval**: Generate report every N hours
|
||||
- **cron**: Use cron expression for precise scheduling (e.g., "0 8 * * *" for 8 AM daily)
|
||||
- **once**: Run once immediately (for testing)
|
||||
|
||||
### Running Scheduler as a Service
|
||||
|
||||
**Linux (systemd)**:
|
||||
```bash
|
||||
# Create service file: /etc/systemd/system/vendor-report-scheduler.service
|
||||
[Unit]
|
||||
Description=Vendor Report Scheduler
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=your-user
|
||||
WorkingDirectory=/path/to/vendor_report
|
||||
ExecStart=/usr/bin/python3 /path/to/vendor_report/scheduler.py
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
# Enable and start
|
||||
sudo systemctl enable vendor-report-scheduler
|
||||
sudo systemctl start vendor-report-scheduler
|
||||
```
|
||||
|
||||
**Windows (Task Scheduler)**:
|
||||
- Create a scheduled task that runs `python scheduler.py` at startup or on a schedule
|
||||
|
||||
## Web UI & On-Demand Report Generation
|
||||
|
||||
The application includes both a **Web UI** and a **REST API** for generating reports on demand.
|
||||
|
||||
### Web UI (Recommended for Easy Access)
|
||||
|
||||
A simple, user-friendly web interface for generating reports without using the terminal.
|
||||
|
||||
1. **Start the Web UI server**:
|
||||
```bash
|
||||
python web_ui.py
|
||||
```
|
||||
|
||||
2. **Open in browser**:
|
||||
```
|
||||
http://localhost:8080
|
||||
```
|
||||
|
||||
3. **Features**:
|
||||
- One-click report generation
|
||||
- Download from SharePoint & generate (single button)
|
||||
- View generated reports
|
||||
- View service status
|
||||
- View configuration
|
||||
- No terminal knowledge required!
|
||||
|
||||
### REST API
|
||||
|
||||
The application also includes a REST API for integration with other systems or manual triggers.
|
||||
|
||||
### Setup API Server
|
||||
|
||||
1. **Edit `config.yaml`**:
|
||||
```yaml
|
||||
api:
|
||||
enabled: true
|
||||
host: "0.0.0.0"
|
||||
port: 8080
|
||||
api_key: "your-secret-api-key" # Optional, for authentication
|
||||
```
|
||||
|
||||
2. **Start the Web UI** (recommended):
|
||||
```bash
|
||||
python web_ui.py
|
||||
```
|
||||
|
||||
Then open `http://localhost:8080` in your browser.
|
||||
|
||||
**OR start the API server** (for programmatic access):
|
||||
```bash
|
||||
python api_server.py
|
||||
```
|
||||
|
||||
3. **Generate report via API**:
|
||||
```bash
|
||||
# Without authentication
|
||||
curl -X POST http://localhost:8080/api/generate \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"download_from_sharepoint": true}'
|
||||
|
||||
# With API key authentication
|
||||
curl -X POST http://localhost:8080/api/generate \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-API-Key: your-secret-api-key" \
|
||||
-d '{"download_from_sharepoint": true}'
|
||||
```
|
||||
|
||||
### API Endpoints
|
||||
|
||||
- **POST `/api/generate`**: Generate report on demand
|
||||
- Request body (optional):
|
||||
```json
|
||||
{
|
||||
"download_from_sharepoint": true,
|
||||
"reports_dir": "reports",
|
||||
"output_file": "output/report.json"
|
||||
}
|
||||
```
|
||||
|
||||
- **GET `/api/status`**: Get service status and configuration
|
||||
|
||||
- **GET `/health`**: Health check endpoint
|
||||
|
||||
### Example: Integration with Webhook
|
||||
|
||||
You can trigger report generation from SharePoint webhooks, Power Automate, or any HTTP client:
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
response = requests.post(
|
||||
'http://your-server:8080/api/generate',
|
||||
json={'download_from_sharepoint': True},
|
||||
headers={'X-API-Key': 'your-api-key'}
|
||||
)
|
||||
print(response.json())
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
The application uses a YAML configuration file (`config.yaml`) for all settings. You can also use environment variables:
|
||||
|
||||
### Environment Variables
|
||||
|
||||
```bash
|
||||
# SharePoint
|
||||
export SHAREPOINT_ENABLED=true
|
||||
export SHAREPOINT_SITE_URL="https://yourcompany.sharepoint.com/sites/YourSite"
|
||||
export SHAREPOINT_FOLDER_PATH="/Shared Documents/Reports"
|
||||
export SHAREPOINT_CLIENT_ID="your-client-id"
|
||||
export SHAREPOINT_CLIENT_SECRET="your-client-secret"
|
||||
export SHAREPOINT_USE_APP_AUTH=true
|
||||
|
||||
# Scheduler
|
||||
export SCHEDULER_ENABLED=true
|
||||
export SCHEDULER_INTERVAL_HOURS=24
|
||||
|
||||
# API
|
||||
export API_ENABLED=true
|
||||
export API_PORT=8080
|
||||
export API_KEY="your-api-key"
|
||||
```
|
||||
|
||||
## Complete Workflow Example
|
||||
|
||||
Here's a complete example setup for automated SharePoint → Report generation:
|
||||
|
||||
1. **Setup configuration** (`config.yaml`):
|
||||
```yaml
|
||||
sharepoint:
|
||||
enabled: true
|
||||
site_url: "https://company.sharepoint.com/sites/Reports"
|
||||
folder_path: "/Shared Documents/Vendor Reports"
|
||||
use_app_authentication: true
|
||||
client_id: "your-client-id"
|
||||
client_secret: "your-client-secret"
|
||||
|
||||
scheduler:
|
||||
enabled: true
|
||||
schedule_type: "cron"
|
||||
cron_expression: "0 8 * * *" # 8 AM daily
|
||||
timezone: "America/New_York"
|
||||
|
||||
report:
|
||||
output_dir: "output"
|
||||
reports_dir: "reports"
|
||||
```
|
||||
|
||||
2. **Start scheduler**:
|
||||
```bash
|
||||
python scheduler.py
|
||||
```
|
||||
|
||||
3. **The scheduler will**:
|
||||
- Download latest Excel files from SharePoint at 8 AM daily
|
||||
- Generate reports automatically
|
||||
- Save to `output/report.json` and `output/report.html`
|
||||
|
||||
## License
|
||||
|
||||
[Add your license information here]
|
||||
|
||||
127
SHAREPOINT_SETUP.md
Normal file
127
SHAREPOINT_SETUP.md
Normal file
@ -0,0 +1,127 @@
|
||||
# SharePoint Configuration Guide
|
||||
|
||||
This guide will help you get the configuration values needed to connect to SharePoint.
|
||||
|
||||
## Quick Answer: Where to Get Configuration Values
|
||||
|
||||
### 1. SharePoint Site URL
|
||||
- Go to your SharePoint site in a browser
|
||||
- Copy the URL from the address bar
|
||||
- Example: `https://yourcompany.sharepoint.com/sites/YourSiteName`
|
||||
- **Important**: Include `/sites/SiteName` if it's a subsite
|
||||
|
||||
### 2. Folder Path
|
||||
- Navigate to the folder containing your Excel files in SharePoint
|
||||
- Right-click the folder → "Copy path" or "Details"
|
||||
- Example: `/Shared Documents/Reports` or `/sites/YourSite/Shared Documents/Vendor Reports`
|
||||
- **Tip**: In SharePoint, go to the folder, click "..." menu → "Copy link" and extract the path
|
||||
|
||||
### 3. Azure AD App Credentials (Recommended Method)
|
||||
|
||||
#### Step 1: Register App in Azure AD
|
||||
1. Go to [Azure Portal](https://portal.azure.com)
|
||||
2. Navigate to **Azure Active Directory** → **App registrations**
|
||||
3. Click **New registration**
|
||||
4. Name it (e.g., "Vendor Report Generator")
|
||||
5. Select **Accounts in this organizational directory only**
|
||||
6. Click **Register**
|
||||
|
||||
#### Step 2: Create Client Secret
|
||||
1. In your app, go to **Certificates & secrets**
|
||||
2. Click **New client secret**
|
||||
3. Add description (e.g., "Vendor Report Secret")
|
||||
4. Choose expiration (recommend 24 months)
|
||||
5. Click **Add**
|
||||
6. **IMPORTANT**: Copy the **Value** immediately (you won't see it again!)
|
||||
- This is your `client_secret`
|
||||
|
||||
#### Step 3: Get Client ID
|
||||
1. In your app, go to **Overview**
|
||||
2. Copy the **Application (client) ID**
|
||||
- This is your `client_id`
|
||||
|
||||
#### Step 4: Grant SharePoint Permissions
|
||||
1. In your app, go to **API permissions**
|
||||
2. Click **Add a permission**
|
||||
3. Select **SharePoint**
|
||||
4. Choose **Application permissions** (not Delegated)
|
||||
5. Select **Sites.Read.All** (or Sites.ReadWrite.All if you need write access)
|
||||
6. Click **Add permissions**
|
||||
7. Click **Grant admin consent** (important!)
|
||||
8. Wait for status to show "Granted for [Your Organization]"
|
||||
|
||||
### 4. Alternative: User Credentials (Less Secure)
|
||||
If you can't use app authentication:
|
||||
- `username`: Your SharePoint/Office 365 email
|
||||
- `password`: Your password (not recommended for automation)
|
||||
|
||||
## Complete Configuration Example
|
||||
|
||||
Once you have all values, add them to `config.yaml`:
|
||||
|
||||
```yaml
|
||||
sharepoint:
|
||||
enabled: true
|
||||
site_url: "https://yourcompany.sharepoint.com/sites/YourSite"
|
||||
folder_path: "/Shared Documents/Reports" # Path to your Excel files folder
|
||||
local_dir: "reports" # Where to save downloaded files
|
||||
use_app_authentication: true # Use app auth (recommended)
|
||||
client_id: "12345678-1234-1234-1234-123456789abc" # From Azure AD
|
||||
client_secret: "your-secret-value-here" # From Azure AD (the Value, not Secret ID!)
|
||||
file_pattern: "*.xlsx" # Only download Excel files
|
||||
overwrite: true # Overwrite existing files
|
||||
```
|
||||
|
||||
## Testing Your Configuration
|
||||
|
||||
1. **Test SharePoint connection**:
|
||||
```bash
|
||||
python sharepoint_downloader.py
|
||||
```
|
||||
|
||||
2. **Or use the Web UI**:
|
||||
- Start: `python web_ui.py`
|
||||
- Open: `http://localhost:8080`
|
||||
- Click "Update Data from SharePoint"
|
||||
- Check for errors
|
||||
|
||||
## Common Issues
|
||||
|
||||
### "SharePoint authentication failed"
|
||||
- **Check**: Client ID and secret are correct
|
||||
- **Check**: App has been granted admin consent
|
||||
- **Check**: Permissions are "Application permissions" (not Delegated)
|
||||
|
||||
### "Folder not found"
|
||||
- **Check**: Folder path is correct (case-sensitive)
|
||||
- **Tip**: Use SharePoint's "Copy path" feature
|
||||
- **Check**: Path starts with `/` (e.g., `/Shared Documents/...`)
|
||||
|
||||
### "No files downloaded"
|
||||
- **Check**: Folder contains Excel files (`.xlsx` or `.xls`)
|
||||
- **Check**: File pattern matches your files
|
||||
- **Check**: You have read permissions to the folder
|
||||
|
||||
### "Access denied"
|
||||
- **Check**: App has `Sites.Read.All` permission
|
||||
- **Check**: Admin consent has been granted
|
||||
- **Check**: App is registered in the same tenant as SharePoint
|
||||
|
||||
## Security Best Practices
|
||||
|
||||
1. **Use App Authentication** (not user credentials)
|
||||
2. **Store secrets securely**:
|
||||
- Use environment variables in production
|
||||
- Never commit `config.yaml` with secrets to git
|
||||
- Use a secrets manager for production
|
||||
3. **Limit permissions**: Only grant `Sites.Read.All` (not write access unless needed)
|
||||
4. **Rotate secrets**: Update client secrets regularly
|
||||
|
||||
## Getting Help
|
||||
|
||||
If you're stuck:
|
||||
1. Check the terminal/console for detailed error messages
|
||||
2. Verify each configuration value step by step
|
||||
3. Test with a simple folder first (one Excel file)
|
||||
4. Check Azure AD app status in Azure Portal
|
||||
|
||||
470
TASKBOARD_INTEGRATION_CONTEXT.md
Normal file
470
TASKBOARD_INTEGRATION_CONTEXT.md
Normal file
@ -0,0 +1,470 @@
|
||||
# Vendor Report Generator - Taskboard Integration Context
|
||||
|
||||
## 🎯 Goal & Purpose
|
||||
|
||||
The **Vendor Report Generator** is a Python-based tool designed to automate the generation of comprehensive vendor punchlist reports from Excel files stored in SharePoint. The goal is to:
|
||||
|
||||
1. **Automate Report Generation**: Eliminate manual Excel processing and report creation
|
||||
2. **Centralize Data**: Pull vendor punchlist data directly from SharePoint
|
||||
3. **Provide Insights**: Generate actionable reports with metrics, priorities, and status tracking
|
||||
4. **Enable Integration**: Make reports accessible within Taskboard for team collaboration
|
||||
|
||||
### Business Value
|
||||
|
||||
- **Time Savings**: Automates hours of manual report generation
|
||||
- **Accuracy**: Consistent data normalization and calculation
|
||||
- **Visibility**: Real-time vendor status tracking and metrics
|
||||
- **Accessibility**: Web-based interface for non-technical users
|
||||
- **Integration Ready**: Can be embedded as a tool/widget in Taskboard
|
||||
|
||||
---
|
||||
|
||||
## 📋 Application Overview
|
||||
|
||||
### What It Does
|
||||
|
||||
The application processes Excel files containing vendor punchlist items and generates:
|
||||
- **Interactive HTML Reports**: Searchable, filterable web reports with vendor tabs, status filters, and priority grouping
|
||||
- **JSON Data**: Structured data for further processing or API integration
|
||||
- **Metrics**: Per-vendor statistics (total items, closed/open counts, 24-hour updates, oldest unaddressed items)
|
||||
|
||||
### Key Features
|
||||
|
||||
1. **Excel Processing**: Direct pandas-based reading (no manual conversion needed)
|
||||
2. **Data Normalization**: Automatically handles vendor name variations, status inconsistencies, priority classifications
|
||||
3. **24-Hour Tracking**: Identifies items added, closed, or changed to monitor status in the last 24 hours (Baltimore/Eastern timezone)
|
||||
4. **Priority Classification**: Groups items by Very High, High, Medium, Low priorities
|
||||
5. **Oldest Items**: Highlights the oldest 3 unaddressed items per vendor
|
||||
6. **SharePoint Integration**: Automatically downloads Excel files from SharePoint
|
||||
7. **Scheduled Generation**: Can run automatically on a schedule
|
||||
8. **Web UI**: User-friendly interface for generating reports
|
||||
9. **REST API**: Programmatic access for integration
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Architecture & Components
|
||||
|
||||
### Core Components
|
||||
|
||||
```
|
||||
vendor_report/
|
||||
├── report_generator.py # Main entry point - orchestrates report generation
|
||||
├── data_preprocessor.py # Excel parsing, normalization, data cleaning
|
||||
├── html_generator.py # Generates interactive HTML reports
|
||||
├── models.py # Pydantic data models for validation
|
||||
├── sharepoint_downloader.py # SharePoint file downloader
|
||||
├── scheduler.py # Scheduled report generation
|
||||
├── api_server.py # REST API server
|
||||
├── web_ui.py # Web UI server (Flask-based)
|
||||
├── config.py # Configuration management
|
||||
└── config.yaml # Configuration file
|
||||
```
|
||||
|
||||
### Data Flow
|
||||
|
||||
```
|
||||
SharePoint Excel Files
|
||||
↓
|
||||
[SharePoint Downloader] → Local reports/ directory
|
||||
↓
|
||||
[Data Preprocessor] → Normalize vendors, statuses, priorities, parse dates
|
||||
↓
|
||||
[Report Generator] → Calculate metrics, group by vendor, identify updates
|
||||
↓
|
||||
[HTML Generator] → Generate interactive report.html
|
||||
↓
|
||||
[Output] → output/report.json + output/report.html
|
||||
```
|
||||
|
||||
### Processing Pipeline
|
||||
|
||||
1. **Input**: Excel files with columns:
|
||||
- Punchlist Name, Vendor, Priority, Description, Date Identified, Status Updates, Issue Image, Status, Date Completed
|
||||
|
||||
2. **Preprocessing**:
|
||||
- Parse Excel files using pandas
|
||||
- Normalize vendor names (handle case variations, combined vendors)
|
||||
- Normalize statuses (Complete, Monitor, Incomplete)
|
||||
- Classify priorities (Very High, High, Medium, Low)
|
||||
- Parse dates (multiple formats supported)
|
||||
- Calculate 24-hour windows (Baltimore/Eastern timezone)
|
||||
- Calculate item age (days since identified)
|
||||
|
||||
3. **Report Generation**:
|
||||
- Group items by vendor
|
||||
- Calculate metrics per vendor (total, closed, open, monitor counts)
|
||||
- Identify 24-hour updates (added, closed, changed to monitor)
|
||||
- Find oldest 3 unaddressed items per vendor
|
||||
- Group by priority levels
|
||||
- Generate JSON structure
|
||||
- Generate HTML report
|
||||
|
||||
4. **Output**:
|
||||
- `output/report.json`: Structured JSON data
|
||||
- `output/report.html`: Interactive HTML report
|
||||
- `output/preprocessed_data.txt`: Debug/preview data
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Technical Details
|
||||
|
||||
### Dependencies
|
||||
|
||||
```python
|
||||
# Core
|
||||
pandas>=2.0.0 # Excel processing
|
||||
openpyxl>=3.0.0 # Excel file reading
|
||||
pydantic>=2.0.0 # Data validation
|
||||
|
||||
# Optional: SharePoint
|
||||
Office365-REST-Python-Client>=2.3.0 # SharePoint API
|
||||
|
||||
# Optional: Scheduling
|
||||
apscheduler>=3.10.0 # Task scheduling
|
||||
|
||||
# Optional: Web UI/API
|
||||
flask>=2.3.0 # Web framework
|
||||
flask-cors>=4.0.0 # CORS support
|
||||
|
||||
# Configuration
|
||||
pyyaml>=6.0 # YAML config parsing
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
Configuration is managed via `config.yaml`:
|
||||
|
||||
```yaml
|
||||
sharepoint:
|
||||
enabled: true/false
|
||||
site_url: "https://company.sharepoint.com/sites/SiteName"
|
||||
folder_path: "/Shared Documents/Reports"
|
||||
use_app_authentication: true # Azure AD app auth (recommended)
|
||||
client_id: "azure-ad-client-id"
|
||||
client_secret: "azure-ad-client-secret"
|
||||
|
||||
scheduler:
|
||||
enabled: true/false
|
||||
schedule_type: "interval" | "cron" | "once"
|
||||
interval_hours: 24
|
||||
cron_expression: "0 8 * * *" # 8 AM daily
|
||||
|
||||
api:
|
||||
enabled: true/false
|
||||
port: 8080
|
||||
api_key: "optional-api-key"
|
||||
|
||||
report:
|
||||
output_dir: "output"
|
||||
reports_dir: "reports"
|
||||
```
|
||||
|
||||
### API Endpoints
|
||||
|
||||
**Web UI Server** (`web_ui.py`):
|
||||
- `GET /` - Web UI interface
|
||||
- `POST /api/generate` - Generate report
|
||||
- `POST /api/update-sharepoint` - Download files from SharePoint
|
||||
- `GET /api/status` - Service status
|
||||
- `GET /api/reports` - List generated reports
|
||||
- `GET /api/config` - Configuration (safe, no secrets)
|
||||
- `GET /reports/<filename>` - Serve report files
|
||||
|
||||
**API Server** (`api_server.py`):
|
||||
- `POST /api/generate` - Generate report (programmatic)
|
||||
- `GET /api/status` - Service status
|
||||
- `GET /health` - Health check
|
||||
|
||||
### Data Models
|
||||
|
||||
**PunchlistItem**:
|
||||
- punchlist_name, description, priority, date_identified, date_completed
|
||||
- status, status_updates, issue_image, age_days
|
||||
|
||||
**VendorMetrics**:
|
||||
- vendor_name, total_items, closed_count, open_count, monitor_count
|
||||
- updates_24h (added, closed, changed_to_monitor)
|
||||
- oldest_unaddressed (top 3)
|
||||
- very_high_priority_items, high_priority_items
|
||||
|
||||
**FullReport**:
|
||||
- report_generated_at, vendors[], summary{}
|
||||
|
||||
---
|
||||
|
||||
## 🔗 Taskboard Integration Possibilities
|
||||
|
||||
### Option 1: Embedded Widget/Page
|
||||
|
||||
Create a new page in Taskboard (`/vendor-reports`) that:
|
||||
- Uses Taskboard's authentication (already authenticated users)
|
||||
- Embeds the generated HTML report in an iframe or renders it directly
|
||||
- Provides a button to trigger report generation
|
||||
- Shows report history/list
|
||||
|
||||
**Implementation**:
|
||||
```typescript
|
||||
// taskboard/src/app/(dashboard)/vendor-reports/page.tsx
|
||||
// - Call Python API server to generate reports
|
||||
// - Display generated HTML reports
|
||||
// - Use Taskboard's UI components for consistency
|
||||
```
|
||||
|
||||
### Option 2: API Integration
|
||||
|
||||
Create Taskboard API routes that proxy to the Python API:
|
||||
- `POST /api/vendor-reports/generate` → Calls Python `POST /api/generate`
|
||||
- `GET /api/vendor-reports/list` → Calls Python `GET /api/reports`
|
||||
- `GET /api/vendor-reports/status` → Calls Python `GET /api/status`
|
||||
|
||||
**Benefits**:
|
||||
- Single authentication system (Taskboard)
|
||||
- Consistent API patterns
|
||||
- Can add Taskboard-specific features (notifications, task linking)
|
||||
|
||||
### Option 3: Background Service
|
||||
|
||||
Run the Python scheduler as a background service that:
|
||||
- Generates reports on schedule
|
||||
- Saves reports to a shared location
|
||||
- Taskboard displays the latest report
|
||||
- Can trigger notifications when reports are updated
|
||||
|
||||
### Option 4: Task Integration
|
||||
|
||||
Link reports to Taskboard tasks:
|
||||
- Create tasks for vendors with unaddressed items
|
||||
- Link report generation to project/task completion
|
||||
- Use report metrics in task dashboards
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Usage Examples
|
||||
|
||||
### Command Line
|
||||
|
||||
```bash
|
||||
# Generate report from local files
|
||||
python report_generator.py
|
||||
|
||||
# Generate with custom directories
|
||||
python report_generator.py --reports-dir /path/to/excel --output /path/to/output.json
|
||||
```
|
||||
|
||||
### Web UI
|
||||
|
||||
```bash
|
||||
# Start web UI server
|
||||
python web_ui.py
|
||||
|
||||
# Open browser: http://localhost:8080
|
||||
# Click "Update Data from SharePoint" → "Generate Report"
|
||||
```
|
||||
|
||||
### API
|
||||
|
||||
```bash
|
||||
# Generate report via API
|
||||
curl -X POST http://localhost:8080/api/generate \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"download_from_sharepoint": false}'
|
||||
|
||||
# Update from SharePoint
|
||||
curl -X POST http://localhost:8080/api/update-sharepoint
|
||||
```
|
||||
|
||||
### Scheduled
|
||||
|
||||
```bash
|
||||
# Start scheduler (runs continuously)
|
||||
python scheduler.py
|
||||
|
||||
# Configured via config.yaml:
|
||||
# scheduler:
|
||||
# enabled: true
|
||||
# schedule_type: "cron"
|
||||
# cron_expression: "0 8 * * *" # 8 AM daily
|
||||
```
|
||||
|
||||
### Programmatic (Python)
|
||||
|
||||
```python
|
||||
from report_generator import generate_report
|
||||
|
||||
# Generate report
|
||||
report_data = generate_report(
|
||||
reports_dir="reports",
|
||||
output_file="output/report.json",
|
||||
verbose=True
|
||||
)
|
||||
|
||||
# Access data
|
||||
vendors = report_data['vendors']
|
||||
summary = report_data['summary']
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📊 Report Structure
|
||||
|
||||
### JSON Report Format
|
||||
|
||||
```json
|
||||
{
|
||||
"report_generated_at": "2025-11-06T16:00:00",
|
||||
"vendors": [
|
||||
{
|
||||
"vendor_name": "VendorName",
|
||||
"total_items": 10,
|
||||
"closed_count": 5,
|
||||
"open_count": 3,
|
||||
"monitor_count": 2,
|
||||
"updates_24h": {
|
||||
"added": [...],
|
||||
"closed": [...],
|
||||
"changed_to_monitor": [...]
|
||||
},
|
||||
"oldest_unaddressed": [...],
|
||||
"very_high_priority_items": [...],
|
||||
"high_priority_items": [...],
|
||||
"closed_items": [...],
|
||||
"monitor_items": [...],
|
||||
"open_items": [...]
|
||||
}
|
||||
],
|
||||
"summary": {
|
||||
"total_vendors": 5,
|
||||
"total_items": 50,
|
||||
"total_closed": 25,
|
||||
"total_open": 15,
|
||||
"total_monitor": 10
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### HTML Report Features
|
||||
|
||||
- **Summary Cards**: Overview statistics
|
||||
- **Vendor Tabs**: Quick navigation between vendors
|
||||
- **Status Tabs**: Filter by All, Yesterday's Updates, Oldest Unaddressed, Closed, Monitor, Open
|
||||
- **Search & Filters**: Search by name/description, filter by vendor/status/priority
|
||||
- **Quick Filters**: Show only vendors with updates or oldest items
|
||||
- **Responsive Design**: Works on desktop and mobile
|
||||
|
||||
---
|
||||
|
||||
## 🔐 Authentication & Security
|
||||
|
||||
### Current State
|
||||
|
||||
- **Web UI**: Optional API key authentication
|
||||
- **SharePoint**: Azure AD app authentication (recommended) or user credentials
|
||||
- **No User Management**: Standalone application
|
||||
|
||||
### Taskboard Integration Benefits
|
||||
|
||||
- **Leverage Existing Auth**: Use Taskboard's Authentik/Microsoft Entra ID authentication
|
||||
- **Role-Based Access**: Control who can generate/view reports
|
||||
- **Audit Trail**: Track who generated reports (via Taskboard user system)
|
||||
- **Secure Storage**: Use Taskboard's file storage for reports
|
||||
|
||||
---
|
||||
|
||||
## 📝 Integration Checklist
|
||||
|
||||
### Phase 1: Basic Integration
|
||||
- [ ] Set up Python API server as background service
|
||||
- [ ] Create Taskboard API route that proxies to Python API
|
||||
- [ ] Create Taskboard page to display reports
|
||||
- [ ] Add "Generate Report" button in Taskboard UI
|
||||
|
||||
### Phase 2: Enhanced Integration
|
||||
- [ ] Use Taskboard authentication for report access
|
||||
- [ ] Store report metadata in Taskboard database
|
||||
- [ ] Add report history/versioning
|
||||
- [ ] Link reports to projects/tasks
|
||||
|
||||
### Phase 3: Advanced Features
|
||||
- [ ] Scheduled report generation via Taskboard
|
||||
- [ ] Notifications when reports are generated
|
||||
- [ ] Dashboard widgets showing report metrics
|
||||
- [ ] Export reports to Taskboard tasks/boards
|
||||
|
||||
---
|
||||
|
||||
## 🛠️ Development Notes
|
||||
|
||||
### Running Locally
|
||||
|
||||
```bash
|
||||
# Setup
|
||||
cd vendor_report
|
||||
python -m venv venv
|
||||
source venv/bin/activate # Windows: venv\Scripts\activate
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Configure
|
||||
cp config.yaml.template config.yaml
|
||||
# Edit config.yaml with SharePoint credentials
|
||||
|
||||
# Run Web UI
|
||||
python web_ui.py
|
||||
# Open http://localhost:8080
|
||||
```
|
||||
|
||||
### Deployment Considerations
|
||||
|
||||
- **Python Environment**: Requires Python 3.8+
|
||||
- **Dependencies**: Install via pip
|
||||
- **Configuration**: Store secrets securely (environment variables or vault)
|
||||
- **Port**: Default 8080 (configurable)
|
||||
- **File Storage**: Reports saved to `output/` directory
|
||||
- **SharePoint**: Requires Azure AD app registration
|
||||
|
||||
### Error Handling
|
||||
|
||||
- Graceful handling of missing Excel files
|
||||
- SharePoint connection errors logged
|
||||
- Invalid data formats handled
|
||||
- User-friendly error messages in Web UI
|
||||
|
||||
---
|
||||
|
||||
## 📚 Additional Resources
|
||||
|
||||
- **SharePoint Setup**: See `SHAREPOINT_SETUP.md` for detailed Azure AD configuration
|
||||
- **Quick Start**: See `QUICK_START.md` for 5-minute setup guide
|
||||
- **Full Documentation**: See `README.md` for complete usage guide
|
||||
|
||||
---
|
||||
|
||||
## 💡 Integration Ideas for Taskboard
|
||||
|
||||
1. **Vendor Dashboard**: Show vendor metrics as cards/widgets
|
||||
2. **Report History**: Track when reports were generated, by whom
|
||||
3. **Task Creation**: Auto-create tasks for vendors with oldest unaddressed items
|
||||
4. **Notifications**: Alert project managers when reports are generated
|
||||
5. **Export to Tasks**: Convert report items to Taskboard tasks
|
||||
6. **Project Linking**: Associate reports with Taskboard projects
|
||||
7. **Scheduled Reports**: Use Taskboard's scheduling to trigger reports
|
||||
8. **Role-Based Views**: Different report views for different user roles
|
||||
|
||||
---
|
||||
|
||||
## 🔄 Current Status
|
||||
|
||||
- ✅ Core functionality complete
|
||||
- ✅ SharePoint integration working
|
||||
- ✅ Web UI functional
|
||||
- ✅ API endpoints available
|
||||
- ✅ Scheduled generation supported
|
||||
- ⏳ Taskboard integration pending
|
||||
- ⏳ Authentication integration pending
|
||||
- ⏳ Database storage pending
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: November 6, 2025
|
||||
**Version**: 1.0
|
||||
**Status**: Production Ready (Standalone), Integration Ready (Taskboard)
|
||||
|
||||
696
api_server.py
Normal file
696
api_server.py
Normal file
@ -0,0 +1,696 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Web API Server for On-Demand Report Generation
|
||||
|
||||
Provides REST API endpoints to trigger report generation on demand.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, Dict
|
||||
import json
|
||||
from datetime import datetime
|
||||
import shutil
|
||||
import os
|
||||
|
||||
try:
|
||||
from flask import Flask, jsonify, request, send_from_directory
|
||||
from flask_cors import CORS
|
||||
from werkzeug.utils import secure_filename
|
||||
FLASK_AVAILABLE = True
|
||||
except ImportError:
|
||||
FLASK_AVAILABLE = False
|
||||
logging.warning("Flask not installed. API server features disabled.")
|
||||
|
||||
from config import load_config
|
||||
from report_generator import generate_report
|
||||
from sharepoint_downloader import download_from_sharepoint
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = None
|
||||
config = None
|
||||
|
||||
|
||||
def cleanup_old_reports(output_dir: Path, reports_dir: Path, max_reports: int = 10):
|
||||
"""
|
||||
Cleanup old reports and Excel files, keeping only the last max_reports.
|
||||
|
||||
Args:
|
||||
output_dir: Directory containing report HTML/JSON files
|
||||
reports_dir: Directory containing Excel files
|
||||
max_reports: Maximum number of reports to keep
|
||||
"""
|
||||
try:
|
||||
# Get all report HTML files sorted by modification time (newest first)
|
||||
html_files = sorted(output_dir.glob('report-*.html'), key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
|
||||
if len(html_files) <= max_reports:
|
||||
return # No cleanup needed
|
||||
|
||||
# Get reports to delete (oldest ones)
|
||||
reports_to_delete = html_files[max_reports:]
|
||||
|
||||
deleted_count = 0
|
||||
for html_file in reports_to_delete:
|
||||
report_id = html_file.stem
|
||||
|
||||
# Delete HTML file
|
||||
try:
|
||||
html_file.unlink()
|
||||
logger.info(f"Deleted old report HTML: {html_file.name}")
|
||||
deleted_count += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete {html_file.name}: {e}")
|
||||
|
||||
# Delete corresponding JSON file
|
||||
json_file = output_dir / f"{report_id}.json"
|
||||
if json_file.exists():
|
||||
try:
|
||||
json_file.unlink()
|
||||
logger.info(f"Deleted old report JSON: {json_file.name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete {json_file.name}: {e}")
|
||||
|
||||
# Cleanup Excel files - keep only files associated with remaining reports
|
||||
if reports_dir.exists():
|
||||
excel_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
|
||||
|
||||
if len(excel_files) > max_reports:
|
||||
# Sort by modification time and delete oldest
|
||||
excel_files_sorted = sorted(excel_files, key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
excel_to_delete = excel_files_sorted[max_reports:]
|
||||
|
||||
for excel_file in excel_to_delete:
|
||||
try:
|
||||
excel_file.unlink()
|
||||
logger.info(f"Deleted old Excel file: {excel_file.name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete {excel_file.name}: {e}")
|
||||
|
||||
logger.info(f"Cleanup completed: deleted {deleted_count} old report(s)")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during cleanup: {e}", exc_info=True)
|
||||
|
||||
|
||||
def create_app(config_path: Optional[str] = None):
|
||||
"""Create and configure Flask app."""
|
||||
global app, config
|
||||
|
||||
if not FLASK_AVAILABLE:
|
||||
raise ImportError(
|
||||
"Flask is required for API server. "
|
||||
"Install it with: pip install flask flask-cors"
|
||||
)
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app) # Enable CORS for all routes
|
||||
|
||||
config = load_config(config_path)
|
||||
api_config = config.get('api', {})
|
||||
sharepoint_config = config.get('sharepoint', {})
|
||||
report_config = config.get('report', {})
|
||||
|
||||
# Resolve paths relative to script location, not current working directory
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
|
||||
# Convert relative paths to absolute paths relative to script directory
|
||||
if 'output_dir' in report_config and report_config['output_dir']:
|
||||
output_dir = Path(report_config['output_dir'])
|
||||
if not output_dir.is_absolute():
|
||||
report_config['output_dir'] = str(script_dir / output_dir)
|
||||
|
||||
if 'reports_dir' in report_config and report_config['reports_dir']:
|
||||
reports_dir = Path(report_config['reports_dir'])
|
||||
if not reports_dir.is_absolute():
|
||||
report_config['reports_dir'] = str(script_dir / reports_dir)
|
||||
|
||||
# Store config in app context
|
||||
app.config['API_KEY'] = api_config.get('api_key')
|
||||
app.config['SHAREPOINT_CONFIG'] = sharepoint_config
|
||||
app.config['REPORT_CONFIG'] = report_config
|
||||
|
||||
@app.route('/health', methods=['GET'])
|
||||
def health():
|
||||
"""Health check endpoint."""
|
||||
return jsonify({
|
||||
'status': 'healthy',
|
||||
'service': 'vendor-report-generator'
|
||||
})
|
||||
|
||||
@app.route('/api/generate', methods=['POST'])
|
||||
def generate_report_endpoint():
|
||||
"""
|
||||
Generate report on demand.
|
||||
|
||||
Request body (optional):
|
||||
{
|
||||
"download_from_sharepoint": true,
|
||||
"reports_dir": "reports",
|
||||
"output_file": "output/report.json"
|
||||
}
|
||||
"""
|
||||
# Check API key if configured
|
||||
api_key = app.config.get('API_KEY')
|
||||
if api_key:
|
||||
provided_key = request.headers.get('X-API-Key') or request.json.get('api_key') if request.json else None
|
||||
if provided_key != api_key:
|
||||
return jsonify({'error': 'Invalid API key'}), 401
|
||||
|
||||
try:
|
||||
request_data = request.json or {}
|
||||
download_from_sp = request_data.get('download_from_sharepoint', True) # Default to True for backward compatibility
|
||||
downloaded_files = [] # Initialize here for scope
|
||||
|
||||
# Get report config early - needed for error handling
|
||||
report_config = app.config['REPORT_CONFIG']
|
||||
|
||||
# Download from SharePoint if requested AND no manual upload happened
|
||||
# If download_from_sharepoint is False, it means manual upload was used
|
||||
if download_from_sp:
|
||||
sp_config = app.config['SHAREPOINT_CONFIG']
|
||||
if not sp_config.get('enabled'):
|
||||
return jsonify({
|
||||
'error': 'SharePoint is not enabled in configuration'
|
||||
}), 400
|
||||
|
||||
logger.info("Downloading files from SharePoint...")
|
||||
try:
|
||||
downloaded = download_from_sharepoint(
|
||||
site_url=sp_config['site_url'],
|
||||
folder_path=sp_config.get('folder_path'),
|
||||
file_path=sp_config.get('file_path'),
|
||||
local_dir=sp_config.get('local_dir', 'reports'),
|
||||
tenant_id=sp_config.get('tenant_id'),
|
||||
client_id=sp_config.get('client_id'),
|
||||
client_secret=sp_config.get('client_secret'),
|
||||
use_app_authentication=sp_config.get('use_app_authentication', True),
|
||||
file_pattern=sp_config.get('file_pattern'),
|
||||
overwrite=sp_config.get('overwrite', True)
|
||||
)
|
||||
downloaded_files = downloaded if downloaded else []
|
||||
logger.info(f"Downloaded {len(downloaded_files)} file(s) from SharePoint: {downloaded_files}")
|
||||
|
||||
# If SharePoint download failed (no files downloaded), check if we have existing files
|
||||
if len(downloaded_files) == 0:
|
||||
logger.warning("SharePoint download returned 0 files. This could mean:")
|
||||
logger.warning("1. SharePoint permissions issue (401/403 error)")
|
||||
logger.warning("2. No files found in the specified folder")
|
||||
logger.warning("3. Site access not granted (Resource-Specific Consent needed)")
|
||||
logger.warning("Checking if existing files are available in reports directory...")
|
||||
|
||||
# Check if there are existing files we can use
|
||||
reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
|
||||
if not reports_dir_path.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
reports_dir_path = script_dir / reports_dir_path
|
||||
|
||||
if reports_dir_path.exists():
|
||||
existing_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
||||
if existing_files:
|
||||
logger.warning(f"Found {len(existing_files)} existing file(s) in reports directory. Will use these instead.")
|
||||
logger.warning("NOTE: These may be old files. Consider using manual upload for fresh data.")
|
||||
else:
|
||||
logger.error("No files available - neither from SharePoint nor existing files.")
|
||||
return jsonify({
|
||||
'error': 'SharePoint download failed and no existing files found',
|
||||
'details': 'SharePoint access may require Resource-Specific Consent (RSC). Please use manual file upload or fix SharePoint permissions.',
|
||||
'sharepoint_error': True
|
||||
}), 500
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to download from SharePoint: {e}", exc_info=True)
|
||||
# Check if we have existing files as fallback
|
||||
reports_dir_path = Path(report_config.get('reports_dir', 'reports'))
|
||||
if not reports_dir_path.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
reports_dir_path = script_dir / reports_dir_path
|
||||
|
||||
if reports_dir_path.exists():
|
||||
existing_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
||||
if existing_files:
|
||||
logger.warning(f"SharePoint download failed, but found {len(existing_files)} existing file(s). Will use these.")
|
||||
downloaded_files = [] # Continue with existing files
|
||||
else:
|
||||
return jsonify({
|
||||
'error': f'SharePoint download failed: {str(e)}',
|
||||
'details': 'No existing files found. Please use manual file upload or fix SharePoint permissions.',
|
||||
'sharepoint_error': True
|
||||
}), 500
|
||||
else:
|
||||
return jsonify({
|
||||
'error': f'SharePoint download failed: {str(e)}',
|
||||
'details': 'Reports directory does not exist. Please use manual file upload or fix SharePoint permissions.',
|
||||
'sharepoint_error': True
|
||||
}), 500
|
||||
|
||||
# Generate report with timestamp
|
||||
reports_dir = request_data.get('reports_dir', report_config.get('reports_dir', 'reports'))
|
||||
output_dir_str = report_config.get('output_dir', 'output')
|
||||
output_dir = Path(output_dir_str)
|
||||
if not output_dir.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
output_dir = script_dir / output_dir
|
||||
|
||||
# Create timestamped filename
|
||||
timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
|
||||
report_id = f"report-{timestamp}"
|
||||
output_file = str(output_dir / f"{report_id}.json")
|
||||
|
||||
# Log which files will be used for generation
|
||||
reports_dir_path = Path(reports_dir)
|
||||
if not reports_dir_path.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
reports_dir_path = script_dir / reports_dir_path
|
||||
|
||||
logger.info(f"Generating report from {reports_dir_path.absolute()}...")
|
||||
logger.info(f"Reports directory exists: {reports_dir_path.exists()}")
|
||||
|
||||
# Determine which files to use for generation
|
||||
# CRITICAL: Only use files that were just downloaded/uploaded, not old ones
|
||||
if downloaded_files:
|
||||
# Files were downloaded from SharePoint - use only those
|
||||
logger.info(f"Using {len(downloaded_files)} file(s) downloaded from SharePoint")
|
||||
# Verify that reports_dir only contains the downloaded files (should be empty of old files)
|
||||
all_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
||||
downloaded_file_paths = [Path(f).name for f in downloaded_files] # Get just filenames
|
||||
if len(all_files) != len(downloaded_files):
|
||||
logger.warning(f"WARNING: Found {len(all_files)} file(s) in reports_dir but only {len(downloaded_files)} were downloaded!")
|
||||
logger.warning("This might indicate old files weren't cleared. Clearing now...")
|
||||
for file in all_files:
|
||||
if file.name not in downloaded_file_paths:
|
||||
try:
|
||||
file.unlink()
|
||||
logger.info(f"Cleared unexpected file: {file.name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to clear unexpected file {file.name}: {e}")
|
||||
elif not download_from_sp:
|
||||
# Manual upload was used (download_from_sharepoint=False)
|
||||
# Upload endpoint should have cleared old files, but double-check
|
||||
# Only use files uploaded in the last 10 minutes to avoid combining with old files
|
||||
if reports_dir_path.exists():
|
||||
excel_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
||||
current_time = datetime.now().timestamp()
|
||||
recent_files = []
|
||||
for excel_file in excel_files:
|
||||
mtime = excel_file.stat().st_mtime
|
||||
# Only use files modified in the last 10 minutes (should be the uploaded ones)
|
||||
# Increased from 5 to 10 minutes to account for upload + generation delay
|
||||
if current_time - mtime < 600: # 10 minutes
|
||||
recent_files.append(excel_file)
|
||||
mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')
|
||||
logger.info(f" - {excel_file.name} (modified: {mtime_str}) - will be used for manual upload generation")
|
||||
else:
|
||||
logger.warning(f" - {excel_file.name} (modified: {datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S')}) - skipping (too old, might be from previous run)")
|
||||
|
||||
if len(recent_files) < len(excel_files):
|
||||
logger.warning(f"Found {len(excel_files)} total file(s), but only {len(recent_files)} are recent. Clearing old files to avoid combining...")
|
||||
# Clear old files to ensure we only use the manually uploaded ones
|
||||
for excel_file in excel_files:
|
||||
if excel_file not in recent_files:
|
||||
try:
|
||||
excel_file.unlink()
|
||||
logger.info(f"Cleared old file: {excel_file.name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to clear old file {excel_file.name}: {e}")
|
||||
|
||||
if len(recent_files) == 0:
|
||||
logger.error("Manual upload was used but no recent files found in reports directory!")
|
||||
logger.error("This might mean:")
|
||||
logger.error("1. Files were not uploaded successfully")
|
||||
logger.error("2. Files were uploaded but cleared before generation")
|
||||
logger.error("3. File modification times are incorrect")
|
||||
return jsonify({
|
||||
'error': 'No files found for manual upload generation',
|
||||
'details': 'Files were uploaded but not found in reports directory. Please try uploading again.',
|
||||
'manual_upload_error': True
|
||||
}), 400
|
||||
|
||||
# Verify we only have the recently uploaded files
|
||||
all_files = list(reports_dir_path.glob('*.xlsx')) + list(reports_dir_path.glob('*.xls'))
|
||||
if len(all_files) != len(recent_files):
|
||||
logger.warning(f"WARNING: Found {len(all_files)} file(s) but only {len(recent_files)} are recent!")
|
||||
logger.warning("Clearing old files to ensure only uploaded files are used...")
|
||||
for file in all_files:
|
||||
if file not in recent_files:
|
||||
try:
|
||||
file.unlink()
|
||||
logger.info(f"Cleared unexpected old file: {file.name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to clear unexpected file {file.name}: {e}")
|
||||
|
||||
logger.info(f"Will generate report from {len(recent_files)} recently uploaded file(s)")
|
||||
else:
|
||||
logger.error("Manual upload was used but reports directory does not exist!")
|
||||
return jsonify({
|
||||
'error': 'Reports directory does not exist',
|
||||
'details': 'Cannot generate report from manual upload - reports directory is missing.',
|
||||
'manual_upload_error': True
|
||||
}), 500
|
||||
else:
|
||||
# SharePoint download was attempted but failed - this shouldn't happen if download_from_sp=True
|
||||
# But if it does, we should NOT use existing files as they might be old
|
||||
logger.error("SharePoint download was requested but failed, and no manual upload was used!")
|
||||
logger.error("This should not happen - refusing to use potentially old files")
|
||||
return jsonify({
|
||||
'error': 'SharePoint download failed and no manual upload provided',
|
||||
'details': 'Cannot generate report - no data source available. Please try again or use manual upload.',
|
||||
'sharepoint_error': True
|
||||
}), 400
|
||||
|
||||
report_data = generate_report(
|
||||
reports_dir=str(reports_dir_path),
|
||||
output_file=output_file,
|
||||
verbose=False # Don't print to console in API mode
|
||||
)
|
||||
|
||||
if report_data:
|
||||
# Generate HTML with same timestamp
|
||||
html_file = output_dir / f"{report_id}.html"
|
||||
from html_generator import generate_html_report
|
||||
generate_html_report(output_file, str(html_file))
|
||||
|
||||
# Cleanup old reports (keep only last 10)
|
||||
# Ensure reports_dir is a Path object
|
||||
reports_dir_for_cleanup = Path(reports_dir) if isinstance(reports_dir, str) else reports_dir
|
||||
cleanup_old_reports(output_dir, reports_dir_for_cleanup, max_reports=10)
|
||||
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'message': 'Report generated successfully',
|
||||
'report_id': report_id,
|
||||
'report_date': timestamp,
|
||||
'output_file': output_file,
|
||||
'summary': report_data.get('summary', {}),
|
||||
'vendors_count': len(report_data.get('vendors', [])),
|
||||
'downloaded_files': len(downloaded_files) if download_from_sp else 0,
|
||||
'downloaded_file_names': [Path(f).name for f in downloaded_files] if download_from_sp else []
|
||||
})
|
||||
else:
|
||||
return jsonify({
|
||||
'error': 'Report generation failed'
|
||||
}), 500
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating report: {e}", exc_info=True)
|
||||
return jsonify({
|
||||
'error': f'Report generation failed: {str(e)}'
|
||||
}), 500
|
||||
|
||||
@app.route('/api/upload', methods=['POST'])
|
||||
def upload_files():
|
||||
"""Upload Excel files manually. Clears old files before uploading new ones."""
|
||||
try:
|
||||
if 'files' not in request.files:
|
||||
return jsonify({'error': 'No files provided'}), 400
|
||||
|
||||
files = request.files.getlist('files')
|
||||
if not files or all(f.filename == '' for f in files):
|
||||
return jsonify({'error': 'No files selected'}), 400
|
||||
|
||||
report_config = app.config['REPORT_CONFIG']
|
||||
reports_dir_str = report_config.get('reports_dir', 'reports')
|
||||
reports_dir = Path(reports_dir_str)
|
||||
if not reports_dir.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
reports_dir = script_dir / reports_dir
|
||||
|
||||
# Ensure reports directory exists
|
||||
reports_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ALWAYS clear ALL old Excel files from reports directory before uploading new ones
|
||||
# CRITICAL: This prevents combining multiple files in report generation
|
||||
old_excel_files = list(reports_dir.glob('*.xlsx')) + list(reports_dir.glob('*.xls'))
|
||||
cleared_count = 0
|
||||
failed_to_clear = []
|
||||
|
||||
for old_file in old_excel_files:
|
||||
try:
|
||||
# On Windows, files might be locked - try multiple times
|
||||
max_retries = 3
|
||||
retry_count = 0
|
||||
while retry_count < max_retries:
|
||||
try:
|
||||
old_file.unlink()
|
||||
cleared_count += 1
|
||||
logger.info(f"Cleared old file before upload: {old_file.name}")
|
||||
break
|
||||
except PermissionError:
|
||||
retry_count += 1
|
||||
if retry_count < max_retries:
|
||||
import time
|
||||
time.sleep(0.5) # Wait 500ms before retry
|
||||
else:
|
||||
raise
|
||||
except Exception as e:
|
||||
failed_to_clear.append(old_file.name)
|
||||
logger.error(f"Failed to clear old file {old_file.name}: {e}")
|
||||
|
||||
# If any files failed to clear, fail the upload to prevent mixing old and new data
|
||||
if failed_to_clear:
|
||||
logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before upload: {failed_to_clear}")
|
||||
return jsonify({
|
||||
'error': f'Failed to clear {len(failed_to_clear)} old file(s) before upload. Please ensure files are not locked or in use.',
|
||||
'failed_files': failed_to_clear,
|
||||
'details': 'Old files must be cleared before upload to ensure report generation uses only the new file(s). Files may be locked by Excel or another process.'
|
||||
}), 500
|
||||
|
||||
if cleared_count > 0:
|
||||
logger.info(f"Cleared {cleared_count} old Excel file(s) before upload")
|
||||
else:
|
||||
logger.info("No old Excel files found to clear (reports directory was empty)")
|
||||
|
||||
uploaded_count = 0
|
||||
uploaded_files = []
|
||||
|
||||
for file in files:
|
||||
if file.filename == '':
|
||||
continue
|
||||
|
||||
# Check if it's an Excel file
|
||||
filename = secure_filename(file.filename)
|
||||
if not (filename.endswith('.xlsx') or filename.endswith('.xls')):
|
||||
logger.warning(f"Skipping non-Excel file: {filename}")
|
||||
continue
|
||||
|
||||
# Save file to reports directory
|
||||
file_path = reports_dir / filename
|
||||
file.save(str(file_path))
|
||||
uploaded_count += 1
|
||||
uploaded_files.append(filename)
|
||||
logger.info(f"Uploaded file: {filename} -> {file_path}")
|
||||
|
||||
if uploaded_count == 0:
|
||||
return jsonify({'error': 'No valid Excel files uploaded'}), 400
|
||||
|
||||
# Warn if multiple files uploaded - reports should be generated from ONE file
|
||||
if uploaded_count > 1:
|
||||
logger.warning(f"WARNING: {uploaded_count} files uploaded. Reports should be generated from a single file. Only the newest file will be used.")
|
||||
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'message': f'Successfully uploaded {uploaded_count} file(s)',
|
||||
'uploaded_count': uploaded_count,
|
||||
'uploaded_files': uploaded_files,
|
||||
'cleared_old_files': cleared_count,
|
||||
'warning': f'{uploaded_count} file(s) uploaded - only the newest will be used for report generation' if uploaded_count > 1 else None
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading files: {e}", exc_info=True)
|
||||
return jsonify({'error': f'Failed to upload files: {str(e)}'}), 500
|
||||
|
||||
@app.route('/api/status', methods=['GET'])
|
||||
def status():
|
||||
"""Get service status and configuration."""
|
||||
return jsonify({
|
||||
'status': 'running',
|
||||
'sharepoint_enabled': app.config['SHAREPOINT_CONFIG'].get('enabled', False),
|
||||
'reports_dir': app.config['REPORT_CONFIG'].get('reports_dir', 'reports'),
|
||||
'output_dir': app.config['REPORT_CONFIG'].get('output_dir', 'output')
|
||||
})
|
||||
|
||||
@app.route('/api/report/json', methods=['GET'])
|
||||
def get_report_json():
|
||||
"""Get latest report JSON file."""
|
||||
try:
|
||||
report_config = app.config['REPORT_CONFIG']
|
||||
output_dir_str = report_config.get('output_dir', 'output')
|
||||
output_dir = Path(output_dir_str)
|
||||
if not output_dir.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
output_dir = script_dir / output_dir
|
||||
report_file = output_dir / 'report.json'
|
||||
|
||||
if not report_file.exists():
|
||||
return jsonify({'error': 'Report not found. Generate a report first.'}), 404
|
||||
|
||||
with open(report_file, 'r', encoding='utf-8') as f:
|
||||
report_data = json.load(f)
|
||||
|
||||
return jsonify(report_data)
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading report JSON: {e}", exc_info=True)
|
||||
return jsonify({'error': f'Failed to read report: {str(e)}'}), 500
|
||||
|
||||
@app.route('/api/report/html', methods=['GET'])
|
||||
def get_report_html():
|
||||
"""Get report HTML file by report_id (or latest if not specified)."""
|
||||
try:
|
||||
from flask import send_from_directory
|
||||
|
||||
report_config = app.config['REPORT_CONFIG']
|
||||
output_dir_str = report_config.get('output_dir', 'output')
|
||||
output_dir = Path(output_dir_str)
|
||||
if not output_dir.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
output_dir = script_dir / output_dir
|
||||
|
||||
# Get report_id from query parameter, default to latest
|
||||
report_id = request.args.get('report_id')
|
||||
|
||||
if report_id:
|
||||
# Check if it's a timestamped report or legacy report
|
||||
html_file = output_dir / f"{report_id}.html"
|
||||
|
||||
# If not found and it starts with "report-", might be a legacy report with generated ID
|
||||
if not html_file.exists() and report_id.startswith('report-'):
|
||||
# Try legacy report.html
|
||||
legacy_file = output_dir / 'report.html'
|
||||
if legacy_file.exists():
|
||||
html_file = legacy_file
|
||||
else:
|
||||
return jsonify({'error': f'Report {report_id} not found.'}), 404
|
||||
elif not html_file.exists():
|
||||
return jsonify({'error': f'Report {report_id} not found.'}), 404
|
||||
else:
|
||||
# Get latest report (check both timestamped and legacy)
|
||||
timestamped_files = list(output_dir.glob('report-*.html'))
|
||||
legacy_file = output_dir / 'report.html'
|
||||
|
||||
html_files = []
|
||||
if legacy_file.exists():
|
||||
html_files.append(legacy_file)
|
||||
html_files.extend(timestamped_files)
|
||||
|
||||
if not html_files:
|
||||
return jsonify({'error': 'No reports found. Generate a report first.'}), 404
|
||||
|
||||
html_file = sorted(html_files, key=lambda p: p.stat().st_mtime, reverse=True)[0]
|
||||
|
||||
return send_from_directory(str(output_dir), html_file.name, mimetype='text/html')
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading report HTML: {e}", exc_info=True)
|
||||
return jsonify({'error': f'Failed to read report HTML: {str(e)}'}), 500
|
||||
|
||||
@app.route('/api/reports/list', methods=['GET'])
|
||||
def list_reports():
|
||||
"""List all available reports (last 10)."""
|
||||
try:
|
||||
report_config = app.config['REPORT_CONFIG']
|
||||
output_dir_str = report_config.get('output_dir', 'output')
|
||||
output_dir = Path(output_dir_str)
|
||||
|
||||
# Ensure absolute path
|
||||
if not output_dir.is_absolute():
|
||||
script_dir = Path(__file__).parent.absolute()
|
||||
output_dir = script_dir / output_dir
|
||||
|
||||
# Log for debugging
|
||||
logger.info(f"Looking for reports in: {output_dir.absolute()}")
|
||||
logger.info(f"Output directory exists: {output_dir.exists()}")
|
||||
if output_dir.exists():
|
||||
logger.info(f"Files in output directory: {list(output_dir.glob('*'))}")
|
||||
|
||||
# Find all report HTML files (both timestamped and non-timestamped)
|
||||
timestamped_files = list(output_dir.glob('report-*.html'))
|
||||
legacy_file = output_dir / 'report.html'
|
||||
|
||||
logger.info(f"Found {len(timestamped_files)} timestamped report files")
|
||||
logger.info(f"Legacy report.html exists: {legacy_file.exists()}")
|
||||
if legacy_file.exists():
|
||||
logger.info(f"Legacy report.html path: {legacy_file.absolute()}")
|
||||
|
||||
html_files = []
|
||||
|
||||
# Add legacy report.html if it exists
|
||||
if legacy_file.exists():
|
||||
html_files.append(legacy_file)
|
||||
logger.info("Added legacy report.html to list")
|
||||
|
||||
# Add timestamped files
|
||||
html_files.extend(timestamped_files)
|
||||
logger.info(f"Total HTML files found: {len(html_files)}")
|
||||
|
||||
reports = []
|
||||
for html_file in sorted(html_files, key=lambda p: p.stat().st_mtime, reverse=True)[:10]:
|
||||
report_id = html_file.stem # e.g., "report-2025-11-08-11-25-46" or "report"
|
||||
|
||||
# Handle legacy report.html
|
||||
if report_id == 'report':
|
||||
# Use file modification time as timestamp
|
||||
mtime = html_file.stat().st_mtime
|
||||
dt = datetime.fromtimestamp(mtime)
|
||||
timestamp_str = dt.strftime('%Y-%m-%d-%H-%M-%S')
|
||||
date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
report_id = f"report-{timestamp_str}"
|
||||
else:
|
||||
# Timestamped report
|
||||
timestamp_str = report_id.replace('report-', '')
|
||||
try:
|
||||
# Parse timestamp to create readable date
|
||||
dt = datetime.strptime(timestamp_str, '%Y-%m-%d-%H-%M-%S')
|
||||
date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
except:
|
||||
date_str = timestamp_str
|
||||
|
||||
# Get file size
|
||||
file_size = html_file.stat().st_size
|
||||
|
||||
reports.append({
|
||||
'report_id': report_id,
|
||||
'date': date_str,
|
||||
'timestamp': timestamp_str,
|
||||
'file_size': file_size
|
||||
})
|
||||
|
||||
return jsonify({
|
||||
'reports': reports,
|
||||
'count': len(reports)
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing reports: {e}", exc_info=True)
|
||||
return jsonify({'error': f'Failed to list reports: {str(e)}'}), 500
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def run_server(config_path: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None):
|
||||
"""Run the API server."""
|
||||
app = create_app(config_path)
|
||||
|
||||
api_config = config.get('api', {})
|
||||
server_host = host or api_config.get('host', '0.0.0.0')
|
||||
server_port = port or api_config.get('port', 8080)
|
||||
|
||||
logger.info(f"Starting API server on {server_host}:{server_port}")
|
||||
app.run(host=server_host, port=server_port, debug=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
config_path = sys.argv[1] if len(sys.argv) > 1 else None
|
||||
|
||||
# Check if API is enabled
|
||||
config = load_config(config_path)
|
||||
if not config.get('api', {}).get('enabled', False):
|
||||
logger.warning("API is disabled in configuration. Set api.enabled=true to enable.")
|
||||
logger.info("Starting API server anyway (for testing)...")
|
||||
|
||||
run_server(config_path=config_path)
|
||||
|
||||
262
config.py
Normal file
262
config.py
Normal file
@ -0,0 +1,262 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Configuration Management
|
||||
|
||||
Loads configuration from YAML file or environment variables.
|
||||
"""
|
||||
|
||||
import os
|
||||
import yaml
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Any
|
||||
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
DOTENV_AVAILABLE = True
|
||||
except ImportError:
|
||||
DOTENV_AVAILABLE = False
|
||||
|
||||
DEFAULT_CONFIG = {
|
||||
'sharepoint': {
|
||||
'enabled': False,
|
||||
'site_url': '',
|
||||
'folder_path': '/Shared Documents/Reports',
|
||||
'file_path': None, # Use folder_path for multiple files, file_path for single file
|
||||
'local_dir': 'reports',
|
||||
'username': None,
|
||||
'password': None,
|
||||
'client_id': None,
|
||||
'client_secret': None,
|
||||
'use_app_authentication': False,
|
||||
'file_pattern': '*.xlsx',
|
||||
'overwrite': True
|
||||
},
|
||||
'scheduler': {
|
||||
'enabled': False,
|
||||
'schedule_type': 'interval', # 'interval', 'cron', or 'once'
|
||||
'interval_hours': 24, # For interval type
|
||||
'cron_expression': '0 8 * * *', # For cron type (8 AM daily)
|
||||
'timezone': 'America/New_York'
|
||||
},
|
||||
'api': {
|
||||
'enabled': False,
|
||||
'host': '0.0.0.0',
|
||||
'port': 8080,
|
||||
'api_key': None # Optional API key for authentication
|
||||
},
|
||||
'report': {
|
||||
'output_dir': 'output',
|
||||
'reports_dir': 'reports'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Load configuration from YAML file or environment variables.
|
||||
|
||||
Args:
|
||||
config_path: Path to config.yaml file (default: config.yaml in current directory)
|
||||
|
||||
Returns:
|
||||
Configuration dictionary
|
||||
"""
|
||||
# Load .env file if available (from current directory or parent taskboard directory)
|
||||
if DOTENV_AVAILABLE:
|
||||
# Try loading from vendor_report/.env first
|
||||
env_file = Path(__file__).parent / ".env"
|
||||
if not env_file.exists():
|
||||
# Try loading from parent taskboard/.env
|
||||
parent_env = Path(__file__).parent.parent / "taskboard" / ".env"
|
||||
if parent_env.exists():
|
||||
env_file = parent_env
|
||||
logging.info(f"Found .env file in taskboard directory: {env_file}")
|
||||
else:
|
||||
logging.warning(f".env file not found in vendor_report or taskboard directory")
|
||||
logging.warning(f"Checked: {Path(__file__).parent / '.env'}")
|
||||
logging.warning(f"Checked: {parent_env}")
|
||||
else:
|
||||
logging.info(f"Found .env file in vendor_report directory: {env_file}")
|
||||
|
||||
if env_file.exists():
|
||||
load_dotenv(env_file, override=True) # override=True ensures env vars take precedence
|
||||
logging.info(f"Loaded environment variables from {env_file.absolute()}")
|
||||
|
||||
# Log which SharePoint env vars were found (checking both SHAREPOINT_* and AZURE_AD_* fallbacks)
|
||||
sp_vars = ['SHAREPOINT_ENABLED', 'SHAREPOINT_SITE_URL', 'SHAREPOINT_FOLDER_PATH']
|
||||
found_vars = [var for var in sp_vars if os.getenv(var)]
|
||||
|
||||
# Check credentials (with fallback)
|
||||
client_id = os.getenv('SHAREPOINT_CLIENT_ID') or os.getenv('AZURE_AD_CLIENT_ID')
|
||||
tenant_id = os.getenv('SHAREPOINT_TENANT_ID') or os.getenv('AZURE_AD_TENANT_ID')
|
||||
client_secret = os.getenv('SHAREPOINT_CLIENT_SECRET') or os.getenv('AZURE_AD_CLIENT_SECRET')
|
||||
|
||||
if client_id:
|
||||
found_vars.append('CLIENT_ID (from SHAREPOINT_CLIENT_ID or AZURE_AD_CLIENT_ID)')
|
||||
if tenant_id:
|
||||
found_vars.append('TENANT_ID (from SHAREPOINT_TENANT_ID or AZURE_AD_TENANT_ID)')
|
||||
if client_secret:
|
||||
found_vars.append('CLIENT_SECRET (from SHAREPOINT_CLIENT_SECRET or AZURE_AD_CLIENT_SECRET)')
|
||||
|
||||
logging.info(f"Found SharePoint environment variables: {', '.join(found_vars)}")
|
||||
|
||||
missing_vars = []
|
||||
if not client_id:
|
||||
missing_vars.append('CLIENT_ID (SHAREPOINT_CLIENT_ID or AZURE_AD_CLIENT_ID)')
|
||||
if not tenant_id:
|
||||
missing_vars.append('TENANT_ID (SHAREPOINT_TENANT_ID or AZURE_AD_TENANT_ID)')
|
||||
if not client_secret:
|
||||
missing_vars.append('CLIENT_SECRET (SHAREPOINT_CLIENT_SECRET or AZURE_AD_CLIENT_SECRET)')
|
||||
|
||||
if missing_vars:
|
||||
logging.warning(f"Missing SharePoint credentials: {', '.join(missing_vars)}")
|
||||
|
||||
if config_path is None:
|
||||
config_path = Path(__file__).parent / "config.yaml"
|
||||
else:
|
||||
config_path = Path(config_path)
|
||||
|
||||
config = DEFAULT_CONFIG.copy()
|
||||
|
||||
# Load from YAML file if exists
|
||||
if config_path.exists():
|
||||
try:
|
||||
with open(config_path, 'r') as f:
|
||||
file_config = yaml.safe_load(f) or {}
|
||||
# Deep merge with defaults
|
||||
config = _deep_merge(config, file_config)
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to load config from {config_path}: {e}")
|
||||
|
||||
# Override with environment variables
|
||||
config = _load_from_env(config)
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def _deep_merge(base: Dict, override: Dict) -> Dict:
|
||||
"""Deep merge two dictionaries."""
|
||||
result = base.copy()
|
||||
for key, value in override.items():
|
||||
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
||||
result[key] = _deep_merge(result[key], value)
|
||||
else:
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
|
||||
def _load_from_env(config: Dict) -> Dict:
|
||||
"""Load configuration from environment variables."""
|
||||
# SharePoint settings
|
||||
if os.getenv('SHAREPOINT_ENABLED'):
|
||||
config['sharepoint']['enabled'] = os.getenv('SHAREPOINT_ENABLED').lower() == 'true'
|
||||
if os.getenv('SHAREPOINT_SITE_URL'):
|
||||
config['sharepoint']['site_url'] = os.getenv('SHAREPOINT_SITE_URL')
|
||||
if os.getenv('SHAREPOINT_FOLDER_PATH'):
|
||||
config['sharepoint']['folder_path'] = os.getenv('SHAREPOINT_FOLDER_PATH')
|
||||
if os.getenv('SHAREPOINT_USERNAME'):
|
||||
config['sharepoint']['username'] = os.getenv('SHAREPOINT_USERNAME')
|
||||
if os.getenv('SHAREPOINT_PASSWORD'):
|
||||
config['sharepoint']['password'] = os.getenv('SHAREPOINT_PASSWORD')
|
||||
# Check for SHAREPOINT_CLIENT_ID first, fallback to AZURE_AD_CLIENT_ID
|
||||
if os.getenv('SHAREPOINT_CLIENT_ID'):
|
||||
config['sharepoint']['client_id'] = os.getenv('SHAREPOINT_CLIENT_ID')
|
||||
elif os.getenv('AZURE_AD_CLIENT_ID'):
|
||||
config['sharepoint']['client_id'] = os.getenv('AZURE_AD_CLIENT_ID')
|
||||
|
||||
# Check for SHAREPOINT_CLIENT_SECRET first, fallback to AZURE_AD_CLIENT_SECRET
|
||||
if os.getenv('SHAREPOINT_CLIENT_SECRET'):
|
||||
config['sharepoint']['client_secret'] = os.getenv('SHAREPOINT_CLIENT_SECRET')
|
||||
elif os.getenv('AZURE_AD_CLIENT_SECRET'):
|
||||
config['sharepoint']['client_secret'] = os.getenv('AZURE_AD_CLIENT_SECRET')
|
||||
|
||||
# Tenant ID (required for Microsoft Graph API)
|
||||
if os.getenv('SHAREPOINT_TENANT_ID'):
|
||||
config['sharepoint']['tenant_id'] = os.getenv('SHAREPOINT_TENANT_ID')
|
||||
elif os.getenv('AZURE_AD_TENANT_ID'):
|
||||
config['sharepoint']['tenant_id'] = os.getenv('AZURE_AD_TENANT_ID')
|
||||
|
||||
if os.getenv('SHAREPOINT_USE_APP_AUTH'):
|
||||
config['sharepoint']['use_app_authentication'] = os.getenv('SHAREPOINT_USE_APP_AUTH').lower() == 'true'
|
||||
elif os.getenv('SHAREPOINT_USE_APP_AUTH') is None and os.getenv('AZURE_AD_CLIENT_ID'):
|
||||
# If Azure AD credentials are present, default to app auth
|
||||
config['sharepoint']['use_app_authentication'] = True
|
||||
|
||||
# Scheduler settings
|
||||
if os.getenv('SCHEDULER_ENABLED'):
|
||||
config['scheduler']['enabled'] = os.getenv('SCHEDULER_ENABLED').lower() == 'true'
|
||||
if os.getenv('SCHEDULER_INTERVAL_HOURS'):
|
||||
config['scheduler']['interval_hours'] = int(os.getenv('SCHEDULER_INTERVAL_HOURS'))
|
||||
if os.getenv('SCHEDULER_CRON'):
|
||||
config['scheduler']['cron_expression'] = os.getenv('SCHEDULER_CRON')
|
||||
|
||||
# API settings
|
||||
if os.getenv('API_ENABLED'):
|
||||
config['api']['enabled'] = os.getenv('API_ENABLED').lower() == 'true'
|
||||
if os.getenv('API_PORT'):
|
||||
config['api']['port'] = int(os.getenv('API_PORT'))
|
||||
if os.getenv('API_HOST'):
|
||||
config['api']['host'] = os.getenv('API_HOST')
|
||||
if os.getenv('API_KEY'):
|
||||
config['api']['api_key'] = os.getenv('API_KEY')
|
||||
|
||||
# Report settings
|
||||
if os.getenv('REPORT_OUTPUT_DIR'):
|
||||
config['report']['output_dir'] = os.getenv('REPORT_OUTPUT_DIR')
|
||||
if os.getenv('REPORT_REPORTS_DIR'):
|
||||
config['report']['reports_dir'] = os.getenv('REPORT_REPORTS_DIR')
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def save_config_template(config_path: Optional[str] = None) -> None:
|
||||
"""Save a template configuration file."""
|
||||
if config_path is None:
|
||||
config_path = Path(__file__).parent / "config.yaml.template"
|
||||
else:
|
||||
config_path = Path(config_path)
|
||||
|
||||
template = """# Vendor Report Generator Configuration
|
||||
|
||||
# SharePoint Integration
|
||||
sharepoint:
|
||||
enabled: false # Set to true to enable SharePoint downloads
|
||||
site_url: "https://yourcompany.sharepoint.com/sites/YourSite"
|
||||
folder_path: "/Shared Documents/Reports" # Path to folder containing Excel files
|
||||
# file_path: "/Shared Documents/Reports/file.xlsx" # Alternative: single file path
|
||||
local_dir: "reports" # Local directory to save downloaded files
|
||||
username: null # Username for user authentication (leave null if using app auth)
|
||||
password: null # Password for user authentication (leave null if using app auth)
|
||||
client_id: null # Azure AD app client ID (for app authentication)
|
||||
client_secret: null # Azure AD app client secret (for app authentication)
|
||||
use_app_authentication: false # Set to true to use app authentication (recommended)
|
||||
file_pattern: "*.xlsx" # Pattern to filter files
|
||||
overwrite: true # Whether to overwrite existing files
|
||||
|
||||
# Scheduler Configuration
|
||||
scheduler:
|
||||
enabled: false # Set to true to enable scheduled report generation
|
||||
schedule_type: "interval" # Options: "interval", "cron", or "once"
|
||||
interval_hours: 24 # For interval type: generate report every N hours
|
||||
cron_expression: "0 8 * * *" # For cron type: generate at 8 AM daily (cron format)
|
||||
timezone: "America/New_York" # Timezone for scheduling
|
||||
|
||||
# API Configuration (for on-demand report generation)
|
||||
api:
|
||||
enabled: false # Set to true to enable web API
|
||||
host: "0.0.0.0" # Host to bind API server
|
||||
port: 8080 # Port for API server
|
||||
api_key: null # Optional API key for authentication (set to enable auth)
|
||||
|
||||
# Report Settings
|
||||
report:
|
||||
output_dir: "output" # Directory for generated reports
|
||||
reports_dir: "reports" # Directory containing Excel files
|
||||
"""
|
||||
|
||||
with open(config_path, 'w') as f:
|
||||
f.write(template)
|
||||
|
||||
print(f"Configuration template saved to: {config_path}")
|
||||
|
||||
37
config.yaml.template
Normal file
37
config.yaml.template
Normal file
@ -0,0 +1,37 @@
|
||||
# Vendor Report Generator Configuration
|
||||
|
||||
# SharePoint Integration
|
||||
sharepoint:
|
||||
enabled: false # Set to true to enable SharePoint downloads
|
||||
site_url: "https://yourcompany.sharepoint.com/sites/YourSite"
|
||||
folder_path: "/Shared Documents/Reports" # Path to folder containing Excel files
|
||||
# file_path: "/Shared Documents/Reports/file.xlsx" # Alternative: single file path
|
||||
local_dir: "reports" # Local directory to save downloaded files
|
||||
username: null # Username for user authentication (leave null if using app auth)
|
||||
password: null # Password for user authentication (leave null if using app auth)
|
||||
client_id: null # Azure AD app client ID (for app authentication)
|
||||
client_secret: null # Azure AD app client secret (for app authentication)
|
||||
use_app_authentication: false # Set to true to use app authentication (recommended)
|
||||
file_pattern: "*.xlsx" # Pattern to filter files
|
||||
overwrite: true # Whether to overwrite existing files
|
||||
|
||||
# Scheduler Configuration
|
||||
scheduler:
|
||||
enabled: false # Set to true to enable scheduled report generation
|
||||
schedule_type: "interval" # Options: "interval", "cron", or "once"
|
||||
interval_hours: 24 # For interval type: generate report every N hours
|
||||
cron_expression: "0 8 * * *" # For cron type: generate at 8 AM daily (cron format)
|
||||
timezone: "America/New_York" # Timezone for scheduling
|
||||
|
||||
# API Configuration (for on-demand report generation)
|
||||
api:
|
||||
enabled: false # Set to true to enable web API
|
||||
host: "0.0.0.0" # Host to bind API server
|
||||
port: 8080 # Port for API server
|
||||
api_key: null # Optional API key for authentication (set to enable auth)
|
||||
|
||||
# Report Settings
|
||||
report:
|
||||
output_dir: "output" # Directory for generated reports
|
||||
reports_dir: "reports" # Directory containing Excel files
|
||||
|
||||
@ -63,6 +63,11 @@ def get_priority_badge_class(priority: Optional[str]) -> str:
|
||||
return "badge-secondary"
|
||||
|
||||
|
||||
def escape_js_string(s: str) -> str:
|
||||
"""Escape a string for use in JavaScript double-quoted strings."""
|
||||
return s.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n').replace('\r', '\\r')
|
||||
|
||||
|
||||
def generate_html_report(json_path: str, output_path: Optional[str] = None) -> str:
|
||||
"""
|
||||
Generate HTML report from JSON report file.
|
||||
@ -784,7 +789,7 @@ def generate_html_content(report_data: Dict) -> str:
|
||||
<div class="tabs-container">
|
||||
<div class="tabs" id="vendor-tabs">
|
||||
<button class="tab active" onclick="switchVendorTab('all')" data-vendor="all">All Vendors</button>
|
||||
{''.join([f'<button class="tab" onclick="switchVendorTab(\'{vn}\')" data-vendor="{vn}">{vn}</button>' for vn in vendor_names])}
|
||||
{''.join(['<button class="tab" onclick="switchVendorTab(' + "'" + escape_js_string(vn) + "'" + ')" data-vendor="' + vn + '">' + vn + '</button>' for vn in vendor_names])}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
9795
output/report-2025-11-08-15-16-56.html
Normal file
9795
output/report-2025-11-08-15-16-56.html
Normal file
File diff suppressed because it is too large
Load Diff
3972
output/report-2025-11-08-15-16-56.json
Normal file
3972
output/report-2025-11-08-15-16-56.json
Normal file
File diff suppressed because it is too large
Load Diff
9795
output/report-2025-11-08-15-18-53.html
Normal file
9795
output/report-2025-11-08-15-18-53.html
Normal file
File diff suppressed because it is too large
Load Diff
3972
output/report-2025-11-08-15-18-53.json
Normal file
3972
output/report-2025-11-08-15-18-53.json
Normal file
File diff suppressed because it is too large
Load Diff
9795
output/report-2025-11-08-15-21-12.html
Normal file
9795
output/report-2025-11-08-15-21-12.html
Normal file
File diff suppressed because it is too large
Load Diff
3972
output/report-2025-11-08-15-21-12.json
Normal file
3972
output/report-2025-11-08-15-21-12.json
Normal file
File diff suppressed because it is too large
Load Diff
9916
output/report-2025-11-08-15-24-16.html
Normal file
9916
output/report-2025-11-08-15-24-16.html
Normal file
File diff suppressed because it is too large
Load Diff
4080
output/report-2025-11-08-15-24-16.json
Normal file
4080
output/report-2025-11-08-15-24-16.json
Normal file
File diff suppressed because it is too large
Load Diff
9916
output/report-2025-11-08-15-24-30.html
Normal file
9916
output/report-2025-11-08-15-24-30.html
Normal file
File diff suppressed because it is too large
Load Diff
4080
output/report-2025-11-08-15-24-30.json
Normal file
4080
output/report-2025-11-08-15-24-30.json
Normal file
File diff suppressed because it is too large
Load Diff
9916
output/report-2025-11-08-15-24-35.html
Normal file
9916
output/report-2025-11-08-15-24-35.html
Normal file
File diff suppressed because it is too large
Load Diff
4080
output/report-2025-11-08-15-24-35.json
Normal file
4080
output/report-2025-11-08-15-24-35.json
Normal file
File diff suppressed because it is too large
Load Diff
13534
output/report-2025-11-08-15-24-57.html
Normal file
13534
output/report-2025-11-08-15-24-57.html
Normal file
File diff suppressed because it is too large
Load Diff
7569
output/report-2025-11-08-15-24-57.json
Normal file
7569
output/report-2025-11-08-15-24-57.json
Normal file
File diff suppressed because it is too large
Load Diff
9723
output/report-2025-11-08-15-33-57.html
Normal file
9723
output/report-2025-11-08-15-33-57.html
Normal file
File diff suppressed because it is too large
Load Diff
3936
output/report-2025-11-08-15-33-57.json
Normal file
3936
output/report-2025-11-08-15-33-57.json
Normal file
File diff suppressed because it is too large
Load Diff
9723
output/report-2025-11-08-15-42-46.html
Normal file
9723
output/report-2025-11-08-15-42-46.html
Normal file
File diff suppressed because it is too large
Load Diff
3936
output/report-2025-11-08-15-42-46.json
Normal file
3936
output/report-2025-11-08-15-42-46.json
Normal file
File diff suppressed because it is too large
Load Diff
9723
output/report-2025-11-08-15-42-53.html
Normal file
9723
output/report-2025-11-08-15-42-53.html
Normal file
File diff suppressed because it is too large
Load Diff
3936
output/report-2025-11-08-15-42-53.json
Normal file
3936
output/report-2025-11-08-15-42-53.json
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -2,3 +2,17 @@
|
||||
pandas>=2.0.0
|
||||
openpyxl>=3.0.0
|
||||
pydantic>=2.0.0
|
||||
|
||||
# Optional: SharePoint integration (Office365-REST-Python-Client)
|
||||
Office365-REST-Python-Client>=2.3.0
|
||||
|
||||
# Optional: Scheduling
|
||||
apscheduler>=3.10.0
|
||||
|
||||
# Optional: Web API
|
||||
flask>=2.3.0
|
||||
flask-cors>=4.0.0
|
||||
|
||||
# Configuration
|
||||
pyyaml>=6.0
|
||||
python-dotenv>=1.0.0
|
||||
28
run-local.ps1
Normal file
28
run-local.ps1
Normal file
@ -0,0 +1,28 @@
|
||||
# PowerShell script to run vendor-report API locally with environment variables
|
||||
|
||||
# Set SharePoint Configuration
|
||||
$env:SHAREPOINT_ENABLED = "true"
|
||||
$env:SHAREPOINT_SITE_URL = "https://automationstandard.sharepoint.com/sites/2429ODF_AMZ_MTN6_25K"
|
||||
$env:SHAREPOINT_FOLDER_PATH = "/Documents/General/Amazon Punchlist [EXTERNAL]"
|
||||
$env:SHAREPOINT_CLIENT_ID = "5e00db88-ff96-4070-8270-e6c9ea9282f0"
|
||||
$env:SHAREPOINT_CLIENT_SECRET = "tYY8Q~e6hrzNA5EsTcUtDfZ4q3vT-c134r7nkaM8"
|
||||
$env:SHAREPOINT_USE_APP_AUTH = "true"
|
||||
|
||||
# Set API Configuration
|
||||
$env:API_ENABLED = "true"
|
||||
$env:API_PORT = "8080"
|
||||
$env:API_HOST = "0.0.0.0"
|
||||
|
||||
# Set Report Configuration
|
||||
$env:REPORT_OUTPUT_DIR = "output"
|
||||
$env:REPORT_REPORTS_DIR = "reports"
|
||||
|
||||
Write-Host "Starting vendor-report API with SharePoint configuration..." -ForegroundColor Green
|
||||
Write-Host "SharePoint Site: $env:SHAREPOINT_SITE_URL" -ForegroundColor Cyan
|
||||
Write-Host "Folder Path: $env:SHAREPOINT_FOLDER_PATH" -ForegroundColor Cyan
|
||||
Write-Host "API will run on: http://localhost:8080" -ForegroundColor Cyan
|
||||
Write-Host ""
|
||||
|
||||
# Run the API
|
||||
python api_server.py
|
||||
|
||||
172
scheduler.py
Normal file
172
scheduler.py
Normal file
@ -0,0 +1,172 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Report Scheduler
|
||||
|
||||
Schedules automatic report generation with optional SharePoint downloads.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from apscheduler.triggers.date import DateTrigger
|
||||
SCHEDULER_AVAILABLE = True
|
||||
except ImportError:
|
||||
SCHEDULER_AVAILABLE = False
|
||||
logging.warning("APScheduler not installed. Scheduling features disabled.")
|
||||
|
||||
from config import load_config
|
||||
from report_generator import generate_report
|
||||
from sharepoint_downloader import download_from_sharepoint
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ReportScheduler:
|
||||
"""Manages scheduled report generation."""
|
||||
|
||||
def __init__(self, config_path: Optional[str] = None):
|
||||
"""
|
||||
Initialize scheduler.
|
||||
|
||||
Args:
|
||||
config_path: Path to configuration file
|
||||
"""
|
||||
if not SCHEDULER_AVAILABLE:
|
||||
raise ImportError(
|
||||
"APScheduler is required for scheduling. "
|
||||
"Install it with: pip install apscheduler"
|
||||
)
|
||||
|
||||
self.config = load_config(config_path)
|
||||
self.scheduler = BlockingScheduler(timezone=self.config['scheduler']['timezone'])
|
||||
self.scheduler_config = self.config['scheduler']
|
||||
self.sharepoint_config = self.config.get('sharepoint', {})
|
||||
self.report_config = self.config.get('report', {})
|
||||
|
||||
def generate_report_job(self):
|
||||
"""Job function to generate report."""
|
||||
logger.info("=" * 70)
|
||||
logger.info("SCHEDULED REPORT GENERATION")
|
||||
logger.info("=" * 70)
|
||||
logger.info(f"Started at: {datetime.now()}")
|
||||
|
||||
try:
|
||||
# Download from SharePoint if enabled
|
||||
if self.sharepoint_config.get('enabled'):
|
||||
logger.info("Downloading files from SharePoint...")
|
||||
try:
|
||||
downloaded = download_from_sharepoint(
|
||||
site_url=self.sharepoint_config['site_url'],
|
||||
folder_path=self.sharepoint_config.get('folder_path'),
|
||||
file_path=self.sharepoint_config.get('file_path'),
|
||||
local_dir=self.sharepoint_config.get('local_dir', 'reports'),
|
||||
tenant_id=self.sharepoint_config.get('tenant_id'),
|
||||
client_id=self.sharepoint_config.get('client_id'),
|
||||
client_secret=self.sharepoint_config.get('client_secret'),
|
||||
use_app_authentication=self.sharepoint_config.get('use_app_authentication', True),
|
||||
file_pattern=self.sharepoint_config.get('file_pattern'),
|
||||
overwrite=self.sharepoint_config.get('overwrite', True)
|
||||
)
|
||||
logger.info(f"Downloaded {len(downloaded)} file(s) from SharePoint")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to download from SharePoint: {e}")
|
||||
# Continue with report generation even if download fails
|
||||
|
||||
# Generate report
|
||||
logger.info("Generating report...")
|
||||
reports_dir = self.report_config.get('reports_dir', 'reports')
|
||||
output_file = Path(self.report_config.get('output_dir', 'output')) / 'report.json'
|
||||
|
||||
report_data = generate_report(
|
||||
reports_dir=reports_dir,
|
||||
output_file=str(output_file),
|
||||
verbose=True
|
||||
)
|
||||
|
||||
if report_data:
|
||||
logger.info("✓ Scheduled report generation completed successfully")
|
||||
else:
|
||||
logger.error("✗ Scheduled report generation failed")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in scheduled report generation: {e}", exc_info=True)
|
||||
|
||||
def start(self):
|
||||
"""Start the scheduler."""
|
||||
if not self.scheduler_config.get('enabled'):
|
||||
logger.warning("Scheduler is disabled in configuration")
|
||||
return
|
||||
|
||||
schedule_type = self.scheduler_config.get('schedule_type', 'interval')
|
||||
|
||||
if schedule_type == 'interval':
|
||||
# Schedule at regular intervals
|
||||
interval_hours = self.scheduler_config.get('interval_hours', 24)
|
||||
trigger = IntervalTrigger(hours=interval_hours)
|
||||
logger.info(f"Scheduling reports every {interval_hours} hours")
|
||||
|
||||
elif schedule_type == 'cron':
|
||||
# Schedule using cron expression
|
||||
cron_expression = self.scheduler_config.get('cron_expression', '0 8 * * *')
|
||||
# Parse cron expression (format: "minute hour day month day_of_week")
|
||||
parts = cron_expression.split()
|
||||
if len(parts) == 5:
|
||||
trigger = CronTrigger(
|
||||
minute=parts[0],
|
||||
hour=parts[1],
|
||||
day=parts[2],
|
||||
month=parts[3],
|
||||
day_of_week=parts[4]
|
||||
)
|
||||
else:
|
||||
logger.error(f"Invalid cron expression: {cron_expression}")
|
||||
return
|
||||
logger.info(f"Scheduling reports with cron: {cron_expression}")
|
||||
|
||||
elif schedule_type == 'once':
|
||||
# Run once at a specific time
|
||||
# For "once", you'd typically use DateTrigger, but for simplicity,
|
||||
# we'll just run it immediately
|
||||
logger.info("Running report generation once (immediately)")
|
||||
self.generate_report_job()
|
||||
return
|
||||
|
||||
else:
|
||||
logger.error(f"Unknown schedule type: {schedule_type}")
|
||||
return
|
||||
|
||||
# Add job to scheduler
|
||||
self.scheduler.add_job(
|
||||
self.generate_report_job,
|
||||
trigger=trigger,
|
||||
id='generate_report',
|
||||
name='Generate Vendor Report',
|
||||
replace_existing=True
|
||||
)
|
||||
|
||||
logger.info("Scheduler started. Press Ctrl+C to stop.")
|
||||
try:
|
||||
self.scheduler.start()
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Scheduler stopped by user")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
config_path = sys.argv[1] if len(sys.argv) > 1 else None
|
||||
|
||||
scheduler = ReportScheduler(config_path=config_path)
|
||||
scheduler.start()
|
||||
|
||||
461
sharepoint_downloader.py
Normal file
461
sharepoint_downloader.py
Normal file
@ -0,0 +1,461 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
SharePoint File Downloader using Office365-REST-Python-Client
|
||||
|
||||
Downloads Excel files from SharePoint to the local reports directory.
|
||||
Uses Office365-REST-Python-Client library for SharePoint REST API access.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
import logging
|
||||
|
||||
try:
|
||||
from office365.sharepoint.client_context import ClientContext
|
||||
from office365.runtime.auth.authentication_context import AuthenticationContext
|
||||
from office365.sharepoint.files.file import File
|
||||
OFFICE365_AVAILABLE = True
|
||||
except ImportError:
|
||||
OFFICE365_AVAILABLE = False
|
||||
logging.warning("office365-rest-python-client not installed. SharePoint features disabled.")
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SharePointDownloader:
|
||||
"""Downloads files from SharePoint using Office365-REST-Python-Client."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
site_url: str,
|
||||
tenant_id: Optional[str] = None,
|
||||
client_id: Optional[str] = None,
|
||||
client_secret: Optional[str] = None,
|
||||
use_app_authentication: bool = True
|
||||
):
|
||||
"""
|
||||
Initialize SharePoint downloader using Office365-REST-Python-Client.
|
||||
|
||||
Args:
|
||||
site_url: SharePoint site URL (e.g., "https://yourcompany.sharepoint.com/sites/YourSite")
|
||||
tenant_id: Azure AD tenant ID (required for app authentication)
|
||||
client_id: Azure AD app client ID (required for app authentication)
|
||||
client_secret: Azure AD app client secret (required for app authentication)
|
||||
use_app_authentication: Whether to use app authentication (default: True)
|
||||
"""
|
||||
if not OFFICE365_AVAILABLE:
|
||||
raise ImportError(
|
||||
"office365-rest-python-client is required for SharePoint integration. "
|
||||
"Install it with: pip install Office365-REST-Python-Client"
|
||||
)
|
||||
|
||||
self.site_url = site_url.rstrip('/')
|
||||
self.tenant_id = tenant_id
|
||||
self.client_id = client_id
|
||||
self.client_secret = client_secret
|
||||
self.use_app_authentication = use_app_authentication
|
||||
self.ctx = None
|
||||
|
||||
if not self.client_id or not self.client_secret:
|
||||
logger.error("Client ID and Client Secret are required for SharePoint authentication.")
|
||||
raise ValueError("Missing Azure AD credentials for SharePoint.")
|
||||
|
||||
def connect(self) -> bool:
|
||||
"""Connect to SharePoint site."""
|
||||
if self.ctx:
|
||||
return True
|
||||
|
||||
try:
|
||||
if self.use_app_authentication:
|
||||
# App-only authentication using Office365-REST-Python-Client
|
||||
from office365.runtime.auth.client_credential import ClientCredential
|
||||
|
||||
logger.info(f"Connecting to SharePoint site: {self.site_url}")
|
||||
logger.info(f"Using Client ID: {self.client_id[:8]}... (truncated for security)")
|
||||
|
||||
credentials = ClientCredential(self.client_id, self.client_secret)
|
||||
self.ctx = ClientContext(self.site_url).with_credentials(credentials)
|
||||
|
||||
# Test connection by getting web
|
||||
# This will fail if RSC is not granted or credentials are wrong
|
||||
web = self.ctx.web
|
||||
self.ctx.load(web)
|
||||
self.ctx.execute_query()
|
||||
|
||||
logger.info(f"Successfully connected to SharePoint site: {web.properties['Title']}")
|
||||
return True
|
||||
else:
|
||||
logger.error("Only app-only authentication is supported")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
logger.error(f"Failed to connect to SharePoint: {error_msg}", exc_info=True)
|
||||
|
||||
# Provide helpful error messages
|
||||
if "Unsupported app only token" in error_msg or "401" in error_msg:
|
||||
logger.error("This error usually means:")
|
||||
logger.error("1. Resource-Specific Consent (RSC) is not granted for this site")
|
||||
logger.error("2. Go to: {}/_layouts/15/appinv.aspx".format(self.site_url))
|
||||
logger.error("3. Enter App ID: {}".format(self.client_id))
|
||||
logger.error("4. Grant permission with XML: <AppPermissionRequests AllowAppOnlyPolicy=\"true\"><AppPermissionRequest Scope=\"http://sharepoint/content/sitecollection\" Right=\"Read\" /></AppPermissionRequests>")
|
||||
elif "403" in error_msg or "Forbidden" in error_msg:
|
||||
logger.error("403 Forbidden - App does not have access to this site")
|
||||
logger.error("RSC must be granted via appinv.aspx")
|
||||
elif "Invalid client secret" in error_msg or "invalid_client" in error_msg:
|
||||
logger.error("Invalid client credentials - check CLIENT_ID and CLIENT_SECRET")
|
||||
|
||||
return False
|
||||
|
||||
def list_files_in_folder(
|
||||
self,
|
||||
folder_path: str,
|
||||
file_pattern: Optional[str] = None
|
||||
) -> List[dict]:
|
||||
"""
|
||||
List files in a SharePoint folder.
|
||||
|
||||
Args:
|
||||
folder_path: Folder path relative to site root (e.g., "Shared Documents/General/Amazon Punchlist [EXTERNAL]")
|
||||
file_pattern: Optional file pattern filter (e.g., "*.xlsx")
|
||||
|
||||
Returns:
|
||||
List of file metadata dictionaries
|
||||
"""
|
||||
if not self.ctx:
|
||||
if not self.connect():
|
||||
return []
|
||||
|
||||
try:
|
||||
# Normalize folder path
|
||||
# User provides: /Shared Documents/General/Amazon Punchlist [EXTERNAL]
|
||||
# SharePoint needs: /sites/SiteName/Shared Documents/General/Amazon Punchlist [EXTERNAL]
|
||||
folder_path = folder_path.strip('/')
|
||||
|
||||
# Extract site path from site_url
|
||||
from urllib.parse import urlparse
|
||||
site_path = urlparse(self.site_url).path.strip('/')
|
||||
|
||||
# Construct full server-relative URL
|
||||
# If folder_path already starts with site path, use as-is
|
||||
# Otherwise, prepend site path
|
||||
if folder_path.startswith(site_path + '/'):
|
||||
server_relative_url = f"/{folder_path}"
|
||||
elif site_path:
|
||||
server_relative_url = f"/{site_path}/{folder_path}"
|
||||
else:
|
||||
server_relative_url = f"/{folder_path}"
|
||||
|
||||
logger.info(f"Listing files in folder: {server_relative_url}")
|
||||
logger.info(f"Site URL: {self.site_url}, Site path: {site_path}, Folder path: {folder_path}")
|
||||
|
||||
# Get folder
|
||||
folder = self.ctx.web.get_folder_by_server_relative_url(server_relative_url)
|
||||
files = folder.files
|
||||
self.ctx.load(files)
|
||||
self.ctx.execute_query()
|
||||
|
||||
excel_files = []
|
||||
for file in files:
|
||||
file_name = file.properties["Name"]
|
||||
# Only consider Excel files
|
||||
if file_name and (file_name.endswith('.xlsx') or file_name.endswith('.xls')):
|
||||
# Apply file pattern filter if provided
|
||||
if file_pattern:
|
||||
pattern = file_pattern.replace('*', '')
|
||||
if not file_name.endswith(pattern):
|
||||
continue
|
||||
|
||||
excel_files.append({
|
||||
"name": file_name,
|
||||
"server_relative_url": file.properties.get("ServerRelativeUrl", ""),
|
||||
"size": file.properties.get("Length", 0),
|
||||
"time_last_modified": file.properties.get("TimeLastModified", "")
|
||||
})
|
||||
|
||||
logger.info(f"Found {len(excel_files)} Excel file(s) in folder")
|
||||
for file_info in excel_files:
|
||||
logger.info(f" - {file_info['name']} ({file_info['size']} bytes)")
|
||||
|
||||
return excel_files
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing files: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
def download_file(
|
||||
self,
|
||||
server_relative_url: str,
|
||||
file_name: str,
|
||||
local_path: str,
|
||||
overwrite: bool = True
|
||||
) -> bool:
|
||||
"""
|
||||
Download a single file from SharePoint.
|
||||
|
||||
Args:
|
||||
server_relative_url: Server-relative URL of the file
|
||||
file_name: The original name of the file (for logging)
|
||||
local_path: Local path where file should be saved
|
||||
overwrite: Whether to overwrite existing file
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
if not self.ctx:
|
||||
if not self.connect():
|
||||
return False
|
||||
|
||||
local_file = None
|
||||
try:
|
||||
local_file_path = Path(local_path)
|
||||
local_file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if local_file_path.exists() and not overwrite:
|
||||
logger.info(f"File already exists, skipping: {local_path}")
|
||||
return True
|
||||
|
||||
logger.info(f"Downloading file: {file_name} from {server_relative_url} to {local_path}")
|
||||
|
||||
# Get file
|
||||
file = self.ctx.web.get_file_by_server_relative_url(server_relative_url)
|
||||
self.ctx.load(file)
|
||||
self.ctx.execute_query()
|
||||
|
||||
# Open file and keep it open during download
|
||||
# The Office365 library writes to the file during execute_query()
|
||||
local_file = open(local_file_path, "wb")
|
||||
|
||||
# Download file content - this sets up the download callback
|
||||
file.download(local_file)
|
||||
|
||||
# Execute the query - this actually performs the download and writes to the file
|
||||
self.ctx.execute_query()
|
||||
|
||||
# Close the file after download completes
|
||||
local_file.close()
|
||||
local_file = None
|
||||
|
||||
logger.info(f"Successfully downloaded: {file_name} -> {local_path}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading file {file_name}: {e}", exc_info=True)
|
||||
if local_file:
|
||||
try:
|
||||
local_file.close()
|
||||
except:
|
||||
pass
|
||||
return False
|
||||
|
||||
def download_files_from_folder(
|
||||
self,
|
||||
folder_path: str,
|
||||
local_dir: str,
|
||||
file_pattern: Optional[str] = None,
|
||||
overwrite: bool = True,
|
||||
clear_existing: bool = True
|
||||
) -> List[str]:
|
||||
"""
|
||||
Download Excel files from a SharePoint folder.
|
||||
By default, downloads only the newest file and clears old files.
|
||||
|
||||
Args:
|
||||
folder_path: Folder path relative to site root
|
||||
local_dir: Local directory to save files
|
||||
file_pattern: Optional file pattern filter (e.g., "*.xlsx")
|
||||
overwrite: Whether to overwrite existing files
|
||||
clear_existing: If True, clear all existing Excel files before downloading (default: True)
|
||||
|
||||
Returns:
|
||||
List of downloaded file paths (typically 1 file - the newest)
|
||||
"""
|
||||
# Connect to SharePoint
|
||||
if not self.connect():
|
||||
logger.error("Failed to connect to SharePoint")
|
||||
return []
|
||||
|
||||
# Prepare local directory
|
||||
local_dir_path = Path(local_dir)
|
||||
local_dir_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ALWAYS clear ALL existing Excel files before downloading (to ensure only new files are used)
|
||||
# This is critical to prevent combining multiple files
|
||||
existing_files = list(local_dir_path.glob('*.xlsx')) + list(local_dir_path.glob('*.xls'))
|
||||
cleared_count = 0
|
||||
failed_to_clear = []
|
||||
|
||||
for old_file in existing_files:
|
||||
try:
|
||||
# On Windows, files might be locked - try multiple times
|
||||
max_retries = 3
|
||||
retry_count = 0
|
||||
while retry_count < max_retries:
|
||||
try:
|
||||
old_file.unlink()
|
||||
cleared_count += 1
|
||||
logger.info(f"Cleared existing file before download: {old_file.name}")
|
||||
break
|
||||
except PermissionError:
|
||||
retry_count += 1
|
||||
if retry_count < max_retries:
|
||||
import time
|
||||
time.sleep(0.5) # Wait 500ms before retry
|
||||
else:
|
||||
raise
|
||||
except Exception as e:
|
||||
failed_to_clear.append(old_file.name)
|
||||
logger.error(f"Failed to clear existing file {old_file.name}: {e}")
|
||||
|
||||
if failed_to_clear:
|
||||
logger.error(f"CRITICAL: Failed to clear {len(failed_to_clear)} file(s) before download: {failed_to_clear}")
|
||||
logger.error("This will cause data mixing! Files may be locked by another process.")
|
||||
# Don't fail here - let the download proceed, but log the warning
|
||||
|
||||
if cleared_count > 0:
|
||||
logger.info(f"Cleared {cleared_count} existing Excel file(s) before downloading from SharePoint")
|
||||
else:
|
||||
logger.info("No existing Excel files found to clear (reports directory was empty)")
|
||||
|
||||
# List files in folder
|
||||
files = self.list_files_in_folder(folder_path, file_pattern)
|
||||
|
||||
if not files:
|
||||
logger.warning(f"No Excel files found in folder: {folder_path}")
|
||||
return []
|
||||
|
||||
# Sort files by last modified date (newest first) and download only the newest one
|
||||
def parse_time(time_str):
|
||||
try:
|
||||
if time_str:
|
||||
# Office365 library returns datetime objects or ISO strings
|
||||
if isinstance(time_str, datetime):
|
||||
return time_str
|
||||
# Try parsing ISO format
|
||||
return datetime.fromisoformat(str(time_str).replace('Z', '+00:00'))
|
||||
return datetime.min
|
||||
except:
|
||||
return datetime.min
|
||||
|
||||
files_sorted = sorted(files, key=lambda f: parse_time(f.get("time_last_modified", "")), reverse=True)
|
||||
|
||||
if len(files_sorted) > 1:
|
||||
logger.info(f"Found {len(files_sorted)} Excel file(s) in SharePoint folder. Using only the newest file.")
|
||||
logger.info(f"Newest file: {files_sorted[0]['name']} (modified: {files_sorted[0].get('time_last_modified', 'Unknown')})")
|
||||
if len(files_sorted) > 1:
|
||||
logger.info(f"Skipping {len(files_sorted) - 1} older file(s) to avoid combining data")
|
||||
|
||||
# Download only the newest file
|
||||
downloaded_files = []
|
||||
newest_file = files_sorted[0]
|
||||
file_name = newest_file["name"]
|
||||
server_relative_url = newest_file["server_relative_url"]
|
||||
local_file_path = local_dir_path / file_name
|
||||
|
||||
if self.download_file(server_relative_url, file_name, str(local_file_path), overwrite=overwrite):
|
||||
downloaded_files.append(str(local_file_path))
|
||||
logger.info(f"Successfully downloaded newest file: {file_name}")
|
||||
else:
|
||||
logger.error(f"Failed to download file: {file_name}")
|
||||
|
||||
logger.info(f"Downloaded {len(downloaded_files)} file(s) from {folder_path} (using only newest file)")
|
||||
return downloaded_files
|
||||
|
||||
|
||||
def download_from_sharepoint(
|
||||
site_url: str,
|
||||
folder_path: Optional[str] = None,
|
||||
file_path: Optional[str] = None,
|
||||
local_dir: str = "reports",
|
||||
tenant_id: Optional[str] = None,
|
||||
client_id: Optional[str] = None,
|
||||
client_secret: Optional[str] = None,
|
||||
use_app_authentication: bool = True,
|
||||
file_pattern: Optional[str] = None,
|
||||
overwrite: bool = True,
|
||||
clear_existing: bool = True
|
||||
) -> List[str]:
|
||||
"""
|
||||
Convenience function to download files from SharePoint using Office365-REST-Python-Client.
|
||||
|
||||
Args:
|
||||
site_url: SharePoint site URL
|
||||
folder_path: Path to folder (if downloading all files from folder)
|
||||
file_path: Path to specific file (if downloading single file) - NOT YET IMPLEMENTED
|
||||
local_dir: Local directory to save files
|
||||
tenant_id: Azure AD tenant ID (not used by Office365 library, but kept for compatibility)
|
||||
client_id: Azure AD app client ID (required for app authentication)
|
||||
client_secret: Azure AD app client secret (required for app authentication)
|
||||
use_app_authentication: Use app authentication (default: True)
|
||||
file_pattern: Pattern to filter files (e.g., "*.xlsx")
|
||||
overwrite: Whether to overwrite existing files
|
||||
clear_existing: If True, clear all existing Excel files before downloading (default: True)
|
||||
|
||||
Returns:
|
||||
List of downloaded file paths (typically 1 file - the newest)
|
||||
"""
|
||||
if not folder_path and not file_path:
|
||||
logger.error("Either folder_path or file_path must be provided")
|
||||
return []
|
||||
|
||||
if file_path:
|
||||
logger.warning("Single file download not yet implemented")
|
||||
return []
|
||||
|
||||
downloader = SharePointDownloader(
|
||||
site_url=site_url,
|
||||
tenant_id=tenant_id,
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
use_app_authentication=use_app_authentication
|
||||
)
|
||||
|
||||
if folder_path:
|
||||
# Download only the newest file from folder (clears existing files first)
|
||||
return downloader.download_files_from_folder(
|
||||
folder_path=folder_path,
|
||||
local_dir=local_dir,
|
||||
file_pattern=file_pattern,
|
||||
overwrite=overwrite,
|
||||
clear_existing=clear_existing
|
||||
)
|
||||
else:
|
||||
logger.error("file_path download not yet implemented")
|
||||
return []
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from config import load_config
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
config = load_config()
|
||||
if not config.get('sharepoint'):
|
||||
logger.error("SharePoint configuration not found")
|
||||
sys.exit(1)
|
||||
|
||||
sp_config = config['sharepoint']
|
||||
|
||||
downloaded = download_from_sharepoint(
|
||||
site_url=sp_config['site_url'],
|
||||
folder_path=sp_config.get('folder_path'),
|
||||
file_path=sp_config.get('file_path'),
|
||||
local_dir=sp_config.get('local_dir', 'reports'),
|
||||
tenant_id=sp_config.get('tenant_id'),
|
||||
client_id=sp_config.get('client_id'),
|
||||
client_secret=sp_config.get('client_secret'),
|
||||
use_app_authentication=sp_config.get('use_app_authentication', True),
|
||||
file_pattern=sp_config.get('file_pattern'),
|
||||
overwrite=sp_config.get('overwrite', True)
|
||||
)
|
||||
|
||||
print(f"Downloaded {len(downloaded)} file(s)")
|
||||
for file_path in downloaded:
|
||||
print(f" - {file_path}")
|
||||
26
test_docker.ps1
Normal file
26
test_docker.ps1
Normal file
@ -0,0 +1,26 @@
|
||||
# Simple Docker test script
|
||||
Write-Host "=== Testing Docker Image ===" -ForegroundColor Cyan
|
||||
|
||||
Write-Host "`n1. Testing local file..." -ForegroundColor Yellow
|
||||
python test_syntax.py
|
||||
|
||||
Write-Host "`n2. Building Docker image..." -ForegroundColor Yellow
|
||||
docker build --no-cache -t vendor-report-api-test . 2>&1 | Select-String "Step|COPY|ERROR" | Select-Object -Last 5
|
||||
|
||||
Write-Host "`n3. Checking line 794 in LOCAL file:" -ForegroundColor Yellow
|
||||
Get-Content html_generator.py | Select-Object -Index 793
|
||||
|
||||
Write-Host "`n4. Checking line 794 in DOCKER image:" -ForegroundColor Yellow
|
||||
docker run --rm vendor-report-api-test sed -n '794p' /app/html_generator.py 2>&1
|
||||
|
||||
Write-Host "`n5. Checking line 1284 in LOCAL file:" -ForegroundColor Yellow
|
||||
Get-Content html_generator.py | Select-Object -Index 1283
|
||||
|
||||
Write-Host "`n6. Checking line 1284 in DOCKER image:" -ForegroundColor Yellow
|
||||
docker run --rm vendor-report-api-test sed -n '1284p' /app/html_generator.py 2>&1
|
||||
|
||||
Write-Host "`n7. Testing Python import in Docker:" -ForegroundColor Yellow
|
||||
docker run --rm vendor-report-api-test python -c "import html_generator; print('SUCCESS')" 2>&1
|
||||
|
||||
Write-Host "`n=== Done ===" -ForegroundColor Cyan
|
||||
|
||||
20
test_syntax.py
Normal file
20
test_syntax.py
Normal file
@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Quick syntax test for html_generator.py"""
|
||||
|
||||
import sys
|
||||
|
||||
try:
|
||||
import html_generator
|
||||
print("SUCCESS: html_generator.py imports without syntax errors!")
|
||||
print(f"escape_js_string function exists: {hasattr(html_generator, 'escape_js_string')}")
|
||||
sys.exit(0)
|
||||
except SyntaxError as e:
|
||||
print(f"SYNTAX ERROR: {e}")
|
||||
print(f" File: {e.filename}")
|
||||
print(f" Line: {e.lineno}")
|
||||
print(f" Text: {e.text}")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"ERROR: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
55
verify_build.ps1
Normal file
55
verify_build.ps1
Normal file
@ -0,0 +1,55 @@
|
||||
# Comprehensive build verification script
|
||||
Write-Host "=== Vendor Report API Build Verification ===" -ForegroundColor Cyan
|
||||
|
||||
# 1. Verify local file syntax
|
||||
Write-Host "`n1. Checking local file syntax..." -ForegroundColor Yellow
|
||||
python -c "import html_generator; print('Local file syntax OK')" 2>&1
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host "Local file has syntax errors!" -ForegroundColor Red
|
||||
exit 1
|
||||
} else {
|
||||
Write-Host "Local file syntax OK" -ForegroundColor Green
|
||||
}
|
||||
|
||||
# 2. Check specific lines
|
||||
Write-Host "`n2. Checking fixed lines..." -ForegroundColor Yellow
|
||||
$line794 = (Get-Content html_generator.py)[793]
|
||||
$line1284 = (Get-Content html_generator.py)[1283]
|
||||
|
||||
Write-Host "Line 794: $($line794.Substring(0, [Math]::Min(80, $line794.Length)))..."
|
||||
if ($line794 -match '\\"') {
|
||||
Write-Host "Line 794 still has backslash!" -ForegroundColor Red
|
||||
} else {
|
||||
Write-Host "Line 794 looks correct (no backslash)" -ForegroundColor Green
|
||||
}
|
||||
|
||||
Write-Host "Line 1284: $($line1284.Substring(0, [Math]::Min(80, $line1284.Length)))..."
|
||||
if ($line1284 -match '\\"') {
|
||||
Write-Host "Line 1284 still has backslash!" -ForegroundColor Red
|
||||
} else {
|
||||
Write-Host "Line 1284 looks correct (no backslash)" -ForegroundColor Green
|
||||
}
|
||||
|
||||
# 3. Build image with --no-cache
|
||||
Write-Host "`n3. Building Docker image with --no-cache..." -ForegroundColor Yellow
|
||||
docker build --no-cache -t vendor-report-api-test . 2>&1 | Select-String "Step|COPY|ERROR" | Select-Object -Last 10
|
||||
|
||||
# 4. Verify what's in the image
|
||||
Write-Host "`n4. Verifying Docker image contents..." -ForegroundColor Yellow
|
||||
Write-Host "Line 794 in image:"
|
||||
docker run --rm vendor-report-api-test sed -n '794p' /app/html_generator.py 2>&1
|
||||
|
||||
Write-Host "`nLine 1284 in image:"
|
||||
docker run --rm vendor-report-api-test sed -n '1284p' /app/html_generator.py 2>&1
|
||||
|
||||
# 5. Test import in Docker
|
||||
Write-Host "`n5. Testing Python import in Docker container..." -ForegroundColor Yellow
|
||||
docker run --rm vendor-report-api-test python -c "import html_generator; print('SUCCESS')" 2>&1
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-Host "✓ Docker image works!" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host "✗ Docker image still has errors!" -ForegroundColor Red
|
||||
}
|
||||
|
||||
Write-Host "`n=== Verification Complete ===" -ForegroundColor Cyan
|
||||
|
||||
32
verify_docker_image.ps1
Normal file
32
verify_docker_image.ps1
Normal file
@ -0,0 +1,32 @@
|
||||
# Verify what's actually in the Docker image
|
||||
param(
|
||||
[string]$ImageName = "vendor-report-api-test"
|
||||
)
|
||||
|
||||
Write-Host "=== Verifying Docker Image Contents ===" -ForegroundColor Cyan
|
||||
|
||||
# Build the image
|
||||
Write-Host "`n1. Building image: $ImageName" -ForegroundColor Yellow
|
||||
docker build -t $ImageName . 2>&1 | Select-String "COPY|Step"
|
||||
|
||||
# Check line 794 in the image
|
||||
Write-Host "`n2. Checking line 794 in Docker image:" -ForegroundColor Yellow
|
||||
docker run --rm $ImageName sed -n '794p' /app/html_generator.py
|
||||
|
||||
# Check line 1284 in the image
|
||||
Write-Host "`n3. Checking line 1284 in Docker image:" -ForegroundColor Yellow
|
||||
docker run --rm $ImageName sed -n '1284p' /app/html_generator.py
|
||||
|
||||
# Try to import the module
|
||||
Write-Host "`n4. Testing Python import in Docker:" -ForegroundColor Yellow
|
||||
docker run --rm $ImageName python -c "import html_generator; print('SUCCESS')" 2>&1
|
||||
|
||||
# Compare with local file
|
||||
Write-Host "`n5. Local file line 794:" -ForegroundColor Yellow
|
||||
Get-Content html_generator.py | Select-Object -Index 793
|
||||
|
||||
Write-Host "`n6. Local file line 1284:" -ForegroundColor Yellow
|
||||
Get-Content html_generator.py | Select-Object -Index 1283
|
||||
|
||||
Write-Host "`n=== Done ===" -ForegroundColor Cyan
|
||||
|
||||
765
web_ui.py
Normal file
765
web_ui.py
Normal file
@ -0,0 +1,765 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Web UI for Vendor Report Generator
|
||||
|
||||
Provides a simple web interface for generating reports, viewing status, and managing configuration.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
from flask import Flask, render_template_string, jsonify, request, send_from_directory, redirect, url_for
|
||||
from flask_cors import CORS
|
||||
FLASK_AVAILABLE = True
|
||||
except ImportError:
|
||||
FLASK_AVAILABLE = False
|
||||
logging.warning("Flask not installed. Web UI features disabled.")
|
||||
|
||||
from config import load_config
|
||||
from report_generator import generate_report
|
||||
from sharepoint_downloader import download_from_sharepoint
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = None
|
||||
config = None
|
||||
|
||||
# HTML Template for the Web UI
|
||||
UI_TEMPLATE = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Vendor Report Generator</title>
|
||||
<style>
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
min-height: 100vh;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
background: white;
|
||||
border-radius: 12px;
|
||||
box-shadow: 0 10px 40px rgba(0,0,0,0.2);
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
header {
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
color: white;
|
||||
padding: 30px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
header h1 {
|
||||
font-size: 2.5em;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
header p {
|
||||
opacity: 0.9;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
|
||||
.content {
|
||||
padding: 40px;
|
||||
}
|
||||
|
||||
.section {
|
||||
margin-bottom: 40px;
|
||||
padding: 30px;
|
||||
background: #f9fafb;
|
||||
border-radius: 8px;
|
||||
border: 1px solid #e5e7eb;
|
||||
}
|
||||
|
||||
.section h2 {
|
||||
color: #1e40af;
|
||||
margin-bottom: 20px;
|
||||
font-size: 1.5em;
|
||||
}
|
||||
|
||||
.button-group {
|
||||
display: flex;
|
||||
gap: 15px;
|
||||
flex-wrap: wrap;
|
||||
margin-top: 20px;
|
||||
}
|
||||
|
||||
.btn {
|
||||
padding: 12px 24px;
|
||||
border: none;
|
||||
border-radius: 6px;
|
||||
font-size: 1em;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
text-decoration: none;
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
.btn-primary {
|
||||
background: #2563eb;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-primary:hover {
|
||||
background: #1d4ed8;
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 4px 12px rgba(37, 99, 235, 0.4);
|
||||
}
|
||||
|
||||
.btn-success {
|
||||
background: #10b981;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-success:hover {
|
||||
background: #059669;
|
||||
}
|
||||
|
||||
.btn-secondary {
|
||||
background: #6b7280;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-secondary:hover {
|
||||
background: #4b5563;
|
||||
}
|
||||
|
||||
.btn:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
transform: none;
|
||||
}
|
||||
|
||||
.status-card {
|
||||
background: white;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
border-left: 4px solid #2563eb;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
.status-card h3 {
|
||||
color: #374151;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
.status-card p {
|
||||
color: #6b7280;
|
||||
margin: 5px 0;
|
||||
}
|
||||
|
||||
.status-indicator {
|
||||
display: inline-block;
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
border-radius: 50%;
|
||||
margin-right: 8px;
|
||||
}
|
||||
|
||||
.status-indicator.active {
|
||||
background: #10b981;
|
||||
}
|
||||
|
||||
.status-indicator.inactive {
|
||||
background: #ef4444;
|
||||
}
|
||||
|
||||
.loading {
|
||||
display: none;
|
||||
text-align: center;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
.loading.active {
|
||||
display: block;
|
||||
}
|
||||
|
||||
.spinner {
|
||||
border: 4px solid #f3f4f6;
|
||||
border-top: 4px solid #2563eb;
|
||||
border-radius: 50%;
|
||||
width: 40px;
|
||||
height: 40px;
|
||||
animation: spin 1s linear infinite;
|
||||
margin: 0 auto 15px;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
0% { transform: rotate(0deg); }
|
||||
100% { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
.alert {
|
||||
padding: 15px;
|
||||
border-radius: 6px;
|
||||
margin-bottom: 20px;
|
||||
display: none;
|
||||
}
|
||||
|
||||
.alert.active {
|
||||
display: block;
|
||||
}
|
||||
|
||||
.alert-success {
|
||||
background: #d1fae5;
|
||||
color: #065f46;
|
||||
border: 1px solid #10b981;
|
||||
}
|
||||
|
||||
.alert-error {
|
||||
background: #fee2e2;
|
||||
color: #991b1b;
|
||||
border: 1px solid #ef4444;
|
||||
}
|
||||
|
||||
.alert-info {
|
||||
background: #dbeafe;
|
||||
color: #1e40af;
|
||||
border: 1px solid #2563eb;
|
||||
}
|
||||
|
||||
.report-list {
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
.report-item {
|
||||
background: white;
|
||||
padding: 15px;
|
||||
border-radius: 6px;
|
||||
margin-bottom: 10px;
|
||||
border: 1px solid #e5e7eb;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.report-item:hover {
|
||||
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
||||
}
|
||||
|
||||
.report-info {
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.report-info strong {
|
||||
color: #1e40af;
|
||||
display: block;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
|
||||
.report-info small {
|
||||
color: #6b7280;
|
||||
}
|
||||
|
||||
.config-item {
|
||||
margin-bottom: 15px;
|
||||
padding: 15px;
|
||||
background: white;
|
||||
border-radius: 6px;
|
||||
border: 1px solid #e5e7eb;
|
||||
}
|
||||
|
||||
.config-item label {
|
||||
display: block;
|
||||
font-weight: 600;
|
||||
color: #374151;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
|
||||
.config-item .value {
|
||||
color: #6b7280;
|
||||
font-family: monospace;
|
||||
}
|
||||
|
||||
.badge {
|
||||
display: inline-block;
|
||||
padding: 4px 12px;
|
||||
border-radius: 12px;
|
||||
font-size: 0.85em;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.badge-enabled {
|
||||
background: #d1fae5;
|
||||
color: #065f46;
|
||||
}
|
||||
|
||||
.badge-disabled {
|
||||
background: #fee2e2;
|
||||
color: #991b1b;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header>
|
||||
<h1>📊 Vendor Report Generator</h1>
|
||||
<p>Generate comprehensive vendor punchlist reports from Excel files</p>
|
||||
</header>
|
||||
|
||||
<div class="content">
|
||||
<div id="alert-container"></div>
|
||||
|
||||
<!-- Update Data Section -->
|
||||
<div class="section">
|
||||
<h2>Update Data</h2>
|
||||
<p>Download the latest Excel files from SharePoint to update your local data.</p>
|
||||
|
||||
<div class="button-group">
|
||||
<button class="btn btn-success" onclick="updateFromSharePoint()">
|
||||
Update Data from SharePoint
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="loading" id="loading-update">
|
||||
<div class="spinner"></div>
|
||||
<p>Downloading files from SharePoint... This may take a moment.</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Generate Report Section -->
|
||||
<div class="section">
|
||||
<h2>Generate Report</h2>
|
||||
<p>Generate a new report from Excel files in the local reports directory.</p>
|
||||
|
||||
<div class="button-group">
|
||||
<button class="btn btn-primary" onclick="generateReport()">
|
||||
Generate Report
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="loading" id="loading">
|
||||
<div class="spinner"></div>
|
||||
<p>Generating report... This may take a moment.</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Status Section -->
|
||||
<div class="section">
|
||||
<h2>Service Status</h2>
|
||||
<div id="status-container">
|
||||
<div class="status-card">
|
||||
<h3>Loading status...</h3>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Reports Section -->
|
||||
<div class="section">
|
||||
<h2>Generated Reports</h2>
|
||||
<div id="reports-container">
|
||||
<p>Loading reports...</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Configuration Section -->
|
||||
<div class="section">
|
||||
<h2>Configuration</h2>
|
||||
<div id="config-container">
|
||||
<p>Loading configuration...</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
// Update data from SharePoint
|
||||
async function updateFromSharePoint() {
|
||||
const loading = document.getElementById('loading-update');
|
||||
const alertContainer = document.getElementById('alert-container');
|
||||
|
||||
loading.classList.add('active');
|
||||
alertContainer.innerHTML = '';
|
||||
|
||||
try {
|
||||
const response = await fetch('/api/update-sharepoint', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (response.ok) {
|
||||
showAlert('success', `Successfully downloaded ${data.downloaded_count} file(s) from SharePoint!`);
|
||||
loadStatus();
|
||||
} else {
|
||||
showAlert('error', `Error: ${data.error || 'Failed to download from SharePoint'}`);
|
||||
}
|
||||
} catch (error) {
|
||||
showAlert('error', `Error: ${error.message}`);
|
||||
} finally {
|
||||
loading.classList.remove('active');
|
||||
}
|
||||
}
|
||||
|
||||
// Generate report
|
||||
async function generateReport() {
|
||||
const loading = document.getElementById('loading');
|
||||
const alertContainer = document.getElementById('alert-container');
|
||||
|
||||
loading.classList.add('active');
|
||||
alertContainer.innerHTML = '';
|
||||
|
||||
try {
|
||||
const response = await fetch('/api/generate', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
download_from_sharepoint: false
|
||||
})
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (response.ok) {
|
||||
showAlert('success', `Report generated successfully! Processed ${data.vendors_count || 0} vendors.`);
|
||||
loadReports();
|
||||
loadStatus();
|
||||
} else {
|
||||
showAlert('error', `Error: ${data.error || 'Failed to generate report'}`);
|
||||
}
|
||||
} catch (error) {
|
||||
showAlert('error', `Error: ${error.message}`);
|
||||
console.error('Generate report error:', error);
|
||||
} finally {
|
||||
loading.classList.remove('active');
|
||||
}
|
||||
}
|
||||
|
||||
// Load status
|
||||
async function loadStatus() {
|
||||
try {
|
||||
const response = await fetch('/api/status');
|
||||
const data = await response.json();
|
||||
|
||||
const container = document.getElementById('status-container');
|
||||
container.innerHTML = `
|
||||
<div class="status-card">
|
||||
<h3>
|
||||
<span class="status-indicator ${data.status === 'running' ? 'active' : 'inactive'}"></span>
|
||||
Service Status: ${data.status}
|
||||
</h3>
|
||||
<p><strong>SharePoint:</strong> <span class="badge ${data.sharepoint_enabled ? 'badge-enabled' : 'badge-disabled'}">${data.sharepoint_enabled ? 'Enabled' : 'Disabled'}</span></p>
|
||||
<p><strong>Reports Directory:</strong> ${data.reports_dir}</p>
|
||||
<p><strong>Output Directory:</strong> ${data.output_dir}</p>
|
||||
</div>
|
||||
`;
|
||||
} catch (error) {
|
||||
console.error('Failed to load status:', error);
|
||||
}
|
||||
}
|
||||
|
||||
// Load reports
|
||||
async function loadReports() {
|
||||
try {
|
||||
const response = await fetch('/api/reports');
|
||||
const data = await response.json();
|
||||
|
||||
const container = document.getElementById('reports-container');
|
||||
|
||||
if (data.reports && data.reports.length > 0) {
|
||||
const reportsList = data.reports.map(report => `
|
||||
<div class="report-item">
|
||||
<div class="report-info">
|
||||
<strong>${report.name}</strong>
|
||||
<small>Generated: ${report.generated_at} | Size: ${report.size}</small>
|
||||
</div>
|
||||
<div>
|
||||
<a href="/reports/${report.name}" class="btn btn-primary" target="_blank">View HTML</a>
|
||||
${report.json_exists ? `<a href="/reports/${report.json_name}" class="btn btn-secondary" download>Download JSON</a>` : ''}
|
||||
</div>
|
||||
</div>
|
||||
`).join('');
|
||||
|
||||
container.innerHTML = `<ul class="report-list">${reportsList}</ul>`;
|
||||
} else {
|
||||
container.innerHTML = '<p>No reports generated yet.</p>';
|
||||
} catch (error) {
|
||||
console.error('Failed to load reports:', error);
|
||||
document.getElementById('reports-container').innerHTML = '<p>Error loading reports.</p>';
|
||||
}
|
||||
}
|
||||
|
||||
// Load configuration
|
||||
async function loadConfig() {
|
||||
try {
|
||||
const response = await fetch('/api/config');
|
||||
const config = await response.json();
|
||||
|
||||
const container = document.getElementById('config-container');
|
||||
const configItems = Object.entries(config).map(([key, value]) => {
|
||||
const displayValue = typeof value === 'boolean'
|
||||
? `<span class="badge ${value ? 'badge-enabled' : 'badge-disabled'}">${value ? 'Enabled' : 'Disabled'}</span>`
|
||||
: String(value || 'Not configured');
|
||||
return `
|
||||
<div class="config-item">
|
||||
<label>${key.replace(/_/g, ' ').replace(/\\b\\w/g, l => l.toUpperCase())}</label>
|
||||
<div class="value">${displayValue}</div>
|
||||
</div>
|
||||
`;
|
||||
}).join('');
|
||||
|
||||
container.innerHTML = configItems;
|
||||
} catch (error) {
|
||||
console.error('Failed to load config:', error);
|
||||
document.getElementById('config-container').innerHTML = '<p>Error loading configuration.</p>';
|
||||
}
|
||||
}
|
||||
|
||||
// Show alert
|
||||
function showAlert(type, message) {
|
||||
const container = document.getElementById('alert-container');
|
||||
const alert = document.createElement('div');
|
||||
alert.className = `alert alert-${type} active`;
|
||||
alert.textContent = message;
|
||||
container.appendChild(alert);
|
||||
|
||||
setTimeout(() => {
|
||||
alert.remove();
|
||||
}, 5000);
|
||||
}
|
||||
|
||||
// Load data on page load
|
||||
window.addEventListener('DOMContentLoaded', () => {
|
||||
loadStatus();
|
||||
loadReports();
|
||||
loadConfig();
|
||||
|
||||
// Refresh every 30 seconds
|
||||
setInterval(() => {
|
||||
loadStatus();
|
||||
loadReports();
|
||||
}, 30000);
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
def create_app(config_path: Optional[str] = None):
|
||||
"""Create and configure Flask app with Web UI."""
|
||||
global app, config
|
||||
|
||||
if not FLASK_AVAILABLE:
|
||||
raise ImportError(
|
||||
"Flask is required for Web UI. "
|
||||
"Install it with: pip install flask flask-cors"
|
||||
)
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
config = load_config(config_path)
|
||||
api_config = config.get('api', {})
|
||||
sharepoint_config = config.get('sharepoint', {})
|
||||
report_config = config.get('report', {})
|
||||
|
||||
app.config['API_KEY'] = api_config.get('api_key')
|
||||
app.config['SHAREPOINT_CONFIG'] = sharepoint_config
|
||||
app.config['REPORT_CONFIG'] = report_config
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
"""Main web UI page."""
|
||||
return render_template_string(UI_TEMPLATE)
|
||||
|
||||
@app.route('/api/update-sharepoint', methods=['POST'])
|
||||
def update_sharepoint_endpoint():
|
||||
"""Download files from SharePoint."""
|
||||
api_key = app.config.get('API_KEY')
|
||||
if api_key:
|
||||
provided_key = request.headers.get('X-API-Key') or (request.json.get('api_key') if request.json else None)
|
||||
if provided_key != api_key:
|
||||
return jsonify({'error': 'Invalid API key'}), 401
|
||||
|
||||
try:
|
||||
sp_config = app.config['SHAREPOINT_CONFIG']
|
||||
if not sp_config.get('enabled'):
|
||||
return jsonify({'error': 'SharePoint is not enabled in configuration'}), 400
|
||||
|
||||
logger.info("Downloading files from SharePoint...")
|
||||
try:
|
||||
downloaded = download_from_sharepoint(
|
||||
site_url=sp_config['site_url'],
|
||||
folder_path=sp_config.get('folder_path'),
|
||||
file_path=sp_config.get('file_path'),
|
||||
local_dir=sp_config.get('local_dir', 'reports'),
|
||||
tenant_id=sp_config.get('tenant_id'),
|
||||
client_id=sp_config.get('client_id'),
|
||||
client_secret=sp_config.get('client_secret'),
|
||||
use_app_authentication=sp_config.get('use_app_authentication', True),
|
||||
file_pattern=sp_config.get('file_pattern'),
|
||||
overwrite=sp_config.get('overwrite', True)
|
||||
)
|
||||
logger.info(f"Downloaded {len(downloaded)} file(s) from SharePoint")
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'message': f'Successfully downloaded {len(downloaded)} file(s) from SharePoint',
|
||||
'downloaded_count': len(downloaded),
|
||||
'files': downloaded
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to download from SharePoint: {e}", exc_info=True)
|
||||
return jsonify({'error': f'SharePoint download failed: {str(e)}'}), 500
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating from SharePoint: {e}", exc_info=True)
|
||||
return jsonify({'error': f'Update failed: {str(e)}'}), 500
|
||||
|
||||
@app.route('/api/generate', methods=['POST'])
|
||||
def generate_report_endpoint():
|
||||
"""Generate report on demand."""
|
||||
api_key = app.config.get('API_KEY')
|
||||
if api_key:
|
||||
provided_key = request.headers.get('X-API-Key') or (request.json.get('api_key') if request.json else None)
|
||||
if provided_key != api_key:
|
||||
return jsonify({'error': 'Invalid API key'}), 401
|
||||
|
||||
try:
|
||||
request_data = request.json or {}
|
||||
|
||||
report_config = app.config['REPORT_CONFIG']
|
||||
reports_dir = request_data.get('reports_dir', report_config.get('reports_dir', 'reports'))
|
||||
output_file = request_data.get('output_file',
|
||||
str(Path(report_config.get('output_dir', 'output')) / 'report.json'))
|
||||
|
||||
# Check if reports directory exists and has files
|
||||
reports_path = Path(reports_dir)
|
||||
if not reports_path.exists():
|
||||
return jsonify({'error': f'Reports directory not found: {reports_dir}'}), 400
|
||||
|
||||
excel_files = list(reports_path.glob('*.xlsx')) + list(reports_path.glob('*.xls'))
|
||||
if not excel_files:
|
||||
return jsonify({'error': f'No Excel files found in {reports_dir}. Please update data from SharePoint first.'}), 400
|
||||
|
||||
logger.info(f"Generating report from {reports_dir} ({len(excel_files)} Excel file(s))...")
|
||||
report_data = generate_report(
|
||||
reports_dir=reports_dir,
|
||||
output_file=output_file,
|
||||
verbose=False
|
||||
)
|
||||
|
||||
if report_data and report_data.get('vendors'):
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'message': 'Report generated successfully',
|
||||
'output_file': output_file,
|
||||
'summary': report_data.get('summary', {}),
|
||||
'vendors_count': len(report_data.get('vendors', []))
|
||||
})
|
||||
else:
|
||||
return jsonify({'error': 'Report generation failed - no data processed'}), 500
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating report: {e}", exc_info=True)
|
||||
return jsonify({'error': f'Report generation failed: {str(e)}'}), 500
|
||||
|
||||
@app.route('/api/status', methods=['GET'])
|
||||
def status():
|
||||
"""Get service status."""
|
||||
return jsonify({
|
||||
'status': 'running',
|
||||
'sharepoint_enabled': app.config['SHAREPOINT_CONFIG'].get('enabled', False),
|
||||
'reports_dir': app.config['REPORT_CONFIG'].get('reports_dir', 'reports'),
|
||||
'output_dir': app.config['REPORT_CONFIG'].get('output_dir', 'output')
|
||||
})
|
||||
|
||||
@app.route('/api/reports', methods=['GET'])
|
||||
def list_reports():
|
||||
"""List generated reports."""
|
||||
output_dir = Path(app.config['REPORT_CONFIG'].get('output_dir', 'output'))
|
||||
reports = []
|
||||
|
||||
if output_dir.exists():
|
||||
html_files = list(output_dir.glob('*.html'))
|
||||
for html_file in html_files:
|
||||
json_file = html_file.with_suffix('.json')
|
||||
reports.append({
|
||||
'name': html_file.name,
|
||||
'json_name': json_file.name if json_file.exists() else None,
|
||||
'json_exists': json_file.exists(),
|
||||
'generated_at': datetime.fromtimestamp(html_file.stat().st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
|
||||
'size': f"{html_file.stat().st_size / 1024:.1f} KB"
|
||||
})
|
||||
|
||||
# Sort by modification time (newest first)
|
||||
reports.sort(key=lambda x: x['generated_at'], reverse=True)
|
||||
|
||||
return jsonify({'reports': reports})
|
||||
|
||||
@app.route('/api/config', methods=['GET'])
|
||||
def get_config():
|
||||
"""Get configuration (safe, no secrets)."""
|
||||
return jsonify({
|
||||
'sharepoint_enabled': app.config['SHAREPOINT_CONFIG'].get('enabled', False),
|
||||
'sharepoint_site_url': app.config['SHAREPOINT_CONFIG'].get('site_url', 'Not configured'),
|
||||
'sharepoint_folder_path': app.config['SHAREPOINT_CONFIG'].get('folder_path', 'Not configured'),
|
||||
'reports_dir': app.config['REPORT_CONFIG'].get('reports_dir', 'reports'),
|
||||
'output_dir': app.config['REPORT_CONFIG'].get('output_dir', 'output')
|
||||
})
|
||||
|
||||
@app.route('/reports/<filename>')
|
||||
def serve_report(filename):
|
||||
"""Serve report files."""
|
||||
output_dir = Path(app.config['REPORT_CONFIG'].get('output_dir', 'output'))
|
||||
return send_from_directory(str(output_dir), filename)
|
||||
|
||||
@app.route('/health', methods=['GET'])
|
||||
def health():
|
||||
"""Health check."""
|
||||
return jsonify({'status': 'healthy', 'service': 'vendor-report-generator-ui'})
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def run_server(config_path: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None):
|
||||
"""Run the Web UI server."""
|
||||
app = create_app(config_path)
|
||||
|
||||
api_config = config.get('api', {})
|
||||
server_host = host or api_config.get('host', '0.0.0.0')
|
||||
server_port = port or api_config.get('port', 8080)
|
||||
|
||||
logger.info(f"Starting Web UI server on http://{server_host}:{server_port}")
|
||||
app.run(host=server_host, port=server_port, debug=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
config_path = sys.argv[1] if len(sys.argv) > 1 else None
|
||||
|
||||
config = load_config(config_path)
|
||||
if not config.get('api', {}).get('enabled', False):
|
||||
logger.warning("API is disabled in configuration, but starting Web UI anyway...")
|
||||
|
||||
run_server(config_path=config_path)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user