vendor_report/sharepoint_downloader.py
2025-11-06 20:50:19 +04:00

293 lines
10 KiB
Python

#!/usr/bin/env python3
"""
SharePoint File Downloader
Downloads Excel files from SharePoint to the local reports directory.
Supports both scheduled and on-demand downloads.
"""
import os
from pathlib import Path
from typing import Optional, List
from datetime import datetime
import logging
try:
from office365.sharepoint.client_context import ClientContext
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.runtime.auth.user_credential import UserCredential
from office365.runtime.auth.client_credential import ClientCredential
SHAREPOINT_AVAILABLE = True
except ImportError:
SHAREPOINT_AVAILABLE = False
logging.warning("office365-rest-python-client not installed. SharePoint features disabled.")
logger = logging.getLogger(__name__)
class SharePointDownloader:
"""Downloads files from SharePoint."""
def __init__(
self,
site_url: str,
username: Optional[str] = None,
password: Optional[str] = None,
client_id: Optional[str] = None,
client_secret: Optional[str] = None,
use_app_authentication: bool = False
):
"""
Initialize SharePoint downloader.
Args:
site_url: SharePoint site URL (e.g., "https://yourcompany.sharepoint.com/sites/YourSite")
username: Username for user authentication (if not using app authentication)
password: Password for user authentication (if not using app authentication)
client_id: Azure AD app client ID (for app authentication)
client_secret: Azure AD app client secret (for app authentication)
use_app_authentication: Whether to use app authentication (recommended for automation)
"""
if not SHAREPOINT_AVAILABLE:
raise ImportError(
"office365-rest-python-client is required for SharePoint integration. "
"Install it with: pip install Office365-REST-Python-Client"
)
self.site_url = site_url
self.username = username
self.password = password
self.client_id = client_id
self.client_secret = client_secret
self.use_app_authentication = use_app_authentication
self.ctx = None
def authenticate(self) -> bool:
"""Authenticate with SharePoint."""
try:
if self.use_app_authentication and self.client_id and self.client_secret:
# App authentication (recommended for automation)
credentials = ClientCredential(self.client_id, self.client_secret)
self.ctx = ClientContext(self.site_url).with_credentials(credentials)
logger.info("Authenticated with SharePoint using app credentials")
elif self.username and self.password:
# User authentication
credentials = UserCredential(self.username, self.password)
self.ctx = ClientContext(self.site_url).with_credentials(credentials)
logger.info("Authenticated with SharePoint using user credentials")
else:
logger.error("No authentication credentials provided")
return False
# Test connection
web = self.ctx.web
self.ctx.load(web)
self.ctx.execute_query()
logger.info(f"Successfully connected to SharePoint site: {web.properties['Title']}")
return True
except Exception as e:
logger.error(f"SharePoint authentication failed: {e}")
return False
def download_file(
self,
file_path: str,
local_path: str,
overwrite: bool = True
) -> bool:
"""
Download a single file from SharePoint.
Args:
file_path: Path to file in SharePoint (e.g., "/Shared Documents/Reports/file.xlsx")
local_path: Local path where file should be saved
overwrite: Whether to overwrite existing file
Returns:
True if successful, False otherwise
"""
if not self.ctx:
if not self.authenticate():
return False
try:
local_file_path = Path(local_path)
local_file_path.parent.mkdir(parents=True, exist_ok=True)
# Check if file exists and overwrite flag
if local_file_path.exists() and not overwrite:
logger.info(f"File already exists, skipping: {local_path}")
return True
# Download file
with open(local_file_path, "wb") as local_file:
file = self.ctx.web.get_file_by_server_relative_url(file_path)
file.download(local_file)
self.ctx.execute_query()
logger.info(f"Downloaded: {file_path} -> {local_path}")
return True
except Exception as e:
logger.error(f"Failed to download {file_path}: {e}")
return False
def download_files_from_folder(
self,
folder_path: str,
local_dir: str,
file_pattern: Optional[str] = None,
overwrite: bool = True
) -> List[str]:
"""
Download all files from a SharePoint folder.
Args:
folder_path: Path to folder in SharePoint (e.g., "/Shared Documents/Reports")
local_dir: Local directory where files should be saved
file_pattern: Optional pattern to filter files (e.g., "*.xlsx")
overwrite: Whether to overwrite existing files
Returns:
List of successfully downloaded file paths
"""
if not self.ctx:
if not self.authenticate():
return []
downloaded_files = []
try:
folder = self.ctx.web.get_folder_by_server_relative_url(folder_path)
files = folder.files
self.ctx.load(files)
self.ctx.execute_query()
local_dir_path = Path(local_dir)
local_dir_path.mkdir(parents=True, exist_ok=True)
for file in files:
file_name = file.properties["Name"]
# Filter by pattern if provided
if file_pattern:
if not file_name.endswith(file_pattern.replace("*", "")):
continue
# Only download Excel files
if not (file_name.endswith('.xlsx') or file_name.endswith('.xls')):
continue
local_file_path = local_dir_path / file_name
if self.download_file(
file.properties["ServerRelativeUrl"],
str(local_file_path),
overwrite=overwrite
):
downloaded_files.append(str(local_file_path))
logger.info(f"Downloaded {len(downloaded_files)} files from {folder_path}")
return downloaded_files
except Exception as e:
logger.error(f"Failed to download files from folder {folder_path}: {e}")
return downloaded_files
def download_from_sharepoint(
site_url: str,
file_path: Optional[str] = None,
folder_path: Optional[str] = None,
local_dir: str = "reports",
username: Optional[str] = None,
password: Optional[str] = None,
client_id: Optional[str] = None,
client_secret: Optional[str] = None,
use_app_authentication: bool = False,
file_pattern: Optional[str] = None,
overwrite: bool = True
) -> List[str]:
"""
Convenience function to download files from SharePoint.
Args:
site_url: SharePoint site URL
file_path: Path to specific file (if downloading single file)
folder_path: Path to folder (if downloading all files from folder)
local_dir: Local directory to save files
username: Username for authentication
password: Password for authentication
client_id: Azure AD app client ID
client_secret: Azure AD app client secret
use_app_authentication: Use app authentication
file_pattern: Pattern to filter files (e.g., "*.xlsx")
overwrite: Whether to overwrite existing files
Returns:
List of downloaded file paths
"""
downloader = SharePointDownloader(
site_url=site_url,
username=username,
password=password,
client_id=client_id,
client_secret=client_secret,
use_app_authentication=use_app_authentication
)
if file_path:
# Download single file
local_file_path = Path(local_dir) / Path(file_path).name
if downloader.download_file(file_path, str(local_file_path), overwrite=overwrite):
return [str(local_file_path)]
return []
elif folder_path:
# Download all files from folder
return downloader.download_files_from_folder(
folder_path=folder_path,
local_dir=local_dir,
file_pattern=file_pattern,
overwrite=overwrite
)
else:
logger.error("Either file_path or folder_path must be provided")
return []
if __name__ == "__main__":
import sys
from config import load_config
logging.basicConfig(level=logging.INFO)
# Load configuration
config = load_config()
if not config.get('sharepoint'):
print("SharePoint configuration not found in config.yaml")
sys.exit(1)
sp_config = config['sharepoint']
# Download files
downloaded = download_from_sharepoint(
site_url=sp_config['site_url'],
folder_path=sp_config.get('folder_path'),
file_path=sp_config.get('file_path'),
local_dir=sp_config.get('local_dir', 'reports'),
username=sp_config.get('username'),
password=sp_config.get('password'),
client_id=sp_config.get('client_id'),
client_secret=sp_config.get('client_secret'),
use_app_authentication=sp_config.get('use_app_authentication', False),
file_pattern=sp_config.get('file_pattern'),
overwrite=True
)
print(f"Downloaded {len(downloaded)} file(s):")
for file in downloaded:
print(f" - {file}")