293 lines
10 KiB
Python
293 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
SharePoint File Downloader
|
|
|
|
Downloads Excel files from SharePoint to the local reports directory.
|
|
Supports both scheduled and on-demand downloads.
|
|
"""
|
|
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Optional, List
|
|
from datetime import datetime
|
|
import logging
|
|
|
|
try:
|
|
from office365.sharepoint.client_context import ClientContext
|
|
from office365.runtime.auth.authentication_context import AuthenticationContext
|
|
from office365.runtime.auth.user_credential import UserCredential
|
|
from office365.runtime.auth.client_credential import ClientCredential
|
|
SHAREPOINT_AVAILABLE = True
|
|
except ImportError:
|
|
SHAREPOINT_AVAILABLE = False
|
|
logging.warning("office365-rest-python-client not installed. SharePoint features disabled.")
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SharePointDownloader:
|
|
"""Downloads files from SharePoint."""
|
|
|
|
def __init__(
|
|
self,
|
|
site_url: str,
|
|
username: Optional[str] = None,
|
|
password: Optional[str] = None,
|
|
client_id: Optional[str] = None,
|
|
client_secret: Optional[str] = None,
|
|
use_app_authentication: bool = False
|
|
):
|
|
"""
|
|
Initialize SharePoint downloader.
|
|
|
|
Args:
|
|
site_url: SharePoint site URL (e.g., "https://yourcompany.sharepoint.com/sites/YourSite")
|
|
username: Username for user authentication (if not using app authentication)
|
|
password: Password for user authentication (if not using app authentication)
|
|
client_id: Azure AD app client ID (for app authentication)
|
|
client_secret: Azure AD app client secret (for app authentication)
|
|
use_app_authentication: Whether to use app authentication (recommended for automation)
|
|
"""
|
|
if not SHAREPOINT_AVAILABLE:
|
|
raise ImportError(
|
|
"office365-rest-python-client is required for SharePoint integration. "
|
|
"Install it with: pip install Office365-REST-Python-Client"
|
|
)
|
|
|
|
self.site_url = site_url
|
|
self.username = username
|
|
self.password = password
|
|
self.client_id = client_id
|
|
self.client_secret = client_secret
|
|
self.use_app_authentication = use_app_authentication
|
|
self.ctx = None
|
|
|
|
def authenticate(self) -> bool:
|
|
"""Authenticate with SharePoint."""
|
|
try:
|
|
if self.use_app_authentication and self.client_id and self.client_secret:
|
|
# App authentication (recommended for automation)
|
|
credentials = ClientCredential(self.client_id, self.client_secret)
|
|
self.ctx = ClientContext(self.site_url).with_credentials(credentials)
|
|
logger.info("Authenticated with SharePoint using app credentials")
|
|
elif self.username and self.password:
|
|
# User authentication
|
|
credentials = UserCredential(self.username, self.password)
|
|
self.ctx = ClientContext(self.site_url).with_credentials(credentials)
|
|
logger.info("Authenticated with SharePoint using user credentials")
|
|
else:
|
|
logger.error("No authentication credentials provided")
|
|
return False
|
|
|
|
# Test connection
|
|
web = self.ctx.web
|
|
self.ctx.load(web)
|
|
self.ctx.execute_query()
|
|
logger.info(f"Successfully connected to SharePoint site: {web.properties['Title']}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"SharePoint authentication failed: {e}")
|
|
return False
|
|
|
|
def download_file(
|
|
self,
|
|
file_path: str,
|
|
local_path: str,
|
|
overwrite: bool = True
|
|
) -> bool:
|
|
"""
|
|
Download a single file from SharePoint.
|
|
|
|
Args:
|
|
file_path: Path to file in SharePoint (e.g., "/Shared Documents/Reports/file.xlsx")
|
|
local_path: Local path where file should be saved
|
|
overwrite: Whether to overwrite existing file
|
|
|
|
Returns:
|
|
True if successful, False otherwise
|
|
"""
|
|
if not self.ctx:
|
|
if not self.authenticate():
|
|
return False
|
|
|
|
try:
|
|
local_file_path = Path(local_path)
|
|
local_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Check if file exists and overwrite flag
|
|
if local_file_path.exists() and not overwrite:
|
|
logger.info(f"File already exists, skipping: {local_path}")
|
|
return True
|
|
|
|
# Download file
|
|
with open(local_file_path, "wb") as local_file:
|
|
file = self.ctx.web.get_file_by_server_relative_url(file_path)
|
|
file.download(local_file)
|
|
self.ctx.execute_query()
|
|
|
|
logger.info(f"Downloaded: {file_path} -> {local_path}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to download {file_path}: {e}")
|
|
return False
|
|
|
|
def download_files_from_folder(
|
|
self,
|
|
folder_path: str,
|
|
local_dir: str,
|
|
file_pattern: Optional[str] = None,
|
|
overwrite: bool = True
|
|
) -> List[str]:
|
|
"""
|
|
Download all files from a SharePoint folder.
|
|
|
|
Args:
|
|
folder_path: Path to folder in SharePoint (e.g., "/Shared Documents/Reports")
|
|
local_dir: Local directory where files should be saved
|
|
file_pattern: Optional pattern to filter files (e.g., "*.xlsx")
|
|
overwrite: Whether to overwrite existing files
|
|
|
|
Returns:
|
|
List of successfully downloaded file paths
|
|
"""
|
|
if not self.ctx:
|
|
if not self.authenticate():
|
|
return []
|
|
|
|
downloaded_files = []
|
|
|
|
try:
|
|
folder = self.ctx.web.get_folder_by_server_relative_url(folder_path)
|
|
files = folder.files
|
|
self.ctx.load(files)
|
|
self.ctx.execute_query()
|
|
|
|
local_dir_path = Path(local_dir)
|
|
local_dir_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
for file in files:
|
|
file_name = file.properties["Name"]
|
|
|
|
# Filter by pattern if provided
|
|
if file_pattern:
|
|
if not file_name.endswith(file_pattern.replace("*", "")):
|
|
continue
|
|
|
|
# Only download Excel files
|
|
if not (file_name.endswith('.xlsx') or file_name.endswith('.xls')):
|
|
continue
|
|
|
|
local_file_path = local_dir_path / file_name
|
|
|
|
if self.download_file(
|
|
file.properties["ServerRelativeUrl"],
|
|
str(local_file_path),
|
|
overwrite=overwrite
|
|
):
|
|
downloaded_files.append(str(local_file_path))
|
|
|
|
logger.info(f"Downloaded {len(downloaded_files)} files from {folder_path}")
|
|
return downloaded_files
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to download files from folder {folder_path}: {e}")
|
|
return downloaded_files
|
|
|
|
|
|
def download_from_sharepoint(
|
|
site_url: str,
|
|
file_path: Optional[str] = None,
|
|
folder_path: Optional[str] = None,
|
|
local_dir: str = "reports",
|
|
username: Optional[str] = None,
|
|
password: Optional[str] = None,
|
|
client_id: Optional[str] = None,
|
|
client_secret: Optional[str] = None,
|
|
use_app_authentication: bool = False,
|
|
file_pattern: Optional[str] = None,
|
|
overwrite: bool = True
|
|
) -> List[str]:
|
|
"""
|
|
Convenience function to download files from SharePoint.
|
|
|
|
Args:
|
|
site_url: SharePoint site URL
|
|
file_path: Path to specific file (if downloading single file)
|
|
folder_path: Path to folder (if downloading all files from folder)
|
|
local_dir: Local directory to save files
|
|
username: Username for authentication
|
|
password: Password for authentication
|
|
client_id: Azure AD app client ID
|
|
client_secret: Azure AD app client secret
|
|
use_app_authentication: Use app authentication
|
|
file_pattern: Pattern to filter files (e.g., "*.xlsx")
|
|
overwrite: Whether to overwrite existing files
|
|
|
|
Returns:
|
|
List of downloaded file paths
|
|
"""
|
|
downloader = SharePointDownloader(
|
|
site_url=site_url,
|
|
username=username,
|
|
password=password,
|
|
client_id=client_id,
|
|
client_secret=client_secret,
|
|
use_app_authentication=use_app_authentication
|
|
)
|
|
|
|
if file_path:
|
|
# Download single file
|
|
local_file_path = Path(local_dir) / Path(file_path).name
|
|
if downloader.download_file(file_path, str(local_file_path), overwrite=overwrite):
|
|
return [str(local_file_path)]
|
|
return []
|
|
elif folder_path:
|
|
# Download all files from folder
|
|
return downloader.download_files_from_folder(
|
|
folder_path=folder_path,
|
|
local_dir=local_dir,
|
|
file_pattern=file_pattern,
|
|
overwrite=overwrite
|
|
)
|
|
else:
|
|
logger.error("Either file_path or folder_path must be provided")
|
|
return []
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
from config import load_config
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
# Load configuration
|
|
config = load_config()
|
|
|
|
if not config.get('sharepoint'):
|
|
print("SharePoint configuration not found in config.yaml")
|
|
sys.exit(1)
|
|
|
|
sp_config = config['sharepoint']
|
|
|
|
# Download files
|
|
downloaded = download_from_sharepoint(
|
|
site_url=sp_config['site_url'],
|
|
folder_path=sp_config.get('folder_path'),
|
|
file_path=sp_config.get('file_path'),
|
|
local_dir=sp_config.get('local_dir', 'reports'),
|
|
username=sp_config.get('username'),
|
|
password=sp_config.get('password'),
|
|
client_id=sp_config.get('client_id'),
|
|
client_secret=sp_config.get('client_secret'),
|
|
use_app_authentication=sp_config.get('use_app_authentication', False),
|
|
file_pattern=sp_config.get('file_pattern'),
|
|
overwrite=True
|
|
)
|
|
|
|
print(f"Downloaded {len(downloaded)} file(s):")
|
|
for file in downloaded:
|
|
print(f" - {file}")
|
|
|