#!/usr/bin/env python3 """ SharePoint File Downloader Downloads Excel files from SharePoint to the local reports directory. Supports both scheduled and on-demand downloads. """ import os from pathlib import Path from typing import Optional, List from datetime import datetime import logging try: from office365.sharepoint.client_context import ClientContext from office365.runtime.auth.authentication_context import AuthenticationContext from office365.runtime.auth.user_credential import UserCredential from office365.runtime.auth.client_credential import ClientCredential SHAREPOINT_AVAILABLE = True except ImportError: SHAREPOINT_AVAILABLE = False logging.warning("office365-rest-python-client not installed. SharePoint features disabled.") logger = logging.getLogger(__name__) class SharePointDownloader: """Downloads files from SharePoint.""" def __init__( self, site_url: str, username: Optional[str] = None, password: Optional[str] = None, client_id: Optional[str] = None, client_secret: Optional[str] = None, use_app_authentication: bool = False ): """ Initialize SharePoint downloader. Args: site_url: SharePoint site URL (e.g., "https://yourcompany.sharepoint.com/sites/YourSite") username: Username for user authentication (if not using app authentication) password: Password for user authentication (if not using app authentication) client_id: Azure AD app client ID (for app authentication) client_secret: Azure AD app client secret (for app authentication) use_app_authentication: Whether to use app authentication (recommended for automation) """ if not SHAREPOINT_AVAILABLE: raise ImportError( "office365-rest-python-client is required for SharePoint integration. " "Install it with: pip install Office365-REST-Python-Client" ) self.site_url = site_url self.username = username self.password = password self.client_id = client_id self.client_secret = client_secret self.use_app_authentication = use_app_authentication self.ctx = None def authenticate(self) -> bool: """Authenticate with SharePoint.""" try: if self.use_app_authentication and self.client_id and self.client_secret: # App authentication (recommended for automation) credentials = ClientCredential(self.client_id, self.client_secret) self.ctx = ClientContext(self.site_url).with_credentials(credentials) logger.info("Authenticated with SharePoint using app credentials") elif self.username and self.password: # User authentication credentials = UserCredential(self.username, self.password) self.ctx = ClientContext(self.site_url).with_credentials(credentials) logger.info("Authenticated with SharePoint using user credentials") else: logger.error("No authentication credentials provided") return False # Test connection web = self.ctx.web self.ctx.load(web) self.ctx.execute_query() logger.info(f"Successfully connected to SharePoint site: {web.properties['Title']}") return True except Exception as e: logger.error(f"SharePoint authentication failed: {e}") return False def download_file( self, file_path: str, local_path: str, overwrite: bool = True ) -> bool: """ Download a single file from SharePoint. Args: file_path: Path to file in SharePoint (e.g., "/Shared Documents/Reports/file.xlsx") local_path: Local path where file should be saved overwrite: Whether to overwrite existing file Returns: True if successful, False otherwise """ if not self.ctx: if not self.authenticate(): return False try: local_file_path = Path(local_path) local_file_path.parent.mkdir(parents=True, exist_ok=True) # Check if file exists and overwrite flag if local_file_path.exists() and not overwrite: logger.info(f"File already exists, skipping: {local_path}") return True # Download file with open(local_file_path, "wb") as local_file: file = self.ctx.web.get_file_by_server_relative_url(file_path) file.download(local_file) self.ctx.execute_query() logger.info(f"Downloaded: {file_path} -> {local_path}") return True except Exception as e: logger.error(f"Failed to download {file_path}: {e}") return False def download_files_from_folder( self, folder_path: str, local_dir: str, file_pattern: Optional[str] = None, overwrite: bool = True ) -> List[str]: """ Download all files from a SharePoint folder. Args: folder_path: Path to folder in SharePoint (e.g., "/Shared Documents/Reports") local_dir: Local directory where files should be saved file_pattern: Optional pattern to filter files (e.g., "*.xlsx") overwrite: Whether to overwrite existing files Returns: List of successfully downloaded file paths """ if not self.ctx: if not self.authenticate(): return [] downloaded_files = [] try: folder = self.ctx.web.get_folder_by_server_relative_url(folder_path) files = folder.files self.ctx.load(files) self.ctx.execute_query() local_dir_path = Path(local_dir) local_dir_path.mkdir(parents=True, exist_ok=True) for file in files: file_name = file.properties["Name"] # Filter by pattern if provided if file_pattern: if not file_name.endswith(file_pattern.replace("*", "")): continue # Only download Excel files if not (file_name.endswith('.xlsx') or file_name.endswith('.xls')): continue local_file_path = local_dir_path / file_name if self.download_file( file.properties["ServerRelativeUrl"], str(local_file_path), overwrite=overwrite ): downloaded_files.append(str(local_file_path)) logger.info(f"Downloaded {len(downloaded_files)} files from {folder_path}") return downloaded_files except Exception as e: logger.error(f"Failed to download files from folder {folder_path}: {e}") return downloaded_files def download_from_sharepoint( site_url: str, file_path: Optional[str] = None, folder_path: Optional[str] = None, local_dir: str = "reports", username: Optional[str] = None, password: Optional[str] = None, client_id: Optional[str] = None, client_secret: Optional[str] = None, use_app_authentication: bool = False, file_pattern: Optional[str] = None, overwrite: bool = True ) -> List[str]: """ Convenience function to download files from SharePoint. Args: site_url: SharePoint site URL file_path: Path to specific file (if downloading single file) folder_path: Path to folder (if downloading all files from folder) local_dir: Local directory to save files username: Username for authentication password: Password for authentication client_id: Azure AD app client ID client_secret: Azure AD app client secret use_app_authentication: Use app authentication file_pattern: Pattern to filter files (e.g., "*.xlsx") overwrite: Whether to overwrite existing files Returns: List of downloaded file paths """ downloader = SharePointDownloader( site_url=site_url, username=username, password=password, client_id=client_id, client_secret=client_secret, use_app_authentication=use_app_authentication ) if file_path: # Download single file local_file_path = Path(local_dir) / Path(file_path).name if downloader.download_file(file_path, str(local_file_path), overwrite=overwrite): return [str(local_file_path)] return [] elif folder_path: # Download all files from folder return downloader.download_files_from_folder( folder_path=folder_path, local_dir=local_dir, file_pattern=file_pattern, overwrite=overwrite ) else: logger.error("Either file_path or folder_path must be provided") return [] if __name__ == "__main__": import sys from config import load_config logging.basicConfig(level=logging.INFO) # Load configuration config = load_config() if not config.get('sharepoint'): print("SharePoint configuration not found in config.yaml") sys.exit(1) sp_config = config['sharepoint'] # Download files downloaded = download_from_sharepoint( site_url=sp_config['site_url'], folder_path=sp_config.get('folder_path'), file_path=sp_config.get('file_path'), local_dir=sp_config.get('local_dir', 'reports'), username=sp_config.get('username'), password=sp_config.get('password'), client_id=sp_config.get('client_id'), client_secret=sp_config.get('client_secret'), use_app_authentication=sp_config.get('use_app_authentication', False), file_pattern=sp_config.get('file_pattern'), overwrite=True ) print(f"Downloaded {len(downloaded)} file(s):") for file in downloaded: print(f" - {file}")