""" Database integration service. Combines functionality from both Zulip and ChromaDB services. """ from datetime import datetime from app.db.zulip_service import ZulipDatabaseService from app.db.chroma_service import ChromaDBService from app.utils.contextual_retrieval.hybrid_search import HybridSearchService class DatabaseIntegrationService: """ Service for integrating between Zulip DB and ChromaDB. Handles the synchronization of messages from Zulip to ChromaDB. """ @staticmethod def sync_messages_to_chromadb(days_ago=30, limit=1000): """ Sync recent messages from Zulip to ChromaDB. Args: days_ago (int): Number of days to look back limit (int): Maximum number of messages to sync Returns: dict: Statistics about the sync operation """ # Get messages from Zulip messages = ZulipDatabaseService.get_messages_from_it_channels(days_ago=days_ago, limit=limit) stats = { "total_messages": len(messages), "new_messages": 0, "already_existing": 0, "failed": 0 } # Process each message for message in messages: # Check if message already exists in ChromaDB if ChromaDBService.message_exists(message.id): stats["already_existing"] += 1 continue # Get channel name for the message channel_name = ZulipDatabaseService.get_channel_name_for_message(message) # Get sender name (we don't have that information readily available from the query) # In a real implementation, we would join with the UserProfile table sender_name = f"User ID: {message.sender_id}" # Add message to ChromaDB success = ChromaDBService.add_message( message_id=message.id, content=message.content, channel_name=channel_name, subject=message.subject, sender_name=sender_name, date_sent=message.date_sent ) if success: stats["new_messages"] += 1 else: stats["failed"] += 1 return stats @staticmethod def search_knowledge_base(query_text, n_results=5, filter_channel=None, use_hybrid=True, use_reranking=True): """ Search for messages in the knowledge base using hybrid search. Args: query_text (str): Text to search for n_results (int): Number of results to return filter_channel (str): Optional channel name to filter results use_hybrid (bool): Whether to use hybrid search or just vector search use_reranking (bool): Whether to apply reranking to the results Returns: list: List of search results """ # Prepare filter criteria filter_criteria = None if filter_channel: filter_criteria = {"channel": filter_channel} # Decide which search method to use if use_hybrid: # Use the hybrid search service results = HybridSearchService.hybrid_search( query=query_text, n_results=n_results, filter_criteria=filter_criteria, rerank=use_reranking ) return results else: # Use the standard ChromaDB search results = ChromaDBService.search_similar( query_text=query_text, n_results=n_results, filter_criteria=filter_criteria, use_hybrid=False ) # Format results formatted_results = [] if results and results['ids'] and len(results['ids'][0]) > 0: for i in range(len(results['ids'][0])): formatted_results.append({ 'id': results['ids'][0][i], 'content': results['documents'][0][i], 'metadata': results['metadatas'][0][i], 'score': results['distances'][0][i] if 'distances' in results else None }) return formatted_results