zulip_bot/app/utils/contextual_retrieval/context_service.py

"""
Context Service for generating rich contextual descriptions for messages.

This service uses LLMs to generate contextual descriptions for messages,
which improves retrieval by providing more context to the embedding process.
"""
import os
import time
from typing import Dict, List, Optional, Union
from openai import OpenAI
from app.config import Config

class ContextService:
    """Service for generating rich contextual descriptions for messages."""

    # Initialize OpenAI client
    client = OpenAI(api_key=Config.OPENAI_API_KEY)

    # Cache for context generation to reduce API calls
    _context_cache = {}

    @staticmethod
    def generate_context(content: str, metadata: Dict) -> str:
        """
        Generate a rich contextual description for a message.

        Args:
            content (str): The original message content
            metadata (Dict): Metadata about the message (channel, subject, sender, timestamp)

        Returns:
            str: A rich contextual description
        """
        # Create a cache key from content and metadata
        cache_key = f"{content[:100]}_{metadata.get('channel')}_{metadata.get('subject')}"

        # Check if we have this context cached
        if cache_key in ContextService._context_cache:
            return ContextService._context_cache[cache_key]

        try:
            # Create messages for context generation
            messages = [
                {
                    "role": "system",
                    "content": "You are a context generation assistant. Generate a short, succinct context description for the given message. The context should situate this message within its domain and highlight key information that would be helpful for retrieval. Keep the context under 100 words."
                },
                {
                    "role": "user",
                    "content": f"""
                    Message details:
                    - Channel: {metadata.get('channel', 'Unknown')}
                    - Subject: {metadata.get('subject', 'Unknown')}
                    - Sender: {metadata.get('sender', 'Unknown')}
                    - Timestamp: {metadata.get('timestamp', 'Unknown')}

                    Message content:
                    {content}
                    """
                }
            ]

            # Generate the context using OpenAI
            response = ContextService.client.chat.completions.create(
                model="gpt-4o",
                messages=messages,
                max_tokens=150,
                temperature=0.3
            )

            # Extract the response text
            context = response.choices[0].message.content.strip()

            # If the context is too long, truncate it
            if len(context) > 500:
                context = context[:497] + "..."

            # Cache the result
            ContextService._context_cache[cache_key] = context

            return context
        except Exception as e:
            print(f"Error generating context: {e}")

            # Fallback to a simple context based on metadata
            channel = metadata.get('channel', 'Unknown')
            subject = metadata.get('subject', 'Unknown')

            fallback_context = f"This message is from the {channel} channel and discusses {subject}."

            # Cache the fallback
            ContextService._context_cache[cache_key] = fallback_context

            return fallback_context

    @staticmethod
    def contextualize_content(content: str, metadata: Dict) -> str:
        """
        Add rich contextual description to a message.

        Args:
            content (str): The original message content
            metadata (Dict): Metadata about the message

        Returns:
            str: The content with context prepended
        """
        # Generate the context
        context = ContextService.generate_context(content, metadata)

        # Add the context to the content
        return f"CONTEXT: {context}\n\nCONTENT: {content}"