112 lines
4.1 KiB
Python
112 lines
4.1 KiB
Python
"""
|
|
Context Service for generating rich contextual descriptions for messages.
|
|
|
|
This service uses LLMs to generate contextual descriptions for messages,
|
|
which improves retrieval by providing more context to the embedding process.
|
|
"""
|
|
import os
|
|
import time
|
|
from typing import Dict, List, Optional, Union
|
|
from openai import OpenAI
|
|
from app.config import Config
|
|
|
|
class ContextService:
|
|
"""Service for generating rich contextual descriptions for messages."""
|
|
|
|
# Initialize OpenAI client
|
|
client = OpenAI(api_key=Config.OPENAI_API_KEY)
|
|
|
|
# Cache for context generation to reduce API calls
|
|
_context_cache = {}
|
|
|
|
@staticmethod
|
|
def generate_context(content: str, metadata: Dict) -> str:
|
|
"""
|
|
Generate a rich contextual description for a message.
|
|
|
|
Args:
|
|
content (str): The original message content
|
|
metadata (Dict): Metadata about the message (channel, subject, sender, timestamp)
|
|
|
|
Returns:
|
|
str: A rich contextual description
|
|
"""
|
|
# Create a cache key from content and metadata
|
|
cache_key = f"{content[:100]}_{metadata.get('channel')}_{metadata.get('subject')}"
|
|
|
|
# Check if we have this context cached
|
|
if cache_key in ContextService._context_cache:
|
|
return ContextService._context_cache[cache_key]
|
|
|
|
try:
|
|
# Create messages for context generation
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": "You are a context generation assistant. Generate a short, succinct context description for the given message. The context should situate this message within its domain and highlight key information that would be helpful for retrieval. Keep the context under 100 words."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": f"""
|
|
Message details:
|
|
- Channel: {metadata.get('channel', 'Unknown')}
|
|
- Subject: {metadata.get('subject', 'Unknown')}
|
|
- Sender: {metadata.get('sender', 'Unknown')}
|
|
- Timestamp: {metadata.get('timestamp', 'Unknown')}
|
|
|
|
Message content:
|
|
{content}
|
|
"""
|
|
}
|
|
]
|
|
|
|
# Generate the context using OpenAI
|
|
response = ContextService.client.chat.completions.create(
|
|
model="gpt-4o",
|
|
messages=messages,
|
|
max_tokens=150,
|
|
temperature=0.3
|
|
)
|
|
|
|
# Extract the response text
|
|
context = response.choices[0].message.content.strip()
|
|
|
|
# If the context is too long, truncate it
|
|
if len(context) > 500:
|
|
context = context[:497] + "..."
|
|
|
|
# Cache the result
|
|
ContextService._context_cache[cache_key] = context
|
|
|
|
return context
|
|
except Exception as e:
|
|
print(f"Error generating context: {e}")
|
|
|
|
# Fallback to a simple context based on metadata
|
|
channel = metadata.get('channel', 'Unknown')
|
|
subject = metadata.get('subject', 'Unknown')
|
|
|
|
fallback_context = f"This message is from the {channel} channel and discusses {subject}."
|
|
|
|
# Cache the fallback
|
|
ContextService._context_cache[cache_key] = fallback_context
|
|
|
|
return fallback_context
|
|
|
|
@staticmethod
|
|
def contextualize_content(content: str, metadata: Dict) -> str:
|
|
"""
|
|
Add rich contextual description to a message.
|
|
|
|
Args:
|
|
content (str): The original message content
|
|
metadata (Dict): Metadata about the message
|
|
|
|
Returns:
|
|
str: The content with context prepended
|
|
"""
|
|
# Generate the context
|
|
context = ContextService.generate_context(content, metadata)
|
|
|
|
# Add the context to the content
|
|
return f"CONTEXT: {context}\n\nCONTENT: {content}" |