zulip_bot/reset_chromadb.py
2025-05-16 18:00:22 +04:00

87 lines
3.1 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Script to reset the ChromaDB completely and properly.
This fixes issues with the vector database that cause "Add of existing embedding ID" warnings.
"""
import os
import shutil
import logging
import chromadb
from chromadb.utils import embedding_functions
from app.utils.embeddings import EmbeddingService
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("reset_chromadb")
def main():
"""Main function to reset ChromaDB."""
try:
# Default ChromaDB path used in the application
chromadb_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "chromadb")
logger.info(f"Preparing to reset ChromaDB at {chromadb_path}")
# First check if the directory exists
if not os.path.exists(chromadb_path):
logger.info("ChromaDB directory doesn't exist yet. Creating a fresh one.")
os.makedirs(chromadb_path, exist_ok=True)
logger.info("ChromaDB directory created successfully.")
return
# Backup the existing ChromaDB directory
backup_path = f"{chromadb_path}_backup"
logger.info(f"Creating backup of ChromaDB at {backup_path}")
# Remove old backup if it exists
if os.path.exists(backup_path):
logger.info("Removing old backup")
shutil.rmtree(backup_path)
# Create backup
shutil.copytree(chromadb_path, backup_path)
logger.info("Backup created successfully")
# Delete the ChromaDB directory
logger.info("Removing existing ChromaDB directory")
shutil.rmtree(chromadb_path)
# Create fresh ChromaDB
logger.info("Creating fresh ChromaDB")
os.makedirs(chromadb_path, exist_ok=True)
# Initialize a fresh ChromaDB client and create a new collection
logger.info("Initializing fresh ChromaDB client")
client = chromadb.PersistentClient(
path=chromadb_path,
settings=chromadb.Settings(
allow_reset=True,
anonymized_telemetry=False
)
)
# Create a custom embedding function
class CustomEmbeddingFunction(embedding_functions.EmbeddingFunction):
def __call__(self, texts):
return EmbeddingService.get_ollama_embeddings(texts)
# Create a fresh collection
logger.info("Creating fresh collection")
collection = client.create_collection(
name="zulip_messages",
metadata={
"hnsw:space": "cosine"
},
embedding_function=CustomEmbeddingFunction()
)
logger.info("ChromaDB reset completed successfully")
logger.info(f"To restore the backup if needed, delete {chromadb_path} and rename {backup_path} to {chromadb_path}")
except Exception as e:
logger.error(f"Error resetting ChromaDB: {e}")
logger.error("ChromaDB reset failed. Please check the error and try again.")
if __name__ == "__main__":
main()