#!/usr/bin/env python3 """ Script to reset the ChromaDB completely and properly. This fixes issues with the vector database that cause "Add of existing embedding ID" warnings. """ import os import shutil import logging import chromadb from chromadb.utils import embedding_functions from app.utils.embeddings import EmbeddingService # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger("reset_chromadb") def main(): """Main function to reset ChromaDB.""" try: # Default ChromaDB path used in the application chromadb_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "chromadb") logger.info(f"Preparing to reset ChromaDB at {chromadb_path}") # First check if the directory exists if not os.path.exists(chromadb_path): logger.info("ChromaDB directory doesn't exist yet. Creating a fresh one.") os.makedirs(chromadb_path, exist_ok=True) logger.info("ChromaDB directory created successfully.") return # Backup the existing ChromaDB directory backup_path = f"{chromadb_path}_backup" logger.info(f"Creating backup of ChromaDB at {backup_path}") # Remove old backup if it exists if os.path.exists(backup_path): logger.info("Removing old backup") shutil.rmtree(backup_path) # Create backup shutil.copytree(chromadb_path, backup_path) logger.info("Backup created successfully") # Delete the ChromaDB directory logger.info("Removing existing ChromaDB directory") shutil.rmtree(chromadb_path) # Create fresh ChromaDB logger.info("Creating fresh ChromaDB") os.makedirs(chromadb_path, exist_ok=True) # Initialize a fresh ChromaDB client and create a new collection logger.info("Initializing fresh ChromaDB client") client = chromadb.PersistentClient( path=chromadb_path, settings=chromadb.Settings( allow_reset=True, anonymized_telemetry=False ) ) # Create a custom embedding function class CustomEmbeddingFunction(embedding_functions.EmbeddingFunction): def __call__(self, texts): return EmbeddingService.get_ollama_embeddings(texts) # Create a fresh collection logger.info("Creating fresh collection") collection = client.create_collection( name="zulip_messages", metadata={ "hnsw:space": "cosine" }, embedding_function=CustomEmbeddingFunction() ) logger.info("ChromaDB reset completed successfully") logger.info(f"To restore the backup if needed, delete {chromadb_path} and rename {backup_path} to {chromadb_path}") except Exception as e: logger.error(f"Error resetting ChromaDB: {e}") logger.error("ChromaDB reset failed. Please check the error and try again.") if __name__ == "__main__": main()