87 lines
3.1 KiB
Python
Executable File
87 lines
3.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Script to reset the ChromaDB completely and properly.
|
|
This fixes issues with the vector database that cause "Add of existing embedding ID" warnings.
|
|
"""
|
|
|
|
import os
|
|
import shutil
|
|
import logging
|
|
import chromadb
|
|
from chromadb.utils import embedding_functions
|
|
from app.utils.embeddings import EmbeddingService
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
logger = logging.getLogger("reset_chromadb")
|
|
|
|
def main():
|
|
"""Main function to reset ChromaDB."""
|
|
try:
|
|
# Default ChromaDB path used in the application
|
|
chromadb_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "chromadb")
|
|
|
|
logger.info(f"Preparing to reset ChromaDB at {chromadb_path}")
|
|
|
|
# First check if the directory exists
|
|
if not os.path.exists(chromadb_path):
|
|
logger.info("ChromaDB directory doesn't exist yet. Creating a fresh one.")
|
|
os.makedirs(chromadb_path, exist_ok=True)
|
|
logger.info("ChromaDB directory created successfully.")
|
|
return
|
|
|
|
# Backup the existing ChromaDB directory
|
|
backup_path = f"{chromadb_path}_backup"
|
|
logger.info(f"Creating backup of ChromaDB at {backup_path}")
|
|
|
|
# Remove old backup if it exists
|
|
if os.path.exists(backup_path):
|
|
logger.info("Removing old backup")
|
|
shutil.rmtree(backup_path)
|
|
|
|
# Create backup
|
|
shutil.copytree(chromadb_path, backup_path)
|
|
logger.info("Backup created successfully")
|
|
|
|
# Delete the ChromaDB directory
|
|
logger.info("Removing existing ChromaDB directory")
|
|
shutil.rmtree(chromadb_path)
|
|
|
|
# Create fresh ChromaDB
|
|
logger.info("Creating fresh ChromaDB")
|
|
os.makedirs(chromadb_path, exist_ok=True)
|
|
|
|
# Initialize a fresh ChromaDB client and create a new collection
|
|
logger.info("Initializing fresh ChromaDB client")
|
|
client = chromadb.PersistentClient(
|
|
path=chromadb_path,
|
|
settings=chromadb.Settings(
|
|
allow_reset=True,
|
|
anonymized_telemetry=False
|
|
)
|
|
)
|
|
|
|
# Create a custom embedding function
|
|
class CustomEmbeddingFunction(embedding_functions.EmbeddingFunction):
|
|
def __call__(self, texts):
|
|
return EmbeddingService.get_ollama_embeddings(texts)
|
|
|
|
# Create a fresh collection
|
|
logger.info("Creating fresh collection")
|
|
collection = client.create_collection(
|
|
name="zulip_messages",
|
|
metadata={
|
|
"hnsw:space": "cosine"
|
|
},
|
|
embedding_function=CustomEmbeddingFunction()
|
|
)
|
|
|
|
logger.info("ChromaDB reset completed successfully")
|
|
logger.info(f"To restore the backup if needed, delete {chromadb_path} and rename {backup_path} to {chromadb_path}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error resetting ChromaDB: {e}")
|
|
logger.error("ChromaDB reset failed. Please check the error and try again.")
|
|
|
|
if __name__ == "__main__":
|
|
main() |