#!/bin/bash # Script to sync all messages from all channels (except sandbox) and verify # they're in ChromaDB # Set up logging LOG_FILE="logs/sync_and_verify_$(date +%Y%m%d_%H%M%S).log" mkdir -p logs # Make sure scripts are executable chmod +x sync_all_channels.py chmod +x compare_messages.py chmod +x fix_unknown_channels.py echo "======================================================" echo " ZULIP CHANNEL SYNC AND VERIFY PROCESS" echo " $(date)" echo " Logging to: $LOG_FILE" echo "======================================================" echo "" echo "=====================================================" | tee -a "$LOG_FILE" echo "SYNC AND VERIFY PROCESS - $(date)" | tee -a "$LOG_FILE" echo "=====================================================" | tee -a "$LOG_FILE" # Activate virtual environment if it exists if [ -d "venv" ]; then echo "Activating virtual environment..." | tee -a "$LOG_FILE" source venv/bin/activate fi # Set parameters for the sync DAYS_TO_SYNC=365 # Used for verification only MAX_MESSAGES=250 FORCE_SYNC=true INCLUDE_DIRECT_MESSAGES=true ALL_MESSAGES=true # Sync all messages regardless of date TOTAL_BATCHES=1000 # Number of batches to run echo "Configuration:" | tee -a "$LOG_FILE" echo "- Maximum messages per batch: $MAX_MESSAGES" | tee -a "$LOG_FILE" echo "- Force sync: $FORCE_SYNC" | tee -a "$LOG_FILE" echo "- Include direct messages: $INCLUDE_DIRECT_MESSAGES" | tee -a "$LOG_FILE" echo "- Sync all messages: $ALL_MESSAGES" | tee -a "$LOG_FILE" echo "- Number of batches: $TOTAL_BATCHES" | tee -a "$LOG_FILE" echo "- Days for verification: $DAYS_TO_SYNC" | tee -a "$LOG_FILE" echo "" | tee -a "$LOG_FILE" # Step 1: Sync messages in multiple batches echo "" | tee -a "$LOG_FILE" echo "Step 1: Syncing messages from all channels (except sandbox)..." | tee -a "$LOG_FILE" echo "This will exclude messages from IT_Bot and ai_bot" | tee -a "$LOG_FILE" echo "Running $TOTAL_BATCHES batches of $MAX_MESSAGES messages each" | tee -a "$LOG_FILE" echo "" | tee -a "$LOG_FILE" # Build the base command SYNC_CMD="python sync_all_channels.py --max-messages $MAX_MESSAGES" if [ "$INCLUDE_DIRECT_MESSAGES" = true ]; then SYNC_CMD="$SYNC_CMD --include-direct-messages" fi if [ "$ALL_MESSAGES" = true ]; then SYNC_CMD="$SYNC_CMD --all-messages" fi # Run multiple batches for ((i=1; i<=$TOTAL_BATCHES; i++)) do echo "Running batch $i of $TOTAL_BATCHES..." | tee -a "$LOG_FILE" BATCH_CMD="$SYNC_CMD" # If ALL_MESSAGES is true, we should use --force for all batches to ensure we get historical data, # provided that FORCE_SYNC is also enabled. # If ALL_MESSAGES is false, then --force (if enabled by FORCE_SYNC) applies only to the first batch. if [ "$FORCE_SYNC" = true ]; then if [ "$ALL_MESSAGES" = true ] || [ $i -eq 1 ]; then BATCH_CMD="$BATCH_CMD --force" fi fi echo "Running: $BATCH_CMD" | tee -a "$LOG_FILE" echo "" | tee -a "$LOG_FILE" # Run the sync command $BATCH_CMD | tee -a "$LOG_FILE" # Pause between batches if [ $i -lt $TOTAL_BATCHES ]; then echo "Pausing for 5 seconds between batches..." | tee -a "$LOG_FILE" sleep 5 fi done # Step 2: Fix Unknown Channel entries echo "" | tee -a "$LOG_FILE" echo "Step 2: Fixing 'Unknown Channel' entries..." | tee -a "$LOG_FILE" echo "" | tee -a "$LOG_FILE" # Run the fix unknown channels script FIX_CMD="python fix_unknown_channels.py" echo "Running: $FIX_CMD" | tee -a "$LOG_FILE" echo "" | tee -a "$LOG_FILE" $FIX_CMD | tee -a "$LOG_FILE" # Step 3: Verify all messages are in ChromaDB echo "" | tee -a "$LOG_FILE" echo "Step 3: Verifying all messages are in ChromaDB..." | tee -a "$LOG_FILE" echo "" | tee -a "$LOG_FILE" # Run comparison with the specified number of days for verification COMPARE_CMD="python compare_messages.py --days $DAYS_TO_SYNC" echo "Running: $COMPARE_CMD" | tee -a "$LOG_FILE" echo "" | tee -a "$LOG_FILE" $COMPARE_CMD | tee -a "$LOG_FILE" echo "" | tee -a "$LOG_FILE" echo "=====================================================" | tee -a "$LOG_FILE" echo "Sync and verification process completed at $(date)" | tee -a "$LOG_FILE" echo "See $LOG_FILE for complete log" | tee -a "$LOG_FILE" echo "=====================================================" | tee -a "$LOG_FILE" echo "" echo "======================================================" echo " SYNC AND VERIFICATION PROCESS COMPLETED" echo " $(date)" echo " Log file: $LOG_FILE" echo "======================================================" # If we activated a virtual environment, deactivate it if [ -n "$VIRTUAL_ENV" ]; then deactivate fi