132 lines
4.6 KiB
Bash
Executable File
132 lines
4.6 KiB
Bash
Executable File
#!/bin/bash
|
|
# Script to sync all messages from all channels (except sandbox) and verify
|
|
# they're in ChromaDB
|
|
|
|
# Set up logging
|
|
LOG_FILE="logs/sync_and_verify_$(date +%Y%m%d_%H%M%S).log"
|
|
mkdir -p logs
|
|
|
|
# Make sure scripts are executable
|
|
chmod +x sync_all_channels.py
|
|
chmod +x compare_messages.py
|
|
chmod +x fix_unknown_channels.py
|
|
|
|
echo "======================================================"
|
|
echo " ZULIP CHANNEL SYNC AND VERIFY PROCESS"
|
|
echo " $(date)"
|
|
echo " Logging to: $LOG_FILE"
|
|
echo "======================================================"
|
|
echo ""
|
|
|
|
echo "=====================================================" | tee -a "$LOG_FILE"
|
|
echo "SYNC AND VERIFY PROCESS - $(date)" | tee -a "$LOG_FILE"
|
|
echo "=====================================================" | tee -a "$LOG_FILE"
|
|
|
|
# Activate virtual environment if it exists
|
|
if [ -d "venv" ]; then
|
|
echo "Activating virtual environment..." | tee -a "$LOG_FILE"
|
|
source venv/bin/activate
|
|
fi
|
|
|
|
# Set parameters for the sync
|
|
DAYS_TO_SYNC=365 # Used for verification only
|
|
MAX_MESSAGES=250
|
|
FORCE_SYNC=true
|
|
INCLUDE_DIRECT_MESSAGES=true
|
|
ALL_MESSAGES=true # Sync all messages regardless of date
|
|
TOTAL_BATCHES=1000 # Number of batches to run
|
|
|
|
echo "Configuration:" | tee -a "$LOG_FILE"
|
|
echo "- Maximum messages per batch: $MAX_MESSAGES" | tee -a "$LOG_FILE"
|
|
echo "- Force sync: $FORCE_SYNC" | tee -a "$LOG_FILE"
|
|
echo "- Include direct messages: $INCLUDE_DIRECT_MESSAGES" | tee -a "$LOG_FILE"
|
|
echo "- Sync all messages: $ALL_MESSAGES" | tee -a "$LOG_FILE"
|
|
echo "- Number of batches: $TOTAL_BATCHES" | tee -a "$LOG_FILE"
|
|
echo "- Days for verification: $DAYS_TO_SYNC" | tee -a "$LOG_FILE"
|
|
echo "" | tee -a "$LOG_FILE"
|
|
|
|
# Step 1: Sync messages in multiple batches
|
|
echo "" | tee -a "$LOG_FILE"
|
|
echo "Step 1: Syncing messages from all channels (except sandbox)..." | tee -a "$LOG_FILE"
|
|
echo "This will exclude messages from IT_Bot and ai_bot" | tee -a "$LOG_FILE"
|
|
echo "Running $TOTAL_BATCHES batches of $MAX_MESSAGES messages each" | tee -a "$LOG_FILE"
|
|
echo "" | tee -a "$LOG_FILE"
|
|
|
|
# Build the base command
|
|
SYNC_CMD="python sync_all_channels.py --max-messages $MAX_MESSAGES"
|
|
if [ "$INCLUDE_DIRECT_MESSAGES" = true ]; then
|
|
SYNC_CMD="$SYNC_CMD --include-direct-messages"
|
|
fi
|
|
if [ "$ALL_MESSAGES" = true ]; then
|
|
SYNC_CMD="$SYNC_CMD --all-messages"
|
|
fi
|
|
|
|
# Run multiple batches
|
|
for ((i=1; i<=$TOTAL_BATCHES; i++))
|
|
do
|
|
echo "Running batch $i of $TOTAL_BATCHES..." | tee -a "$LOG_FILE"
|
|
|
|
BATCH_CMD="$SYNC_CMD"
|
|
# If ALL_MESSAGES is true, we should use --force for all batches to ensure we get historical data,
|
|
# provided that FORCE_SYNC is also enabled.
|
|
# If ALL_MESSAGES is false, then --force (if enabled by FORCE_SYNC) applies only to the first batch.
|
|
if [ "$FORCE_SYNC" = true ]; then
|
|
if [ "$ALL_MESSAGES" = true ] || [ $i -eq 1 ]; then
|
|
BATCH_CMD="$BATCH_CMD --force"
|
|
fi
|
|
fi
|
|
|
|
echo "Running: $BATCH_CMD" | tee -a "$LOG_FILE"
|
|
echo "" | tee -a "$LOG_FILE"
|
|
|
|
# Run the sync command
|
|
$BATCH_CMD | tee -a "$LOG_FILE"
|
|
|
|
# Pause between batches
|
|
if [ $i -lt $TOTAL_BATCHES ]; then
|
|
echo "Pausing for 5 seconds between batches..." | tee -a "$LOG_FILE"
|
|
sleep 5
|
|
fi
|
|
done
|
|
|
|
# Step 2: Fix Unknown Channel entries
|
|
echo "" | tee -a "$LOG_FILE"
|
|
echo "Step 2: Fixing 'Unknown Channel' entries..." | tee -a "$LOG_FILE"
|
|
echo "" | tee -a "$LOG_FILE"
|
|
|
|
# Run the fix unknown channels script
|
|
FIX_CMD="python fix_unknown_channels.py"
|
|
echo "Running: $FIX_CMD" | tee -a "$LOG_FILE"
|
|
echo "" | tee -a "$LOG_FILE"
|
|
|
|
$FIX_CMD | tee -a "$LOG_FILE"
|
|
|
|
# Step 3: Verify all messages are in ChromaDB
|
|
echo "" | tee -a "$LOG_FILE"
|
|
echo "Step 3: Verifying all messages are in ChromaDB..." | tee -a "$LOG_FILE"
|
|
echo "" | tee -a "$LOG_FILE"
|
|
|
|
# Run comparison with the specified number of days for verification
|
|
COMPARE_CMD="python compare_messages.py --days $DAYS_TO_SYNC"
|
|
echo "Running: $COMPARE_CMD" | tee -a "$LOG_FILE"
|
|
echo "" | tee -a "$LOG_FILE"
|
|
|
|
$COMPARE_CMD | tee -a "$LOG_FILE"
|
|
|
|
echo "" | tee -a "$LOG_FILE"
|
|
echo "=====================================================" | tee -a "$LOG_FILE"
|
|
echo "Sync and verification process completed at $(date)" | tee -a "$LOG_FILE"
|
|
echo "See $LOG_FILE for complete log" | tee -a "$LOG_FILE"
|
|
echo "=====================================================" | tee -a "$LOG_FILE"
|
|
|
|
echo ""
|
|
echo "======================================================"
|
|
echo " SYNC AND VERIFICATION PROCESS COMPLETED"
|
|
echo " $(date)"
|
|
echo " Log file: $LOG_FILE"
|
|
echo "======================================================"
|
|
|
|
# If we activated a virtual environment, deactivate it
|
|
if [ -n "$VIRTUAL_ENV" ]; then
|
|
deactivate
|
|
fi |