Hot backup cassandra elasticsearch snapshots
#!/bin/bash
set -e
# Configuration
# Cassandra variables
CASSANDRA_KEYSPACE=thehive
CASSANDRA_CONNECTION="<ip_node_cassandra>"
CASSANDRA_GENERAL_ARCHIVE_PATH=/mnt/backup/cassandra
CASSANDRA_DATA_FOLDER=/var/lib/cassandra
CASSANDRA_SNAPSHOT_NAME="cassandra_$(date +%Y%m%d_%Hh%Mm%Ss)"
CASSANDRA_ARCHIVE_PATH="${CASSANDRA_GENERAL_ARCHIVE_PATH}/${CASSANDRA_SNAPSHOT_NAME}/${CASSANDRA_KEYSPACE}"
# Elasticsearch variables
ELASTICSEARCH_API_URL='http://127.0.0.1:9200'
ELASTICSEARCH_SNAPSHOT_REPOSITORY=thehive_repository
ELASTICSEARCH_GENERAL_ARCHIVE_PATH=/mnt/backup/elasticsearch
ELASTICSEARCH_SNAPSHOT_NAME="elasticsearch_$(date +%Y%m%d_%Hh%Mm%Ss)"
# Check if the snapshot repository is correctly registered
repository_config=$(curl -s -L "${ELASTICSEARCH_API_URL}/_snapshot")
repository_ok=$(jq 'has("'${ELASTICSEARCH_SNAPSHOT_REPOSITORY}'")' <<< ${repository_config})
if ! ${repository_ok}; then
echo "Abort, no snapshot repository registered in Elasticsearch"
echo "Set the repository folder 'path.repo'"
echo "in an environment variable"
echo "or in elasticsearch.yml"
exit 1
fi
# Make sure the snapshot folder exists and its subcontent permissions are correct
mkdir -p ${CASSANDRA_ARCHIVE_PATH}
chown -R cassandra:cassandra ${CASSANDRA_ARCHIVE_PATH}
echo "Snapshot of all ${CASSANDRA_KEYSPACE} tables will be stored inside ${CASSANDRA_ARCHIVE_PATH}"
# Run both backups in parallel
{
set -e
# Creating snapshot name information file
touch ${ELASTICSEARCH_GENERAL_ARCHIVE_PATH}/${ELASTICSEARCH_SNAPSHOT_NAME}.info
echo "[ES] Starting the Elasticsearch snapshot..."
RESPONSE=$(curl -s -L -X PUT "${ELASTICSEARCH_API_URL}/_snapshot/${ELASTICSEARCH_SNAPSHOT_REPOSITORY}/${ELASTICSEARCH_SNAPSHOT_NAME}" \
-H 'Content-Type: application/json' \
-d '{"indices":"thehive_global", "ignore_unavailable":true, "include_global_state":false}')
if echo "$RESPONSE" | grep -q '"accepted":true'; then
echo "[ES] ✓ Elasticsearch snapshot started successfully"
exit 0
else
echo "[ES] ✗ Elasticsearch ERROR: $RESPONSE"
exit 1
fi
# Verify that the snapshot is finished
state="NONE"
while [ "${state}" != "\"SUCCESS\"" ]; do
echo "Snapshot in progress, waiting 5 seconds before checking status again..."
sleep 5
snapshot_list=$(curl -s -L "${ELASTICSEARCH_API_URL}/_snapshot/${ELASTICSEARCH_SNAPSHOT_REPOSITORY}/*?verbose=false")
state=$(jq '.snapshots[] | select(.snapshot == "'${ELASTICSEARCH_SNAPSHOT_NAME}'").state' <<< ${snapshot_list})
done
echo "Snapshot finished"
} &
PID_ES=$!
{
set -e
echo "[CASS] Starting snapshot ${CASSANDRA_SNAPSHOT_NAME} for keyspace ${CASSANDRA_KEYSPACE}"
if nodetool snapshot -t "${CASSANDRA_SNAPSHOT_NAME}" "${CASSANDRA_KEYSPACE}"; then
echo "[CASS] ✓ Snapshot Cassandra created successfully"
# Save the cql schema of the keyspace
cqlsh ${CASSANDRA_CONNECTION} -e "DESCRIBE KEYSPACE ${CASSANDRA_KEYSPACE}" | grep -v "^WARNING" > "${CASSANDRA_GENERAL_ARCHIVE_PATH}/${CASSANDRA_SNAPSHOT_NAME}/create_keyspace_${CASSANDRA_KEYSPACE}.cql"
echo "The keyspace cql definition for ${CASSANDRA_KEYSPACE} is stored in this file: ${CASSANDRA_GENERAL_ARCHIVE_PATH}/${CASSANDRA_SNAPSHOT_NAME}/create_keyspace_${CASSANDRA_KEYSPACE}.cql"
# For each table folder in the keyspace folder of the snapshot
for TABLE in $(ls ${CASSANDRA_DATA_FOLDER}/data/${CASSANDRA_KEYSPACE}); do
# Folder where the snapshot files are stored
TABLE_SNAPSHOT_FOLDER=${CASSANDRA_DATA_FOLDER}/data/${CASSANDRA_KEYSPACE}/${TABLE}/snapshots/${CASSANDRA_SNAPSHOT_NAME}
if [ -d ${TABLE_SNAPSHOT_FOLDER} ]; then
# Create a folder for each table
mkdir "${CASSANDRA_ARCHIVE_PATH}/${TABLE}"
chown -R cassandra:cassandra ${CASSANDRA_ARCHIVE_PATH}/${TABLE}
# Copy the snapshot files to the proper table folder
# Snapshots files are hardlinks,
# so we use --remove-destination to make sure the files are actually copied and not just linked
cp -p --remove-destination ${TABLE_SNAPSHOT_FOLDER}/* ${CASSANDRA_ARCHIVE_PATH}/${TABLE}
fi
done
# Delete Cassandra snapshot once it's backed up
nodetool clearsnapshot -t ${CASSANDRA_SNAPSHOT_NAME} > /dev/null
# Create a .tar archive with the folder containing the backed up Cassandra data
tar cf ${CASSANDRA_GENERAL_ARCHIVE_PATH}/${CASSANDRA_SNAPSHOT_NAME}.tar -C "${CASSANDRA_GENERAL_ARCHIVE_PATH}" ${CASSANDRA_SNAPSHOT_NAME}
# Remove the folder once the archive is created
rm -rf ${CASSANDRA_GENERAL_ARCHIVE_PATH}/${CASSANDRA_SNAPSHOT_NAME}
exit 0
else
echo "[CASS] ✗ Cassandra ERROR"
exit 1
fi
} &
PID_CASS=$!
ES_EXIT=0
CASS_EXIT=0
# Wait for the two snapshots to finish
wait $PID_ES || ES_EXIT=$?
wait $PID_CASS || CASS_EXIT=$?
# Final check
if [ $ES_EXIT -eq 0 ] && [ $CASS_EXIT -eq 0 ]; then
echo "=== ✓ Full backup successful ==="
# Display the location of the Elasticsearch archive
echo "Elasticsearch backup done!"
# Display the location of the Cassandra archive
echo "Cassandra backup done! Keep the following backup archive safe:"
echo "${CASSANDRA_GENERAL_ARCHIVE_PATH}/${CASSANDRA_SNAPSHOT_NAME}.tar"
exit 0
else
echo "=== ✗ ERROR - ES: exit $ES_EXIT, Cassandra: exit $CASS_EXIT ==="
exit 1
fi