#!/bin/bash

cvmfs_test_name="Interleaving Stratum1 Snapshot Behaviour"
cvmfs_test_autofs_on_startup=false

check_lock() {
  local name=$1
  load_repo_config $name
  [ -f ${CVMFS_SPOOL_DIR}/is_updating.lock ]
}

run_background_op() {
  local op=$1
  local name=$2
  local logfile=$3
  local max_waiting_time=$4
  shift 4
  local op_params="$*"

  local op_command="cvmfs_server $op $op_params $name"
  local pid=0

  echo "running \`${op_command}\`"
  pid=$(run_background_service $logfile "$op_command") || return $?

  echo -n "waiting for $op (PID: $pid) for $max_waiting_time seconds... "
  local timeout=$max_waiting_time
  while kill -0 $pid > /dev/null 2>&1 && [ $timeout -gt 0 ]; do
    sleep 1
    timeout=$(( $timeout - 1 ))
  done
  echo "done (waited $(( $max_waiting_time - $timeout )) seconds)"

  if ! kill -0 $pid > /dev/null 2>&1; then
    echo "$op has terminated"
  else
    echo -n "$op is still running - aborting it... "
    kill -9 $pid && echo "done" || echo "fail"
    return 1
  fi
}

count_fetched_catalogs() {
  # This routine does not work anymore as of 2.3.4
  :
  #local replication_log_file="$1"

  # figure out where replication starts to process repository tags
  # we do not want to count in that part of the log
  #local cutmark=$(cat $replication_log_file | grep -n "repository tag" | head -n 1 | cut -f1 -d:)

  # count the number of times the pulling decided to fetch chunks
  #cat $replication_log_file | head -n $cutmark | grep 'Processing chunks' | wc -l
}


CVMFS_TEST_584_REPLICA_NAME=""
CVMFS_TEST584_KILLPID=
cleanup() {
  echo "running cleanup()"
  if [ ! -z $CVMFS_TEST_584_REPLICA_NAME ]; then
    sudo cvmfs_server rmfs -f $CVMFS_TEST_584_REPLICA_NAME
  fi
  if [ ! -z $CVMFS_TEST584_KILLPID ]; then
    kill $CVMFS_TEST584_KILLPID
  fi
}

cvmfs_run_test() {
  local logfile=$1
  local script_location=$2
  local scratch_dir=$(pwd)
  local repo_dir=/cvmfs/$CVMFS_TEST_REPO

  echo "*** {0} create a fresh garbage-collectable repository named $CVMFS_TEST_REPO with user $CVMFS_TEST_USER"
  create_empty_repo $CVMFS_TEST_REPO $CVMFS_TEST_USER NO -z || return $?

  echo "*** {0} disable auto garbage collection in $CVMFS_TEST_REPO"
  sudo sed -i 's/CVMFS_AUTO_GC=.*/CVMFS_AUTO_GC=no/' /etc/cvmfs/repositories.d/$CVMFS_TEST_REPO/server.conf || return $?

  echo "*** {0} install a desaster cleanup function"
  trap cleanup EXIT HUP INT TERM || return $?

  echo "*** {0} create Stratum1 repository on the same machine"
  local replica_name="$(get_stratum1_name $CVMFS_TEST_REPO)"
  CVMFS_TEST_584_REPLICA_NAME="$replica_name"
  load_repo_config $CVMFS_TEST_REPO
  create_stratum1 $replica_name                          \
                  $CVMFS_TEST_USER                       \
                  $CVMFS_STRATUM0                        \
                  /etc/cvmfs/keys/${CVMFS_TEST_REPO}.pub || return 1

  # ============================================================================

  echo "*** {1} fake a running snapshot"
  lockfile /var/spool/cvmfs/$replica_name/is_updating.lock || return 2
  CVMFS_TEST584_KILLPID=$!

  echo "*** {1} create an initial snapshot (should immediately fail)"
  local snapshot_log_1="snapshot_1.log"
  run_background_op snapshot $replica_name $snapshot_log_1 15 || return 2

  echo "*** {1} check if the lock is there"
  check_lock $replica_name || return 3

  echo "*** {1} check log file for proper error message"
  cat $snapshot_log_1 | grep 'initial snapshot.*in progress.*abort' || return 4

  echo "*** {1} create an initial snapshot without waiting (should immediately fail)"
  local snapshot_log_2="snapshot_2.log"
  run_background_op snapshot $replica_name $snapshot_log_2 15 -t || return 5

  echo "*** {1} check if the lock is there"
  check_lock $replica_name || return 6

  echo "*** {1} check log file for proper error message"
  cat $snapshot_log_2 | grep 'another .*in progress.*abort' || return 7

  echo "*** {1} remove faked lock"
  kill $CVMFS_TEST584_KILLPID
  CVMFS_TEST584_KILLPID=
  rm -f /var/spool/cvmfs/$replica_name/is_updating.lock

  # ============================================================================

  echo "*** {2} create an initial snapshot (should work)"
  local snapshot_log_3="snapshot_3.log"
  run_background_op snapshot $replica_name $snapshot_log_3 120 || return 9

  echo "*** {2} check if the lock is gone"
  check_lock $replica_name && return 10

  echo "*** {2} check the log file for proper manifest upload"
  cat $snapshot_log_3 | grep 'Uploading manifest' || return 11

  echo "*** {2) save snapshot data for later"
  load_repo_config $replica_name
  cp ${CVMFS_SPOOL_DIR}/reflog.chksum . || return $?
  cp -r /srv/cvmfs/$replica_name . || return $?

  # ============================================================================

  echo "*** {3} create a couple of (bulky!) transactions in stratum 0"
  local publish_log_1="publish_1.log"
  start_transaction $CVMFS_TEST_REPO || return $?
  mkdir ${repo_dir}/foo
  cp_bin $repo_dir/foo
  publish_repo $CVMFS_TEST_REPO >> $publish_log_1 2>&1 || return $?

  start_transaction $CVMFS_TEST_REPO || return $?
  mkdir ${repo_dir}/bar
  cp_bin $repo_dir/bar
  publish_repo $CVMFS_TEST_REPO >> $publish_log_1 2>&1 || return $?

  start_transaction $CVMFS_TEST_REPO || return $?
  mkdir ${repo_dir}/baz
  cp_bin $repo_dir/baz
  publish_repo $CVMFS_TEST_REPO >> $publish_log_1 2>&1 || return $?

  start_transaction $CVMFS_TEST_REPO || return $?
  mkdir ${repo_dir}/bam
  cp_bin $repo_dir/bam
  publish_repo $CVMFS_TEST_REPO >> $publish_log_1 2>&1 || return $?

  start_transaction $CVMFS_TEST_REPO || return $?
  mkdir ${repo_dir}/buz
  cp_bin $repo_dir/buz
  publish_repo $CVMFS_TEST_REPO >> $publish_log_1 2>&1 || return $?

  start_transaction $CVMFS_TEST_REPO || return $?
  mkdir ${repo_dir}/big
  dd if=/dev/urandom of=${repo_dir}/big/1 count=100 bs=1MiB > /dev/null 2>&1 || return 12
  dd if=/dev/urandom of=${repo_dir}/big/2 count=100 bs=1MiB > /dev/null 2>&1 || return 13
  dd if=/dev/urandom of=${repo_dir}/big/3 count=100 bs=1MiB > /dev/null 2>&1 || return 14
  dd if=/dev/urandom of=${repo_dir}/big/4 count=100 bs=1MiB > /dev/null 2>&1 || return 15
  publish_repo $CVMFS_TEST_REPO >> $publish_log_1 2>&1 || return $?

  # ============================================================================

  echo "*** {4} create a fake lock"
  lockfile /var/spool/cvmfs/$replica_name/is_updating.lock || return 16
  CVMFS_TEST584_KILLPID=$!

  echo "*** {4} try to snapshot (should hang forever)"
  local snapshot_log_4="snapshot_4.log"
  run_background_op snapshot $replica_name $snapshot_log_4 30 && return 17

  echo "*** {4} check if the lock is there"
  check_lock $replica_name || return 18

  echo "*** {4} check error message"
  cat $snapshot_log_4 | grep 'waiting for .* finish' || return 19

  echo "*** {4} try to snapshot (without waiting)"
  local snapshot_log_5="snapshot_5.log"
  run_background_op snapshot $replica_name $snapshot_log_5 30 -t || return 20

  echo "*** {4} check if the lock is there"
  check_lock $replica_name || return 21

  echo "*** {4} check log file for proper error message"
  cat $snapshot_log_2 | grep 'another .*in progress.*abort' || return 22

  echo "*** {4} remove the fake log"
  kill $CVMFS_TEST584_KILLPID
  CVMFS_TEST584_KILLPID=
  rm -f /var/spool/cvmfs/$replica_name/is_updating.lock

  # ============================================================================

  echo "*** {5} try to snapshot (in the background)"
  local snapshot_log_6="snapshot_6.log"
  # Need to wait while reading a lot of output. Needs more than 64kb of output
  # to work, which is the size of the pipe buffer.
  cat >do_snapshot_6.sh <<!EOF!
#!/bin/sh
export CVMFS_SERVER_DEBUG=3
sh -x /usr/bin/cvmfs_server snapshot $replica_name 2>&1|(sleep 20;cat)
!EOF!
  chmod +x do_snapshot_6.sh
  local pid6=$(run_background_service $snapshot_log_6 ./do_snapshot_6.sh) || return 24
  echo "running with PID: $pid6"

  echo "*** {5} sleep for one second"
  sleep 1

  echo "*** {5} check if the lock is there"
  check_lock $replica_name || return 25

  echo "*** {5} run a second snapshot concurrently (should abort immediately)"
  local snapshot_log_7="snapshot_7.log"
  run_background_op snapshot $replica_name $snapshot_log_7 15 -t || return 26

  echo "*** {5} check if the lock is there"
  check_lock $replica_name || return 27

  echo "*** {5} check if the first snapshot is still running (we missed the race otherwise)"
  kill -0 $pid6 || return 28

  echo "*** {5} check if the lock is there"
  check_lock  $replica_name || return 29

  echo "*** {5} run a second snapshot concurrently (should wait and later do the snapshot)"
  local snapshot_log_8="snapshot_8.log"
  run_background_op snapshot $replica_name $snapshot_log_8 800 || return 30

  echo "*** {5} check if the first snapshot is gone (kill it otherwise)"
  if kill -0 $pid6 > /dev/null 2>&1; then
    echo -n "PID $pid6 is still around - killing it... "
    kill -9 $pid6
    echo "done"
    return 31
  fi

  echo "*** {5} check if the lock is gone"
  check_lock $replica_name && return 32

  echo "*** {5} check the log files"
  echo "        (6 should have snapshotted, 7 should have aborted, 8 should have waited and later snapshotted)"
  # The number of snapshotted revisions is not present in the log anymore since
  # 2.3.4
  # local revisions_snapshotted_by_6=$(count_fetched_catalogs $snapshot_log_6)
  # local revisions_snapshotted_by_7=$(count_fetched_catalogs $snapshot_log_7)
  # echo "--> Number 6 snapshotted $revisions_snapshotted_by_6 revisions"
  # echo "--> Number 7 snapshotted $revisions_snapshotted_by_7 revisions"
  # [ $revisions_snapshotted_by_6 -eq 6 ]                             || return 33
  # [ $revisions_snapshotted_by_7 -eq 0 ]                             || return 34
  cat $snapshot_log_6 | grep 'Uploading manifest'                   || return 35
  cat $snapshot_log_7 | grep 'another .*in progress.*abort'         || return 36
  cat $snapshot_log_8 | grep 'waiting for .* finish'                || return 37
  cat $snapshot_log_8 | grep 'Uploading manifest'                   || return 38

  # ============================================================================

  echo "*** {6} place a fake lock that is stale"
  touch /var/spool/cvmfs/$replica_name/is_updating.lock || return 39

  echo "*** {6} create a snapshot (should work - despite the stale lock)"
  local snapshot_log_9="snapshot_9.log"
  run_background_op snapshot $replica_name $snapshot_log_9 300 || return 40

  echo "*** {6} check if the lock is gone"
  check_lock $replica_name && return 41

  echo "*** {6} check the log file"
  cat $snapshot_log_9 | grep 'Uploading manifest' || return 42

  # ============================================================================

  echo "*** {7) restore early snapshot data so snapshot will take a while"
  cp reflog.chksum ${CVMFS_SPOOL_DIR} || return 43
  rm -rf /srv/cvmfs/$replica_name/data || return 44
  cp -r $replica_name/.??* $replica_name/data /srv/cvmfs/$replica_name || return 45

  echo "*** {7} try to snapshot (in the background)"
  local snapshot_log_10="snapshot_10.log"
  local pid10=$(run_background_service $snapshot_log_10 "cvmfs_server snapshot $replica_name") || return 46
  echo "running with PID: $pid10"

  echo "*** {7} sleep for one second"
  sleep 1

  echo "*** {7} check if the lock is there"
  check_lock  $replica_name || return 47

  echo "*** {7} run a gc concurrently (should wait and later do the gc)"
  local gc_log_1="gc_1.log"
  run_background_op gc $replica_name $gc_log_1 800 -f || return 48

  echo "*** {7} check if the snapshot is gone (kill it otherwise)"
  if kill -0 $pid10 > /dev/null 2>&1; then
    echo -n "PID $pid10 is still around - killing it... "
    kill -9 $pid10
    echo "done"
    return 49
  fi

  echo "*** {7} now run another gc (in the background, slowing down output)"
  # Need to wait while reading a lot of output, or the gc will finish too
  # fast, so add both cvmfs server and shell debugging.  It will begin right
  # away because of the pipe buffer.  Needs more than 64kb of output to work,
  # which is the size of the pipe buffer.
  # It's a hack, but I couldn't think of another way to get a gc to run
  # long enough to test overlap.
  local gc_log_2="gc_2.log"
  cat >do_gc.sh <<!EOF!
#!/bin/sh
export CVMFS_SERVER_DEBUG=3
sh -x /usr/bin/cvmfs_server gc -f -r 0 -l $replica_name 2>&1|(sleep 20;cat)
!EOF!
  chmod +x do_gc.sh
  local pid2=$(run_background_service $gc_log_2 ./do_gc.sh) || return 54
  echo "running with PID: $pid2"

  echo "*** {7} sleep for one second"
  sleep 1

  echo "*** {7} check if the lock is there"
  check_lock $replica_name || return 55

  echo "*** {7} run a snapshot concurrently (should wait and later do the snapshot)"
  local snapshot_log_11="snapshot_11.log"
  run_background_op snapshot $replica_name $snapshot_log_11 800 || return 56

  echo "*** {7} check if the gc is gone (kill it otherwise)"
  if kill -0 $pid2 > /dev/null 2>&1; then
    echo -n "PID $pid2 is still around - killing it... "
    # don't use -9 because there may be children we want to kill
    kill $pid2
    echo "done"
    return 57
  fi

  echo "*** {7} check the log files"

  cat $gc_log_1        | grep 'waiting for .* finish'               || return 58
  cat $gc_log_1        | grep 'Running Garbage Collection'          || return 59
  cat $snapshot_log_11 | grep 'waiting for .* finish'               || return 60
  cat $snapshot_log_11 | grep 'Uploading manifest'                  || return 61

  return 0
}
