#!/bin/bash

# Please see this shell script rather as template how to call rqg_batch.pl even though
# it might be already in its current state sufficient for doing a lot around RQG.
#

export LANG=C

  USAGE="USAGE:   $0 <Config file for the RQG test Simplifier> <Basedir == path to MariaDB binaries> [<YY grammar>]"
EXAMPLE="EXAMPLE: $0 simp_1.cfg /Server_bin/bb-10.2-marko_asan_Og table_stress.yy"
USAGE="\n$USAGE\n\n$EXAMPLE\n"
CALL_LINE="$0 $*"

# Config file for rqg_batch.pl containing various settings for the RQG+server+InnoDB etc.
# including settings for avoiding open bugs.
# The template is: simplify_rqg_template.cfg
CONFIG=$1
if [ "$CONFIG" = "" ]
then
   echo "You need to assign a config file for the RQG test Simplifier as first parameter."
   echo "The call was ->$CALL_LINE<-"
   echo -e "$USAGE"
   exit
fi
if [ ! -e "$CONFIG" ]
then
   echo "The config file for the RQG test Simplifier '$CONFIG' does not exist."
   echo "The call was ->$CALL_LINE<-"
   echo -e "$USAGE"
   exit
fi

CASE0=`basename $CONFIG`
CASE=`basename $CASE0 .cfg`
if [ $CASE = $CASE0 ]
then
   echo "You need to assign a simplifier config file (extension .cfg)."
   echo "The call was ->$CALL_LINE<-"
   echo -e "$USAGE"
   exit
fi


# Path to MariaDB binaries
BASEDIR1="$2"
if [ "$BASEDIR1" = "" ]
then
   echo "You need to assign a basedir (path to MariaDB binaries) as second parameter."
   echo "The call was ->$CALL_LINE<-"
   echo -e "$USAGE"
   exit
fi
if [ ! -d "$BASEDIR1" ]
then
   echo "BASEDIR1 '$BASEDIR1' does not exist or is not a directory."
   exit
fi
BASEDIR1_NAME=`basename "$BASEDIR1"`
BASEDIR2="$BASEDIR1"

# Optional YY grammar
GRAMMAR="$3"
if [ "$GRAMMAR" != "" ]
then
   if [ ! -f "$GRAMMAR" ]
   then
      echo "The RQG grammar '$GRAMMAR' does not exist."
      echo "The call was ->$CALL_LINE<-"
      echo -e "$USAGE"
      exit
   else
      GRAMMAR_PART="--grammar=$GRAMMAR"
   fi
else
   GRAMMAR_PART=""
fi

PROT="rr-replay-""$CASE""-""$BASEDIR1_NAME"".prt"

# Go with heavy load in case the rqg_batch.pl ResourceControl allows it.
# The rqg_batch.pl ResourceControl should be capable to avoid trouble with resources.
# Per experience:
# More general load on the testing raises the likelihood to find or replay a
# concurrency bug.
NPROC=`nproc`
GUEST_ON_BOX=`who | egrep -v "$USER|root" | wc -l`
echo "Number of guests logged into the box: $GUEST_ON_BOX"
# GUEST_ON_BOX=0
if [ $GUEST_ON_BOX -gt 0 ]
then
   # Colleagues are on the box and most probably running rr replay.
   # So do not raise the load too much.
   PARALLEL=$((8 * $NPROC / 10))
else
   PARALLEL=$(($NPROC * 3))
fi
# If $PARALLEL > ~270 than we get trouble with some resources.
if [ $PARALLEL -gt 270 ]
then
   PARALLEL=270
fi

# TRIALS is used as one of two limits (TRIALS and MAX_RUNTIME) for the size of a testing campaign.
# Whenever one of these limits gets exceeded rqg_batch.pl stops ongoing RQG runs,
# makes a cleanup, gives a summary and exits.
# TRIALS means regular finished (!= stopped by rqg_batch.pl because of whatever reason) RQG runs.
# In case of
# - (mostly) hitting no internal error in the RQG runner ('rqg.pl'), the RQG core (lib/*) and
#   ingredients invoked (reporter, validator, grammar, ...)
#   MAX_RUNTIME is a better limiter than TRIALS
# - (sometimes) hitting some internal error in ... (see above)
#   TRIALS is the better limiter because the situation is roughly hopeless and
#   you get an earlier end with less resource consumption.
TRIALS=1600

# MAX_RUNTIME is a limit for defining the size of a simplification campaign.
# RQG batch run elapsed runtime =
#    assigned max_runtime
# +  time for stopping the active RQG Workers + cleanup (usually less than 10 seconds)
MAX_RUNTIME=72000

# Only one temporary 'God' (rqg_batch.pl vs. concurrent MTR, single RQG or whatever) on testing box
# -------------------------------------------------------------------------------------------------
# in order to avoid "ill" runs where
# - current rqg_batch run ---- other ongoing rqg_batch run
# - current rqg_batch run ---- ongoing MTR run
# clash on the same resources (vardir, ports -> MTR_BUILD_THREAD, maybe even files) or
# suffer from tmpfs full etc.
# Testing tool | Programs            | Standard locations
# -------------+---------------------+---------------------------
# rqg_batch.pl | perl, mysqld,   rr  | /dev/shm/rqg/* /data/rqg/*
# MTR          | perl, mariadbd, rr  | /dev/shm/var*
killall -9 perl mysqld mariadbd rr
rm -rf /dev/shm/rqg/* /dev/shm/var* /data/rqg/*

# There should be usually sufficient space in VARDIR for just a few fat core files caused by ASAN.
# Already the RQG runner will take care that everything important inside his VARDIR will be
# saved in his WORKDIR and empty his VARDIR. rqg_batch.pl will empty the VARDIR of this RQG
# runner again. So the space comsumption of a core is only temporary.
# The rqg_batch.pl ResourceControl will also take care to avoid VARDIR full.
# If its not an ASAN build than this environment variable is harmless anyway.
export ASAN_OPTIONS=abort_on_error=1,disable_coredump=0
echo "Have set "`env | grep ASAN`

# If an YY grammar was assigned than offer it for editing
if [ "$GRAMMAR" != "" ]
then
   vi "$GRAMMAR"
fi

rm -f $PROT

set -o pipefail
# Options
# -------
# 0. Please take care that there must be a '\' at line end.
#
# 1. Remove the logs of RQG runs achieving STATUS_OK/verdict 'ignore_*'.
#    Their stuff grammar/datadir was not archived and is already thrown away.
#    So basically:
#    Do not assign '--discard_logs' in case you want to see logs of RQG runs which achieved
#    the verdict 'ignore_*' (blacklist match or STATUS_OK or stopped by rqg_batch.pl)
# --discard_logs                                                       \
#
# 2. Per default the data (data dir of server, core etc.) of some RQG replaying or being at least
#    of interest gets archived.
#    In case you do not want that archiving than you can disable it.
#    But thats is rather suitable for runs of the test simplifier only.
#    rr tracing enabled requires that archiving is not disabled.
# --noarchiving                                                        \
#
# 3. Do not abort if hitting Perl errors or STATUS_ENVIRONMENT_FAILURE. IMHO some rather
#    questionable option. I am unsure if that option gets correct handled in rqg_batch.pl.
# --force                                                              \
#
# 4. Debugging of the rqg_batch.pl tool machinery and rqg.pl
#    Default: Minimal debug output.
#    Assigning '_all_' causes maximum debug output.
#    Warning: Significant more output of especially rqg_batch.pl and partially rqg.pl.
# --script_debug=_all_                                                 \
#
# 5. "--no-mask", "--mask", "--mask_level"
#    rqg_batch.pl
#    - does not support "--mask=...", "--mask_level=..." on command line
#    - accepts any "--no-mask" from command line but passes it through to Combinator or Simplifier
#    The Simplifier
#    - ignores any "--no-mask", "--mask=...", "--mask_level=..." got from whereever
#    - assigns all time "--no-mask" to any call of a RQG runner
#
# 6. rqg_batch.pl prints how it would start RQG Workers and the RQG Worker started "fakes" that
#    it has achieved the verdict assigned. == There all no "real" RQG runs at all.
#    Example:
#    --dryrun=replay --> All RQG Worker started "tell" that they have achieved some replay.
#    Use cases:
#    a) When using the Combinator see which combinations would get generated.
#    b) When using the Simplifier see how it would be tried to shrink the grammar.
#    c) --dryrun=ignore_unwanted  see a) or b) and how TRIALS would be the limiter.
# --dryrun=ignore_unwanted                                             \
# --dryrun=replay                                                      \
#
# 7. rqg_batch stops immediate all RQG runner if reaching the assigned number of replays
#    Stop after the first replay
# --stop_on_replay                                                     \
#    Stop after the n'th replay
# --stop_on_replay=<n>                                                 \
#
# 8. Use "rr" (https://github.com/mozilla/rr/wiki/Usage) for tracing DB servers and other
#    programs.
#
#    "rr" tracing of all servers started (lib/DBServer/MySQL/MySQLd.pm    sub startServer)
#    This is the default.
# --rr                                                                 \
#    or better
# --rr=Server                                                          \
#
#    Preserve the 'rr' traces of the bootstrap, server starts and mariabackup calls.
#    This is the optimal setting for InnoDB QA.
# --rr=Extended                                                        \
#
#    Recommended settings (Info taken from rr help)
#    '--chaos' randomize scheduling decisions to try to reproduce bugs
#    '--wait'  Wait for all child processes to exit, not just the initial process.
# --rr_options='--chaos --wait'                                        \
#
#    "rr" checks which CPU is used in your box.
#    In case your version of "rr" is too old or your CPU is too new than the check might fail
#    and cause that the call of 'rr' fails.
#    Example:
#    Box having "Intel Skylake" CPU's, "rr" version 4 contains the string "Intel Skylake" but
#    claims to have met some unknown CPU.
#    Please becareful with the single and double quotes.
# --rr_options="\'--microarch='Intel Kabylake'\'"                     \
#
#    One rr option which seems to be recommended anywhere
# --rr_options="--chaos"                                              \
#

# In case you distrust the rqg_batch.pl mechanics or the config file etc. than going with some
# limited number of trials is often useful.
# TRIALS=3
# PARALLEL=2
# TRIALS=2
# PARALLEL=2
# TRIALS=1
# PARALLEL=1
#

nohup perl -w ./rqg_batch.pl                                           \
--parallel=$PARALLEL                                                   \
--basedir1=$BASEDIR1                                                   \
$GRAMMAR_PART                                                          \
--config=$CONFIG                                                       \
  --trials=$TRIALS                                                     \
  --rr=Extended                                                        \
  --rr_options="--chaos --wait"                                        \
  --stop_on_replay                                                     \
  --sqltrace                                                           \
  --discard_logs                                                       \
--type=RQG_Simplifier                                                  \
--no-mask                                                              \
--script_debug=_nix_                                                   \
> $PROT 2>&1 &

# Avoid that "tail -f ..." starts before the file exists.
STATE=2
NUM=0
while [ $STATE -eq 2 ]
do
   sleep 0.1
   NUM=$(($NUM + 1))
   if [ $NUM -gt 20 ]
   then
      STATE=1
   fi
   if [ -f $PROT ]
   then
      STATE=0
   fi
done

if [ $STATE -eq 1 ]
then
   echo "ERROR: Most probably in RQG mechanics or setup."
   echo "ERROR: The (expected) protocol file '$PROT' did not show up"
   exit 4
fi

tail -n 40 -f $PROT

