Script of sith clean_ds

#!/usr/bin/bash

# ----- definition of functions starts ----------------------------------------
print_help() {
echo "
This code checks the aminoacids in the 'data runnig' directory and evaluates
the state of the running (running, completed, failed). In case to be completed,
it is moved to the dataset directory.

  -r  running directory. default ./
  -s  data set directory. default ../random3
  -u  user. defatult sucerqdl

  -v  verbose.
  -h  prints this message.
"
exit 0
}

# ----- definition of functions finishes --------------------------------------

# ----- set up starts ---------------------------------------------------------
# General variables
running_directory="./"
dataset="../random3"
user="sucerqdl"
verbose=''
while getopts 'r:s:u:vh' flag;
do
  case "${flag}" in
    r) running_directory=${OPTARG} ;;
    s) dataset=${OPTARG} ;;
    u) user=${OPTARG} ;;

    v) verbose='-v' ;;
    h) print_help ;;
    *) echo "for usage check: sith <function> -h" >&2 ; exit 1 ;;
  esac
done

source "$(sith basics -path)" CHECK_DS $verbose 

verbose -t "JOB information"
verbose -t "==============="
verbose -t " \* Date:"
verbose -t $(date)
verbose -t " \* Command:"
verbose -t "$0" "$@"

# ---- BODY -------------------------------------------------------------------

# requierements
if [ ! -d "$dataset" ]
then
  fail "The directory specified as data set does not exist. check
    'sith clean_ds -h' for more info"
fi

if [ ! -d "$running_directory" ]
then
  fail "The directory specified as running directory does not exist. check
    'sith clean_ds -h' for more info"
fi

mapfile -t job_ids < <( squeue -u "$user" | tail -n +2 |\
                        awk '{print $1}' ) || \
                        fail "You have to run this code in a cluster using
                          slurm and specify an existing user with -u option."

# ==== creating output
echo "# amino state (R:running, C: completed, E: error)" > 00-aminos.txt

# ---- collect running peptides
running_pep=( )
for id in "${job_ids[@]}"
do
  if  ! find . -maxdepth 1 -type f -name "*$id.o" > /dev/null 2>&1
  then
    fail "non peptide output found. This code is expeting to find a file
      with the id of each job in the running directory"
  else
    random="$(grep -A 1 Command "peptides_analysis-$id.o" | tail -n 1 | \
              grep -c "\-R")"
    if [ "$random" -eq 0 ]
    then
      com="$(grep -A 1 Command "peptides_analysis-$id.o" | tail -n 1 )"
      pep=$(echo "${com##*p }" | cut -d ' ' -f 1)
    else
      string=$(grep "WARNING The code created the random peptide " \
                    "peptides_analysis-$id.o" )
      pep=$(echo "${string#*peptide }" | cut -d ',' -f 1)
    fi

    if [ "${#pep}" -eq 0 ]
    then
      fail "$id  is running but the output (peptides_analysis-<id>.o)
        does not exist or does not show the peptide yet."
    fi
    running_pep+=( "$pep" )
    echo "$pep" "   R" >> 00-aminos.txt
    # move other files
    mapfile -t other_files < <(find . -maxdepth 1 -type f -name "*.o" \
                                      -exec grep -l "$pep" {} \;)
    for fil in "${other_files[@]}"
    do
      if [ "$fil" !=  "./peptides_analysis-$id.o" ]
      then
        verbose "moving $fil into $pep"
        mv "$fil" "$pep"
        mv "${fil%*.o}.e" "$pep"
      fi
    done
  fi
done

# add  peptides already in dataset
mapfile -t completed_pep < <(ls "$dataset")
for pep in "${completed_pep[@]}"
do
  echo "$pep" "   C" >> 00-aminos.txt
done

# ---- collect non-running peptides
verbose "Non running peptides"
mapfile -t peps_here < <( find . -maxdepth 1 -type d | tail -n +2)

for pep in "${peps_here[@]}"
do
  # shellcheck disable=SC2199
  if [[ ! "${running_pep[@]}" =~ ${pep:2} ]]
  then
    # move files into the proper directory
    mapfile -t out_files < <(find . -maxdepth 1 -type f -name "*.o" \
                                    -exec grep -l "${pep:2}" {} \;)
    for fil in "${out_files[@]}"
    do
      mv "$fil" "$pep"
      mv "${fil%*.o}.e" "$pep"
    done
    if [ -d "$pep/rupture" ]
    then
      warning "moving $pep to the dataset directory"
      mv "$pep" "$dataset" || fail "trying to move ${pep:2} to $dataset"
      echo "${pep:2}" "   C" >> 00-aminos.txt
    else
      warning "something happened with the stretching of peptide
        ${pep:2}"
      echo "${pep:2}" "   E" >> 00-aminos.txt
    fi
  fi
done

# Now separates the non-running by completed and error. Understanding error as
# anything that aviods to complete

verbose "summary"
echo "completed Jobs: $(grep -c "  C" 00-aminos.txt )"
echo "running Jobs: $(grep -c "  R" 00-aminos.txt )"
echo "failed Jobs: $(grep -c "  E" 00-aminos.txt )"

finish