Script of sith stretching

#!/bin/bash

#SBATCH -N 1                   # number of nodes
#SBATCH --cpus-per-task=1
#SBATCH -t 24:00:00
#SBATCH --output=%x-%j.o
#SBATCH --error=%x-%j.e

# ----- definition of functions starts ----------------------------------------
print_help() {
echo "
This tool obtains the stretched configurations of a molecule by increasing the
distance between two atoms, constraining and optimizing at every step.

  -b  <number_of_breakages=1> The simulation will run until get this number of
      ruptures.
  -c  Use this flag to run in a cluster. When -p is not defined, and you run in
      a slurm job manager system, the of processors is equal to the number of
      cores asked in the submission of the job.
  -e  <extend_method=0> index of stretching method. To see the options, use
      'sith change_distance -h' to see the order.
      carbons of the capping groups
  -i  <index1,index2> indexes of the atoms to use for increasing the distance.
      If this flag is not used and 'molecule' (flag -m) is a pdb, indexes 1 and
      2 will correspond to the CH3 atoms in ACE and NME residues defined in the
      pdb if they exist.
  -l  <xc,base="bmk,6-31+g"> evel of DFT theory.
  -m  <molecule> molecule name. In this directory, a file called
      <molecule>-stretched00.pdb must exist.
  -p  <processors=1> number of processors per gaussian job.
  -r  restart stretching. In this case, this code must be executed from
      the molecule's directory.
  -s  <size[A]=0.2> Size of the step that increases the distances at each step.

  -v  verbose
  -h  prints this message.
"
exit 0
}

# ----- definition of functions finishes --------------------------------------

# ----- set up starts ---------------------------------------------------------
# General variables
breakages=1
method=0
restart='false'
size=0.2
verbose=''
indexes=''
level="bmk,6-31+g"
cluster='false'
n_processors=''
retake='true'
while getopts 'b:ce:i:l:m:p:rs:vh' flag; do
  case "${flag}" in
    b) breakages=${OPTARG} ;;
    c) cluster='true' ;;
    e) extend_method=${OPTARG} ;;
    i) indexes=${OPTARG} ;;
    l) level=${OPTARG} ;;
    m) mol=${OPTARG} ;;
    p) n_processors=${OPTARG} ;;
    r) restart='true' ;;
    s) size=${OPTARG} ;;

    v) verbose='-v' ;;
    h) print_help ;;
    *) echo "for usage check: sith <function> -h" >&2 ; exit 1 ;;
  esac
done

source "$(sith basics -path)" STRETCHING $verbose

if $cluster
then
  load_modules
  if [[ -z "$n_processors" ]] 
  then
    if [[ ! -z "$SLURM_CPUS_ON_NODE" ]]
    then
      n_processors=$SLURM_CPUS_ON_NODE
    else
      n_processors=1
    fi
  fi
fi

# starting information
verbose -t "JOB information"
verbose -t "==============="
verbose -t " \* Date:"
verbose -t $(date)
verbose -t " \* Command:"
verbose -t "$0" "$@"

# stretching method
if [[ "$extend_method" -eq 0 ]]
then
  if [ "$breakages" -eq 1 ]
  then
    method='scale_distance'
  else
    method='increase_distance_with_constraints'
  fi
fi
verbose "The stretching method will be '$method'"

# level of theory
xc_functional=$(echo $level | cut -d ',' -f 1)
basis_set=$(echo $level | cut -d ',' -f 2)

# check dependencies
ase -h &> /dev/null || fail "This code needs ASE"

mol_file=$mol
mol=${mol_file%.*}
# ----- set up finishes -------------------------------------------------------

# ---- BODY -------------------------------------------------------------------

# ----- checking restart ------------------------------------------------------
if $restart
then
  [[ -f 'frozen_dofs.dat' ]] || fail "frozen_dofs.dat doesn't exist"
  indexes=$(head -n 1 frozen_dofs.dat | awk '{print $1","$2}')

  # extracting last i with xyz file already created
  mapfile -t previous < <( find . -maxdepth 1 -type f -name "$mol*.xyz" \
                                  -not -name "*bck*" | sort )

  [ ${#previous} -eq 0 ] && fail "Non previous xyz files were found"

  wext=${previous[-1]}
  last=${wext%.*}
  if [ "${last: -1}" == 'a' ]
  then
    last=${last::-2}
  fi
  i=$(( 10#${last:0-2} ))
  verbose "Restarting $mol, searching last optimization, $i is the last
           stretching step detected"

  # searching incomplete optimization trials
  nameiplusone=$(printf "%02d" "$(( i + 1 ))")

  # searching advances in i+1.
  # retake='true' means that the last configuration was not taken from an
  # incomplete job. In the next block, we search for advances in i+1 and if it
  # finds one, it restarts from there and sets retake='false' as a consecuence,
  # this variable is used later in the loop.
  sith log2xyz "$mol-stretched${nameiplusone}.log" \
    --indexes "[$indexes]" 2> /dev/null && \
    create_bck "$mol-stretched${nameiplusone}."* &&
    lastone=$( search_last_bck $mol-stretched${nameiplusone} ) &&
    if [ $(( lastone )) -gt 2 ]; then fail "this optimization was" \
      "restarted more than 3 times and didn't converged."; fi &&
    echo "coping $mol-stretched${nameiplusone}-bck_$lastone.xyz" &&
    sith change_distance "$mol-stretched${nameiplusone}-bck_$lastone.xyz" \
      "$mol-stretched${nameiplusone}" frozen_dofs.dat 0 0 "$method" \
      --xc "'$xc_functional'" --basis "'$basis_set'" && \
    retake='false' && \
    warning "The stretching of molecule $mol will be restarted
      from $(( i + 1 ))"

  # if i+1 trial doesn't exist
  $retake && \
      warning "The stretching of molecule $mol will be restarted from $i"
else
  if [[ -z "$indexes" ]] && [[ "${mol_file##*.}" == 'pdb' ]]
  then
    # reading indexes from pdb file C-CAP indexes in gaussian convention
    index1=$( grep ACE "$mol.pdb" | grep CH3 | awk '{print $2}' )
    index2=$( grep NME "$mol.pdb" | grep CH3 | awk '{print $2}' )
    index3=$( grep ATOM "$mol.pdb" | awk '{if ( $5 == 2 ) print $0}' | grep "CA" \
              | awk '{print $2}' )
    indexes="$index1,$index2,$index3"
  else
    # reading indexes from user input
    index1=$( echo "$indexes" | cut -d ',' -f 1 )
    index2=$( echo "$indexes" | cut -d ',' -f 2 )
    indexes="$index1,$index2"
  fi

  # check that the indexes were read properly:
  [[ "$index1" -eq 0 && "$index2" -eq 0 ]] && fail "Not recognized indexes (1)
    check -i flag"
  [[ "$index1" -eq 1 && "$index2" -eq 1 ]] && fail "Not recognized indexes (1)
    check -i flag"
  [[ "$index1" == "$index2" ]] && fail "Not recognized indexes (1) check -i
    flag"
  if ! [[ -f 'frozen_dofs.dat' ]]
  then
    echo "$index1 $index2 F" > frozen_dofs.dat
  fi

  verbose "This code will stretch the atoms with the indexes $index1 $index2
    (1-based indexing)."

  # in case of not restarting
  i=-1
fi
# ----- checking restart finishes ---------------------------------------------

# ----- stretching starts -----------------------------------------------------
verbose "Stretching of $mol starts and will run until getting $breakages
  ruptures"

while [[ "$( wc -l < "frozen_dofs.dat" )" -le "$breakages" ]]
do
  # names by index
  namei=$(printf "%02d" "$i")
  nameiplusone=$(printf "%02d" $(( i + 1)))
  nameiplustwo=$(printf "%02d" $(( i + 2)))

  verbose "Stretched ${nameiplusone} starts"
  # Creates gaussian input file
  if [ $(( i + 1)) -eq 0 ]
  then
    # initial gaussian optimization
    verbose "The first gaussian process is an optimization"
    sith change_distance "$mol_file" \
      "$mol-stretched00" frozen_dofs.dat 0 0 "$method" \
      --xc "'$xc_functional'" --basis "'$basis_set'" || \
      fail "Creating initial gaussian input"
    sed -i  '/^TV  /d' "$mol-stretched00.com"
    sed -i "/opt/d" "$mol-stretched00.com"
  else
    if "$retake"
    then
      # if the last configuration was not taken from an incomplete job
      sith change_distance \
        "$mol-stretched$namei.xyz" "$mol-stretched${nameiplusone}" \
        frozen_dofs.dat "$size" 0 "$method" \
        --xc "'$xc_functional'" --basis "'$basis_set'"\
        || fail "Creating gaussian input of stretching $nameiplusone"
    fi
    retake='true'
    sed -i '$d' "$mol-stretched${nameiplusone}.com"
    # add constrains
    cat frozen_dofs.dat >> \
    "$mol-stretched${nameiplusone}.com"  
  fi
  sed -i "1a %NProcShared=$n_processors" "$mol-stretched${nameiplusone}.com"
  sed -i "/#P/a opt(modredun,calcfc)" "$mol-stretched${nameiplusone}.com"

  # run gaussian
  verbose "Running optmization of stretching ${nameiplusone}"
  gaussian "$mol-stretched${nameiplusone}.com" \
      "$mol-stretched${nameiplusone}.log" || \
    { if [ "$(grep -c "Atoms too close." \
            "$mol-stretched${nameiplusone}.log")" \
            -eq 1 ]; then fail "Atoms too close for ${nameiplusone}" ; \
      fi ; }

  # check convergence from output
  output=$(grep -i optimized "$mol-stretched${nameiplusone}.log" | \
           grep -c -i Non )
  if [ "$output" -ne 0 ]
  then
    # If the code enters here is because, in a first optimization, it
    # didn't converge so it has to run again to get the optimized 
    # structure. As a second chance to converge.
    verbose "Optimization did not converge with distance $(( i + 1 )) *
      $size . Then, a new trial will start now"
    sith log2xyz "$mol-stretched${nameiplusone}.log" \
      --indexes "[$indexes]" || fail "
      Transforming log file to xyz in second trial of optimization"
    sith change_distance \
            "$mol-stretched${nameiplusone}.xyz" \
            "$mol-stretched${nameiplustwo}" frozen_dofs.dat 0 0 "$method" \
            --xc "'$xc_functional'" --basis "'$basis_set'" || fail "changing distance"
    # save the failed files in ...-stretched<number>a.*
    create_bck "$mol-stretched${nameiplusone}"*
    # then restart the optimization
    mv "$mol-stretched${nameiplustwo}.com" "$mol-stretched${nameiplusone}.com"
    sed -i "s/stretched${nameiplustwo}/stretched${nameiplusone}/g" \
      "$mol-stretched${nameiplusone}.com"
    sed -i "1a %NProcShared=$n_processors" "$mol-stretched${nameiplusone}.com"
    sed -i "/#P/a opt(modredun,calcfc)" "$mol-stretched${nameiplusone}.com"
    sed -i '$d' "$mol-stretched${nameiplusone}.com"
    cat frozen_dofs.dat >> \
      "$mol-stretched${nameiplusone}.com"
    # run optimization
    verbose "Re-running optimization"
    gaussian "$mol-stretched${nameiplusone}.com" \
        "$mol-stretched${nameiplusone}.log"
  fi

  # check the output again
  output=$(grep -i optimized "$mol-stretched${nameiplusone}.log" | \
           grep -c -i Non )
  [ "$output" -ne 0 ] && failed "Optimization when the stretched distance was
      $(( i + 1 ))*0.2 didn't converge. No more stretching will be applied"

  # creating fchk file
  formchk -3 "$mol-stretched${nameiplusone}.chk" || fail "Creating fchk file"

  # ==== Testing DOFs
  verbose "Testing dofs"
  sith log2xyz "$mol-stretched${nameiplusone}.log" \
    --indexes "[$indexes]" || fail "Transforming
    log file to xyz with indexes [$indexes]"
  
  if [ "$i" -eq -1 ]
  then
    extrad=".."
  else
    # check the bonds in 'i' that are not in 'i+1'. If any, they are added to
    # frozen_dofs.dat
    extrad=$( sith diff_bonds "$mol-stretched${namei}.xyz" \
              "$mol-stretched${nameiplusone}.xyz" )
  fi

  if [ ${#extrad} -ne 2 ]
  then
    # create bck in rupture directory in case it already exists
    verbose "rupture found in $extrad, this bond will be frozen"
    mkdir -p rupture ; cd rupture
    create_bck "$mol-stretched${nameiplusone}"
    cd .. 
    mv "$mol-stretched${nameiplusone}"* rupture/
  else
     verbose "Non-rupture detected in stretched ${nameiplusone}"
  fi

  verbose "Stretched ${nameiplusone} finished"
  # next i
  i=$(( i + 1 ))
done

# ----- stretching finishes ---------------------------------------------------

finish "$mol finished"