Skip to content

Commit

Permalink
version 1.2
Browse files Browse the repository at this point in the history
  • Loading branch information
Andreas Buerki committed Jan 3, 2021
1 parent 8c887a5 commit b0f143a
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 16 deletions.
Binary file added TUTORIAL.pdf
Binary file not shown.
65 changes: 51 additions & 14 deletions bin/substring-B.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# substring-B.sh
copyright="Copyright (c) 2016-18 Cardiff University, 2011-2014 Andreas Buerki"
# licensed under the EUPL V.1.1.
version="1.0.1"
version="1.2"
####
# DESCRRIPTION: performs frequency consolidation among different length n-grams
# for options see -h
Expand Down Expand Up @@ -48,6 +48,8 @@ version="1.0.1"
# (0.9.9.1)
# 24 Aug 2018 renamed script to substring-B.sh, to fit with new architecture of
# (1.0) the whole SubString package
# 03 Jan 2020 changed script to
# (1.2)
#############################################
# define help function
#############################################
Expand Down Expand Up @@ -194,7 +196,7 @@ done
#############################################
prep_stage ( ) {
# reading files into memory
if [ "$bash_v4" ]; then
if [ "$bash_v4orlater" ]; then
# make sure we're starting afresh
unset -v 'uncut_list' 'long_list'
short_list=
Expand Down Expand Up @@ -316,7 +318,7 @@ for line in $(cut -d '.' -f 1 <<< "$short_list"); do # line without freqs of fir
#fi

# step 4
if [ "$bash_v4" ]; then
if [ "$bash_v4orlater" ]; then
if [ -z "${long_list["$superstring"]}" ]; then
# if this superstring was not found in second cut list
# try to find it in the uncut list
Expand Down Expand Up @@ -345,7 +347,7 @@ if [ "$verbose" ]; then
echo ""
fi
# write to file and tidy up
if [ "$bash_v4" ]; then
if [ "$bash_v4orlater" ]; then
for i in "${!long_list[@]}"; do echo "$i.${long_list[$i]}"; done > $2
unset -v 'uncut_list' 'long_list'
short_list=
Expand Down Expand Up @@ -492,8 +494,42 @@ do
uncut11=$OPTARG
elif [ $number_of_uncut_lists -eq 12 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 13 ]; then
uncut13=$OPTARG
elif [ $number_of_uncut_lists -eq 14 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 15 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 16 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 17 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 18 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 19 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 20 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 21 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 22 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 23 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 24 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 25 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 26 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 27 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 28 ]; then
uncut12=$OPTARG
elif [ $number_of_uncut_lists -eq 29 ]; then
uncut12=$OPTARG
else
echo "no more than 12 uncut lists allowed" >&2
echo "no more than 29 uncut lists allowed" >&2
exit 1
fi
;;
Expand Down Expand Up @@ -736,12 +772,12 @@ else
fi
# check version of bash in use
if [ -z "$force_bash3" ]; then
BASH_V="$(bash --version | egrep -o "version [45]" | cut -d ' ' -f 2)"
BASH_V="$(bash --version | egrep -o "version [456789]" | cut -d ' ' -f 2)"
if [ $BASH_V -gt 2 ]; then
#if [ "$(grep '^4' <<< $BASH_VERSION)" ] || [ "$(grep '^5' <<< $BASH_VERSION)" ] ; then
bash_v4=true
bash_v4orlater=true
else
echo "Warning: $(basename $0) is running under bash version $BASH_VERSION. If possible, upgrade bash on your system to version 4.3 or later." >&2
echo "WARNING: $(basename $0) is running under bash version $BASH_VERSION. If possible, upgrade bash on your system to version 4.3 or later. Support for bash $BASH_VERSION is not fully tested and might be discontinued in a future version of $(basename $0)." >&2
fi
elif [ "$verbose" ]; then
echo "forcing processing with bash 3"
Expand Down Expand Up @@ -803,7 +839,7 @@ if [ $number_of_lists -ne "$(ls $SCRATCHDIR/*.lst | wc -l)" ]; then
fi
# check if we have empty lists and reduce the number of lists by the number
# of empty lists found, making sure that the lists remain consecutive in
# n-size (any applied 1-gram list was already checked)
# n-size; any applied 1-gram list was already checked
if [ -e $SCRATCHDIR/1.lst ]; then
n=$number_of_lists
else
Expand All @@ -824,7 +860,8 @@ for number in $(eval echo {$n..$min});do
done
# name n-gram lists with the 'argN' variable
current=1 # create count variable for naming
for ii in $(ls $SCRATCHDIR/*.lst); do
for ii in $(ls $SCRATCHDIR/*.lst | sort -V); do # employing version sort to get numeric sort
#echo "CHECK: $ii"
if [ -s $ii ]; then # if they are non empty
eval arg$current=$ii # create variable with the name of the list
((current +=1))
Expand All @@ -837,13 +874,13 @@ if [ -z "$doc" ] && [ "$(head -1 $(eval echo \$arg$number_of_lists) | cut -d '.'
mv $(eval echo \$arg$number_of_lists).alt $(eval echo \$arg$number_of_lists)
fi
####### start consolidation #######
# report to user
if [ "$verbose" ]; then
echo "$number_of_lists lists to consolidate"
fi
# initialise indices
longlistindex="$number_of_lists"
longlistminusindex=$(( $longlistindex - 1 ))
# report to user
if [ "$verbose" ]; then
echo "$number_of_lists lists to consolidate, longest list is $(basename $(eval echo \$arg$longlistindex))."
fi
# start loops
until [ 1 -gt $longlistminusindex ]
do
Expand Down
4 changes: 2 additions & 2 deletions bin/substring.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export PATH="$PATH:/usr/local/bin:/usr/bin:/bin:"$HOME/bin"" # needed for Cygwin
# substring.sh
copyright="Copyright (c) 2016-18 Cardiff University, 2011-2014 Andreas Buerki"
# licensed under the EUPL V.1.1.
version="1.1.2"
version="1.2"
####
# DESCRRIPTION: this is an interactive wrapper script for the Substring package
# SYNOPSIS: substring.sh [OPTIONS]
Expand Down Expand Up @@ -553,7 +553,7 @@ if [ -e uncut/31.lst ]; then
echo "ERROR: SubString is not designed to consolidate n-grams with n > 30."
exit 0
fi
substring-B.sh -dv $(for list in 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30; do if [ -e uncut/$list.lst ]; then echo -n "-u uncut/$list.lst ";fi;done) $(for list in $(ls cut); do echo -n "cut/$list ";done) || exit 1
substring-B.sh -dv $(for list in 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30; do if [ -e uncut/$list.lst ]; then echo -n "-u uncut/$list.lst ";fi;done) $(for list in $(ls cut | sort -n); do echo -n "cut/$list ";done) || exit 1
mv neg_freq.lst "$indir/neg_freq.txt" 2> /dev/null
# ascertain filename of consolidated list
filename=$(ls *substrd)
Expand Down
7 changes: 7 additions & 0 deletions release_notes.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
release notes v. 1.2
********************

substring-B.sh
- fixed an issue introduced in version 1.1.2, where the order of consolidation was mixed up for n-grams of n > 9.
- extended limit of 12 uncut lists to 29 uncut lists.

release notes v. 1.1.2
**********************

Expand Down

0 comments on commit b0f143a

Please sign in to comment.