diff --git a/bio/bio-align/bio.sh b/bio/bio-align/bio.sh deleted file mode 100755 index 7c615bcda..000000000 --- a/bio/bio-align/bio.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -# https://www.biostars.org/p/43677/ -# https://github.com/h3abionet/h3agatk -# https://docs.google.com/document/d/1siCZrequI4plggz3ho351NnX57CoyCJl9GWp3azlxfU/edit# -bwa mem -M -p -t [num_threads] \ - -R "@RG\tID:1\tPL:ILLUMINA\tPU:pu\tLB:group1\tSM:SAMPLEID" \ - [reference_fasta] \ - [input_fastq] > [output] - -bwa mem genome.fa reads.fastq | samtools sort -o output.bam - - -# https://www.biostars.org/p/43677/ -bwa aln -t 4 ./hg19.fasta ./s1_1.fastq > ./s1_1.sai -bwa aln -t 4 ./hg19.fasta ./s1_2.fastq > ./s1_2.sai -bwa sampe ./hg19.fasta ./s1_1.sai ./s1_2.sai ./s1_1.fastq ./s1_2.fastq | - samtools view -Shu - | - samtools sort - - | - samtools rmdup -s - - | - tee s1_sorted_nodup.bam | - bamToBed > s1_sorted_nodup.bed - -# 4 cores, -M is for Picard compatibility -bwa mem -M -t 4 ./hg19.fasta ./s1_1.fastq ./s1_2.fastq > s1.sam - -samtools merge - *.bam | -# tee merged.bam | - samtools rmdup - - | -# tee rmdup.bam | - samtools mpileup - uf ./hg19.fasta - | - bcftools view -bvcg - | gzip > var.raw.bcf.gz - -bwa sampe ./hg19.fasta <(bwa aln -t 4 ./hg19.fasta ./s1_1.fastq) <(bwa aln -t 4 ./hg19.fasta ./s1_2.fastq) ./s1_1.fastq ./s1_2.fastq | samtools view -Shb /dev/stdin > s1.bam diff --git a/bio/bio-align/genome-diff.sh b/bio/bio-align/genome-diff.sh deleted file mode 100755 index a269f9e95..000000000 --- a/bio/bio-align/genome-diff.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -# Find differences between two genome sequences---a a paired Illumina sequencing -# read (FASTQ files) and an assembled reference genome from GenBank (e.g., -# Pasteurella multocida). The reads are aligned to the reference, and sorted by -# coordinate. Instead of saving the BAM file, we pipe it directly to a series of -# BCF tool steps. Note the use of -l 0 and -Ou to keep the piped data in an -# uncompressed form, to avoid repeated compression/decompression steps. The -# --min-MQ 60 ensures only uniquely mapped reads are used. The final filter step -# removes low quality variant calls, heterozygous calls (this is haploid -# bacteria), and any regions with less than 10 supporting reads. - -# Requires: samtools, minimap2, bcftools -# Data: http://ndr.md/data/bio/R1.fastq.gz http://ndr.md/data/bio/R2.fastq.gz http://ndr.md/data/bio/ref.fa - -# https://github.com/samtools/samtools/releases/latest -# https://github.com/lh3/minimap2 -# http://thegenomefactory.blogspot.com/2018/10/a-unix-one-liner-to-call-bacterial.html - -CPUS=1 -REF=./input/ref.fa -R1=./input/R1.fastq.gz -R2=./input/R2.fastq.gz -OUT=/dev/shm/out.txt - -BIO_TOOLS=~/biotools - -# These should be added to every script -export PATH="$PATH:$BIO_TOOLS/bcftools-1.9" -export PATH="$PATH:$BIO_TOOLS/samtools-1.9" -export PATH="$PATH:$BIO_TOOLS/htslib-1.9" -export PATH="$PATH:$BIO_TOOLS/minimap2-2.17_x64-linux" - -minimap2 -a -x sr -t "$CPUS" "$REF" "$R1" "$R2" | # align reads to the reference - samtools sort -l 0 --threads "$CPUS" | # sort reads by coordinate - bcftools mpileup -Ou -B --min-MQ 60 -f "$REF" - | # multi-way pileup producing genotype likelihoods - bcftools call -Ou -v -m - | # SNP/indel calling - bcftools norm -Ou -f "$REF" -d all - | # left-align and normalize indels - bcftools filter -Ov -e 'QUAL<40 || DP<10 || GT!="1/1"' | # removes low-quality variant calls, etc - bcftools stats | # produce VCF/BCF stats - grep '^SN' | # look for a starting pattern - cut -f3- > $OUT # only write third column diff --git a/bio/bio-align/genquality.sh b/bio/bio-align/genquality.sh deleted file mode 100755 index 64c777fdd..000000000 --- a/bio/bio-align/genquality.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash - -# Identify the top 10 reasons why genome assemblies don't make it into GenBank -# -- NIH's genetic sequence database, an annotated collection of all publicly -# available DNA sequences -# http://thegenomefactory.blogspot.com/2019/09/25-reasons-assemblies-dont-make-it-into.html - -# Require: csvkit -# Data: http://ndr.md/data/bio/genbank.txt - -IN=./input/genbank.txt -OUT=./output/out.txt - -cat $IN | - csvcut -t -K 1 -c 'excluded_from_refseq' | - tail -n +2 | tr ";" "\n" | - sed -e 's/^ //' -e 's/ $//' | - grep -v '""' | - sort | - uniq -c | - sort -nr | - head -n 10 | - nl > $OUT - -# More bio pipelines for -# # Strains with Complete Genome -# cat assembly_summary.tsv \ -# | csvtk grep -t -f assembly_level -i -p "Complete Genome" \ -# | wc -l -# -# # Most sequenced species with Complete Genome -# cat assembly_summary.tsv \ -# | csvtk grep -t -f assembly_level -i -p "Complete Genome" \ -# | csvtk cut -t -f organism_name \ -# | cut -d ' ' -f 1,2 \ -# | csvtk freq -t -n -r | head -n 20 | csvtk pretty -t -# -# # Number of species, by organism name -# -# # Filter by species (organism_name) -# cat assembly_summary.tsv \ -# | csvtk grep -t -f organism_name -i -r -p "Mycobacterium tuberculosis" \ -# | csvtk grep -t -f assembly_level -i -p "Complete Genome" \ -# > mt.tsv -# -# # Filter (complete genome) by species_taxid -# cat assembly_summary.tsv \ -# | csvtk grep -t -f species_taxid -p 239935,1280 \ -# | csvtk grep -t -f assembly_level -i -p "Complete Genome" \ -# > bytaxid.tsv -# -# # Download genome sequence and annotation files -# cat mt.tsv | csvtk cut -t -f ftp_path | sed 1d \ -# | rush -v prefix='{}/{%}' \ -# ' \ -# wget -c {prefix}_genomic.fna.gz; \ -# wget -c {prefix}_genomic.gbff.gz; \ -# wget -c {prefix}_genomic.gff.gz; \ -# wget -c {prefix}_cds_from_genomic.fna.gz \ -# wget -c {prefix}_protein.faa.gz; \ -# ' \ -# -j 10 -c -C download.rush -# -# #Get GenBank assembly summary file -# wget ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/assembly_summary_genbank.txt -# -# #Get all lines that have "Mycobacter", if 12th field is "Complete Genome", print the 20th field (url to file). -# #But the actual filename ends _genomic.fna.gz so include that too.. -# grep Mycobacter assembly_summary_genbank.txt \ -# | awk 'BEGIN{FS="\t"}{if($12=="Complete Genome"){print $20}}' \ -# | awk 'BEGIN{OFS=FS="/"}{print $0,$NF"_genomic.fna.gz"}' \ -# > urls.txt -# -# #Now you can go through your urls file -# IFS=$'\n'; for NEXT in $(cat urls.txt); do wget "$NEXT"; done diff --git a/bio/bio1/README b/bio/bio1/README deleted file mode 100644 index ee1f1ea1d..000000000 --- a/bio/bio1/README +++ /dev/null @@ -1 +0,0 @@ -'' Novel DNA sequencing techniques, referred to as next-generation sequencing (NGS), provide high speed and throughput that can produce an enormous volume of sequences with many possible applications in research and diagnostic settings. In this article, we provide an overview of the many applications of NGS in diagnostic virology. NGS techniques have been used for high-throughput whole viral genome sequencing, such as sequencing of new influenza viruses, for detection of viral genome variability and evolution within the host, such as investigation of human immunodeficiency virus and human hepatitis C virus quasispecies, and monitoring of low-abundance antiviral drug-resistance mutations. NGS techniques have been applied to metagenomics-based strategies for the detection of unexpected disease-associated viruses and for the discovery of novel human viruses, including cancer-related viruses. Finally, the human virome in healthy and disease conditions has been described by NGS-based metagenomics. '' diff --git a/bio/bio1/bam_to_sam.sh b/bio/bio1/bam_to_sam.sh deleted file mode 100644 index fec09f66e..000000000 --- a/bio/bio1/bam_to_sam.sh +++ /dev/null @@ -1,4 +0,0 @@ -INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/bam} -OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} -cd ${INPUT} -find . -name "*.bam" | xargs -I {} samtools view -h -o ${OUTPUT} {} diff --git a/bio/bio1/bio2.sh b/bio/bio1/bio2.sh deleted file mode 100644 index fb00a1b23..000000000 --- a/bio/bio1/bio2.sh +++ /dev/null @@ -1,73 +0,0 @@ -#### Ported #### -# https://dfzljdn9uc3pi.cloudfront.net/2013/203/1/Supplement_S2.pdf -set -e -cd $PASH_TOP/evaluation/benchmarks/bio/bio1/input/ -ls *.R1.fq > namelist -sed -i 's/.R1.fq//g' namelist -NAMES=( `cat "namelist" `) -mkdir -p assembly -# Trims raw files two different ways. -# First way removes any reads with substantial amounts of adapter, but does no -# quality trimming. These reads are used for assembly and must be uniform lengths -# Second way removes adapters and does quality trimming. These reads will be -# used for mapping. -for i in "${NAMES[@]}" -do - echo $i - Trim/trim_galore --paired -q 0 --length 90 -a GATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATATCGTATGCCGTCTTCTGCTTG -a2 GATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCG --stringency 20 ${i}.R1.fq ${i}.R2.fq --output_dir ./assembly - Trim/trim_galore --paired -q 20 --length 20 -a GATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATATCGTATGCCGTCTTCTGCTTG -a2 GATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCG --stringency 10 $i.R1.fq $i.R2.fq -done - -# Renaming trimmed files to simpler names -for i in "${NAMES[@]}" -do - mv $i.R1_val_1.fq $i.1.fq - mv $i.R2_val_2.fq $i.2.fq -done - -### Assembly ### -# These parameters could be further optimized for particular taxa -# First step concatenates reads into one forward and one reverse fastq file -cat ./assembly/*.R1_val_1.fq > forward -cat ./assembly/*.R2_val_2.fq > reverse -# Rainbow now clusters and assembles -rainbow/rainbow cluster -1 forward -2 reverse > cat.rbcluster.out 2> log -# we can add -f $1 but im not good with maths -rainbow/rainbow div -i cat.rbcluster.out -o cat.rbdiv.out -rainbow/rainbow merge -a -i cat.rbdiv.out -o cat.rbasm.out -N 1000 -perl rainbow/select_best_rbcontig.pl cat.rbasm.out > rainbowf -# Renames contigs to sequential numbers for simplicity -fastx_renamer -n COUNT -i rainbowf -o reference -## Mapping -# Use BWA to index reference -bwa-0.7.17/bwa index -a bwtsw reference -# Use BWA to map reads to reference. -### These parameters could be further optimized for particular taxa -for i in "${NAMES[@]}" -do - bwa-0.7.17/bwa mem reference $i.1.fq $i.2.fq -t 32 -a -T 10 > $i.sam -done -#Convert Sam to Bam and remove low quality, ambiguous mapping -for i in "${NAMES[@]}" -do - samtools view -bS -q15 $i.sam > $i.bam - samtools sort $i.bam -o $i -done -# Index reference for SAMtools -samtools faidx reference -# sort the Sample1.bam cause it sucks. The file needs to be sorted in that -# way before index is called -samtools sort -m 2G -@ 4 Sample1.bam -o lala -mv lala Sample1.bam -# index the bamfile -samtools index Sample1.bam -samtools mpileup -D -f reference *.bam >mpileup -# VarScan calls all sites with at least 5X coverage, a variant frequency above -# 10%, and 95% probability of being a SNP. Need varscan 2.3.5 version -java -jar VarScan.jar mpileup2snp mpileup --output-vcf --min-coverage 5 --strand-filter 0 --min-var-freq 0.1 --p-value 0.05 >SNPS.vcf -# VCFtools to filter raw SNPs and create a filtered vcf file (Final.recode.vcf) -# with SNPs that are present in every individual and that are not INDels -# can also work with --geno 0.99 flag but it needs vcftools 0.1.10 version -vcftools --vcf SNPS.vcf --out Final --recode --non-ref-af 0.001 --remove-indels -# VCFtools again to filter for SNPs that are present at an average of 10X coverage -vcftools --vcf Final.recode.vcf --out Final10X --recode --min-meanDP 10 diff --git a/bio/bio1/bio3.sh b/bio/bio1/bio3.sh deleted file mode 100644 index 3f2c5a12c..000000000 --- a/bio/bio1/bio3.sh +++ /dev/null @@ -1,10 +0,0 @@ -# **Create the bowtie2 alignment database for the Arabidopsis genome** -# https://bioinformaticsworkbook.org/Appendix/GNUparallel/GNU_parallel_examples.html#gsc.tab=0 -cd $PASH_TOP/evaluation/bio/input/bio3 -bowtie2-build TAIR10_chr_all.fas tair -#theirs -time parallel -j2 "bowtie2 --threads 4 -x tair -k1 -q -1 {1} -2 {2} -S {1/.}.sam >& {1/.}.log" ::: fastqfiles/*_1.fastq.gz :::+ fastqfiles/*_2.fastq.gz -#ours -paste <(find . -name "*_1.fastq.gz") <(find . -name "*_2.fastq.gz") | xargs -n \ -2 sh -c 'bowtie2 --threads 4 -x tair -k1 -q -1 "$1" -2 "$2" -S fifth_R1.sam' argv0 - diff --git a/bio/bio1/convert_to_fast.sh b/bio/bio1/convert_to_fast.sh deleted file mode 100644 index e66a954fd..000000000 --- a/bio/bio1/convert_to_fast.sh +++ /dev/null @@ -1,6 +0,0 @@ -# convert fastq to fasta format -# It recognizes the extension .fasta and it converts the input to fasta.gz format -INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} -OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} -cd ${INPUT} -find . -maxdepth 1 -name "*.fastq" | xargs -I {} cutadapt -o ${OUTPUT}/{}.fasta.gz {} diff --git a/bio/bio1/generate_single_chrom.sh b/bio/bio1/generate_single_chrom.sh deleted file mode 100644 index 364c28abe..000000000 --- a/bio/bio1/generate_single_chrom.sh +++ /dev/null @@ -1,19 +0,0 @@ -# Here are sample steps to generate a single paired read from hg19: -# https://www.biostars.org/p/150010/ -INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} -OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} -cd ${INPUT} -# filter out a single chromosome and index it, e.g. -samtools faidx ${INPUT}/human_g1k_v37.fasta 20 > ${OUTPUT}/human_g1k_v37_chr20.fasta -bowtie2-build ${OUTPUT}/human_g1k_v37_chr20.fasta ${OUTPUT}/homo_chr20 -#simulate a single read sample, e.g. here is for a single (-N 1) paired read: -${INPUT}/wgsim/wgsim -N 1 ${OUTPUT}/human_g1k_v37_chr20.fasta ${OUTPUT}/single.read1.fq ${OUTPUT}/single.read2.fq > ${OUTPUT}/wgsim.out -#generate the sam, e.g. -bowtie2 -x ${OUTPUT}/homo_chr20 -1 ${OUTPUT}/single.read1.fq -2 ${OUTPUT}/single.read2.fq -S ${OUTPUT}/single_pair.sam -#generate a bam -samtools view -b -S -o ${OUTPUT}/single_pair.bam ${OUTPUT}/single_pair.sam -#sort and index it -samtools sort ${OUTPUT}/single_pair.bam -o ${OUTPUT}/single_pair.sorted.bam -# this seems to not affect the file, but in other cases, its indeed needed -samtools index ${OUTPUT}/single_pair.sorted.bam - diff --git a/bio/bio1/remove_adapter.sh b/bio/bio1/remove_adapter.sh deleted file mode 100644 index 006de5d06..000000000 --- a/bio/bio1/remove_adapter.sh +++ /dev/null @@ -1,3 +0,0 @@ -INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} -# remove adapter -find ${INPUT} -name "*.fastq" | sort | uniq | xargs -I {} cutadapt -a AGATCGGAAGAGCACAC {} > /dev/null diff --git a/bio/bio1/setup.sh b/bio/bio1/setup.sh deleted file mode 100644 index 40bdd47cb..000000000 --- a/bio/bio1/setup.sh +++ /dev/null @@ -1,77 +0,0 @@ -if [[ $1 == "-c" ]]; then - rm -rf input - rm -rf output - exit -fi - -mkdir -p input -mkdir -p output -cd input -if [[ ! -f R1.fastq ]]; then - wget ndr.md/data/bio/{R1.fastq.gz,R2.fastq.gz,ref.fa} - - gunzip R1.fastq.gz - gunzip R2.fastq.gz -fi -if [[ ! -f human_g1k_v37.fasta ]]; then - wget http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/human_g1k_v37.fasta.fai - wget http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/human_g1k_v37.fasta.gz - gunzip human_g1k_v37.fasta.gz - cp R1.fastq Sample1.R1.fq - cp R2.fastq Sample1.R2.fq -fi - -if [[ ! -d wgsim ]]; then - git clone https://github.com/lh3/wgsim - cd wgsim/ && gcc -g -O2 -Wall -o wgsim wgsim.c -lz -lm && cd - -fi -apt-get install samtools bowtie2 vcftools sra-toolkit cutadapt zlib1g-dev default-jre -if [[ ! -d Trim ]]; then - wget https://github.com/FelixKrueger/TrimGalore/archive/0.6.6.zip - unzip 0.6.6.zip - mv TrimGalore-0.6.6 Trim/ -fi -if [[ ! -d rainbow ]]; then - wget https://phoenixnap.dl.sourceforge.net/project/bio-rainbow/rainbow_2.0.4.tar.gz - tar xf rainbow_2.0.4.tar.gz - mv rainbow_2.0.4 rainbow - cd rainbow && make -j && cd - -fi -if [[ ! -d fastx_toolkit-0.0.14 ]]; then - wget https://github.com/agordon/fastx_toolkit/releases/download/0.0.14/fastx_toolkit-0.0.14.tar.bz2 - tar xf fastx_toolkit-0.0.14.tar.bz2 - sed -i 's/usage();/usage();break;/g' fastx_toolkit-0.0.14/src/fasta_formatter/fasta_formatter.cpp - cd fastx_toolkit-0.0.14 && ./configure && sudo make install && cd - -fi -if [[ -d libgtextutils-0.7 ]]; then - wget https://github.com/agordon/libgtextutils/releases/download/0.7/libgtextutils-0.7.tar.gz - tar xf libgtextutils-0.7.tar.gz - # patch because they suck - sed -i 's/return input_stream/return (bool)input_stream/g' ./libgtextutils-0.7/src/gtextutils/text_line_reader.cpp - cd libgtextutils-0.7 && ./configure && sudo make install && cd - -fi -if [[ ! -d bwa-0.7.17 ]]; then - wget https://altushost-swe.dl.sourceforge.net/project/bio-bwa/bwa-0.7.17.tar.bz2 - tar xf bwa-0.7.17.tar.bz2 - cd bwa-0.7.17 && make -j && cd - -fi - -if [[ ! -f VarScan.jar ]]; then - wget https://deac-fra.dl.sourceforge.net/project/varscan/VarScan.v2.3.5.jar -O VarScan.jar -fi - -#### Download sam files ### -if [[ ! -f toy.sam ]]; then - wget https://raw.githubusercontent.com/samtools/samtools/develop/examples/toy.sam - wget https://raw.githubusercontent.com/samtools/samtools/develop/examples/ex1.sam.gz - gunzip ex1.sam.gz - sam-dump --aligned-region 20:1-16444167 --output-file SRR1976040_chr20.sam SRR1976040 - # too slow to download - sam-dump --aligned-region chr20 --output-file SRR1976036_chr20.sam SRR1976036 -fi - -## bio3.sh -#wget www.bioinformaticsworkbook.org/Appendix/GNUparallel/fastqfiles.tar.gz -#tar -zxvf fastqfiles.tar.gz -#wget https://www.arabidopsis.org/download_files/Genes/TAIR10_genome_release/TAIR10_chromosome_files/TAIR10_chr_all.fas -#cd - diff --git a/bio/bio1/trim_primers.sh b/bio/bio1/trim_primers.sh deleted file mode 100644 index 5254ae909..000000000 --- a/bio/bio1/trim_primers.sh +++ /dev/null @@ -1,6 +0,0 @@ -# trim primers -INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} -OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} -cd ${INPUT} -find . -maxdepth 1 -name "*.fastq" | xargs -I {} cutadapt -a TCCTCCGCTTATTGATAGC -o ${OUTPUT}/{}\_trimmed.fastq {}; - diff --git a/bio/input/setup.sh b/bio/input/setup.sh deleted file mode 100755 index 8232402d0..000000000 --- a/bio/input/setup.sh +++ /dev/null @@ -1,76 +0,0 @@ -# red color -RED='\033[0;31m' -# reset the color -NC='\033[0m' - -IN=${BIO4:-$PASH_TOP/evaluation/benchmarks/bio} -IN_NAME=${IN_N:-input_all.txt} -if [[ $1 == "-c" ]]; then - rm -rf *.bam - rm -rf *.sam - rm -rf ../output - exit -fi - -PW=${PASH_TOP}/benchmarks/bio/input -echo $PW -mkdir -p $PW -mkdir -p ${PASH_TOP}/benchmarks/bio/output - -# install dependencies -required_version="1.7" - -# Check if Samtools is already installed and matches the required version -if command -v samtools &>/dev/null; then - installed_version=$(samtools --version | head -n 1 | awk '{print $2}') - if [[ "$installed_version" == "$required_version" ]]; then - echo "Samtools version $required_version is already installed." - else - echo "A different version of Samtools is installed: $installed_version." - echo "Proceeding to install the required version: $required_version." - fi -else - echo "Samtools is not installed. Proceeding with the installation." - # Update and install prerequisites - echo "Installing prerequisites..." - sudo apt update - sudo apt install -y build-essential libncurses5-dev libncursesw5-dev libbz2-dev liblzma-dev libcurl4-openssl-dev libssl-dev wget zlib1g-dev - - # Download Samtools version 1.7 - echo "Downloading Samtools version $required_version..." - wget https://github.com/samtools/samtools/releases/download/$required_version/samtools-$required_version.tar.bz2 - - # Extract the downloaded file - echo "Extracting Samtools..." - tar -xvjf samtools-$required_version.tar.bz2 - cd samtools-$required_version - - # Compile and install - echo "Compiling and installing Samtools..." - ./configure - make - sudo make install - - sudo ln -s /usr/local/bin/samtools /usr/bin/samtools - - # Verify the installation - echo "Verifying the installation..." - installed_version=$(samtools --version | head -n 1 | awk '{print $2}') - if [[ "$installed_version" == "$required_version" ]]; then - echo "Samtools version $required_version has been successfully installed." - else - echo "Failed to install the correct version of Samtools." - exit 1 - fi -fi - -# cat ${IN}/${IN_NAME} |while read s_line; -# do - -# sample=$(echo $s_line |cut -d " " -f 2); -# if [[ ! -f $sample ]]; then -# pop=$(echo $s_line |cut -f 1 -d " "); -# link=$(echo $s_line |cut -f 3 -d " "); -# wget -O "$PW/$sample".bam "$link"; ##this part can be adjusted maybe -# fi -# done;