#!/bin/bash - 
#===============================================================================
#
#          FILE: bychrmpil.sh
# 
#         USAGE: ./bychrmpil.sh 
# 
#   DESCRIPTION: Output commands for mpileup by chromosome in parallel
#               Adapted from a blog post here
#               http://www.research.janahang.com/efficient-way-to-generate-vcf-files-using-samtools/
#                And updated to run to with samtools 1.2
#                Works with bam and cram files
#       OPTIONS: $1 fasta file
#  REQUIREMENTS: samtools, bcftools, hstlib
#         NOTES: To run these commands you need samtools and bcftools
#        AUTHOR: Jillian Rowe, 
#  ORGANIZATION: WCMC-Q
#       CREATED: 21/01/15 14:33
#      REVISION:  ---
#===============================================================================

#Checked with samtools 1.2, htslib 1.2, and bcftools 1.2
#
# samtools mpileup -t DP,DPR,DV,DP4,INFO/DPR,SP -gus -f human_g1k_v37.fasta -r GL000237.1 HG00096.mapped.illumina.mosaik.GBR.exome.20110411.cram | bcftools call -mO z --output tmp.vcf.gz

cd `pwd`

mkdir -p tmpbcf

REF=$1

find `pwd`/ |grep -e "bam$" -e "cram$" > bamlist.txt

cat bamlist.txt | xargs -n 1 -I % bash -c "samtools view -H % " |  grep "\@SQ" | sed 's/^.*SN://g' | cut -f 1 >> tt

cat tt | sort | uniq > coord.txt

rm tt

#Lets shuffle the input to distribute it better

# cat coord.txt | shuf | xargs -I {} -n 1 bash -c 'echo -e "samtools mpileup -DguS -f $1 -r {} -b bamlist.txt | bcftools view -bvcg - > tmpbcf/tmp.{}.bcf && bcftools view tmpbcf/tmp.{}.bcf > tmpbcf/tmp.{}.vcf\n"'

#Getting rid of having a bcftools view on each chromosome - save it for the end
#This one needs to be changed to update for samtools 1.2
# cat coord.txt | shuf | xargs -I {} -n 1 bash -c "echo 'samtools mpileup -DguS -f $REF -r {} -b `pwd`/bamlist.txt | bcftools view -bvcg - > `pwd`/tmpbcf/tmp.{}.bcf'"
cat coord.txt | shuf | xargs -I {} -n 1 bash -c "echo 'samtools mpileup -t DP,DPR,DV,DP4,INFO/DPR,SP -gus -f $REF -r {} -b `pwd`/bamlist.txt | bcftools call -mO z `pwd`/tmpbcf/tmp.{}.bcf'"

#Wait for these to finish
echo -e "wait\n" 

# Got to cat the bcf files back together
# find `pwd`/tmpbcf | grep -e "bcf$" > bcflist.txt
# Didn't work with the -f option not sure why
# echo "find `pwd`/tmpbcf/*bcf | xargs bcftools cat > `pwd`/merge.bcf && bcftools view `pwd`/merge.bcf > `pwd`/merge.vcf"
echo "find `pwd`/tmpbcf/*bcf > bcflist.txt && `pwd`/bcftools concat -f `pwd`/bcflist.txt > `pwd`/merge.bcf && bcftools call -mO z `pwd`/merge.bcf --output `pwd`/merge.vcf.gz"

rm coord.txt
