-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathSamtoolsFiltering.sh
More file actions
94 lines (69 loc) · 3.21 KB
/
SamtoolsFiltering.sh
File metadata and controls
94 lines (69 loc) · 3.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/bin/bash
#
#SBATCH -c 16
#SBATCH --mem-per-cpu=4000
#SBATCH --job-name=SAMFilter
#SBATCH --output=SAMFilter.out
#SBATCH --time=12:00:00
#######################################################################################
#######################################################################################
#sort with samtools and filter
#######################################################################################
module load samtools/1.4
module load jre/1.8.0_121 #this loads Java 1.8 working environment
module load R/3.6.1 #picard needs this
mkdir -p output/sam/
for sample in `cat SRR_Acc_List.txt`
do
echo ${sample} "starting filtering"
#properly mapped and paired reads:
#pipe samtools: https://www.biostars.org/p/43677/
#flags:
# 0x1 template having multiple segments in sequencing
# 0x2 each segment properly aligned according to the aligner
# 0x4 segment unmapped
# 0x8 next segment in the template unmapped
# 0x10 SEQ being reverse complemented
# 0x20 SEQ of the next segment in the template being reversed
# 0x40 the first segment in the template
# 0x80 the last segment in the template
# 0x100 secondary alignment
# 0x200 not passing quality controls
#0x400 PCR or optical duplicate
echo ${sample} "converting sam to bam"
#convert sam to bam and name sort for fixmate
#-@ 16 sets to 16 threads
samtools view -bS aligned/${sample}.sam | samtools sort -n -@ 16 - -o aligned/${sample}_tmp.bam
echo ${sample} "removing unmapped reads"
#Remove unmapped reads and secondary alignments from name sorted input
#samtools view -bS test.sam | samtools sort -n -@ 16 -o test_tmp.bam
samtools fixmate -r aligned/${sample}_tmp.bam aligned/${sample}_tmp2.bam
echo ${sample} "removing unpaired reads"
#coordinate sort, then filter:
#remove PCR duplicates:https://www.biostars.org/p/318974/
# -F INT only include reads with none of the bits set in INT set in FLAG [0]
#so include none of 0x400
#keep only output propper pairs: https://broadinstitute.github.io/picard/explain-flags.html
# -f 0x2
#samtools sort -@ 16 test_tmp2.bam |samtools view -b -F 0x400 -f 0x2 -@ 16 - -o test_dedup.bam
samtools sort -@ 16 aligned/${sample}_tmp2.bam |samtools view -b -F 0x400 -f 0x2 -@ 16 - -o aligned/${sample}_dedup.bam
echo ${sample} "index"
#index
samtools index aligned/${sample}_dedup.bam
echo ${sample} "QC metrics"
#fix mate information
#java -jar $PICARD FixMateInformation I=aligned/${sample}_dedup.bam O=output/sam/${sample}_fixmate.txt
#picard alignment summary
java -jar $PICARD CollectAlignmentSummaryMetrics I=aligned/${sample}_dedup.bam O=output/sam/${sample}_dedup_alignsum.txt
#find insert size
java -jar $PICARD CollectInsertSizeMetrics I=aligned/${sample}_dedup.bam H=output/sam/${sample}_histogram.pdf O=output/sam/${sample}_insertmetric.txt
#flagstat
samtools flagstat aligned/${sample}_dedup.bam > output/sam/${sample}_dedupFlagstat.txt
rm aligned/${sample}_tmp.bam
rm aligned/${sample}_tmp2.bam
echo ${sample} "finished filtering"
done
#######################################################################################
#combine
#######################################################################################
multiqc output/sam --filename output/sam_multiqc_report.html --ignore-samples Undetermined* --interactive