Variant calling and annotation: Practical session
#!/bin/bash # Working directory cd ~/mda12/calling # Run program and see options ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar # UnifiedGenotyper ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar\ -T UnifiedGenotyper # Checking the reference head ~/mda12/resources/ref/human_g1k_v37.chr20.fasta head -3000 ~/mda12/resources/ref/human_g1k_v37.chr20.fasta | tail #Checking the bed file head ~/mda12/resources/ref/Exon_50mb_hg19_chr20.bed #SNV Calling of all sites ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar \ -T UnifiedGenotyper \ -R ~/mda12/resources/ref/human_g1k_v37.chr20.fasta \ -L ~/mda12/resources/ref/Exon_50mb_hg19_chr20.bed \ -I ~/mda12/resources/mapping/test_final.bam \ -glm SNP \ -out_mode EMIT_ALL_SITES \ -o all_sites.vcf # Checking file less all_sites.vcf #Counting lines du -hs all_sites.vcf wc -l all_sites.vcf # Executing SNVs calling of variants ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar \ -T UnifiedGenotyper \ -R ~/mda12/resources/ref/human_g1k_v37.chr20.fasta \ -L ~/mda12/resources/ref/Exon_50mb_hg19_chr20.bed \ -I ~/mda12/resources/mapping/test_final.bam \ -glm SNP \ -o snvs.vcf # Checking file less snvs.vcf # Counting lines du -hs snvs.vcf wc -l snvs.vcf # Executing indels calling of variants ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar \ -T UnifiedGenotyper \ -R ~/mda12/resources/ref/human_g1k_v37.chr20.fasta \ -L ~/mda12/resources/ref/Exon_50mb_hg19_chr20.bed \ -I ~/mda12/resources/mapping/test_final.bam \ -glm INDEL \ -o indels.vcf # Checking file less indels.vcf # Counting lines du -hs indels.vcf wc -l indels.vcf # Labeling VCF files # VariantFiltration ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar \ -T VariantFiltration # Labeling SNVs VCF file ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar \ -T VariantFiltration \ -filter "QD < 2.0 || MQ < 40.0 || FS > 60.0 || HaplotypeScore > 13.0 || MQRankSum < -12.5 || ReadPosRankSum < -8.0" \ -filterName "STD_FILTER" \ -R ~/mda12/resources/ref/human_g1k_v37.chr20.fasta \ -V snvs.vcf \ -o snvs_labeled.vcf # Checking files wc -l snvs_labeled.vcf wc -l snvs.vcf grep PASS snvs_labeled.vcf | wc -l # Labeling indels VCF file ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar \ -T VariantFiltration \ -filter "QD < 2.0 || ReadPosRankSum < -20.0 || FS > 200.0" \ -filterName "STD_FILTER" \ -R ~/mda12/resources/ref/human_g1k_v37.chr20.fasta \ -V indels.vcf \ -o indels_labeled.vcf # Checking files wc -l indels.vcf wc -l indels_labeled.vcf grep PASS indels_labeled.vcf | wc -l # Converting SNV VCF file to annovar format file ~/mda12/calling/software/annovar/convert2annovar.pl \ -format vcf4 \ -filter PASS snvs_labeled.vcf > \ snvs_labeled.vcf.annovar # Checking files wc -l snvs_labeled.vcf.annovar grep PASS snvs_labeled.vcf | wc -l head snvs_labeled.vcf.annovar # Annotating ~/mda12/calling/software/annovar/annotate_variation.pl \ --geneanno \ --buildver hg19 \ --dbtype gene \ snvs_labeled.vcf.annovar \ ~/mda12/calling/software/annovar/humandb/ # Output ls -latr head snvs_labeled.vcf.annovar.exonic_variant_function head snvs_labeled.vcf.annovar.variant_function head snvs_labeled.vcf.annovar.log # Converting indels VCF file to annovar format file ~/mda12/calling/software/annovar/convert2annovar.pl \ -format vcf4 \ -filter PASS indels_labeled.vcf > \ indels_labeled.vcf.annovar # Checking files wc -l indels_labeled.vcf.annovar grep PASS indels_labeled.vcf | wc -l head indels_labeled.vcf.annovar # Annotating ~/mda12/calling/software/annovar/annotate_variation.pl \ --geneanno \ --buildver hg19 \ --dbtype gene \ indels_labeled.vcf.annovar \ ~/mda12/calling/software/annovar/humandb/ # Output ls -latr head indels_labeled.vcf.annovar.exonic_variant_function head indels_labeled.vcf.annovar.variant_function head indels_labeled.vcf.annovar.log # IGV igv