Differences
This shows you the differences between two versions of the page.
calling_handson [2012/12/05 15:41] jjimeneza |
calling_handson [2017/05/24 14:23] (current) |
||
---|---|---|---|
Line 1: | Line 1: | ||
+ | ====== Variant calling and annotation: Practical session ====== | ||
+ | \\ | ||
+ | |||
+ | <code bash> | ||
+ | #!/bin/bash | ||
+ | |||
+ | # Working directory | ||
+ | |||
+ | cd ~/mda12/calling | ||
+ | |||
+ | |||
+ | # Run program and see options | ||
+ | ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar | ||
+ | |||
+ | # UnifiedGenotyper | ||
+ | |||
+ | ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar\ | ||
+ | -T UnifiedGenotyper | ||
+ | |||
+ | # Checking the reference | ||
+ | |||
+ | head ~/mda12/resources/ref/human_g1k_v37.chr20.fasta | ||
+ | |||
+ | head -3000 ~/mda12/resources/ref/human_g1k_v37.chr20.fasta | tail | ||
+ | |||
+ | #Checking the bed file | ||
+ | |||
+ | head ~/mda12/resources/ref/Exon_50mb_hg19_chr20.bed | ||
+ | |||
+ | #SNV Calling of all sites | ||
+ | ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar \ | ||
+ | -T UnifiedGenotyper \ | ||
+ | -R ~/mda12/resources/ref/human_g1k_v37.chr20.fasta \ | ||
+ | -L ~/mda12/resources/ref/Exon_50mb_hg19_chr20.bed \ | ||
+ | -I ~/mda12/resources/mapping/test_final.bam \ | ||
+ | -glm SNP \ | ||
+ | -out_mode EMIT_ALL_SITES \ | ||
+ | -o all_sites.vcf | ||
+ | |||
+ | |||
+ | # Checking file | ||
+ | less all_sites.vcf | ||
+ | |||
+ | #Counting lines | ||
+ | du -hs all_sites.vcf | ||
+ | wc -l all_sites.vcf | ||
+ | |||
+ | # Executing SNVs calling of variants | ||
+ | ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar \ | ||
+ | -T UnifiedGenotyper \ | ||
+ | -R ~/mda12/resources/ref/human_g1k_v37.chr20.fasta \ | ||
+ | -L ~/mda12/resources/ref/Exon_50mb_hg19_chr20.bed \ | ||
+ | -I ~/mda12/resources/mapping/test_final.bam \ | ||
+ | -glm SNP \ | ||
+ | -o snvs.vcf | ||
+ | |||
+ | # Checking file | ||
+ | less snvs.vcf | ||
+ | |||
+ | # Counting lines | ||
+ | du -hs snvs.vcf | ||
+ | wc -l snvs.vcf | ||
+ | |||
+ | # Executing indels calling of variants | ||
+ | ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar \ | ||
+ | -T UnifiedGenotyper \ | ||
+ | -R ~/mda12/resources/ref/human_g1k_v37.chr20.fasta \ | ||
+ | -L ~/mda12/resources/ref/Exon_50mb_hg19_chr20.bed \ | ||
+ | -I ~/mda12/resources/mapping/test_final.bam \ | ||
+ | -glm INDEL \ | ||
+ | -o indels.vcf | ||
+ | |||
+ | # Checking file | ||
+ | less indels.vcf | ||
+ | |||
+ | # Counting lines | ||
+ | du -hs indels.vcf | ||
+ | wc -l indels.vcf | ||
+ | |||
+ | # Labeling VCF files | ||
+ | # VariantFiltration | ||
+ | ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar \ | ||
+ | -T VariantFiltration | ||
+ | |||
+ | |||
+ | # Labeling SNVs VCF file | ||
+ | ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar \ | ||
+ | -T VariantFiltration \ | ||
+ | -filter "QD < 2.0 || MQ < 40.0 || FS > 60.0 || HaplotypeScore > 13.0 || MQRankSum < -12.5 || ReadPosRankSum < -8.0" \ | ||
+ | -filterName "STD_FILTER" \ | ||
+ | -R ~/mda12/resources/ref/human_g1k_v37.chr20.fasta \ | ||
+ | -V snvs.vcf \ | ||
+ | -o snvs_labeled.vcf | ||
+ | |||
+ | # Checking files | ||
+ | wc -l snvs_labeled.vcf | ||
+ | wc -l snvs.vcf | ||
+ | grep PASS snvs_labeled.vcf | wc -l | ||
+ | |||
+ | # Labeling indels VCF file | ||
+ | ~/mda12/calling/software/GenomeAnalysisTK-1.4-15-gcd43f01/GenomeAnalysisTK.jar \ | ||
+ | -T VariantFiltration \ | ||
+ | -filter "QD < 2.0 || ReadPosRankSum < -20.0 || FS > 200.0" \ | ||
+ | -filterName "STD_FILTER" \ | ||
+ | -R ~/mda12/resources/ref/human_g1k_v37.chr20.fasta \ | ||
+ | -V indels.vcf \ | ||
+ | -o indels_labeled.vcf | ||
+ | |||
+ | # Checking files | ||
+ | wc -l indels.vcf | ||
+ | wc -l indels_labeled.vcf | ||
+ | grep PASS indels_labeled.vcf | wc -l | ||
+ | |||
+ | # Converting SNV VCF file to annovar format file | ||
+ | ~/mda12/calling/software/annovar/convert2annovar.pl \ | ||
+ | -format vcf4 \ | ||
+ | -filter PASS snvs_labeled.vcf > \ | ||
+ | snvs_labeled.vcf.annovar | ||
+ | |||
+ | # Checking files | ||
+ | wc -l snvs_labeled.vcf.annovar | ||
+ | grep PASS snvs_labeled.vcf | wc -l | ||
+ | head snvs_labeled.vcf.annovar | ||
+ | |||
+ | # Annotating | ||
+ | ~/mda12/calling/software/annovar/annotate_variation.pl \ | ||
+ | --geneanno \ | ||
+ | --buildver hg19 \ | ||
+ | --dbtype gene \ | ||
+ | snvs_labeled.vcf.annovar \ | ||
+ | ~/mda12/calling/software/annovar/humandb/ | ||
+ | |||
+ | # Output | ||
+ | ls -latr | ||
+ | head snvs_labeled.vcf.annovar.exonic_variant_function | ||
+ | head snvs_labeled.vcf.annovar.variant_function | ||
+ | head snvs_labeled.vcf.annovar.log | ||
+ | |||
+ | # Converting indels VCF file to annovar format file | ||
+ | ~/mda12/calling/software/annovar/convert2annovar.pl \ | ||
+ | -format vcf4 \ | ||
+ | -filter PASS indels_labeled.vcf > \ | ||
+ | indels_labeled.vcf.annovar | ||
+ | |||
+ | # Checking files | ||
+ | wc -l indels_labeled.vcf.annovar | ||
+ | grep PASS indels_labeled.vcf | wc -l | ||
+ | head indels_labeled.vcf.annovar | ||
+ | |||
+ | # Annotating | ||
+ | ~/mda12/calling/software/annovar/annotate_variation.pl \ | ||
+ | --geneanno \ | ||
+ | --buildver hg19 \ | ||
+ | --dbtype gene \ | ||
+ | indels_labeled.vcf.annovar \ | ||
+ | ~/mda12/calling/software/annovar/humandb/ | ||
+ | |||
+ | # Output | ||
+ | ls -latr | ||
+ | head indels_labeled.vcf.annovar.exonic_variant_function | ||
+ | head indels_labeled.vcf.annovar.variant_function | ||
+ | head indels_labeled.vcf.annovar.log | ||
+ | |||
+ | |||
+ | # IGV | ||
+ | igv | ||
+ | </code> |