##gene_set_ngs.sh ##2013-03-14 dmontaner@cipf.es ##GESBAP EXAMPLE ##This script shows the basics for functional profiling of genome wide studies. ##Go to the working directory cd /home/biouser/gesbap ##and see what is in it ls ## gesbap.py: the main program to run the Gesbap Functional profiling, it will: ## export the evidence from SNPs to genes ## perform the segmentation analysis (FatiScan) ## snp-gene.db: a DATABASE containing the information that links SNPs with GENES ## data: directory for examples data ## Explore the data directory ls data cd data ## See the header of all files head * ## These are examples of gene ANNOTATION files # human_gene_ontology_biomart_70.txt # human_gene_ontology_biomart_BP_propagated.txt # human_gene_ontology_biomart_BP_propagated_filtered.txt head human_gene_ontology_biomart_BP_propagated_filtered.txt ##Observe how annotations are replicated (all of them should be) grep GO:0000002 human_gene_ontology_biomart_BP_propagated_filtered.txt ##And also the genes (some of them) grep ENSG00000005156 human_gene_ontology_biomart_BP_propagated_filtered.txt ##See the other annotations head human_gene_ontology_biomart_70.txt head human_gene_ontology_biomart_BP_propagated.txt ##How long are they? wc -l human_gene_ontology_biomart_70.txt wc -l human_gene_ontology_biomart_BP_propagated.txt wc -l human_gene_ontology_biomart_BP_propagated_filtered.txt ##GO propagation expands the number of gene to GO relationships ##Filtering of small functional blocks reduces the number of rows ## significantSNPs-DOM-Inf-v2.txt: is an standard output file form an association study analysis head significantSNPs-DOM-Inf-v2.txt ## significantSNPs-DOM-Inf-v2.txt AND significantSNPs.txt are SNP ranking files head significantSNPs-DOM-Inf-v2.txt head significantSNPs.txt ###Count the rows wc -l significantSNPs-DOM-Inf-v2.txt wc -l significantSNPs.txt ##Why is there one more line in significantSNPs-DOM-Inf-v2.txt? head significantSNPs.txt head all_nsp_to_gesbap.txt ## significantSNPs-DOM-Inf-v2.txt contains all the SNPs, not just the significant: wc -l significantSNPs.txt wc -l all_nsp_to_gesbap.txt ################################################################################ ### ANALYSIS ### cd /home/biouser/gesbap ##make a directory for the results mkdir res ###RUN the analysis (10 minutes) ./gesbap.py -i data/all_nsp_to_gesbap.txt -o res/NAME_RES ###EXPLORE THE OUTPUT FILES (use an spreadsheet) cd res ls ## NAME_RES.gene_association.txt has the association evidence exported form SNP to gene head NAME_RES.gene_association.txt ## NAME_RES.fatiscan_significant_terms.txt contains the significant GO terms. ## How many are they? wc -l NAME_RES.fatiscan_significant_terms.txt ## NAME_RES.fatiscan_significant_terms_simple.txt is a simplified version head NAME_RES.fatiscan_significant_terms.txt ## NAME_RES.fatiscan.txt has the exhaustive results form the analysis