Omics-analysis

Omics analysis of complex traits

Seeding comprehensive analysis in their named directories (e.g., BMI), the repository links to technical issues documented in physalia, Mixed-Models, software-notes and other sister repositories: SUMSTATS, FM-pipeline, PW-pipeline, hess-pipeline, TWAS-pipeline, EWAS-fusion. for fine-mapping, pathway analysis, TWAS, Mendelian randomisation, predictive analytics and other topics as highlighted in the wiki page.

Earlier or broader aspects have been reflected in the following repositories: Haplotype-Analysis, misc, R.

The figure below was generated with eQTL.R.

Resources

— Glossary of Genetics —

NHGRI Genetics glossary

— Annotation —

The Ensembl public MySQL Servers

The following script gives information on genes from ENSEMBL as well as attributes (columns) that contains gene.

library(biomaRt)
listMarts()
mart <- useMart("ENSEMBL_MART_FUNCGEN")
listDatasets(mart)
mart <- useMart("ensembl")
listDatasets(mart)
ensembl <- useMart("ensembl", dataset="hsapiens_gene_ensembl", host="grch37.ensembl.org", path="/biomart/martservice")
attr <- listAttributes(ensembl)
attr_select <- c('ensembl_gene_id', 'chromosome_name', 'start_position', 'end_position', 'description', 'hgnc_symbol', 'transcription_start_site')
gene <- getBM(attributes = attr_select, mart = ensembl)
filter <- listFilters(ensembl)
searchFilters(mart = ensembl, pattern = "gene")

See also https://sites.google.com/site/jpopgen/wgsa for precompiled annotation. Alternatively,

# GENCODE v19
url <- "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.chr_patch_hapl_scaff.annotation.gtf.gz"
gtf <- rtracklayer::import(url)
gencode <- as.data.frame(gtf)

— Linkage disequilibrium —

LDlink: https://ldlink.nci.nih.gov/?tab=home.

NyuWa Chinese Population Variant Database (NCVD): http://bigdata.ibp.ac.cn/NyuWa_variants/

— EFO —

https://www.ebi.ac.uk/efo/

Example code,

library(ontologyIndex)

id <- function(ontology)
{
  inflammatory <- grep(ontology$name,pattern="inflammatory")
  immune <- grep(ontology$name,pattern="immune")
  inf <- union(inflammatory,immune)
  list(id=ontology$id[inf],name=ontology$name[inf])
}
# GO
data(go)
goidname <- id(go)
# EFO
file <- "efo.obo"
get_relation_names(file)
efo <- get_ontology(file, extract_tags="everything")
length(efo) # 89
length(efo$id) # 27962
efoidname <- id(efo)
diseases <- get_descendants(efo,"EFO:0000408")
efo_0000540 <- get_descendants(efo,"EFO:0000540")
efo_0000540name <- efo$name[efo_0000540]
isd <- data.frame(efo_0000540,efo_0000540name)
save(efo,diseases,isd,efoidname,goidname, file="work/efo.rda")
write.table(isd,file="efo_0000540.csv",col.names=FALSE,row.names=FALSE,sep=",")
pdf("efo_0000540.pdf",height=15,width=15)
library(ontologyPlot)
onto_plot(efo,efo_0000540)
dev.off()

— FUMA GWAS —

https://fuma.ctglab.nl/ (https://github.com/Kyoko-wtnb/FUMA-webapp/)

— GRCh38 reference genome —

https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/GRCh38_reference_genome/

— GTEx and eQTLGen —

— MetaMapLite —

https://metamap.nlm.nih.gov/MetaMapLite.shtml

— MR-Base/OpenGWAS —

— OmicsPred —

https://www.omicspred.org/

— PredictDB data repository —

http://predictdb.org/

— Proteomic researches —

— RegulomeDB —

http://regulomedb.org/

— Roadmap —

http://www.roadmapepigenomics.org/

— snakemake workflow catalogue —

https://snakemake.github.io/snakemake-workflow-catalog/

— TWAS —

https://github.com/hakyimlab/MetaXcan

http://gusevlab.org/projects/fusion/

— eQTL Catalog —

https://www.ebi.ac.uk/eqtl/

— GWAS Catalog —

https://www.ebi.ac.uk/gwas/

— PGS Catalog —

https://www.pgscatalog.org/

— PheWAS Catalog —

https://phewascatalog.org/

— rentrez —

The relevant URLs are as follows,

with example code,

library(rentrez)
entrez_dbs()
entrez_db_links("pubmed")
pubmed_fields <- entrez_db_searchable("pubmed")
# set_entrez_key("")
Sys.getenv("ENTREZ_KEY")
term <- "pQTLs OR (protein AND quantitative AND trait AND loci) AND human [MH] AND (plasma OR Serum)"
r <- entrez_search(db="pubmed",term=term,use_history=TRUE)
class(r)
names(r)
with(r,web_history)
unlink(paste("pubmed",c("fetch","summary"),sep="."))
fields <- c("uid", "pubdate", "sortfirstauthor", "title", "source", "volume", "pages")
for(i in seq(1,with(r,count),50))
{
  cat(i+49, "records downloaded\r")
  f <- entrez_fetch(db="pubmed", web_history=with(r,web_history), rettype="text", retmax=50, retstart=i)
  write.table(f, col.names=FALSE, row.names=FALSE, file="pubmed.fetch", append=TRUE)
  s <- entrez_summary(db="pubmed", web_history=with(r,web_history), rettype="text", retmax=50, retstart=i)
  e <- extract_from_esummary(s, fields)
  write.table(t(e), col.names=FALSE, row.names=FALSE, file="pubmed.summary", append=TRUE, sep="\t")
}
id <- 600807
upload <- entrez_post(db="omim", id=id)
asthma_variants <- entrez_link(dbfrom="omim", db="clinvar", cmd="neighbor_history", web_history=upload)
asthma_variants
snp_links <- entrez_link(dbfrom="clinvar", db="snp", web_history=asthma_variants$web_histories$omim_clinvar, cmd="neighbor_history")
all_links <- entrez_link(dbfrom='pubmed', id=id, db='all')

— Sequence Ontology —

http://www.sequenceontology.org/

— TWAS-hub —

http://twas-hub.org/

— Biobanks —