Skip to contents
pkgs <- c("GenomicRanges", "TwoSampleMR", "biomaRt", "coloc", "dplyr", "gap", "ggplot2",
          "httr", "karyoploteR", "knitr", "pQTLtools", "rGREAT", "readr", "regioneR", "seqminer")
for (p in pkgs) if (length(grep(paste("^package:", p, "$", sep=""), search())) == 0) {
    if (!requireNamespace(p)) warning(paste0("This vignette needs package `", p, "'; please install"))
}
invisible(suppressMessages(lapply(pkgs, require, character.only=TRUE)))

cis/trans classification

options(width=200)
f <- file.path("~","pQTLtools","tests","INF1.merge")
merged <- read.delim(f,as.is=TRUE)
hits <- merge(merged[c("CHR","POS","MarkerName","prot","log10p")],
              inf1[c("prot","uniprot")],by="prot") %>%
        mutate(log10p=-log10p)
names(hits) <- c("prot","Chr","bp","SNP","log10p","uniprot")
cistrans <- cis.vs.trans.classification(hits,inf1,"uniprot")
cis.vs.trans <- with(cistrans,data)
kable(with(cistrans,table),caption="Table 1. cis/trans classification")
Table 1. cis/trans classification
cis trans total
ADA 1 0 1
CASP8 1 0 1
CCL11 1 4 5
CCL13 1 3 4
CCL19 1 3 4
CCL2 0 3 3
CCL20 1 1 2
CCL23 1 0 1
CCL25 1 3 4
CCL3 1 1 2
CCL4 1 1 2
CCL7 1 2 3
CCL8 1 1 2
CD244 1 2 3
CD274 1 0 1
CD40 1 0 1
CD5 1 2 3
CD6 1 1 2
CDCP1 1 2 3
CSF1 1 0 1
CST5 1 3 4
CX3CL1 1 2 3
CXCL1 1 0 1
CXCL10 1 1 2
CXCL11 1 3 4
CXCL5 1 3 4
CXCL6 1 1 2
CXCL9 1 2 3
DNER 1 0 1
EIF4EBP1 0 1 1
FGF19 0 3 3
FGF21 1 2 3
FGF23 0 2 2
FGF5 1 0 1
FLT3LG 0 6 6
GDNF 1 0 1
HGF 1 1 2
IL10 1 2 3
IL10RB 1 1 2
IL12B 1 7 8
IL15RA 1 0 1
IL17C 1 0 1
IL18 1 1 2
IL18R1 1 1 2
IL1A 0 1 1
IL6 0 1 1
IL7 1 0 1
IL8 1 0 1
KITLG 0 7 7
LIFR 0 1 1
LTA 1 2 3
MMP1 1 2 3
MMP10 1 1 2
NGF 1 1 2
NTF3 0 1 1
OSM 0 2 2
PLAU 1 5 6
S100A12 1 0 1
SIRT2 1 0 1
SLAMF1 1 4 5
SULT1A1 1 1 2
TGFA 1 0 1
TGFB1 1 0 1
TNFRSF11B 1 1 2
TNFRSF9 1 1 2
TNFSF10 1 7 8
TNFSF11 1 5 6
TNFSF12 1 4 5
TNFSF14 1 0 1
VEGFA 1 3 4
total 59 121 180
with(cistrans,total)
#> [1] 180
circos.cis.vs.trans.plot(hits=f,inf1,"uniprot")
cis/trans classification

cis/trans classification

A more recent implementation is the qtlClassifier function. For this example we have,

options(width=200)
geneSNP <- merge(merged[c("prot","MarkerName")],inf1[c("prot","gene")],by="prot")[c("gene","MarkerName","prot")]
SNPPos <- merged[c("MarkerName","CHR","POS")]
genePos <- inf1[c("gene","chr","start","end")]
cvt <- qtlClassifier(geneSNP,SNPPos,genePos,1e6)
kable(head(cvt))
gene MarkerName prot geneChrom geneStart geneEnd SNPChrom SNPPos Type
2 EIF4EBP1 chr4:187158034_A_G 4E.BP1 8 37887859 37917883 4 187158034 trans
3 ADA chr20:43255220_C_T ADA 20 43248163 43280874 20 43255220 cis
4 NGF chr1:115829943_A_C Beta.NGF 1 115828539 115880857 1 115829943 cis
5 NGF chr9:90362040_C_T Beta.NGF 1 115828539 115880857 9 90362040 trans
6 CASP8 chr2:202164805_C_G CASP.8 2 202098166 202152434 2 202164805 cis
7 CCL11 chr1:159175354_A_G CCL11 17 32612687 32615353 1 159175354 trans
cistrans.check <- merge(cvt[c("gene","MarkerName","Type")],cis.vs.trans[c("p.gene","SNP","cis.trans")],
                        by.x=c("gene","MarkerName"),by.y=c("p.gene","SNP"))
with(cistrans.check,table(Type,cis.trans))
#>        cis.trans
#> Type    cis trans
#>   cis    59     0
#>   trans   0   121

This is used to generate 2d-pQTL plot

t2d <- qtl2dplot(cis.vs.trans,xlab="pQTL position",ylab="Gene position")
2d pQTL plot

2d pQTL plot

whose result can be also viewed in a 2-d plotly style,

fig2d <- qtl2dplotly(cis.vs.trans,xlab="pQTL position",ylab="Gene position")
fig2d

2d pQTL plotly

# We can save the figure for a browser independently
htmlwidgets::saveWidget(fig2d,file="fig2d.html")

and 3-d counterpart

fig3d <- qtl3dplotly(cis.vs.trans,zmax=300,qtl.prefix="pQTL:",xlab="pQTL position",ylab="Gene position")
fig3d

3d pQTL plotly

# We can save the figure for a browser independently
htmlwidgets::saveWidget(fig3d,file="fig3d.html")

and ideogram (biomaRt is not always on so we include the plot below)

f <- file.path("~","pQTLtools","tests","INF1.merge")
INF1_merge <- read.delim(f)[c("Chrom","Start","End","prot","MarkerName")]
INF1_merge_cvt <- merge(INF1_merge,cis.vs.trans,by.x=c("prot","MarkerName"),by.y=c("prot","SNP"))
ord <- with(INF1_merge_cvt,order(Chr,bp))
INF1_merge_cvt <- INF1_merge_cvt[ord,]

set_config(config(ssl_verifypeer = 0L))
mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
attrs <- c("hgnc_symbol", "chromosome_name", "start_position", "end_position", "band")
hgnc <- vector("character",180)
for(i in 1:180)
{
  v <- with(INF1_merge_cvt[i,],paste0(Chr,":",bp,":",bp))
  g <- subset(getBM(attributes = attrs, filters="chromosomal_region", values=v, mart=mart),!is.na(hgnc_symbol))
  hgnc[i] <- paste(g[["hgnc_symbol"]],collapse=";")
  cat(i,g[["hgnc_symbol"]],hgnc[i],"\n")
}
INF1_merge_cvt <- within(INF1_merge_cvt,{
  hgnc <- hgnc
  hgnc[cis] <- p.gene[cis]
})

with(INF1_merge_cvt, {
  png("karyoplot.png",res= 300, units="in", width=12, height=20)
  sentinels <- toGRanges(Chr,bp-1,bp,labels=hgnc)
  cis.regions <- toGRanges(Chr,cis.start,cis.end)
  loci <- toGRanges(Chr,Start,End)
  colors <- c("red","blue")
  seqlevelsStyle(sentinels) <- "UCSC"
  kp <- plotKaryotype(genome="hg19",chromosomes=levels(seqnames(sentinels)))
  kpAddBaseNumbers(kp)
  kpPlotRegions(kp,data=loci,r0=0.05,r1=0.15,border="black")
  kpPlotMarkers(kp, data=sentinels, labels=hgnc, text.orientation="vertical",
                cex=0.5, y=0.3*seq_along(hgnc)/length(hgnc), srt=30,
                ignore.chromosome.ends=TRUE,
                adjust.label.position=TRUE, label.color=colors[2-cis], label.dist=0.002,
                cex.axis=3, cex.lab=3)
  legend("bottomright", bty="n", pch=c(19,19), col=colors, pt.cex=0.4,
         legend=c("cis", "trans"), text.col=colors, cex=0.8, horiz=FALSE)
# panel <- toGRanges(p.chr,p.start,p.end,labels=p.gene)
# kpPlotLinks(kp, data=loci, data2=panel, col=colors[2-cis])
# dev.off()
})
Karyoplot

Karyoplot

Genomic regions enrichment analysis

It is now considerably easier with Genomic Regions Enrichment of Annotations Tool (GREAT).

post <- function(regions)
{
  job <- submitGreatJob(get(regions), species="hg19", version="3.0.0")
  et <- getEnrichmentTables(job,download_by = 'tsv')
  tb <- do.call('rbind',et)
  write.table(tb,file=paste0(regions,".tsv"),quote=FALSE,row.names=FALSE,sep="\t")
  invisible(list(job=job,tb=tb))
}

M <- 1e+6
INF1_merge <- read.delim(file.path("~","pQTLtools","tests","INF1.merge")) %>%
              dplyr::mutate(chr=Chrom, start=POS-M, end=POS+M) %>%
              dplyr::mutate(start=if_else(start<0,0,start)) %>%
              dplyr::select(prot,MarkerName,chr,start,end)
cistrans <- dplyr::select(INF1_merge, chr,start,end) %>%
            dplyr::arrange(chr,start,end) %>%
            dplyr::distinct()
# All regions
cistrans.post <- post("cistrans")
job <- with(cistrans.post,job)
plotRegionGeneAssociationGraphs(job,type=c(1,3))
GREAT plots

GREAT plots

availableOntologies(job)
# plot of the top term
par(mfcol=c(3,1))
plotRegionGeneAssociationGraphs(job, ontology="GO Molecular Function",
                                termID="GO:0005126", type=c(1,3))
GREAT plots

GREAT plots

plotRegionGeneAssociationGraphs(job, ontology="GO Biological Process",
                                termID="GO:0009611", type=c(1,3))
GREAT plots

GREAT plots

plotRegionGeneAssociationGraphs(job, ontology="GO Cellular Component",
                                termID="GO:0005615", type=c(1,3))
GREAT plots

GREAT plots

# Specific regions
IL12B <- dplyr::filter(INF1_merge,prot=="IL.12B") %>% select(chr,start,end)
KITLG <- dplyr::filter(INF1_merge,prot=="SCF") %>% select(chr,start,end)
TNFSF10 <- dplyr::filter(INF1_merge,prot=="TRAIL") %>% select(chr,start,end)
tb_all <- data.frame()
for (r in c("IL12B","KITLG","TNFSF10"))
{
  r.post <- post(r)
  tb_all <- rbind(tb_all,data.frame(gene=r,with(r.post,tb)))
}
#> Don't make too frequent requests. The time break is 60s.
#> Please wait for 53s for the next request.
#> The time break can be set by `request_interval` argument.
#> Don't make too frequent requests. The time break is 60s.
#> Please wait for 57s for the next request.
#> The time break can be set by `request_interval` argument.
#> 
#> Don't make too frequent requests. The time break is 60s.
#> Please wait for 57s for the next request.
#> The time break can be set by `request_interval` argument.
write.table(tb_all,file="IL12B-KITLG-TNFSF10.tsv",quote=FALSE,row.names=FALSE,sep="\t")

The top terms at Binomial p=1e-5 could be extracted as follows,

Table 2. GREAT IL12B-KITLG-TNFSF10 results
gene Ontology ID Desc BinomRank BinomP BinomBonfP BinomFdrQ RegionFoldEnrich ExpRegions ObsRegions GenomeFrac SetCov HyperRank HyperP HyperBonfP HyperFdrQ GeneFoldEnrich ExpGenes ObsGenes TotalGenes GeneSetCov TermCov Regions Genes
KITLG GO Biological Process GO: 0010874 regulation of cholesterol efflux 1 1.0e-07 0.0014154 0.0014154 272.459900 0.0110108 3 0.0015730 0.4285714 1 2.00e-07 0.0015878 0.0015878 265.30880 0.0113076 3 17 0.2500000 0.1764706 chr16:55993161-57993161,chr7:93953895-95953895,chr9:106661742-108661742 ABCA1,CETP,PON1
KITLG GO Biological Process GO: 0032374 regulation of cholesterol transport 2 4.0e-07 0.0044976 0.0022488 185.188600 0.0161997 3 0.0023142 0.4285714 2 1.10e-06 0.0115168 0.0057584 140.94530 0.0212849 3 32 0.2500000 0.0937500 chr16:55993161-57993161,chr7:93953895-95953895,chr9:106661742-108661742 ABCA1,CETP,PON1
KITLG GO Biological Process GO: 0032368 regulation of lipid transport 3 9.0e-06 0.0936233 0.0312078 67.045540 0.0447457 3 0.0063922 0.4285714 3 1.10e-05 0.1148072 0.0382691 66.32721 0.0452303 3 68 0.2500000 0.0441176 chr16:55993161-57993161,chr7:93953895-95953895,chr9:106661742-108661742 ABCA1,CETP,PON1
TNFSF10 GO Biological Process GO: 0030195 negative regulation of blood coagulation 1 6.0e-07 0.0061689 0.0061689 170.502500 0.0175951 3 0.0021994 0.3750000 1 3.30e-06 0.0340956 0.0340956 100.22780 0.0299318 3 36 0.2000000 0.0833333 chr17:63224775-65224775,chr19:43153100-45153100,chr3:185449122-187449122 APOH,KNG1,PLAUR
TNFSF10 GO Biological Process GO: 0050819 negative regulation of coagulation 2 1.3e-06 0.0140303 0.0070151 129.538500 0.0231591 3 0.0028949 0.3750000 2 4.50e-06 0.0470857 0.0235429 90.20500 0.0332576 3 40 0.2000000 0.0750000 chr17:63224775-65224775,chr19:43153100-45153100,chr3:185449122-187449122 APOH,KNG1,PLAUR
TNFSF10 GO Biological Process GO: 0072376 protein activation cascade 3 4.4e-06 0.0457415 0.0152472 87.207410 0.0344008 3 0.0043001 0.3750000 3 1.88e-05 0.1961954 0.0653985 56.37813 0.0532121 3 64 0.2000000 0.0468750 chr17:63224775-65224775,chr1:195710916-197710916,chr3:185449122-187449122 APOH,CFH,KNG1
TNFSF10 GO Cellular Component GO: 0005615 extracellular space 1 6.5e-06 0.0082242 0.0082242 9.342017 0.6422596 6 0.0802824 0.7500000 1 1.00e-07 0.0001862 0.0001862 10.93394 0.7316668 8 880 0.5333333 0.0090909 chr14:93844947-95844947,chr17:63224775-65224775,chr18:28804863-30804863,chr1:195710916-197710916,chr3:171274232-173274232,chr3:185449122-187449122 APOH,CFH,CFHR3,KNG1,MEP1B,SERPINA1,SERPINA6,TNFSF10

eQTL Catalog for colocalization analysis

See example associated with import_eQTLCatalogue. A related function is import_OpenGWAS used to fetch data from OpenGWAS. The cis-pQTLs and 1e+6 flanking regions were considered and data are actually fetched from files stored locally. Only the first sentinel was used, and the code is not executed.

INF <- Sys.getenv("INF")
HPC_WORK <- Sys.getenv("HPC_WORK")
f <- file.path(system.file(package="pQTLtools"),"eQTL-Catalogue","hg19ToHg38.over.chain")
chain <- rtracklayer::import.chain(f)
gwasvcf::set_bcftools(file.path(HPC_WORK,"bin","bcftools"))
f <- file.path(system.file(package="pQTLtools"),"eQTL-Catalogue","tabix_ftp_paths_gtex.tsv")
tabix_paths <- read.delim(f, sep = "\t", header = TRUE, stringsAsFactors = FALSE) %>%
               dplyr::as_tibble()
f <- file.path(system.file(package="pQTLtools"),"eQTL-Catalogue",
               "tabix_ftp_paths_gtex.tsv")
imported_tabix_paths <- within(read.delim(f, stringsAsFactors = FALSE) %>%
                        dplyr::as_tibble(),
      {ftp_path <- gsub("ftp://ftp.ebi.ac.uk/pub/databases/spot/eQTL/csv/GTEx_V8/ge",
                        paste0("~","/rds/public_databases/GTEx/csv"),ftp_path)})
M <- 1e6
sentinels <- subset(cis.vs.trans,cis)

liftRegion <- function(x,chain,flanking=1e6)
{
  gr <- with(x,GenomicRanges::GRanges(seqnames=chr,IRanges::IRanges(start,end))+flanking)
  seqlevelsStyle(gr) <- "UCSC"
  gr38 <- rtracklayer::liftOver(gr, chain)
  chr <- colnames(table(seqnames(gr38)))
  chr <- gsub("chr","",chr)
  start <- min(unlist(start(gr38)))
  end <- max(unlist(end(gr38)))
  invisible(list(chr=chr,start=start,end=end,region=paste0(chr,":",start,"-",end)))
}

sumstats <- function(prot,chr,region37)
{
  cat("GWAS sumstats\n")
  f <- file.path(INF,"METAL/gwas2vcf",paste0(prot,".vcf.gz"))
  gwas_stats <- gwasvcf::query_gwas(f, chrompos = region37)
  gwas_stats <- gwasvcf::vcf_to_granges(gwas_stats) %>%
                keepSeqlevels(chr) %>%
                renameSeqlevels(paste0("chr",chr))
  gwas_stats_hg38 <- rtracklayer::liftOver(gwas_stats, chain) %>%
                     unlist() %>%
                     renameSeqlevels(chr) %>%
                     dplyr::as_tibble() %>%
                     dplyr::transmute(chromosome = seqnames,
                                      position = start, REF, ALT, AF, ES, SE, LP, SS) %>%
                     dplyr::mutate(id = paste(chromosome, position, sep = ":")) %>%
                     dplyr::mutate(MAF = pmin(AF, 1-AF)) %>%
                     dplyr::group_by(id) %>%
                     dplyr::mutate(row_count = n()) %>%
                     dplyr::ungroup() %>%
                     dplyr::filter(row_count == 1)
}

coloc <- function(gwas_stats_hg38,ensGene,region38)
{
  cat("c. GTEx_v8 imported eQTL datasets\n")
  rnaseq_df <- dplyr::filter(imported_tabix_paths, quant_method == "ge") %>%
               dplyr::mutate(qtl_id = paste(study, qtl_group, sep = "_"))
  ftp_path_list <- setNames(as.list(rnaseq_df$ftp_path), rnaseq_df$qtl_id)
  hdr <- file.path(system.file(package="pQTLtools"),"eQTL-Catalogue","column_names.GTEx")
  column_names <- names(read.delim(hdr))
  safe_import <- purrr::safely(import_eQTLCatalogue)
  summary_list <- purrr::map(ftp_path_list, ~safe_import(., region38,
                             selected_gene_id = ensGene, column_names))
  result_list <- purrr::map(summary_list, ~.$result)
  result_list <- result_list[!unlist(purrr::map(result_list, is.null))]
  result_filtered <- purrr::map(result_list[lapply(result_list,nrow)!=0],
                                ~dplyr::filter(., !is.na(se)))
  purrr::map_df(result_filtered, ~run_coloc(., gwas_stats_hg38), .id = "qtl_id")
}

sentinel <- sentinels[1,]
isnpid <- within(gap::inv_chr_pos_a1_a2(sentinel[["SNP"]]),
{
    chr <- gsub("chr","",chr)
    pos <- as.integer(pos)
    start <- pos-M
    if (start<0) start <- 0
    end <- pos+M
})
chr <- with(isnpid,chr)
region37 <- with(isnpid, paste0(chr,":",start,"-",end))
ensRegion37 <- with(subset(inf1,prot==sentinel[["prot"]]),paste0(chr,":",start,"-",end))
region38 <- with(liftRegion(isnpid,chain),region)
ensGene <- subset(inf1,prot==sentinel[["prot"]])[["ensembl_gene_id"]]
ensRegion38 <- with(liftRegion(subset(inf1,prot==sentinel[["prot"]]),chain),region)
f <- file.path("~","pQTLtools","tests",with(sentinel,paste0(prot,"-",gsub(":","_",SNP),".rda")))
gwas_stats_hg38 <- sumstats(sentinel[["prot"]],chr,region37)
coloc_df_imported <- coloc(gwas_stats_hg38,ensGene,region38)
save(gwas_stats_hg38,sentinel,coloc_df_imported,file=f)

We load the data obtained from the code above,

load(file.path("~","pQTLtools","tests","OPG-chr8_120081031_C_T.rda"))
ggplot2::ggplot(gwas_stats_hg38, aes(x = position, y = LP)) +
ggplot2::theme_bw() +
ggplot2::geom_point() +
ggplot2::ggtitle(with(sentinel,paste0(prot,"-",SNP," association plot")))
Association plot

Association plot

if (nrow(coloc_df_imported)>0)
{
   dplyr::arrange(coloc_df_imported, -PP.H4.abf)
   ggplot(coloc_df_imported, aes(x = PP.H4.abf)) +
   ggplot2::theme_bw() +
   geom_histogram() +
   ggtitle(with(sentinel,paste0(prot,"-",SNP," PP4 histogram"))) +
   xlab("PP4") + ylab("Frequency")
}
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Association plot

Association plot

ktitle <- with(sentinel,paste0("Table 3. Colocalization results for ",prot,"-",SNP))
kable(coloc_df_imported, caption=ktitle)
Table 3. Colocalization results for OPG-chr8:120081031_C_T
qtl_id nsnps PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
GTEx_V8_Adipose_Subcutaneous 6491 0 0 0.0000037 0.9999961 0.0000002
GTEx_V8_Adipose_Visceral_Omentum 6491 0 0 0.0877730 0.9085846 0.0036424
GTEx_V8_Adrenal_Gland 6490 0 0 0.4593515 0.3617935 0.1788549
GTEx_V8_Artery_Aorta 6491 0 0 0.5953200 0.3790719 0.0256082
GTEx_V8_Artery_Coronary 6489 0 0 0.4847137 0.4779834 0.0373029
GTEx_V8_Artery_Tibial 6491 0 0 0.5945402 0.3336653 0.0717945
GTEx_V8_Brain_Amygdala 6450 0 0 0.5612204 0.3595461 0.0792336
GTEx_V8_Brain_Anterior_cingulate_cortex_BA24 6479 0 0 0.5297845 0.4326513 0.0375642
GTEx_V8_Brain_Caudate_basal_ganglia 6490 0 0 0.5623609 0.4050108 0.0326282
GTEx_V8_Brain_Cerebellar_Hemisphere 6487 0 0 0.5448030 0.4129728 0.0422242
GTEx_V8_Brain_Cerebellum 6487 0 0 0.5455953 0.3764554 0.0779493
GTEx_V8_Brain_Cortex 6490 0 0 0.4788122 0.4670250 0.0541628
GTEx_V8_Brain_Frontal_Cortex_BA9 6485 0 0 0.4769094 0.4745536 0.0485370
GTEx_V8_Brain_Hippocampus 6482 0 0 0.5508764 0.3445222 0.1046014
GTEx_V8_Brain_Hypothalamus 6486 0 0 0.5031980 0.4493794 0.0474226
GTEx_V8_Brain_Nucleus_accumbens_basal_ganglia 6490 0 0 0.5637913 0.3754152 0.0607935
GTEx_V8_Brain_Putamen_basal_ganglia 6481 0 0 0.5773902 0.3803744 0.0422354
GTEx_V8_Brain_Spinal_cord_cervical_c-1 6476 0 0 0.5674380 0.3856918 0.0468702
GTEx_V8_Brain_Substantia_nigra 6455 0 0 0.5801170 0.3810887 0.0387944
GTEx_V8_Breast_Mammary_Tissue 6491 0 0 0.5517909 0.4207544 0.0274547
GTEx_V8_Cells_Cultured_fibroblasts 6491 0 0 0.0000000 1.0000000 0.0000000
GTEx_V8_Cells_EBV-transformed_lymphocytes 6479 0 0 0.4846865 0.4841672 0.0311463
GTEx_V8_Colon_Sigmoid 6491 0 0 0.5263532 0.4351477 0.0384991
GTEx_V8_Colon_Transverse 6491 0 0 0.0075211 0.9921292 0.0003497
GTEx_V8_Esophagus_Gastroesophageal_Junction 6491 0 0 0.5779762 0.3947891 0.0272347
GTEx_V8_Esophagus_Mucosa 6491 0 0 0.3668237 0.6161141 0.0170622
GTEx_V8_Esophagus_Muscularis 6491 0 0 0.0107519 0.9787420 0.0105062
GTEx_V8_Heart_Atrial_Appendage 6491 0 0 0.0005064 0.9994704 0.0000233
GTEx_V8_Heart_Left_Ventricle 6491 0 0 0.0246851 0.9741237 0.0011912
GTEx_V8_Kidney_Cortex 6382 0 0 0.5348624 0.4201667 0.0449709
GTEx_V8_Liver 6491 0 0 0.4685926 0.4736155 0.0577919
GTEx_V8_Lung 6491 0 0 0.0104079 0.9885490 0.0010430
GTEx_V8_Minor_Salivary_Gland 6471 0 0 0.5749675 0.3674231 0.0576094
GTEx_V8_Muscle_Skeletal 6491 0 0 0.6122075 0.3654167 0.0223758
GTEx_V8_Nerve_Tibial 6491 0 0 0.1864732 0.8056029 0.0079240
GTEx_V8_Ovary 6489 0 0 0.5503041 0.4127715 0.0369244
GTEx_V8_Pancreas 6491 0 0 0.0189411 0.9801002 0.0009587
GTEx_V8_Pituitary 6491 0 0 0.6120249 0.3519956 0.0359795
GTEx_V8_Prostate 6491 0 0 0.5131930 0.4522440 0.0345630
GTEx_V8_Skin_Not_Sun_Exposed_Suprapubic 6491 0 0 0.5513830 0.4267627 0.0218543
GTEx_V8_Skin_Sun_Exposed_Lower_leg 6491 0 0 0.4278374 0.5543397 0.0178229
GTEx_V8_Small_Intestine_Terminal_Ileum 6487 0 0 0.5695417 0.3523800 0.0780782
GTEx_V8_Spleen 6489 0 0 0.5456173 0.4073894 0.0469934
GTEx_V8_Stomach 6491 0 0 0.5807760 0.3769347 0.0422893
GTEx_V8_Testis 6491 0 0 0.6158313 0.3520954 0.0320733
GTEx_V8_Thyroid 6491 0 0 0.1501267 0.8433101 0.0065632
GTEx_V8_Uterus 6484 0 0 0.5720926 0.3891594 0.0387480
GTEx_V8_Vagina 6470 0 0 0.5955160 0.3630831 0.0414009

The function sumstats() obtained meta-analysis summary statistics (in build 37 and therefore lifted over to build 38) to be used in colocalization analysis. The output are saved in the .RDS files. Note that ftp_path changes from eQTL Catalog to local files.

pQTL-based Mendelian Randomisation (MR)

The function pqtlMR is derived from Zheng et al. (2020). It has an attractive feature that multiple pQTLs can be used together for conducting MR with a list of outcomes from MR-Base. For generic applications the run_TwoSampleMR() function can be used.

f <- file.path(system.file(package="pQTLtools"),"tests","Ins.csv")
ivs <- format_data(read.csv(f))
caption4 <- "Table 4. ABO/LIFR variants and CHD/FEV1"
kable(ivs, caption=paste(caption4,"(instruments)"))
Table 4. ABO/LIFR variants and CHD/FEV1 (instruments)
SNP effect_allele.exposure other_allele.exposure eaf.exposure beta.exposure se.exposure pval.exposure exposure mr_keep.exposure pval_origin.exposure id.exposure
rs505922 C T 0.313 1.298 0.014 0 ABO TRUE reported dU3C0x
rs635634 T C 0.180 -0.300 0.032 0 LIFR TRUE reported YmGdhW
ids <- c("ieu-a-7","ebi-a-GCST007432")
pqtlMR(ivs, ids)
#> API: public: http://gwas-api.mrcieu.ac.uk/
#> Extracting data for 2 SNP(s) from 2 GWAS(s)
#> Harmonising ABO (dU3C0x) and FEV1 || id:ebi-a-GCST007432 (ebi-a-GCST007432)
#> Harmonising LIFR (YmGdhW) and FEV1 || id:ebi-a-GCST007432 (ebi-a-GCST007432)
#> Harmonising ABO (dU3C0x) and Coronary heart disease || id:ieu-a-7 (ieu-a-7)
#> Harmonising LIFR (YmGdhW) and Coronary heart disease || id:ieu-a-7 (ieu-a-7)
#> Analysing 'dU3C0x' on 'ebi-a-GCST007432'
#> Analysing 'dU3C0x' on 'ieu-a-7'
#> Analysing 'YmGdhW' on 'ebi-a-GCST007432'
#> Analysing 'YmGdhW' on 'ieu-a-7'
result <- read.delim("pQTL-combined-result.txt",header=TRUE)
kable(result,caption=paste(caption4, "(result)"))
Table 4. ABO/LIFR variants and CHD/FEV1 (result)
id.exposure id.outcome outcome exposure method nsnp b se pval
dU3C0x ebi-a-GCST007432 FEV1 || id:ebi-a-GCST007432 ABO Wald ratio 1 -0.0109 0.00193 0.0e+00
dU3C0x ieu-a-7 Coronary heart disease || id:ieu-a-7 ABO Wald ratio 1 0.0334 0.00730 4.6e-06
YmGdhW ebi-a-GCST007432 FEV1 || id:ebi-a-GCST007432 LIFR Wald ratio 1 0.0620 0.01000 0.0e+00
YmGdhW ieu-a-7 Coronary heart disease || id:ieu-a-7 LIFR Wald ratio 1 -0.2572 0.03904 0.0e+00
single <- read.delim("pQTL-combined-single.txt",header=TRUE)
kable(subset(single,!grepl("All",SNP)), caption=paste(caption4, "(single)"))
Table 4. ABO/LIFR variants and CHD/FEV1 (single)
exposure outcome id.exposure id.outcome samplesize SNP b se p
1 ABO FEV1 || id:ebi-a-GCST007432 dU3C0x ebi-a-GCST007432 321047 rs505922 -0.0109 0.00193 1.35e-08
4 ABO Coronary heart disease || id:ieu-a-7 dU3C0x ieu-a-7 184305 rs505922 0.0334 0.00730 4.63e-06
7 LIFR FEV1 || id:ebi-a-GCST007432 YmGdhW ebi-a-GCST007432 321047 rs635634 0.0620 0.01000 5.65e-10
10 LIFR Coronary heart disease || id:ieu-a-7 YmGdhW ieu-a-7 184305 rs635634 -0.2572 0.03904 4.47e-11
invisible(sapply(c("harmonise","result","single"),
                 function(x) unlink(paste0("pQTL-combined-",x,".txt"))))

run_TwoSampleMR

The function has similiarity to Dimou and Tsilidis (2018). The documentation example is quoted here,

outcomes <- "ebi-a-GCST007432"
prot <- "MMP.10"
type <- "cis"
f <- paste0(prot,"-",type,".mrx")
d <- read.table(file.path(system.file(package="pQTLtools"),"tests",f),
                header=TRUE)
exposure <- format_data(within(d,{P=10^logP}), phenotype_col="prot", snp_col="rsid",
                        chr_col="Chromosome", pos_col="Posistion",
                        effect_allele_col="Allele1", other_allele_col="Allele2",
                        eaf_col="Freq1", beta_col="Effect", se_col="StdErr",
                        pval_col="P", log_pval=FALSE,
                        samplesize_col="N")
clump <- clump_data(exposure)
#> Please look at vignettes for options on running this locally if you need to run many instances of this command.
#> Clumping ZoHewj, 1106 variants, using EUR population reference
#> Removing 1102 of 1106 variants due to LD with other variants or absence from LD reference panel
outcome <- extract_outcome_data(snps=exposure$SNP,outcomes=outcomes)
#> Extracting data for 1106 SNP(s) from 1 GWAS(s)
#> Finding proxies for 155 SNPs in outcome ebi-a-GCST007432
#> Extracting data for 155 SNP(s) from 1 GWAS(s)
harmonise <- harmonise_data(clump,outcome)
#> Harmonising MMP.10 (ZoHewj) and FEV1 || id:ebi-a-GCST007432 (ebi-a-GCST007432)
prefix <- paste(outcomes,prot,type,sep="-")
run_TwoSampleMR(harmonise, mr_plot="pQTLtools", prefix=prefix)
#> Analysing 'ZoHewj' on 'ebi-a-GCST007432'
Two-sample MR

Two-sample MR

Two-sample MR

Two-sample MR

Two-sample MR

Two-sample MR

Two-sample MR

Two-sample MR

The output is contained in individual .txt files, together with the scatter, forest, funnel and leave-one-out plots.

Table 5. MMP.10 variants and FEV1 (result)
id.exposure id.outcome outcome exposure method nsnp b se pval
ZoHewj ebi-a-GCST007432 FEV1 || id:ebi-a-GCST007432 MMP.10 MR Egger 4 -0.000577 0.01001 0.959
ZoHewj ebi-a-GCST007432 FEV1 || id:ebi-a-GCST007432 MMP.10 Weighted median 4 0.002002 0.00628 0.750
ZoHewj ebi-a-GCST007432 FEV1 || id:ebi-a-GCST007432 MMP.10 Inverse variance weighted 4 0.002067 0.00594 0.728
ZoHewj ebi-a-GCST007432 FEV1 || id:ebi-a-GCST007432 MMP.10 Simple mode 4 0.004152 0.00994 0.704
ZoHewj ebi-a-GCST007432 FEV1 || id:ebi-a-GCST007432 MMP.10 Weighted mode 4 0.001134 0.00706 0.883
Table 5. MMP.10 variants and FEV1 (heterogeneity)
id.exposure id.outcome outcome exposure method Q Q_df Q_pval
ZoHewj ebi-a-GCST007432 FEV1 || id:ebi-a-GCST007432 MMP.10 MR Egger 0.114 2 0.944
ZoHewj ebi-a-GCST007432 FEV1 || id:ebi-a-GCST007432 MMP.10 Inverse variance weighted 0.222 3 0.974
Table 5. MMP.10 variants and FEV1 (pleiotropy)
id.exposure id.outcome outcome exposure egger_intercept se pval
ZoHewj ebi-a-GCST007432 FEV1 || id:ebi-a-GCST007432 MMP.10 0.00143 0.00434 0.774
Table 5. MMP.10 variants and FEV1 (single)
exposure outcome id.exposure id.outcome samplesize SNP b se p
MMP.10 FEV1 || id:ebi-a-GCST007432 ZoHewj ebi-a-GCST007432 321047 rs11225354 0.006425 0.01217 0.598
MMP.10 FEV1 || id:ebi-a-GCST007432 ZoHewj ebi-a-GCST007432 321047 rs142915220 -0.003993 0.02856 0.889
MMP.10 FEV1 || id:ebi-a-GCST007432 ZoHewj ebi-a-GCST007432 321047 rs17099622 0.004360 0.02229 0.845
MMP.10 FEV1 || id:ebi-a-GCST007432 ZoHewj ebi-a-GCST007432 321047 rs17860955 0.000616 0.00739 0.934
MMP.10 FEV1 || id:ebi-a-GCST007432 ZoHewj ebi-a-GCST007432 321047 All - Inverse variance weighted 0.002067 0.00594 0.728
MMP.10 FEV1 || id:ebi-a-GCST007432 ZoHewj ebi-a-GCST007432 321047 All - MR Egger -0.000577 0.01001 0.959
Table 5. MMP.10 variants and FEV1 (loo)
exposure outcome id.exposure id.outcome samplesize SNP b se p
MMP.10 FEV1 || id:ebi-a-GCST007432 ZoHewj ebi-a-GCST007432 321047 rs11225354 0.000703 0.00681 0.918
MMP.10 FEV1 || id:ebi-a-GCST007432 ZoHewj ebi-a-GCST007432 321047 rs142915220 0.002341 0.00608 0.700
MMP.10 FEV1 || id:ebi-a-GCST007432 ZoHewj ebi-a-GCST007432 321047 rs17099622 0.001891 0.00617 0.759
MMP.10 FEV1 || id:ebi-a-GCST007432 ZoHewj ebi-a-GCST007432 321047 rs17860955 0.004730 0.01001 0.636
MMP.10 FEV1 || id:ebi-a-GCST007432 ZoHewj ebi-a-GCST007432 321047 All 0.002067 0.00594 0.728

MR using cis, trans and cis+trans (pan) instruments

This is illustrated with IL-12B.

efo <- read.delim(file.path("~","pQTLtools","tests","efo.txt")) %>%
       dplyr::mutate(x=1:n()) %>%
       dplyr::select(MRBASEID,trait,x)
d3 <- read.delim(file.path("~","pQTLtools","tests","efo-result.txt")) %>%
      dplyr::filter(exposure=="IL.12B") %>%
      dplyr::mutate(MRBASEID=unlist(lapply(strsplit(outcome,"id:"),"[",2)),y=b) %>%
      dplyr::select(-outcome,-method) %>%
      left_join(efo) %>%
      dplyr::arrange(cistrans)
#> Joining, by = "MRBASEID"
kable(head(d3[c("MRBASEID","trait","cistrans","nsnp","b","se","pval")],29),caption="Table 6. MR with IL-12B variants")
Table 6. MR with IL-12B variants
MRBASEID trait cistrans nsnp b se pval
bbj-a-123 Graves disease cis 21 0.0078000 0.1008000 0.9380000
ebi-a-GCST003156 systemic lupus erythematosus cis 39 -0.0958000 0.0676000 0.1566000
ebi-a-GCST005528 juvenile idiopathic arthritis cis 1 -0.1670000 0.1140000 0.1400000
ebi-a-GCST005581 primary biliary cirrhosis cis 2 0.0056600 0.1630000 0.9720000
finn-a-AB1_HIV HIV infection cis 29 -0.1292000 0.2800000 0.6450000
finn-a-D3_SARCOIDOSIS Sarcoidosis cis 29 -0.0604000 0.1150000 0.6010000
finn-a-M13_SJOGREN Sjogren syndrome cis 29 -0.2480000 0.1410000 0.0802000
finn-a-MENINGITIS meningitis infection cis 29 -0.2172000 0.1910000 0.2570000
ieu-a-1081 IGA glomerulonephritis cis 3 0.2096000 0.1770000 0.2370000
ieu-a-1112 sclerosing cholangitis cis 32 0.0520400 0.0682000 0.4460000
ieu-a-276 celiac disease cis 3 -0.0172900 0.1214000 0.8870000
ieu-a-31 inflammatory bowel disease cis 56 0.3900000 0.0350000 0.0000000
ieu-b-18 multiple sclerosis cis 26 -0.0256000 0.0426000 0.5480000
ieu-b-69 sepsis cis 56 -0.0334000 0.0266000 0.2090000
ukb-a-115 Vitiligo cis 54 -0.0000624 0.0000757 0.4100000
ukb-a-552 Crohn’s disease cis 54 0.0008850 0.0002140 0.0000363
ukb-b-10537 psoriasis cis 17 -0.0035700 0.0007880 0.0000059
ukb-b-13251 gout cis 20 -0.0001410 0.0004710 0.7640000
ukb-b-15606 pneumonia cis 7 0.0002170 0.0002460 0.3760000
ukb-b-15622 Tuberculosis cis 7 0.0001340 0.0003120 0.6680000
ukb-b-16499 allergic rhinitis cis 40 0.0002750 0.0010300 0.7880000
ukb-b-16702 allergy cis 7 -0.0003850 0.0003460 0.2654000
ukb-b-18194 ankylosing spondylitis cis 5 0.0001030 0.0002750 0.7100000
ukb-b-19386 ulcerative colitis cis 7 0.0005620 0.0002730 0.0394000
ukb-b-19732 hypothyroidism cis 37 -0.0000555 0.0009270 0.9520000
ukb-b-20141 atopic eczema cis 26 -0.0000095 0.0006290 0.9880000
ukb-b-20208 asthma cis 7 0.0000822 0.0002650 0.7560000
ukb-b-8814 urinary tract infection cis 18 -0.0005110 0.0004730 0.2790000
ukb-b-9125 rheumatoid arthritis cis 17 -0.0001220 0.0006220 0.8450000
p <- ggplot2::ggplot(d3,aes(y = trait, x = y))+
     ggplot2::theme_bw()+
     ggplot2::geom_point()+
     ggplot2::facet_wrap(~cistrans,ncol=3,scales="free_x")+
     ggplot2::geom_segment(aes(x = b-1.96*se, xend = b+1.96*se, yend = trait))+
     ggplot2::geom_vline(lty=2, aes(xintercept=0), colour = 'red')+
     ggplot2::xlab("Effect size")+
     ggplot2::ylab("")
p
MR with cis, trans and cis+trans variants of IL-12B

MR with cis, trans and cis+trans variants of IL-12B

GSMR forest plots

This is based on GSMR results of TNFB and a range of immune-mediated traits,

Table 7. GSMR results for TNFB and immune-mediated traits
Outcome Effect StdErr
Multiple sclerosis 0.6905860 0.0592704
Systemic lupus erythematosus 0.7668750 0.0790005
Sclerosing cholangitis 0.6267150 0.0759547
Juvenile idiopathic arthritis -1.1757700 0.1602930
Psoriasis 0.0058259 0.0008000
Rheumatoid arthritis -0.0037807 0.0006252
Inflammatory bowel disease -0.1433420 0.0252725
Ankylosing spondylitis -0.0031685 0.0006262
Hypothyroidism -0.0043205 0.0009873
Allergic rhinitis 0.0039308 0.0009260
IgA glomerulonephritis -0.3269660 0.1052620
Atopic eczema -0.0020402 0.0006781

mr_forestplot(tnfb,colgap.forest.left="0.05cm", fontsize=14,
              leftcols=c("studlab"), leftlabs=c("Outcome"),
              plotwidth="5inch", sm="OR", sortvar=tnfb[["Effect"]],
              rightcols=c("effect","ci","pval"), rightlabs=c("OR","95%CI","GSMR P"),
              digits=3, digits.pval=2, scientific.pval=TRUE,
              common=FALSE, random=FALSE, print.I2=FALSE, print.pval.Q=FALSE, print.tau2=FALSE,
              addrow=TRUE, backtransf=TRUE, spacing=1.6)
#> Loading required namespace: meta
Forest plots based on MR results on TNFB

Forest plots based on MR results on TNFB

Literature on pQTLs

Those as identified by Sun et al. (2018) and Suhre, McCarthy, and Schwenk (2020) are included as EndNote libraries.

UniProt IDs

The function uniprot2ids converts UniProt IDs to others.

References

Dimou, Niki L., and Konstantinos K. Tsilidis. 2018. “A Primer in Mendelian Randomization Methodology with a Focus on Utilizing Published Summary Association Data.” In Genetic Epidemiology: Methods and Protocols, edited by Evangelos Evangelou, 211–30. New York: Springer Science+Business Media, LLC, part of Springer Nature.
Suhre, K., M. I. McCarthy, and J. M. Schwenk. 2020. “Genetics Meets Proteomics: Perspectives for Large Population-Based Studies.” Journal Article. Nat Rev Genet. https://doi.org/10.1038/s41576-020-0268-2.
Sun, B. B., J. C. Maranville, J. E. Peters, D. Stacey, J. R. Staley, J. Blackshaw, S. Burgess, et al. 2018. “Genomic Atlas of the Human Plasma Proteome.” Journal Article. Nature 558 (7708): 73–79. https://doi.org/10.1038/s41586-018-0175-2.
Zheng, J., V. Haberland, D. Baird, V. Walker, P. C. Haycock, M. R. Hurle, A. Gutteridge, et al. 2020. “Phenome-Wide Mendelian Randomization Mapping the Influence of the Plasma Proteome on Complex Diseases.” Journal Article. Nature Genetics 52 (10): 1122–31. https://doi.org/10.1038/s41588-020-0682-6.