The examples here are based on the SCALLOP work1.
pkgs <- c("GenomicRanges", "TwoSampleMR", "biomaRt", "coloc", "dplyr", "gap", "ggplot2", "httr",
"karyoploteR", "circlize", "knitr", "plotly", "pQTLtools", "rGREAT", "readr", "regioneR",
"seqminer")
for (p in pkgs) if (length(grep(paste("^package:", p, "$", sep=""), search())) == 0) {
if (!requireNamespace(p)) warning(paste0("This vignette needs package `", p, "'; please install"))
}
invisible(suppressMessages(lapply(pkgs, require, character.only=TRUE)))
1 Forest plots
We start with results on osteoprotegerin (OPG)2,
data(OPG,package="gap.datasets")
meta::settings.meta(method.tau="DL")
METAL_forestplot(OPGtbl,OPGall,OPGrsid,width=6.75,height=5,digits.TE=2,digits.se=2,
col.diamond="black",col.inside="black",col.square="black")
#> Joining with `by = join_by(MarkerName)`
#> Joining with `by = join_by(MarkerName)`

Figure 1.1: Forest plots

Figure 1.2: Forest plots
METAL_forestplot(OPGtbl,OPGall,OPGrsid,package="metafor",method="FE",xlab="Effect",
showweights=TRUE)
#> Joining with `by = join_by(MarkerName)`
#> Joining with `by = join_by(MarkerName)`

Figure 1.3: Forest plots

Figure 1.4: Forest plots
involving both a cis and a trans pQTLs. As meta
inherently includes random effects, we use a fixed effects (FE) model from metafor
.
2 cis/trans classification
2.1 pQTL signals and classification table
options(width=200)
f <- file.path(find.package("pQTLtools"),"tests","INF1.merge")
merged <- read.delim(f,as.is=TRUE)
hits <- merge(merged[c("CHR","POS","MarkerName","prot","log10p")],
pQTLdata::inf1[c("prot","uniprot")],by="prot") %>%
dplyr::mutate(log10p=-log10p)
names(hits) <- c("prot","Chr","bp","SNP","log10p","uniprot")
cistrans <- cis.vs.trans.classification(hits,pQTLdata::inf1,"uniprot")
cis.vs.trans <- with(cistrans,data)
kable(with(cistrans,table),caption="cis/trans classification")
cis | trans | total | |
---|---|---|---|
ADA | 1 | 0 | 1 |
CASP8 | 1 | 0 | 1 |
CCL11 | 1 | 4 | 5 |
CCL13 | 1 | 3 | 4 |
CCL19 | 1 | 3 | 4 |
CCL2 | 0 | 3 | 3 |
CCL20 | 1 | 1 | 2 |
CCL23 | 1 | 0 | 1 |
CCL25 | 1 | 3 | 4 |
CCL3 | 1 | 1 | 2 |
CCL4 | 1 | 1 | 2 |
CCL7 | 1 | 2 | 3 |
CCL8 | 1 | 1 | 2 |
CD244 | 1 | 2 | 3 |
CD274 | 1 | 0 | 1 |
CD40 | 1 | 0 | 1 |
CD5 | 1 | 2 | 3 |
CD6 | 1 | 1 | 2 |
CDCP1 | 1 | 2 | 3 |
CSF1 | 1 | 0 | 1 |
CST5 | 1 | 3 | 4 |
CX3CL1 | 1 | 2 | 3 |
CXCL1 | 1 | 0 | 1 |
CXCL10 | 1 | 1 | 2 |
CXCL11 | 1 | 3 | 4 |
CXCL5 | 1 | 3 | 4 |
CXCL6 | 1 | 1 | 2 |
CXCL9 | 1 | 2 | 3 |
DNER | 1 | 0 | 1 |
EIF4EBP1 | 0 | 1 | 1 |
FGF19 | 0 | 3 | 3 |
FGF21 | 1 | 2 | 3 |
FGF23 | 0 | 2 | 2 |
FGF5 | 1 | 0 | 1 |
FLT3LG | 0 | 6 | 6 |
GDNF | 1 | 0 | 1 |
HGF | 1 | 1 | 2 |
IL10 | 1 | 2 | 3 |
IL10RB | 1 | 1 | 2 |
IL12B | 1 | 7 | 8 |
IL15RA | 1 | 0 | 1 |
IL17C | 1 | 0 | 1 |
IL18 | 1 | 1 | 2 |
IL18R1 | 1 | 1 | 2 |
IL1A | 0 | 1 | 1 |
IL6 | 0 | 1 | 1 |
IL7 | 1 | 0 | 1 |
IL8 | 1 | 0 | 1 |
KITLG | 0 | 7 | 7 |
LIFR | 0 | 1 | 1 |
LTA | 1 | 2 | 3 |
MMP1 | 1 | 2 | 3 |
MMP10 | 1 | 1 | 2 |
NGF | 1 | 1 | 2 |
NTF3 | 0 | 1 | 1 |
OSM | 0 | 2 | 2 |
PLAU | 1 | 5 | 6 |
S100A12 | 1 | 0 | 1 |
SIRT2 | 1 | 0 | 1 |
SLAMF1 | 1 | 4 | 5 |
SULT1A1 | 1 | 1 | 2 |
TGFA | 1 | 0 | 1 |
TGFB1 | 1 | 0 | 1 |
TNFRSF11B | 1 | 1 | 2 |
TNFRSF9 | 1 | 1 | 2 |
TNFSF10 | 1 | 7 | 8 |
TNFSF11 | 1 | 5 | 6 |
TNFSF12 | 1 | 4 | 5 |
TNFSF14 | 1 | 0 | 1 |
VEGFA | 1 | 3 | 4 |
total | 59 | 121 | 180 |
with(cistrans,total)
#> [1] 180
T <- with(cistrans,table)
H <- T[rownames(T)!="total","total"]
merge <- merged[c("Chrom","Start","End","prot","MarkerName")]
merge_cvt <- merge(merge,cis.vs.trans,by.x=c("prot","MarkerName"),by.y=c("prot","SNP"))
ord <- with(merge_cvt,order(Chr,bp))
merge_cvt <- merge_cvt[ord,]
2.2 Genomic associations
This is visualised via a circos plot, highlighting the likely causal genes for pQTLs,
pQTLs <- transmute(merge_cvt,chr=paste0("chr",Chr),start=bp,end=bp,log10p)
cis.pQTLs <- subset(merge_cvt,cis) %>%
transmute(chr=paste0("chr",p.chr),start=p.start,end=p.end,gene=p.gene,cols="red")
pQTL_genes <- read.table(file.path(find.package("pQTLtools"),"tests","pQTL_genes.txt"),
col.names=c("chr","start","end","gene")) %>%
mutate(chr=gsub("hs","chr",chr)) %>%
left_join(cis.pQTLs) %>%
mutate(cols=ifelse(is.na(cols),"blue",cols))
#> Joining with `by = join_by(chr, start, end, gene)`
par(cex=0.7)
gap::circos.mhtplot2(pQTLs,pQTL_genes)
#> Warning: Some of the regions have end position values larger than the end of the chromosomes.
#> Note: 7 points are out of plotting region in sector 'chr1', track '5'.
#> Note: 3 points are out of plotting region in sector 'chr2', track '5'.
#> Note: 10 points are out of plotting region in sector 'chr3', track '5'.
#> Note: 7 points are out of plotting region in sector 'chr4', track '5'.
#> Note: 2 points are out of plotting region in sector 'chr5', track '5'.
#> Note: 4 points are out of plotting region in sector 'chr6', track '5'.
#> Note: 2 points are out of plotting region in sector 'chr8', track '5'.
#> Note: 2 points are out of plotting region in sector 'chr9', track '5'.
#> Note: 2 points are out of plotting region in sector 'chr10', track '5'.
#> Note: 4 points are out of plotting region in sector 'chr11', track '5'.
#> Note: 1 point is out of plotting region in sector 'chr12', track '5'.
#> Note: 1 point is out of plotting region in sector 'chr13', track '5'.
#> Note: 1 point is out of plotting region in sector 'chr14', track '5'.
#> Note: 2 points are out of plotting region in sector 'chr16', track '5'.
#> Note: 8 points are out of plotting region in sector 'chr17', track '5'.
#> Note: 8 points are out of plotting region in sector 'chr19', track '5'.
#> Note: 4 points are out of plotting region in sector 'chr20', track '5'.
#> Note: 1 point is out of plotting region in sector 'chr21', track '5'.

Figure 2.1: Genomic associations
par(cex=1)
where the red and blue colours indicate cis/trans classifications.
2.3 Bar chart and circos plot
barplot(table(H),xlab="No. of pQTL regions",ylab="No. of proteins",
ylim=c(0,25),col="darkgrey",border="black",cex=0.8,cex.axis=2,cex.names=2,las=1)

Figure 2.2: Bar chart
circos.cis.vs.trans.plot(hits=f,pQTLdata::inf1,"uniprot")

Figure 2.3: circos plot
The circos plot is based on target genes (those encoding proteins) and somewhat too busy.
2.4 SH2B3
Here we focus on SH2B3.
HOTSPOT <- "chr12:111884608_C_T"
a <- data.frame(chr="chr12",start=111884607,end=111884608,gene="SH2B3")
b <- filter(cis.vs.trans,SNP==HOTSPOT) %>%
mutate(p.chr=paste0("chr",p.chr)) %>%
rename(chr=p.chr,start=p.start,end=p.end,gene=p.gene,cistrans=cis.trans)
cols <- rep(12,nrow(b))
cols[b[["cis"]]] <- 10
labels <- bind_rows(select(b,chr,start,end,gene),a)
circos.clear()
circos.par(start.degree=90, track.height=0.1, cell.padding=c(0,0,0,0))
circos.initializeWithIdeogram(species="hg19", track.height=0.05, ideogram.height=0.06)
circos.genomicLabels(labels, labels.column=4, cex=1.1, font=3, side="inside")
circos.genomicLink(bind_rows(a,a,a,a,a,a), b[c("chr","start","end")], col=cols,
directional=1, border=10, lwd=2)

Figure 2.4: SH2B3 hotspot
A more recent implementation is the qtlClassifier
function. For this example we have,
options(width=200)
geneSNP <- merge(merged[c("prot","MarkerName")],pQTLdata::inf1[c("prot","gene")],by="prot")[c("gene","MarkerName","prot")]
SNPPos <- merged[c("MarkerName","CHR","POS")]
genePos <- pQTLdata::inf1[c("gene","chr","start","end")]
cvt <- qtlClassifier(geneSNP,SNPPos,genePos,1e6)
kable(head(cvt))
gene | MarkerName | prot | geneChrom | geneStart | geneEnd | SNPChrom | SNPPos | Type | |
---|---|---|---|---|---|---|---|---|---|
2 | EIF4EBP1 | chr4:187158034_A_G | 4E.BP1 | 8 | 37887859 | 37917883 | 4 | 187158034 | trans |
3 | ADA | chr20:43255220_C_T | ADA | 20 | 43248163 | 43280874 | 20 | 43255220 | cis |
4 | NGF | chr1:115829943_A_C | Beta.NGF | 1 | 115828539 | 115880857 | 1 | 115829943 | cis |
5 | NGF | chr9:90362040_C_T | Beta.NGF | 1 | 115828539 | 115880857 | 9 | 90362040 | trans |
6 | CASP8 | chr2:202164805_C_G | CASP.8 | 2 | 202098166 | 202152434 | 2 | 202164805 | cis |
7 | CCL11 | chr1:159175354_A_G | CCL11 | 17 | 32612687 | 32615353 | 1 | 159175354 | trans |
2.5 pQTL-gene plot
t2d <- qtl2dplot(cis.vs.trans,xlab="pQTL position",ylab="Gene position")

Figure 2.5: pQTL-gene plot
2.6 pQTL-gene plotly
The pQTL-gene plot above can be also viewed in a 2-d plotly style, fig2d.html,
fig2d <- qtl2dplotly(cis.vs.trans,xlab="pQTL position",ylab="Gene position")
htmlwidgets::saveWidget(fig2d,file="fig2d.html")
print(fig2d)
and 3-d counterpart, fig3d.html,
fig3d <- qtl3dplotly(cis.vs.trans,zmax=300,qtl.prefix="pQTL:",xlab="pQTL position",ylab="Gene position")
htmlwidgets::saveWidget(fig3d,file="fig3d.html")
print(fig3d)
Both plots are responsive.
2.7 Karyoplot
As biomaRt
is not always on, we keep a copy of hgnc.
set_config(config(ssl_verifypeer = 0L))
mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
attrs <- c("hgnc_symbol", "chromosome_name", "start_position", "end_position", "band")
hgnc <- vector("character",180)
for(i in 1:180)
{
v <- with(merge_cvt[i,],paste0(Chr,":",bp,":",bp))
g <- subset(getBM(attributes = attrs, filters="chromosomal_region", values=v, mart=mart),!is.na(hgnc_symbol))
hgnc[i] <- paste(g[["hgnc_symbol"]],collapse=";")
cat(i,g[["hgnc_symbol"]],hgnc[i],"\n")
}
save(hgnc,file="hgnc.rda",compress="xz")
We now proceed with
load(file.path(find.package("pQTLtools"),"tests","hgnc.rda"))
merge_cvt <- within(merge_cvt,{
hgnc <- hgnc
hgnc[cis] <- p.gene[cis]
})
with(merge_cvt, {
sentinels <- toGRanges(Chr,bp-1,bp,labels=hgnc)
cis.regions <- toGRanges(Chr,cis.start,cis.end)
loci <- toGRanges(Chr,Start,End)
colors <- c("red","blue")
seqlevelsStyle(sentinels) <- "UCSC"
kp <- plotKaryotype(genome="hg19",chromosomes=levels(seqnames(sentinels)))
kpAddBaseNumbers(kp)
kpPlotRegions(kp, data=loci,r0=0.05,r1=0.15,border="black")
kpPlotMarkers(kp, data=sentinels, labels=hgnc, text.orientation="vertical",
cex=0.5, y=0.3*seq_along(hgnc)/length(hgnc), srt=30,
ignore.chromosome.ends=TRUE,
adjust.label.position=TRUE, label.color=colors[2-cis], label.dist=0.002,
cex.axis=3, cex.lab=3)
legend("bottomright", bty="n", pch=c(19,19), col=colors, pt.cex=0.4,
legend=c("cis", "trans"), text.col=colors, cex=0.8, horiz=FALSE)
# panel <- toGRanges(p.chr,p.start,p.end,labels=p.gene)
# kpPlotLinks(kp, data=loci, data2=panel, col=colors[2-cis])
})

Figure 2.6: Karyoplot of cis/trans pQTLs
3 Genomic regions enrichment analysis
It is now considerably easier with Genomic Regions Enrichment of Annotations Tool (GREAT).
post <- function(regions)
{
job <- submitGreatJob(get(regions), species="hg19", version="3.0.0")
et <- getEnrichmentTables(job,download_by = 'tsv')
tb <- do.call('rbind',et)
write.table(tb,file=paste0(regions,".tsv"),quote=FALSE,row.names=FALSE,sep="\t")
invisible(list(job=job,tb=tb))
}
M <- 1e+6
merge <- merged %>%
dplyr::mutate(chr=Chrom, start=POS-M, end=POS+M) %>%
dplyr::mutate(start=if_else(start<1,1,start)) %>%
dplyr::select(prot,MarkerName,chr,start,end)
cistrans <- dplyr::select(merge, chr,start,end) %>%
dplyr::arrange(chr,start,end) %>%
dplyr::distinct()
# All regions
cistrans.post <- post("cistrans")
job <- with(cistrans.post,job)
plotRegionGeneAssociationGraphs(job)

Figure 3.1: GREAT plots
availableOntologies(job)
# plot of the top term
par(mfcol=c(3,1))
plotRegionGeneAssociationGraphs(job, ontology="GO Molecular Function")
plotRegionGeneAssociationGraphs(job, ontology="GO Biological Process")

Figure 3.2: GREAT plots
plotRegionGeneAssociationGraphs(job, ontology="GO Cellular Component")
# Specific regions
IL12B <- dplyr::filter(merge,prot=="IL.12B") %>% dplyr::select(chr,start,end)
KITLG <- dplyr::filter(merge,prot=="SCF") %>% dplyr::select(chr,start,end)
TNFSF10 <- dplyr::filter(merge,prot=="TRAIL") %>% dplyr::select(chr,start,end)
tb_all <- data.frame()
for (r in c("IL12B","KITLG","TNFSF10"))
{
r.post <- post(r)
tb_all <- rbind(tb_all,data.frame(gene=r,with(r.post,tb)))
}
#> Don't make too frequent requests. The time break is 60s.
#> Please wait for 54s for the next request.
#> The time break can be set by `request_interval` argument.
#> Don't make too frequent requests. The time break is 60s.
#> Please wait for 57s for the next request.
#> The time break can be set by `request_interval` argument.
#>
#> Don't make too frequent requests. The time break is 60s.
#> Please wait for 57s for the next request.
#> The time break can be set by `request_interval` argument.
The top terms at Binomial p=1e-5 could be extracted as follows,
gene | Ontology | ID | Desc | BinomRank | BinomP | BinomBonfP | BinomFdrQ | RegionFoldEnrich | ExpRegions | ObsRegions | GenomeFrac | SetCov | HyperRank | HyperP | HyperBonfP | HyperFdrQ | GeneFoldEnrich | ExpGenes | ObsGenes | TotalGenes | GeneSetCov | TermCov | Regions | Genes | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
GO Biological Process.1100 | KITLG | GO Biological Process | GO: 0010874 | regulation of cholesterol efflux | 1 | 0 | 0.001 | 0.001 | 272.460 | 0.011 | 3 | 0.002 | 0.429 | 1 | 0 | 0.002 | 0.002 | 265.309 | 0.011 | 3 | 17 | 0.250 | 0.176 | /chr16:55993160-57993161,/chr7:93953894-95953895,/chr9:106661741-108661742 | ABCA1,CETP,PON1 |
GO Biological Process.2100 | KITLG | GO Biological Process | GO: 0032374 | regulation of cholesterol transport | 2 | 0 | 0.004 | 0.002 | 185.189 | 0.016 | 3 | 0.002 | 0.429 | 2 | 0 | 0.012 | 0.006 | 140.945 | 0.021 | 3 | 32 | 0.250 | 0.094 | /chr16:55993160-57993161,/chr7:93953894-95953895,/chr9:106661741-108661742 | ABCA1,CETP,PON1 |
GO Biological Process.3100 | KITLG | GO Biological Process | GO: 0032368 | regulation of lipid transport | 3 | 0 | 0.094 | 0.031 | 67.046 | 0.045 | 3 | 0.006 | 0.429 | 3 | 0 | 0.115 | 0.038 | 66.327 | 0.045 | 3 | 68 | 0.250 | 0.044 | /chr16:55993160-57993161,/chr7:93953894-95953895,/chr9:106661741-108661742 | ABCA1,CETP,PON1 |
GO Biological Process.1102 | TNFSF10 | GO Biological Process | GO: 0030195 | negative regulation of blood coagulation | 1 | 0 | 0.006 | 0.006 | 170.502 | 0.018 | 3 | 0.002 | 0.375 | 1 | 0 | 0.034 | 0.034 | 100.228 | 0.030 | 3 | 36 | 0.200 | 0.083 | /chr17:63224774-65224775,/chr19:43153099-45153100,/chr3:185449121-187449122 | APOH,KNG1,PLAUR |
GO Biological Process.2102 | TNFSF10 | GO Biological Process | GO: 0050819 | negative regulation of coagulation | 2 | 0 | 0.014 | 0.007 | 129.538 | 0.023 | 3 | 0.003 | 0.375 | 2 | 0 | 0.047 | 0.024 | 90.205 | 0.033 | 3 | 40 | 0.200 | 0.075 | /chr17:63224774-65224775,/chr19:43153099-45153100,/chr3:185449121-187449122 | APOH,KNG1,PLAUR |
GO Biological Process.3102 | TNFSF10 | GO Biological Process | GO: 0072376 | protein activation cascade | 3 | 0 | 0.046 | 0.015 | 87.207 | 0.034 | 3 | 0.004 | 0.375 | 3 | 0 | 0.196 | 0.065 | 56.378 | 0.053 | 3 | 64 | 0.200 | 0.047 | /chr17:63224774-65224775,/chr1:195710915-197710916,/chr3:185449121-187449122 | APOH,CFH,KNG1 |
GO Cellular Component.119 | TNFSF10 | GO Cellular Component | GO: 0005615 | extracellular space | 1 | 0 | 0.008 | 0.008 | 9.342 | 0.642 | 6 | 0.080 | 0.750 | 1 | 0 | 0.000 | 0.000 | 10.934 | 0.732 | 8 | 880 | 0.533 | 0.009 | /chr14:93844946-95844947,/chr17:63224774-65224775,/chr18:28804862-30804863,/chr1:195710915-197710916,/chr3:171274231-173274232,/chr3:185449121-187449122 | APOH,CFH,CFHR3,KNG1,MEP1B,SERPINA1,SERPINA6,TNFSF10 |
4 eQTL Catalog for colocalization analysis
See example associated with import_eQTLCatalogue
. A related function is import_OpenGWAS
used to fetch data from OpenGWAS. The cis-pQTLs and 1e+6
flanking regions were considered and data are actually fetched from files stored locally. Only the first sentinel was used (r=1).
liftRegion <- function(x,chain,flanking=1e6)
{
require(GenomicRanges)
gr <- with(x,GenomicRanges::GRanges(seqnames=chr,IRanges::IRanges(start,end))+flanking)
seqlevelsStyle(gr) <- "UCSC"
gr38 <- rtracklayer::liftOver(gr, chain)
chr <- gsub("chr","",colnames(table(seqnames(gr38))))
start <- min(unlist(start(gr38)))
end <- max(unlist(end(gr38)))
invisible(list(chr=chr[1],start=start,end=end,region=paste0(chr[1],":",start,"-",end)))
}
sumstats <- function(prot,chr,region37)
{
cat("GWAS sumstats\n")
vcf <- file.path(INF,"METAL/gwas2vcf",paste0(prot,".vcf.gz"))
gwas_stats <- gwasvcf::query_gwas(vcf, chrompos = region37) %>%
gwasvcf::vcf_to_granges() %>%
keepSeqlevels(chr) %>%
renameSeqlevels(paste0("chr",chr))
gwas_stats_hg38 <- rtracklayer::liftOver(gwas_stats, chain) %>%
unlist() %>%
dplyr::as_tibble() %>%
dplyr::transmute(chromosome = seqnames,
position = start, REF, ALT, AF, ES, SE, LP, SS) %>%
dplyr::mutate(id = paste(chromosome, position, sep = ":")) %>%
dplyr::mutate(MAF = pmin(AF, 1-AF)) %>%
dplyr::group_by(id) %>%
dplyr::mutate(row_count = n()) %>%
dplyr::ungroup() %>%
dplyr::filter(row_count == 1) %>%
dplyr::mutate(chromosome=gsub("chr","",chromosome))
s <- ggplot2::ggplot(gwas_stats_hg38, aes(x = position, y = LP)) +
ggplot2::theme_bw() +
ggplot2::geom_point() +
ggplot2::ggtitle(with(sentinel,paste0(prot,"-",SNP," association plot")))
s
gwas_stats_hg38
}
gtex <- function(gwas_stats_hg38,ensGene,region38)
{
cat("c. GTEx_v8 imported eQTL datasets\n")
fp <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","tabix_ftp_paths_gtex.tsv")
web <- read.delim(fp, stringsAsFactors = FALSE) %>% dplyr::as_tibble()
local <- within(web %>% dplyr::as_tibble(),
{
f <- lapply(strsplit(ftp_path,"/imported/|/ge/"),"[",3);
ftp_path <- paste0("~/rds/public_databases/GTEx/csv/",f)
})
gtex_df <- dplyr::filter(local, quant_method == "ge") %>%
dplyr::mutate(qtl_id = paste(study, qtl_group, sep = "_"))
ftp_path_list <- setNames(as.list(gtex_df$ftp_path), gtex_df$qtl_id)
hdr <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","column_names.GTEx")
column_names <- names(read.delim(hdr))
safe_import <- purrr::safely(import_eQTLCatalogue)
summary_list <- purrr::map(ftp_path_list,
~safe_import(., region38, selected_gene_id = ensGene, column_names))
result_list <- purrr::map(summary_list, ~.$result)
result_list <- result_list[!unlist(purrr::map(result_list, is.null))]
result_filtered <- purrr::map(result_list[lapply(result_list,nrow)!=0],
~dplyr::filter(., !is.na(se)))
purrr::map_df(result_filtered, ~run_coloc(., gwas_stats_hg38), .id = "qtl_id")
}
gtex_coloc <- function(prot,chr,ensGene,chain,region37,region38,out)
{
gwas_stats_hg38 <- sumstats(prot,chr,region37)
df_gtex <- gtex(gwas_stats_hg38,ensGene,region38)
if (!exists("df_gtex")) return
saveRDS(df_gtex,file=paste0(out,".RDS"))
dplyr::arrange(df_gtex, -PP.H4.abf)
p <- ggplot2::ggplot(df_gtex, aes(x = PP.H4.abf)) +
ggplot2::theme_bw() +
geom_histogram() +
ggtitle(with(sentinel,paste0(prot,"-",SNP," PP4 histogram"))) +
xlab("PP4") + ylab("Frequency")
p
}
single_run <- function()
{
chr <- with(sentinel,Chr)
ss <- subset(inf1,prot==sentinel[["prot"]])
ensRegion37 <- with(ss,
{
start <- start-M
if (start<0) start <- 0
end <- end+M
paste0(chr,":",start,"-",end)
})
ensGene <- ss[["ensembl_gene_id"]]
ensRegion38 <- with(liftRegion(ss,chain),region)
cat(chr,ensGene,ensRegion37,ensRegion38,"\n")
f <- with(sentinel,paste0(prot,"-",SNP))
gtex_coloc(sentinel[["prot"]],chr,ensGene,chain,ensRegion37,ensRegion38,f)
}
options(width=200)
HOME <- Sys.getenv("HOME")
HPC_WORK <- Sys.getenv("HPC_WORK")
INF <- Sys.getenv("INF")
M <- 1e6
sentinels <- subset(cis.vs.trans,cis)
f <- file.path(find.package("pQTLtools"),"eQTL-Catalogue","hg19ToHg38.over.chain")
chain <- rtracklayer::import.chain(f)
gwasvcf::set_bcftools(file.path(HPC_WORK,"bin","bcftools"))
r <- 1
sentinel <- sentinels[r,]
single_run()
#> 8 ENSG00000164761 8:118935796-120964439 8:117923557-119952199
#> GWAS sumstats
#> c. GTEx_v8 imported eQTL datasets
#> [1] "~/rds/public_databases/GTEx/csv/Adipose_Subcutaneous.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Adipose_Visceral_Omentum.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Adrenal_Gland.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Artery_Aorta.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Artery_Coronary.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Artery_Tibial.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Brain_Amygdala.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Brain_Anterior_cingulate_cortex_BA24.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Brain_Caudate_basal_ganglia.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Brain_Cerebellar_Hemisphere.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Brain_Cerebellum.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Brain_Cortex.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Brain_Frontal_Cortex_BA9.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Brain_Hippocampus.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Brain_Hypothalamus.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Brain_Nucleus_accumbens_basal_ganglia.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Brain_Putamen_basal_ganglia.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Brain_Spinal_cord_cervical_c-1.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Brain_Substantia_nigra.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Breast_Mammary_Tissue.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Cells_Cultured_fibroblasts.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Cells_EBV-transformed_lymphocytes.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Colon_Sigmoid.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Colon_Transverse.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Esophagus_Gastroesophageal_Junction.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Esophagus_Mucosa.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Esophagus_Muscularis.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Heart_Atrial_Appendage.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Heart_Left_Ventricle.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Kidney_Cortex.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Liver.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Lung.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Minor_Salivary_Gland.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Muscle_Skeletal.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Nerve_Tibial.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Ovary.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Pancreas.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Pituitary.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Prostate.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Skin_Not_Sun_Exposed_Suprapubic.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Skin_Sun_Exposed_Lower_leg.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Small_Intestine_Terminal_Ileum.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Spleen.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Stomach.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Testis.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Thyroid.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Uterus.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Vagina.tsv.gz"
#> [1] "~/rds/public_databases/GTEx/csv/Whole_Blood.tsv.gz"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 2.43e-49 6.58e-44 3.69e-06 1.00e+00 1.66e-07
#> [1] "PP abf for shared variant: 1.66e-05%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 5.76e-45 5.98e-44 8.76e-02 9.09e-01 3.63e-03
#> [1] "PP abf for shared variant: 0.363%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 2.99e-44 2.42e-44 4.55e-01 3.68e-01 1.77e-01
#> [1] "PP abf for shared variant: 17.7%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.86e-44 2.55e-44 5.87e-01 3.88e-01 2.53e-02
#> [1] "PP abf for shared variant: 2.53%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.13e-44 3.20e-44 4.77e-01 4.87e-01 3.67e-02
#> [1] "PP abf for shared variant: 3.67%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.87e-44 2.24e-44 5.88e-01 3.41e-01 7.10e-02
#> [1] "PP abf for shared variant: 7.1%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.65e-44 2.42e-44 5.54e-01 3.67e-01 7.83e-02
#> [1] "PP abf for shared variant: 7.83%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.44e-44 2.89e-44 5.23e-01 4.40e-01 3.71e-02
#> [1] "PP abf for shared variant: 3.71%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.65e-44 2.71e-44 5.55e-01 4.12e-01 3.22e-02
#> [1] "PP abf for shared variant: 3.22%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.51e-44 2.79e-44 5.34e-01 4.25e-01 4.14e-02
#> [1] "PP abf for shared variant: 4.14%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.50e-44 2.58e-44 5.32e-01 3.92e-01 7.61e-02
#> [1] "PP abf for shared variant: 7.61%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.11e-44 3.12e-44 4.73e-01 4.74e-01 5.35e-02
#> [1] "PP abf for shared variant: 5.35%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.10e-44 3.16e-44 4.72e-01 4.80e-01 4.80e-02
#> [1] "PP abf for shared variant: 4.8%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.56e-44 2.34e-44 5.42e-01 3.55e-01 1.03e-01
#> [1] "PP abf for shared variant: 10.3%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.26e-44 3.01e-44 4.96e-01 4.57e-01 4.68e-02
#> [1] "PP abf for shared variant: 4.68%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.64e-44 2.55e-44 5.53e-01 3.87e-01 5.96e-02
#> [1] "PP abf for shared variant: 5.96%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.75e-44 2.56e-44 5.69e-01 3.89e-01 4.16e-02
#> [1] "PP abf for shared variant: 4.16%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.69e-44 2.59e-44 5.60e-01 3.93e-01 4.63e-02
#> [1] "PP abf for shared variant: 4.63%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.76e-44 2.57e-44 5.72e-01 3.90e-01 3.82e-02
#> [1] "PP abf for shared variant: 3.82%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.57e-44 2.83e-44 5.43e-01 4.30e-01 2.70e-02
#> [1] "PP abf for shared variant: 2.7%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 5.07e-55 6.58e-44 7.70e-12 1.00e+00 3.74e-13
#> [1] "PP abf for shared variant: 3.74e-11%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.08e-44 3.31e-44 4.68e-01 5.02e-01 3.00e-02
#> [1] "PP abf for shared variant: 3%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.40e-44 2.92e-44 5.18e-01 4.45e-01 3.79e-02
#> [1] "PP abf for shared variant: 3.79%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 4.95e-46 6.53e-44 7.53e-03 9.92e-01 3.50e-04
#> [1] "PP abf for shared variant: 0.035%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.74e-44 2.66e-44 5.69e-01 4.04e-01 2.68e-02
#> [1] "PP abf for shared variant: 2.68%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 2.39e-44 4.07e-44 3.64e-01 6.19e-01 1.69e-02
#> [1] "PP abf for shared variant: 1.69%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 7.06e-46 6.44e-44 1.07e-02 9.79e-01 1.05e-02
#> [1] "PP abf for shared variant: 1.05%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.32e-47 6.57e-44 5.04e-04 9.99e-01 2.32e-05
#> [1] "PP abf for shared variant: 0.00232%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 1.62e-45 6.41e-44 2.46e-02 9.74e-01 1.19e-03
#> [1] "PP abf for shared variant: 0.119%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.47e-44 2.81e-44 5.28e-01 4.28e-01 4.44e-02
#> [1] "PP abf for shared variant: 4.44%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.05e-44 3.15e-44 4.64e-01 4.79e-01 5.72e-02
#> [1] "PP abf for shared variant: 5.72%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 6.84e-46 6.50e-44 1.04e-02 9.89e-01 1.04e-03
#> [1] "PP abf for shared variant: 0.104%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.73e-44 2.47e-44 5.68e-01 3.76e-01 5.68e-02
#> [1] "PP abf for shared variant: 5.68%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.98e-44 2.46e-44 6.04e-01 3.74e-01 2.21e-02
#> [1] "PP abf for shared variant: 2.21%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 1.22e-44 5.30e-44 1.86e-01 8.06e-01 7.91e-03
#> [1] "PP abf for shared variant: 0.791%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.58e-44 2.76e-44 5.44e-01 4.19e-01 3.65e-02
#> [1] "PP abf for shared variant: 3.65%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 1.25e-45 6.45e-44 1.90e-02 9.80e-01 9.63e-04
#> [1] "PP abf for shared variant: 0.0963%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.97e-44 2.37e-44 6.04e-01 3.60e-01 3.55e-02
#> [1] "PP abf for shared variant: 3.55%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.34e-44 3.02e-44 5.07e-01 4.58e-01 3.42e-02
#> [1] "PP abf for shared variant: 3.42%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.59e-44 2.84e-44 5.46e-01 4.32e-01 2.16e-02
#> [1] "PP abf for shared variant: 2.16%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 2.79e-44 3.67e-44 4.25e-01 5.58e-01 1.77e-02
#> [1] "PP abf for shared variant: 1.77%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.69e-44 2.38e-44 5.61e-01 3.62e-01 7.70e-02
#> [1] "PP abf for shared variant: 7.7%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.52e-44 2.76e-44 5.35e-01 4.19e-01 4.61e-02
#> [1] "PP abf for shared variant: 4.61%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.74e-44 2.56e-44 5.69e-01 3.90e-01 4.14e-02
#> [1] "PP abf for shared variant: 4.14%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.99e-44 2.38e-44 6.06e-01 3.62e-01 3.16e-02
#> [1] "PP abf for shared variant: 3.16%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 9.86e-45 5.55e-44 1.50e-01 8.44e-01 6.55e-03
#> [1] "PP abf for shared variant: 0.655%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.70e-44 2.63e-44 5.63e-01 3.99e-01 3.81e-02
#> [1] "PP abf for shared variant: 3.81%"
#> PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
#> 3.86e-44 2.45e-44 5.86e-01 3.73e-01 4.08e-02
#> [1] "PP abf for shared variant: 4.08%"

Figure 4.1: Association plot & histogram
# The results are also loadable as follows.
coloc_df <- readRDS(file.path(find.package("pQTLtools"),"tests","OPG-chr8:120081031_C_T.RDS")) %>%
dplyr::rename(Tissue=qtl_id, H0=PP.H0.abf,H1=PP.H1.abf,
H2=PP.H2.abf,H3=PP.H3.abf,H4=PP.H4.abf) %>%
mutate(Tissue=gsub("GTEx_V8_","",Tissue),
H0=round(H0,2),H1=round(H1,2),H2=round(H2,2),H3=round(H3,2),H4=round(H4,2)) %>%
arrange(-H4)
ktitle <- with(sentinel,paste0("Colocalization results for ",prot,"-",SNP))
kable(coloc_df,caption=ktitle)
Tissue | nsnps | H0 | H1 | H2 | H3 | H4 |
---|---|---|---|---|---|---|
Adrenal_Gland | 6741 | 0 | 0 | 0.46 | 0.37 | 0.18 |
Brain_Hippocampus | 6733 | 0 | 0 | 0.54 | 0.36 | 0.10 |
Brain_Amygdala | 6701 | 0 | 0 | 0.55 | 0.37 | 0.08 |
Brain_Cerebellum | 6738 | 0 | 0 | 0.53 | 0.39 | 0.08 |
Small_Intestine_Terminal_Ileum | 6738 | 0 | 0 | 0.56 | 0.36 | 0.08 |
Artery_Tibial | 6742 | 0 | 0 | 0.59 | 0.34 | 0.07 |
Brain_Nucleus_accumbens_basal_ganglia | 6741 | 0 | 0 | 0.55 | 0.39 | 0.06 |
Liver | 6742 | 0 | 0 | 0.46 | 0.48 | 0.06 |
Minor_Salivary_Gland | 6721 | 0 | 0 | 0.57 | 0.38 | 0.06 |
Brain_Cortex | 6741 | 0 | 0 | 0.47 | 0.47 | 0.05 |
Brain_Frontal_Cortex_BA9 | 6736 | 0 | 0 | 0.47 | 0.48 | 0.05 |
Brain_Hypothalamus | 6737 | 0 | 0 | 0.50 | 0.46 | 0.05 |
Brain_Spinal_cord_cervical_c-1 | 6725 | 0 | 0 | 0.56 | 0.39 | 0.05 |
Spleen | 6740 | 0 | 0 | 0.53 | 0.42 | 0.05 |
Artery_Coronary | 6740 | 0 | 0 | 0.48 | 0.49 | 0.04 |
Brain_Anterior_cingulate_cortex_BA24 | 6728 | 0 | 0 | 0.52 | 0.44 | 0.04 |
Brain_Cerebellar_Hemisphere | 6738 | 0 | 0 | 0.53 | 0.42 | 0.04 |
Brain_Putamen_basal_ganglia | 6732 | 0 | 0 | 0.57 | 0.39 | 0.04 |
Brain_Substantia_nigra | 6706 | 0 | 0 | 0.57 | 0.39 | 0.04 |
Colon_Sigmoid | 6742 | 0 | 0 | 0.52 | 0.44 | 0.04 |
Kidney_Cortex | 6625 | 0 | 0 | 0.53 | 0.43 | 0.04 |
Ovary | 6739 | 0 | 0 | 0.54 | 0.42 | 0.04 |
Pituitary | 6742 | 0 | 0 | 0.60 | 0.36 | 0.04 |
Stomach | 6742 | 0 | 0 | 0.57 | 0.39 | 0.04 |
Uterus | 6735 | 0 | 0 | 0.56 | 0.40 | 0.04 |
Vagina | 6719 | 0 | 0 | 0.59 | 0.37 | 0.04 |
Artery_Aorta | 6742 | 0 | 0 | 0.59 | 0.39 | 0.03 |
Brain_Caudate_basal_ganglia | 6741 | 0 | 0 | 0.56 | 0.41 | 0.03 |
Breast_Mammary_Tissue | 6742 | 0 | 0 | 0.54 | 0.43 | 0.03 |
Cells_EBV-transformed_lymphocytes | 6730 | 0 | 0 | 0.47 | 0.50 | 0.03 |
Esophagus_Gastroesophageal_Junction | 6742 | 0 | 0 | 0.57 | 0.40 | 0.03 |
Prostate | 6742 | 0 | 0 | 0.51 | 0.46 | 0.03 |
Testis | 6742 | 0 | 0 | 0.61 | 0.36 | 0.03 |
Esophagus_Mucosa | 6742 | 0 | 0 | 0.36 | 0.62 | 0.02 |
Muscle_Skeletal | 6742 | 0 | 0 | 0.60 | 0.37 | 0.02 |
Skin_Not_Sun_Exposed_Suprapubic | 6742 | 0 | 0 | 0.55 | 0.43 | 0.02 |
Skin_Sun_Exposed_Lower_leg | 6742 | 0 | 0 | 0.42 | 0.56 | 0.02 |
Esophagus_Muscularis | 6742 | 0 | 0 | 0.01 | 0.98 | 0.01 |
Nerve_Tibial | 6742 | 0 | 0 | 0.19 | 0.81 | 0.01 |
Thyroid | 6742 | 0 | 0 | 0.15 | 0.84 | 0.01 |
Adipose_Subcutaneous | 6742 | 0 | 0 | 0.00 | 1.00 | 0.00 |
Adipose_Visceral_Omentum | 6742 | 0 | 0 | 0.09 | 0.91 | 0.00 |
Cells_Cultured_fibroblasts | 6742 | 0 | 0 | 0.00 | 1.00 | 0.00 |
Colon_Transverse | 6742 | 0 | 0 | 0.01 | 0.99 | 0.00 |
Heart_Atrial_Appendage | 6742 | 0 | 0 | 0.00 | 1.00 | 0.00 |
Heart_Left_Ventricle | 6742 | 0 | 0 | 0.02 | 0.97 | 0.00 |
Lung | 6742 | 0 | 0 | 0.01 | 0.99 | 0.00 |
Pancreas | 6742 | 0 | 0 | 0.02 | 0.98 | 0.00 |
The function sumstats()
obtained meta-analysis summary statistics (in build 37 and therefore lifted over to build 38) to be used in colocalization
analysis. The output are saved in the .RDS
files. Note that ftp_path
changes from eQTL Catalog to local files.
5 Mendelian Randomisation (MR)
5.1 pQTL-based MR
The function pqtlMR
has an attractive feature that multiple pQTLs
can be used together for conducting MR with a list of outcomes from MR-Base. For generic applications
the run_TwoSampleMR()
function can be used.
f <- file.path(system.file(package="pQTLtools"),"tests","Ins.csv")
ivs <- format_data(read.csv(f))
caption4 <- "ABO/LIFR variants and CHD/FEV1"
kable(ivs, caption=paste(caption4,"(instruments)"),digits=3)
SNP | effect_allele.exposure | other_allele.exposure | eaf.exposure | beta.exposure | se.exposure | pval.exposure | exposure | mr_keep.exposure | pval_origin.exposure | id.exposure |
---|---|---|---|---|---|---|---|---|---|---|
rs505922 | C | T | 0.313 | 1.298 | 0.014 | 0 | ABO | TRUE | reported | dU3C0x |
rs635634 | T | C | 0.180 | -0.300 | 0.032 | 0 | LIFR | TRUE | reported | YmGdhW |
ids <- c("ieu-a-7","ebi-a-GCST007432")
pqtlMR(ivs, ids)
#> API: public: http://gwas-api.mrcieu.ac.uk/
#> Extracting data for 2 SNP(s) from 2 GWAS(s)
#> Harmonising ABO (dU3C0x) and FEV1 || id:ebi-a-GCST007432 (ebi-a-GCST007432)
#> Harmonising LIFR (YmGdhW) and FEV1 || id:ebi-a-GCST007432 (ebi-a-GCST007432)
#> Harmonising ABO (dU3C0x) and Coronary heart disease || id:ieu-a-7 (ieu-a-7)
#> Harmonising LIFR (YmGdhW) and Coronary heart disease || id:ieu-a-7 (ieu-a-7)
#> Analysing 'dU3C0x' on 'ebi-a-GCST007432'
#> Analysing 'dU3C0x' on 'ieu-a-7'
#> Analysing 'YmGdhW' on 'ebi-a-GCST007432'
#> Analysing 'YmGdhW' on 'ieu-a-7'
result <- read.delim("pQTL-combined-result.txt",header=TRUE)
kable(result,caption=paste(caption4, "(result)"),digits=3)
id.exposure | id.outcome | outcome | exposure | method | nsnp | b | se | pval |
---|---|---|---|---|---|---|---|---|
dU3C0x | ebi-a-GCST007432 | FEV1 || id:ebi-a-GCST007432 | ABO | Wald ratio | 1 | -0.011 | 0.002 | 0 |
dU3C0x | ieu-a-7 | Coronary heart disease || id:ieu-a-7 | ABO | Wald ratio | 1 | 0.033 | 0.007 | 0 |
YmGdhW | ebi-a-GCST007432 | FEV1 || id:ebi-a-GCST007432 | LIFR | Wald ratio | 1 | 0.062 | 0.010 | 0 |
YmGdhW | ieu-a-7 | Coronary heart disease || id:ieu-a-7 | LIFR | Wald ratio | 1 | -0.257 | 0.039 | 0 |
single <- read.delim("pQTL-combined-single.txt",header=TRUE)
kable(subset(single,!grepl("All",SNP)), caption=paste(caption4, "(single)"),digits=3)
exposure | outcome | id.exposure | id.outcome | samplesize | SNP | b | se | p | |
---|---|---|---|---|---|---|---|---|---|
1 | ABO | FEV1 || id:ebi-a-GCST007432 | dU3C0x | ebi-a-GCST007432 | 321047 | rs505922 | -0.0109 | 0.00193 | 1.35e-08 |
4 | ABO | Coronary heart disease || id:ieu-a-7 | dU3C0x | ieu-a-7 | 184305 | rs505922 | 0.0334 | 0.00730 | 4.63e-06 |
7 | LIFR | FEV1 || id:ebi-a-GCST007432 | YmGdhW | ebi-a-GCST007432 | 321047 | rs635634 | 0.0620 | 0.01000 | 5.65e-10 |
10 | LIFR | Coronary heart disease || id:ieu-a-7 | YmGdhW | ieu-a-7 | 184305 | rs635634 | -0.2572 | 0.03904 | 4.47e-11 |
5.2 Two-sample MR
The documentation example is quoted here,
outcomes <- "ebi-a-GCST007432"
prot <- "MMP.10"
type <- "cis"
f <- paste0(prot,"-",type,".mrx")
d <- read.table(file.path(system.file(package="pQTLtools"),"tests",f),
header=TRUE)
exposure <- format_data(within(d,{P=10^logP}), phenotype_col="prot", snp_col="rsid",
chr_col="Chromosome", pos_col="Posistion",
effect_allele_col="Allele1", other_allele_col="Allele2",
eaf_col="Freq1", beta_col="Effect", se_col="StdErr",
pval_col="P", log_pval=FALSE,
samplesize_col="N")
clump <- clump_data(exposure)
#> Please look at vignettes for options on running this locally if you need to run many instances of this command.
#> Clumping ZoHewj, 1106 variants, using EUR population reference
#> Removing 1102 of 1106 variants due to LD with other variants or absence from LD reference panel
outcome <- extract_outcome_data(snps=exposure$SNP,outcomes=outcomes)
#> Extracting data for 1106 SNP(s) from 1 GWAS(s)
#> Finding proxies for 155 SNPs in outcome ebi-a-GCST007432
#> Extracting data for 155 SNP(s) from 1 GWAS(s)
harmonise <- harmonise_data(clump,outcome)
#> Harmonising MMP.10 (ZoHewj) and FEV1 || id:ebi-a-GCST007432 (ebi-a-GCST007432)
prefix <- paste(outcomes,prot,type,sep="-")
run_TwoSampleMR(harmonise, mr_plot="pQTLtools", prefix=prefix)
#> Analysing 'ZoHewj' on 'ebi-a-GCST007432'

Figure 5.1: Two-sample MR

Figure 5.2: Two-sample MR

Figure 5.3: Two-sample MR

Figure 5.4: Two-sample MR
The output is contained in individual .txt
files, together with the scatter, forest, funnel and leave-one-out plots.
id.exposure | id.outcome | outcome | exposure | method | nsnp | b | se | pval |
---|---|---|---|---|---|---|---|---|
ZoHewj | ebi-a-GCST007432 | FEV1 || id:ebi-a-GCST007432 | MMP.10 | MR Egger | 4 | -0.001 | 0.010 | 0.959 |
ZoHewj | ebi-a-GCST007432 | FEV1 || id:ebi-a-GCST007432 | MMP.10 | Weighted median | 4 | 0.002 | 0.006 | 0.750 |
ZoHewj | ebi-a-GCST007432 | FEV1 || id:ebi-a-GCST007432 | MMP.10 | Inverse variance weighted | 4 | 0.002 | 0.006 | 0.728 |
ZoHewj | ebi-a-GCST007432 | FEV1 || id:ebi-a-GCST007432 | MMP.10 | Simple mode | 4 | 0.004 | 0.010 | 0.704 |
ZoHewj | ebi-a-GCST007432 | FEV1 || id:ebi-a-GCST007432 | MMP.10 | Weighted mode | 4 | 0.001 | 0.007 | 0.883 |
id.exposure | id.outcome | outcome | exposure | method | Q | Q_df | Q_pval |
---|---|---|---|---|---|---|---|
ZoHewj | ebi-a-GCST007432 | FEV1 || id:ebi-a-GCST007432 | MMP.10 | MR Egger | 0.114 | 2 | 0.944 |
ZoHewj | ebi-a-GCST007432 | FEV1 || id:ebi-a-GCST007432 | MMP.10 | Inverse variance weighted | 0.222 | 3 | 0.974 |
id.exposure | id.outcome | outcome | exposure | egger_intercept | se | pval |
---|---|---|---|---|---|---|
ZoHewj | ebi-a-GCST007432 | FEV1 || id:ebi-a-GCST007432 | MMP.10 | 0.001 | 0.004 | 0.774 |
exposure | outcome | id.exposure | id.outcome | samplesize | SNP | b | se | p |
---|---|---|---|---|---|---|---|---|
MMP.10 | FEV1 || id:ebi-a-GCST007432 | ZoHewj | ebi-a-GCST007432 | 321047 | rs11225354 | 0.006 | 0.012 | 0.598 |
MMP.10 | FEV1 || id:ebi-a-GCST007432 | ZoHewj | ebi-a-GCST007432 | 321047 | rs142915220 | -0.004 | 0.029 | 0.889 |
MMP.10 | FEV1 || id:ebi-a-GCST007432 | ZoHewj | ebi-a-GCST007432 | 321047 | rs17099622 | 0.004 | 0.022 | 0.845 |
MMP.10 | FEV1 || id:ebi-a-GCST007432 | ZoHewj | ebi-a-GCST007432 | 321047 | rs17860955 | 0.001 | 0.007 | 0.934 |
MMP.10 | FEV1 || id:ebi-a-GCST007432 | ZoHewj | ebi-a-GCST007432 | 321047 | All - Inverse variance weighted | 0.002 | 0.006 | 0.728 |
MMP.10 | FEV1 || id:ebi-a-GCST007432 | ZoHewj | ebi-a-GCST007432 | 321047 | All - MR Egger | -0.001 | 0.010 | 0.959 |
exposure | outcome | id.exposure | id.outcome | samplesize | SNP | b | se | p |
---|---|---|---|---|---|---|---|---|
MMP.10 | FEV1 || id:ebi-a-GCST007432 | ZoHewj | ebi-a-GCST007432 | 321047 | rs11225354 | 0.001 | 0.007 | 0.918 |
MMP.10 | FEV1 || id:ebi-a-GCST007432 | ZoHewj | ebi-a-GCST007432 | 321047 | rs142915220 | 0.002 | 0.006 | 0.700 |
MMP.10 | FEV1 || id:ebi-a-GCST007432 | ZoHewj | ebi-a-GCST007432 | 321047 | rs17099622 | 0.002 | 0.006 | 0.759 |
MMP.10 | FEV1 || id:ebi-a-GCST007432 | ZoHewj | ebi-a-GCST007432 | 321047 | rs17860955 | 0.005 | 0.010 | 0.636 |
MMP.10 | FEV1 || id:ebi-a-GCST007432 | ZoHewj | ebi-a-GCST007432 | 321047 | All | 0.002 | 0.006 | 0.728 |
5.3 MR using cis, trans and cis+trans (pan) instruments
This is illustrated with IL-12B.
efo <- read.delim(file.path(find.package("pQTLtools"),"tests","efo.txt"))
d3 <- read.delim(file.path(find.package("pQTLtools"),"tests","IL.12B.txt")) %>%
dplyr::mutate(MRBASEID=unlist(lapply(strsplit(outcome,"id:"),"[",2)),y=b) %>%
dplyr::select(-outcome,-method) %>%
left_join(efo) %>%
dplyr::arrange(cistrans)
#> Joining with `by = join_by(MRBASEID)`
kable(head(d3[c("MRBASEID","trait","cistrans","nsnp","b","se","pval")],29),caption="MR with IL-12B variants",digits=3)
MRBASEID | trait | cistrans | nsnp | b | se | pval |
---|---|---|---|---|---|---|
bbj-a-123 | Graves disease | cis | 21 | 0.008 | 0.101 | 0.938 |
ebi-a-GCST003156 | systemic lupus erythematosus | cis | 39 | -0.096 | 0.068 | 0.157 |
ebi-a-GCST005528 | juvenile idiopathic arthritis | cis | 1 | -0.167 | 0.114 | 0.140 |
ebi-a-GCST005581 | primary biliary cirrhosis | cis | 2 | 0.006 | 0.163 | 0.972 |
finn-a-AB1_HIV | HIV infection | cis | 29 | -0.129 | 0.280 | 0.645 |
finn-a-D3_SARCOIDOSIS | Sarcoidosis | cis | 29 | -0.060 | 0.115 | 0.601 |
finn-a-M13_SJOGREN | Sjogren syndrome | cis | 29 | -0.248 | 0.141 | 0.080 |
finn-a-MENINGITIS | meningitis infection | cis | 29 | -0.217 | 0.191 | 0.257 |
ieu-a-1081 | IGA glomerulonephritis | cis | 3 | 0.210 | 0.177 | 0.237 |
ieu-a-1112 | sclerosing cholangitis | cis | 32 | 0.052 | 0.068 | 0.446 |
ieu-a-276 | celiac disease | cis | 3 | -0.017 | 0.121 | 0.887 |
ieu-a-31 | inflammatory bowel disease | cis | 56 | 0.390 | 0.035 | 0.000 |
ieu-b-18 | multiple sclerosis | cis | 26 | -0.026 | 0.043 | 0.548 |
ieu-b-69 | sepsis | cis | 56 | -0.033 | 0.027 | 0.209 |
ukb-a-115 | Vitiligo | cis | 54 | 0.000 | 0.000 | 0.410 |
ukb-a-552 | Crohn’s disease | cis | 54 | 0.001 | 0.000 | 0.000 |
ukb-b-10537 | psoriasis | cis | 17 | -0.004 | 0.001 | 0.000 |
ukb-b-13251 | gout | cis | 20 | 0.000 | 0.000 | 0.764 |
ukb-b-15606 | pneumonia | cis | 7 | 0.000 | 0.000 | 0.376 |
ukb-b-15622 | Tuberculosis | cis | 7 | 0.000 | 0.000 | 0.668 |
ukb-b-16499 | allergic rhinitis | cis | 40 | 0.000 | 0.001 | 0.788 |
ukb-b-16702 | allergy | cis | 7 | 0.000 | 0.000 | 0.265 |
ukb-b-18194 | ankylosing spondylitis | cis | 5 | 0.000 | 0.000 | 0.710 |
ukb-b-19386 | ulcerative colitis | cis | 7 | 0.001 | 0.000 | 0.039 |
ukb-b-19732 | hypothyroidism | cis | 37 | 0.000 | 0.001 | 0.952 |
ukb-b-20141 | atopic eczema | cis | 26 | 0.000 | 0.001 | 0.988 |
ukb-b-20208 | asthma | cis | 7 | 0.000 | 0.000 | 0.756 |
ukb-b-8814 | urinary tract infection | cis | 18 | -0.001 | 0.000 | 0.279 |
ukb-b-9125 | rheumatoid arthritis | cis | 17 | 0.000 | 0.001 | 0.845 |
p <- ggplot2::ggplot(d3,aes(y = trait, x = y))+
ggplot2::theme_bw()+
ggplot2::geom_point()+
ggplot2::facet_wrap(~cistrans,ncol=3,scales="free_x")+
ggplot2::geom_segment(ggplot2::aes(x = b-1.96*se, xend = b+1.96*se, yend = trait))+
ggplot2::geom_vline(lty=2, ggplot2::aes(xintercept=0), colour = 'red')+
ggplot2::xlab("Effect size")+
ggplot2::ylab("")
p

Figure 5.5: MR with cis, trans and cis+trans variants of IL-12B
5.4 GSMR forest plots
This is based on GSMR results of TNFB and a range of immune-mediated traits,
Outcome | Effect | StdErr |
---|---|---|
Multiple sclerosis | 0.691 | 0.059 |
Systemic lupus erythematosus | 0.767 | 0.079 |
Sclerosing cholangitis | 0.627 | 0.076 |
Juvenile idiopathic arthritis | -1.176 | 0.160 |
Psoriasis | 0.006 | 0.001 |
Rheumatoid arthritis | -0.004 | 0.001 |
Inflammatory bowel disease | -0.143 | 0.025 |
Ankylosing spondylitis | -0.003 | 0.001 |
Hypothyroidism | -0.004 | 0.001 |
Allergic rhinitis | 0.004 | 0.001 |
IgA glomerulonephritis | -0.327 | 0.105 |
Atopic eczema | -0.002 | 0.001 |
mr_forestplot(tnfb,colgap.forest.left="0.05cm", fontsize=14,
leftcols=c("studlab"), leftlabs=c("Outcome"),
plotwidth="5inch", sm="OR", sortvar=tnfb[["Effect"]],
rightcols=c("effect","ci","pval"), rightlabs=c("OR","95%CI","GSMR P"),
digits=3, digits.pval=2, scientific.pval=TRUE,
common=FALSE, random=FALSE, print.I2=FALSE, print.pval.Q=FALSE, print.tau2=FALSE,
addrow=TRUE, backtransf=TRUE, spacing=1.6, col.inside="black", col.square="black")

Figure 5.6: Forest plots based on MR results on TNFB
6 Literature on pQTLs
References3 4 are included as EndNote libraries which is now part of pQTLdata package.