Skip to contents

This function maps consequences (CSQ) of a given list of variants to those in an annotated set based on facilities such as variant effect predictor (VEP). In the case of protein-altering variants (PAVs), many consequences can be involved. The procedure also takes into account linkage disequilibrium (LD) in flanking regions.

Usage

csq(
  query_loci,
  annotated_loci,
  pattern,
  ldops = NULL,
  flanking = 1e+06,
  pop = "EUR",
  verbose = TRUE
)

Arguments

query_loci

A data.frame of loci whose consequences are to be obtained.

annotated_loci

A data.frame of annotated loci.

pattern

A character string or pattern to match the consequences against.

ldops

Arguments for ieugwasr::ld_matrix_local(), typically a list with bfile and plink paths.

flanking

A numeric value specifying the flanking distance (default is 1e6).

pop

A character string specifying the reference population for ieugwasr::ld_matrix() (default is "EUR").

verbose

A logical flag to show nonexistent variants (default is TRUE).

Value

A data.frame with an indicator showing whether the consequences are present or absent.

Examples

if (FALSE) { # \dontrun{
options(width=2000)
suppressMessages(require(dplyr))
suppressMessages(require(stringr))
# SCALLOP-INF list
METAL <- read.delim(file.path(find.package("pQTLtools"), "tests", "INF1.METAL")) %>%
         dplyr::left_join(gap.datasets::inf1[c("prot", "gene")]) %>%
         dplyr::mutate(prot = gene, chr = Chromosome, pos = Position)
# VEP output
vep <- "/rds/project/rds-zuZwCZMsS0w/Caprion_proteomics/analysis/bgen/vep"
pattern <- paste(protein_altering_variants, collapse = "|")
suppressMessages(require(GenomicRanges))
INF <- "/rds/project/rds-zuZwCZMsS0w/olink_proteomics/scallop/INF"
plink <- "/rds/user/jhz22/hpc-work/bin/plink"
# CSQ
b <- list()
for (i in unique(dplyr::pull(METAL, Chromosome))) {
  m <- dplyr::filter(METAL, Chromosome %in% i) %>%
       dplyr::select(chr, pos, MarkerName, prot) %>%
       dplyr::mutate(rsid = gsub("chr", "", MarkerName)) %>%
       dplyr::select(-MarkerName)
  u <- read.delim(file.path(vep, paste0("chr", i, ".tab.gz"))) %>%
       dplyr::select(Chrom, Pos, X.Uploaded_variation, Consequence) %>%
       setNames(c("chr", "pos", "rsid", "csq"))
  bfile <- file.path(INF, "INTERVAL", "per_chr", paste0("snpid", i))
  b[[i]] <- csq(m, u, pattern, ldops = list(bfile = bfile, plink = plink))
}
r <- dplyr::bind_rows(b) %>%
     dplyr::filter(r2 >= 0.8) %>%
     dplyr::rename(gene = prot) %>%
     dplyr::mutate(seqnames = as.integer(seqnames), pos = as.integer(pos)) %>%
     dplyr::arrange(seqnames, pos) %>%
     dplyr::select(-ref.seqnames, -ref.start, -ref.end, -seqnames, -pos, -start, -end)
w <- r %>%
     group_by(gene,rsid) %>%
     summarize(ref.rsid.all=paste(ref.rsid,collapse=";"),
               ref.pos.all=paste(ref.pos,collapse=";"),
               ref.csq.all=paste(ref.csq,collapse=";"),
               r2.all=paste(r2,collapse=";"))
} # }