In [None]:
import pandas as pd
import numpy as np
import tensorqtl
from tensorqtl import genotypeio, cis, trans
import matplotlib.pyplot as plt

# define paths to data
plink_prefix_path = '1'
expression_bed = '1.expression.bed.gz'
covariates_file = '1.covariates.txt'
prefix = 'caprion'

# load phenotypes and covariates
phenotype_df, phenotype_pos_df = tensorqtl.read_phenotype_bed(expression_bed)
covariates_df = pd.read_csv(covariates_file, sep='\t', index_col=0).T

# PLINK reader for genotypes
pr = genotypeio.PlinkReader(plink_prefix_path)
genotype_df = pr.load_genotypes()
variant_df = pr.bim.set_index('snp')[['chrom', 'pos']]

### *trans*-QTL mapping

In [None]:
# run mapping
# to limit output size, only associations with p-value <= 1e-5 are returned
trans_df = trans.map_trans(genotype_df, phenotype_df, covariates_df, batch_size=20000,
                           return_sparse=True, pval_threshold=5e-2, maf_threshold=0.05)
trans_df.to_csv("caprion.trans",index = False)
# remove cis-associations
trans_filter = trans.filter_cis(trans_df, phenotype_pos_df.T.to_dict(), variant_df, window=5000000)
trans_filter.head()

### *cis*-QTL mapping

In [None]:
# nominal p-values for all variant-phenotype pairs
cis.map_nominal(genotype_df, variant_df, phenotype_df, phenotype_pos_df, covariates_df, prefix)
pairs_df = pd.read_parquet('{}.cis_qtl_pairs.parquet'.format(prefix))
pairs_df.head()
pairs_df.to_csv("caprion.cis",index = False)
# empirical p-values for phenotypes
cis_df = cis.map_cis(genotype_df, variant_df, phenotype_df, phenotype_pos_df, covariates_df)
cis_df.head()