Skip to contents
es_pkgs <- c("Biobase", "arrayQualityMetrics", "dplyr", "knitr", "mclust", "pQTLtools", "rgl", "scatterplot3d")
se_pkgs <- c("BiocGenerics","GenomicRanges","IRanges","MsCoreUtils","S4Vectors","SummarizedExperiment")
pkgs <- c(es_pkgs,se_pkgs)
for (p in pkgs) if (length(grep(paste("^package:", p, "$", sep=""), search())) == 0) {
    if (!requireNamespace(p)) warning(paste0("This vignette needs package `", p, "'; please install"))
}
invisible(suppressMessages(lapply(pkgs, require, character.only = TRUE)))

We start with Bioconductor/Biobase’s ExpressionSet example and finish with a real application.

dataDirectory <- system.file("extdata", package="Biobase")
exprsFile <- file.path(dataDirectory, "exprsData.txt")
exprs <- as.matrix(read.table(exprsFile, header=TRUE, sep="\t", row.names=1, as.is=TRUE))
pDataFile <- file.path(dataDirectory, "pData.txt")
pData <- read.table(pDataFile, row.names=1, header=TRUE, sep="\t")
all(rownames(pData)==colnames(exprs))
metadata <- data.frame(labelDescription=c("Patient gender",
                                          "Case/control status",
                                          "Tumor progress on XYZ scale"),
                       row.names=c("gender", "type", "score"))
phenoData <- Biobase::AnnotatedDataFrame(data=pData, varMetadata=metadata)
experimentData <- Biobase::MIAME(name="Pierre Fermat",
                                 lab="Francis Galton Lab",
                                 contact="pfermat@lab.not.exist",
                                 title="Smoking-Cancer Experiment",
                                 abstract="An example ExpressionSet",
                                 url="www.lab.not.exist",
                                 other=list(notes="Created from text files"))
exampleSet <- pQTLtools::make_ExpressionSet(exprs,phenoData,experimentData=experimentData,
                                            annotation="hgu95av2")
data(sample.ExpressionSet, package="Biobase")
identical(exampleSet,sample.ExpressionSet)

1 data.frame

The great benefit is to use the object directly as a data.frame.

lm.result <- Biobase::esApply(exampleSet,1,function(x) lm(score~gender+x))
beta.x <- unlist(lapply(lapply(lm.result,coef),"[",3))
beta.x[1]
#> AFFX-MurIL2_at.x 
#>    -0.0001907472
lm(score~gender+AFFX.MurIL2_at,data=exampleSet)
#> 
#> Call:
#> lm(formula = score ~ gender + AFFX.MurIL2_at, data = exampleSet)
#> 
#> Coefficients:
#>    (Intercept)      genderMale  AFFX.MurIL2_at  
#>      0.5531725       0.0098932      -0.0001907

2 Composite plots

We wish to examine the distribution of each feature via histogram, scatter and boxplot. One could resort to esApply() for its simplicity as before

invisible(Biobase::esApply(exampleSet[1:2],1,function(x)
                           {par(mfrow=c(3,1));boxplot(x);hist(x);plot(x)}
))

but it is nicer to add feature name in the title.

par(mfrow=c(1,3))
f <- Biobase::featureNames(exampleSet[1:2])
invisible(sapply(f,function(x) {
                     d <- t(Biobase::exprs(exampleSet[x]))
                     fn <- Biobase::featureNames(exampleSet[x])
                     hist(d,main="",xlab=fn); plot(d, ylab=fn); boxplot(d,ylab=fn)
                   }
          )
)
Histogram, scatter & boxplots

Figure 2.1: Histogram, scatter & boxplots

Histogram, scatter & boxplots

Figure 2.2: Histogram, scatter & boxplots

where the expression set is indexed using feature name(s).

3 Outlier detections

This illustrates one mechanism,

list_outliers <- function(es, method="upperquartile")
                 arrayQualityMetrics::outliers(exprs(es),method=method)
for (method in c("KS","sum","upperquartile"))
{
  ZWK_outliers <- list_outliers(protein_ZWK,method=method)
  print(ZWK_outliers@statistic[ZWK_outliers@which])
}

4 Clustering

We employ model-based clustering absed on principal compoents to see potential groupings in the data,

  pca <- prcomp(na.omit(t(Biobase::exprs(exampleSet))), rank=10, scale=TRUE)
  pc1pc2pc3 <- with(pca,x)[,1:3]
  mc <- mclust::Mclust(pc1pc2pc3,G=3)
  with(mc, {
      cols <- c("blue","red", "purple")
      s3d <- scatterplot3d::scatterplot3d(with(pca,x[,c(2,1,3)]),
                                          color=cols[classification],
                                          pch=16,
                                          type="h",
                                          main="Plot of the PC1, PC2 and PC3")
      s3d.coords <- s3d$xyz.convert(with(pca,x[,c(2,1,3)]))
      text(s3d.coords$x, 
           s3d.coords$y,   
           cex = 1.2,
           col = cols[classification],
           labels = row.names(pc1pc2pc3),
           pos = 4)
      legend("right", legend=levels(as.factor(classification)), col=cols[classification], pch=16)
      rgl::open3d(width = 500, height = 500)
      rgl::plot3d(with(pca,x[,c(2,1,3)]),cex=1.2,col=cols[classification],size=5)
      rgl::text3d(with(pca,x[,c(2,1,3)]),cex=1.2,col=cols[classification],texts=row.names(pc1pc2pc3))
      htmlwidgets::saveWidget(rgl::rglwidget(), file = "mcpca3d.html")
  })
Three-group Clustering

Figure 4.1: Three-group Clustering

#> Warning in par3d(userMatrix = structure(c(1, 0, 0, 0, 0, 0.342020143325668, :
#> parameter "width" cannot be set
#> Warning in par3d(userMatrix = structure(c(1, 0, 0, 0, 0, 0.342020143325668, :
#> parameter "height" cannot be set

An interactive version is also available,

5 Data transformation

Suppose we wish use log2 for those greater than zero but set those negative values to be missing.

log2.na <- function(x) log2(ifelse(x>0, x, NA))
Biobase::exprs(exampleSet) <- log2.na(Biobase::exprs(exampleSet))

6 Limit of detection (LOD)

We generate a lod.max ~ U[0,1] variable to experiment

Biobase::fData(exampleSet)
#> data frame with 0 columns and 500 rows
Biobase::fData(exampleSet)$lod.max <- apply(Biobase::exprs(exampleSet),1,quantile,runif(nrow(exampleSet)))
lod <- pQTLtools::get.prop.below.LLOD(exampleSet)
x <- dplyr::arrange(Biobase::fData(lod),desc(pc.belowLOD.new))
knitr::kable(head(lod))
AFFX.MurIL2_at AFFX.MurIL10_at AFFX.MurIL4_at AFFX.MurFAS_at AFFX.BioB.5_at AFFX.BioB.M_at gender type score
A 192.7420 97.13700 45.81920 22.54450 96.7875 89.0730 Female Control 0.75
B 85.7533 126.19600 8.83135 3.60093 30.4380 25.8461 Male Case 0.40
C 176.7570 77.92160 33.06320 14.68830 46.1271 57.2033 Male Control 0.73
D 135.5750 93.37130 28.70720 12.33970 70.9319 69.9766 Male Case 0.42
E 64.4939 24.39860 5.94492 36.86630 56.1744 49.5822 Female Case 0.93
F 76.3569 85.50880 28.29250 11.25680 42.6756 26.1262 Male Control 0.22
G 160.5050 98.90860 30.96940 23.00340 86.5156 75.0083 Male Case 0.96
H 65.9631 81.69320 14.79230 16.21340 30.7927 42.3352 Male Case 0.79
I 56.9039 97.80150 14.23990 12.03750 19.7183 41.1207 Female Case 0.37
J 135.6080 90.48380 34.48740 4.54978 46.3520 91.5307 Male Control 0.63
K 63.4432 70.57330 20.35210 8.51782 39.1326 39.9136 Male Case 0.26
L 78.2126 94.54180 14.15540 27.28520 41.7698 49.8397 Female Control 0.36
M 83.0943 75.34550 20.62510 10.16160 80.2197 63.4794 Male Case 0.41
N 89.3372 68.58270 15.92310 20.24880 36.4903 24.7007 Male Case 0.80
O 91.0615 87.40500 20.15790 15.78490 36.4021 47.4641 Female Case 0.10
P 95.9377 84.45810 27.81390 14.32760 35.3054 47.3578 Female Control 0.41
Q 179.8450 87.68060 32.79110 15.94880 58.6239 58.1331 Female Case 0.16
R 152.4670 108.03200 33.52920 14.67530 114.0620 104.1220 Male Control 0.72
S 180.8340 134.26300 19.81720 -7.91911 93.4402 115.8310 Male Case 0.17
T 85.4146 91.40310 20.41900 12.88750 22.5168 58.1224 Female Case 0.74
U 157.9890 -8.68811 26.87200 11.91860 48.6462 73.4221 Male Control 0.35
V 146.8000 85.02120 31.14880 12.83240 90.2215 64.6066 Female Control 0.77
W 93.8829 79.29980 22.34200 11.13900 42.0053 40.3068 Male Control 0.27
X 103.8550 71.65520 19.01350 7.55564 57.5738 41.8209 Male Control 0.98
Y 64.4340 64.23690 12.16860 19.98490 44.8216 46.1087 Female Case 0.94
Z 175.6150 78.70680 17.37800 8.96849 61.7044 49.4122 Female Case 0.32
plot(x[,2], main="Random quantile cutoff", ylab="<lod%")
LOD based on a random cutoff

Figure 6.1: LOD based on a random cutoff

The quantity has been shown to have a big impact on protein abundance and therefore pQTL detection as is shown with a real example.

rm(list=ls())
dir <- "~/rds/post_qc_data/interval/phenotype/olink_proteomics/post-qc/"
eset <- readRDS(paste0(dir,"eset.inf1.flag.out.outlier.in.rds"))
x <- pQTLtools::get.prop.below.LLOD(eset)
annot <- Biobase::fData(x)
annot$MissDataProp <- as.numeric(gsub("\\%$", "", annot$MissDataProp))
plot(annot$MissDataProp, annot$pc.belowLOD.new, xlab="% <LLOD in Original",
     ylab="% <LLOD in post QC dataset", pch=19)
INF <- Sys.getenv("INF")
np <- read.table(paste(INF, "work", "INF1.merge.nosig", sep="/"), header=FALSE,
                 col.names = c("prot", "uniprot"))
kable(np, caption="Proteins with no pQTL")
annot$pQTL <- rep(NA, nrow(annot))
no.pQTL.ind <- which(annot$uniprot.id %in% np$uniprot)
annot$pQTL[no.pQTL.ind] <- "red"
annot$pQTL[-no.pQTL.ind] <- "blue"
annot <- annot[order(annot$pc.belowLOD.new, decreasing = TRUE),]
annot <- annot[-grep("^BDNF$", annot$ID),]
saveRDS(annot,file=file.path("~","pQTLtools","tests","annot.RDS"))
annot <- readRDS(file.path(find.package("pQTLtools"),"tests","annot.RDS")) %>%
         dplyr::left_join(pQTLdata::inf1[c("prot","target.short","alt_name")],by=c("ID"="prot")) %>%
         dplyr::mutate(prot=if_else(is.na(alt_name),target.short,alt_name),order=1:n()) %>%
         dplyr::arrange(desc(order))
xtick <- seq(1, nrow(annot))
attach(annot)
par(mar=c(10,5,1,1))
plot(100-pc.belowLOD.new,cex=2,pch=19,col=pQTL,xaxt="n",xlab="",ylab="",cex.axis=0.8)
text(66,16,"IL-17C",offset=0,pos=2,cex=1.5,font=2,srt=0)
arrows(67,16,71,16,lwd=2)
axis(1, at=xtick, labels=prot, lwd.tick=0.5, lwd=0, las=2, hadj=1, cex.axis=0.8)
mtext("% samples above LLOD",side=2,line=2.5,cex=1.2)
mtext("Ordered protein",side=1,line=6.5,cex=1.2,font=1)
legend(x=1,y=25,c("without pQTL","with pQTL"),box.lwd=0,cex=2,col=c("red","blue"),pch=19)
LOD in SCALLOP-INF/INTERVAL

Figure 6.2: LOD in SCALLOP-INF/INTERVAL

detach(annot)
options(width=120)
knitr::kable(annot,caption="Summary statistics",row.names=FALSE)
Table 6.1: Summary statistics
ID dichot olink.id uniprot.id lod.max MissDataProp pc.belowLOD.new pQTL target.short alt_name prot order
CSF.1 FALSE 196_CSF-1 P09603 1.02 0.02 0.0000000 blue CSF-1 NA CSF-1 91
TNFB FALSE 195_TNFB P01374 1.10 0.04 0.0000000 blue TNFB NA TNFB 90
ADA FALSE 194_ADA P00813 1.80 0.04 0.0000000 blue ADA NA ADA 89
STAMPB FALSE 192_STAMPB O95630 1.40 0.06 0.0000000 red STAMPB NA STAMPB 88
CCL20 FALSE 190_CCL20 P78556 1.42 0.02 0.0000000 blue CCL20 NA CCL20 87
TWEAK FALSE 189_TWEAK O43508 1.78 0.02 0.0000000 blue TWEAK NA TWEAK 86
TNFRSF9 FALSE 187_TNFRSF9 Q07011 1.86 0.02 0.0000000 blue TNFRSF9 NA TNFRSF9 85
CX3CL1 FALSE 186_CX3CL1 P78423 1.76 0.02 0.0000000 blue CX3CL1 NA CX3CL1 84
CCL25 FALSE 185_CCL25 O15444 1.32 0.02 0.0000000 blue CCL25 NA CCL25 83
CASP.8 FALSE 184_CASP-8 Q14790 1.38 0.04 0.0000000 blue CASP-8 NA CASP-8 82
MCP.2 FALSE 183_MCP-2 P80075 2.10 0.02 0.0000000 blue MCP-2 CCL8 CCL8 81
FGF.19 FALSE 179_FGF-19 O95750 1.25 0.02 0.0000000 blue FGF-19 NA FGF-19 80
CD40 FALSE 176_CD40 P25942 1.91 0.04 0.0000000 blue CD40 NA CD40 79
DNER FALSE 174_DNER Q8NFT8  1.63 0.02 0.0000000 blue DNER NA DNER 78
4E.BP1 FALSE 170_4E-BP1 Q13541 1.04 0.10 0.0000000 blue 4EBP1 NA 4EBP1 77
CXCL10 FALSE 169_CXCL10 P02778 2.03 0.02 0.0000000 blue CXCL10 NA CXCL10 76
CXCL6 FALSE 168_CXCL6 P80162 1.80 0.02 0.0000000 blue CXCL6 CXCL6 CXCL6 75
Flt3L FALSE 167_Flt3L P49771 2.12 0.02 0.0000000 blue FIt3L NA FIt3L 74
CD5 FALSE 165_CD5 P06127 1.42 0.02 0.0000000 blue CD5 NA CD5 73
CCL23 FALSE 164_CCL23 P55773 2.10 0.02 0.0000000 blue CCL23 NA CCL23 72
MMP.10 FALSE 161_MMP-10 P09238 1.56 0.02 0.0000000 blue MMP-10 NA MMP-10 71
IL.12B FALSE 157_IL-12B P29460 0.85 0.02 0.0000000 blue IL-12B NA IL-12B 70
HGF FALSE 156_HGF P14210 1.77 0.02 0.0000000 blue HGF NA HGF 69
TRANCE FALSE 155_TRANCE O14788 1.51 0.04 0.0000000 blue TRANCE NA TRANCE 68
CXCL5 FALSE 154_CXCL5 P42830 1.92 0.02 0.0000000 blue CXCL5 NA CXCL5 67
PD.L1 FALSE 152_PD-L1 Q9NZQ7 2.14 0.04 0.0000000 blue PD-L1 NA PD-L1 66
IL.18R1 FALSE 151_IL-18R1 Q13478 1.21 0.02 0.0000000 blue IL-18R1 NA IL-18R1 65
IL.10RB FALSE 149_IL-10RB Q08334 1.72 0.02 0.0000000 blue IL10RB NA IL10RB 64
CCL19 FALSE 145_CCL19 Q99731 2.13 0.02 0.0000000 blue CCL19 NA CCL19 63
FGF.21 FALSE 144_FGF-21 Q9NSA1 1.14 0.02 0.0000000 blue FGF-21 NA FGF-21 62
LIF.R FALSE 143_LIF-R P42702 2.08 0.04 0.0000000 blue LIF-R NA LIF-R 61
FGF.23 FALSE 139_FGF-23 Q9GZV9 0.66 0.04 0.0000000 blue FGF-23 NA FGF-23 60
TNFSF14 FALSE 138_TNFSF14 O43557 1.81 0.02 0.0000000 blue TNFSF14 NA TNFSF14 59
CCL11 FALSE 137_CCL11 P51671 2.09 0.02 0.0000000 blue CCL11 CCL11 CCL11 58
MCP.4 FALSE 136_MCP-4 Q99616 0.56 0.04 0.0000000 blue MCP-4 CCL13 CCL13 57
TGF.alpha FALSE 135_TGF-alpha P01135 0.07 0.02 0.0000000 blue TGF-alpha NA TGF-alpha 56
IL.18 FALSE 133_IL-18 Q14116 2.79 0.02 0.0000000 blue IL-18 NA IL-18 55
SCF FALSE 132_SCF P21583 2.30 0.02 0.0000000 blue SCF NA SCF 54
CD6 FALSE 131_CD6 Q8WWJ7 1.78 0.04 0.0000000 blue CD6 NA CD6 53
CCL4 FALSE 130_CCL4 P13236 2.15 0.02 0.0000000 blue CCL4 NA CCL4 52
CXCL1 FALSE 128_CXCL1 P09341 3.45 0.02 0.0000000 blue CXCL1 NA CXCL1 51
OSM FALSE 126_OSM P13725 1.32 0.06 0.0000000 blue OSM NA OSM 50
CST5 FALSE 123_CST5 P28325 4.34 0.04 0.0000000 blue CST5 NA CST5 49
CXCL9 FALSE 122_CXCL9 Q07325 1.04 0.02 0.0000000 blue CXCL9 NA CXCL9 48
TRAIL FALSE 120_TRAIL P50591 1.22 0.02 0.0000000 blue TRAIL NA TRAIL 47
CXCL11 FALSE 117_CXCL11 O14625 1.40 0.02 0.0000000 blue CXCL11 NA CXCL11 46
MCP.1 FALSE 115_MCP-1 P13500 1.90 0.02 0.0000000 blue MCP-1 CCL2 CCL2 45
uPA FALSE 112_uPA P00749 1.75 0.02 0.0000000 blue uPA NA uPA 44
LAP.TGF.beta.1 FALSE 111_LAP TGF-beta-1 P01137 0.86 0.02 0.0000000 blue LAP TGF-beta-1 NA LAP TGF-beta-1 43
OPG FALSE 110_OPG O00300 1.81 0.02 0.0000000 blue OPG NA OPG 42
CD244 FALSE 108_CD244 Q9BZW8 1.09 0.02 0.0000000 blue CD244 NA CD244 41
CDCP1 FALSE 107_CDCP1 Q9H5V8 0.21 0.04 0.0000000 blue CDCP1 NA CDCP1 40
VEGF.A FALSE 102_VEGF-A P15692 3.15 0.02 0.0000000 blue VEGF_A NA VEGF_A 39
MIP.1.alpha FALSE 166_MIP-1 alpha P10147 1.92 0.06 0.0203998 blue MIP-1 alpha NA MIP-1 alpha 38
MMP.1 FALSE 142_MMP-1 P03956 1.89 0.06 0.0407997 blue MMP-1 NA MMP-1 37
SIRT2 FALSE 172_SIRT2 Q8IXJ6 1.52 1.00 0.0815993 blue SIRT2 NA SIRT2 36
IL.8 FALSE 101_IL-8 P10145 3.17 0.25 0.2447980 blue IL-8 NA IL-8 35
IL.7 FALSE 109_IL-7 P13232 1.39 1.43 0.7547940 blue IL-7 NA IL-7 34
SLAMF1 FALSE 134_SLAMF1 Q13291 1.55 2.13 1.5299878 blue SLAMF1 NA SLAMF1 33
EN.RAGE FALSE 175_EN-RAGE P80511 1.57 3.28 3.5291718 blue EN-RAGE NA EN-RAGE 32
Beta.NGF FALSE 153_Beta-NGF P01138 1.23 3.96 4.2227662 blue Beta-NGF NA Beta-NGF 31
CCL28 FALSE 173_CCL28 Q9NRJ3 0.92 5.49 5.2835577 red CCL28 NA CCL28 30
IL.10 FALSE 162_IL-10 P22301 1.50 7.71 7.0583435 blue IL-10 NA IL-10 29
AXIN1 FALSE 118_AXIN1 O15169 1.30 13.04 10.8731130 red AXIN1 NA AXIN1 28
NT.3 FALSE 188_NT-3 P20783 0.85 12.77 13.2394941 blue NT-3 NA NT-3 27
FGF.5 FALSE 141_FGF-5 Q8NF90 1.00 31.71 30.3957568 blue FGF-5 NA FGF-5 26
ST1A1 FALSE 191_ST1A1 P50225 1.34 32.68 31.2525500 blue ST1A1 NA ST1A1 25
GDNF FALSE 106_GDNF P39905 1.13 43.26 42.4520604 blue hGDNF NA hGDNF 24
IL.10RA FALSE 140_IL-10RA Q13651 1.14 53.30 53.9779682 red IL-10RA NA IL-10RA 23
IL.6 FALSE 113_IL-6 P05231 1.97 69.06 68.6250510 blue IL-6 NA IL-6 22
IL.5 FALSE 193_IL-5 P05113 1.55 78.21 77.8457772 red IL-5 NA IL-5 21
MCP.3 FALSE 105_MCP-3 P80098 1.31 79.30 79.0697674 blue MCP-3 CCL7 CCL7 20
IL.17C FALSE 114_IL-17C Q9P0M4 1.28 83.07 83.3741330 blue IL-17C NA IL-17C 19
IL.17A FALSE 116_IL-17A Q16552 0.97 84.73 84.7001224 red IL-17A NA IL-17A 18
IL.4 FALSE 180_IL-4 P05112 1.11 93.91 94.4104447 red IL-4 NA IL-4 17
LIF FALSE 181_LIF P15018 1.45 94.90 94.8184415 red LIF NA LIF 16
TNF FALSE 163_TNF P01375 1.19 95.22 95.1244390 red TNF NA TNF 15
IL.20RA FALSE 121_IL-20RA Q9UHF4 0.79 95.16 95.7772338 red IL-20RA NA IL-20RA 14
ARTN FALSE 160_ARTN Q5T4W7 0.73 95.84 95.9404325 red ARTN NA ARTN 13
IL.15RA FALSE 148_IL-15RA Q13261 1.18 96.10 96.1648307 blue IL-15RA NA IL-15RA 12
IL.13 FALSE 159_IL-13 P35225 1.71 96.51 96.5524276 red IL-13 NA IL-13 11
IL.20 FALSE 171_IL-20 Q9NYY1 1.30 96.72 96.7156263 red IL-20 NA IL-20 10
IL.24 FALSE 158_IL-24 Q13007 1.95 97.13 97.0828233 red IL-24 NA IL-24 9
IL.2RB FALSE 124_IL-2RB P14784 1.47 97.11 97.1032232 red IL-2RB NA IL-2RB 8
NRTN FALSE 182_NRTN Q99748 1.39 97.11 97.1236230 red NRTN NA NRTN 7
IL.33 FALSE 177_IL-33 O95760 1.17 98.54 98.5516116 red IL-33 NA IL-33 6
TSLP FALSE 129_TSLP Q969D9 1.80 99.49 99.4900041 red TSLP NA TSLP 5
IFN.gamma FALSE 178_IFN-gamma P01579 1.40 99.53 99.5308038 red IFN-gamma NA IFN-gamma 4
IL.22.RA1 FALSE 150_IL-22 RA1 Q8N6P7 1.79 99.61 99.6124031 red IL-22RA1 NA IL-22RA1 3
IL.2 FALSE 127_IL-2 P60568 0.93 99.86 99.8776010 red IL-2 NA IL-2 2
IL.1.alpha FALSE 125_IL-1 alpha P01583 4.76 99.88 99.8776010 blue IL-1 alpha NA IL-1 alpha 1

7 Combination of proteins and other variables

This is available by design.

8 maEndtoEnd

Web: https://bioconductor.org/packages/release/workflows/html/maEndToEnd.html.

Examples can be found on PCA, heatmap, normalisation, linear models, and enrichment analysis from this Bioconductor package.

9 SummarizedExperiment

This is more modern construct. The following script is adapted from the documentation example, illustrating range summarized experiment (rse) adding missing values.

set.seed(123)
nrows <- 20
ncols <- 4
counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
missing_indices <- sample(length(counts), size = 5, replace = FALSE)
counts[missing_indices] <- NA
rowRanges <- GenomicRanges::GRanges(rep(c("chr1", "chr2"), c(5, 15)),
                            IRanges::IRanges(floor(runif(nrows, 1e5, 1e6)), width=ncols),
                            strand=sample(c("+", "-"), nrows, TRUE),
                            feature_id=sprintf("ID%03d", 1:nrows))
colData <- S4Vectors::DataFrame(Treatment=rep(c("ChIP", "Input"), ncols/2),
                                row.names=LETTERS[1:ncols])
rse <- SummarizedExperiment(assays=S4Vectors::SimpleList(counts=counts),
                            rowRanges=rowRanges, colData=colData)
assay(rse)
#>               A         B         C           D
#>  [1,] 2876.4876 8895.5036 1428.8574 6651.486831
#>  [2,] 7883.2630 6928.3413 4146.0488  949.311769
#>  [3,] 4090.3602 6405.4276 4137.8295 3840.312408
#>  [4,] 8830.2910 9942.7035 3689.0857 2744.562062
#>  [5,] 9404.7324 6557.4023 1525.2950 8146.585749
#>  [6,]        NA 7085.5962 1388.9218 4485.714898
#>  [7,] 5281.5268 5441.1162 2331.1080 8100.833466
#>  [8,] 8924.2980 5941.8261 4660.1585 8124.082706
#>  [9,] 5514.7987 2892.3082 2660.4604 7943.628869
#> [10,] 4566.6907 1471.9894        NA 4398.877044
#> [11,] 9568.3766        NA  459.2658 7544.997111
#> [12,] 4533.8882 9023.0882 4422.5585          NA
#> [13,] 6776.0288 6907.3621 7989.4495 7102.113831
#> [14,] 5726.7614 7954.8787 1219.8707    7.247108
#> [15,] 1030.1439  247.1122 5609.9189 4753.690424
#> [16,] 8998.3499 4778.4819 2066.1074 2201.968733
#> [17,] 2461.6313 7584.8369 1276.1890 3798.785561
#> [18,]  421.5533 2164.8630 7533.3253 6128.097262
#> [19,] 3279.8793        NA 8950.5585 3518.627295
#> [20,] 9545.0820 2317.0262 3745.2533 1112.243108
imputed <- MsCoreUtils::impute_knn(as.matrix(SummarizedExperiment::assay(rse)),2)
imputed_counts <- MsCoreUtils::impute_RF(as.matrix(assay(rse)),2)
imputed-imputed_counts
#>              A         B        C        D
#>  [1,]    0.000    0.0000    0.000    0.000
#>  [2,]    0.000    0.0000    0.000    0.000
#>  [3,]    0.000    0.0000    0.000    0.000
#>  [4,]    0.000    0.0000    0.000    0.000
#>  [5,]    0.000    0.0000    0.000    0.000
#>  [6,] 2559.339    0.0000    0.000    0.000
#>  [7,]    0.000    0.0000    0.000    0.000
#>  [8,]    0.000    0.0000    0.000    0.000
#>  [9,]    0.000    0.0000    0.000    0.000
#> [10,]    0.000    0.0000 1791.909    0.000
#> [11,]    0.000 -709.2786    0.000    0.000
#> [12,]    0.000    0.0000    0.000 2467.614
#> [13,]    0.000    0.0000    0.000    0.000
#> [14,]    0.000    0.0000    0.000    0.000
#> [15,]    0.000    0.0000    0.000    0.000
#> [16,]    0.000    0.0000    0.000    0.000
#> [17,]    0.000    0.0000    0.000    0.000
#> [18,]    0.000    0.0000    0.000    0.000
#> [19,]    0.000 1298.3456    0.000    0.000
#> [20,]    0.000    0.0000    0.000    0.000
SummarizedExperiment::assays(rse) <- S4Vectors::SimpleList(counts=imputed_counts)
SummarizedExperiment::assay(rse)
#>               A         B         C           D
#>  [1,] 2876.4876 8895.5036 1428.8574 6651.486831
#>  [2,] 7883.2630 6928.3413 4146.0488  949.311769
#>  [3,] 4090.3602 6405.4276 4137.8295 3840.312408
#>  [4,] 8830.2910 9942.7035 3689.0857 2744.562062
#>  [5,] 9404.7324 6557.4023 1525.2950 8146.585749
#>  [6,] 4439.2947 7085.5962 1388.9218 4485.714898
#>  [7,] 5281.5268 5441.1162 2331.1080 8100.833466
#>  [8,] 8924.2980 5941.8261 4660.1585 8124.082706
#>  [9,] 5514.7987 2892.3082 2660.4604 7943.628869
#> [10,] 4566.6907 1471.9894 4812.3872 4398.877044
#> [11,] 9568.3766 5545.7565  459.2658 7544.997111
#> [12,] 4533.8882 9023.0882 4422.5585 4611.837102
#> [13,] 6776.0288 6907.3621 7989.4495 7102.113831
#> [14,] 5726.7614 7954.8787 1219.8707    7.247108
#> [15,] 1030.1439  247.1122 5609.9189 4753.690424
#> [16,] 8998.3499 4778.4819 2066.1074 2201.968733
#> [17,] 2461.6313 7584.8369 1276.1890 3798.785561
#> [18,]  421.5533 2164.8630 7533.3253 6128.097262
#> [19,] 3279.8793 4566.2289 8950.5585 3518.627295
#> [20,] 9545.0820 2317.0262 3745.2533 1112.243108
SummarizedExperiment::assays(rse) <- S4Vectors::endoapply(SummarizedExperiment::assays(rse), asinh)
SummarizedExperiment::assay(rse)
#>              A        B        C        D
#>  [1,] 8.657472 9.786448 7.957778 9.495743
#>  [2,] 9.665644 9.536523 9.023058 7.548885
#>  [3,] 9.009536 9.458048 9.021074 8.946456
#>  [4,] 9.779090 9.897741 8.906281 8.610524
#>  [5,] 9.842115 9.481497 8.023090 9.698501
#>  [6,] 9.091398 9.558966 7.929430 9.101800
#>  [7,] 9.265118 9.294887 8.447246 9.692869
#>  [8,] 9.789680 9.382919 9.139952 9.695735
#>  [9,] 9.308338 8.662957 8.579402 9.673273
#> [10,] 9.119691 7.987517 9.172096 9.082252
#> [11,] 9.859366 9.313936 6.822778 9.621787
#> [12,] 9.112482 9.800689 9.087621 9.129529
#> [13,] 9.514294 9.533490 9.679024 9.561295
#> [14,] 9.346053 9.674688 7.799647 2.678476
#> [15,] 7.630601 6.202994 9.325439 9.159824
#> [16,] 9.797944 9.165025 8.326569 8.390254
#> [17,] 8.501727 9.627054 7.844781 8.935584
#> [18,] 6.737095 8.373260 9.620239 9.413787
#> [19,] 8.788709 9.119590 9.792618 8.858973
#> [20,] 9.856929 8.441187 8.921392 7.707281

SummarizedExperiment::rowRanges(rse)
#> GRanges object with 20 ranges and 1 metadata column:
#>        seqnames        ranges strand |  feature_id
#>           <Rle>     <IRanges>  <Rle> | <character>
#>    [1]     chr1 409164-409167      - |       ID001
#>    [2]     chr1 691082-691085      + |       ID002
#>    [3]     chr1 388335-388338      + |       ID003
#>    [4]     chr1 268922-268925      - |       ID004
#>    [5]     chr1 804064-804067      - |       ID005
#>    ...      ...           ...    ... .         ...
#>   [16]     chr2 647861-647864      + |       ID016
#>   [17]     chr2 469620-469623      + |       ID017
#>   [18]     chr2 232385-232388      + |       ID018
#>   [19]     chr2 941769-941772      - |       ID019
#>   [20]     chr2 371106-371109      + |       ID020
#>   -------
#>   seqinfo: 2 sequences from an unspecified genome; no seqlengths
SummarizedExperiment::rowData(rse)
#> DataFrame with 20 rows and 1 column
#>      feature_id
#>     <character>
#> 1         ID001
#> 2         ID002
#> 3         ID003
#> 4         ID004
#> 5         ID005
#> ...         ...
#> 16        ID016
#> 17        ID017
#> 18        ID018
#> 19        ID019
#> 20        ID020
SummarizedExperiment::colData(rse)
#> DataFrame with 4 rows and 1 column
#>     Treatment
#>   <character>
#> A        ChIP
#> B       Input
#> C        ChIP
#> D       Input

rse[, rse$Treatment == "ChIP"]
#> class: RangedSummarizedExperiment 
#> dim: 20 2 
#> metadata(0):
#> assays(1): counts
#> rownames: NULL
#> rowData names(1): feature_id
#> colnames(2): A C
#> colData names(1): Treatment

# the same ranges but different samples:
rse1 <- rse
rse2 <- rse1[, 1:3]
colnames(rse2) <- letters[1:ncol(rse2)]
cmb1 <- cbind(rse1, rse2)
dim(cmb1)
#> [1] 20  7
dimnames(cmb1)
#> [[1]]
#> NULL
#> 
#> [[2]]
#> [1] "A" "B" "C" "D" "a" "b" "c"

# the same samples but different ranges:
rse1 <- rse
rse2 <- rse1[1:20, ]
rownames(rse2) <- letters[1:nrow(rse2)]
cmb2 <- rbind(rse1, rse2)
dim(cmb2)
#> [1] 40  4
dimnames(cmb2)
#> [[1]]
#>  [1] ""  ""  ""  ""  ""  ""  ""  ""  ""  ""  ""  ""  ""  ""  ""  ""  ""  ""  ""  ""  "a" "b" "c" "d" "e" "f" "g" "h" "i"
#> [30] "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t"
#> 
#> [[2]]
#> [1] "A" "B" "C" "D"

se0 <- as(rse, "SummarizedExperiment")
se0
#> class: SummarizedExperiment 
#> dim: 20 4 
#> metadata(0):
#> assays(1): counts
#> rownames: NULL
#> rowData names(1): feature_id
#> colnames(4): A B C D
#> colData names(1): Treatment
as(se0, "RangedSummarizedExperiment")
#> class: RangedSummarizedExperiment 
#> dim: 20 4 
#> metadata(0):
#> assays(1): counts
#> rownames: NULL
#> rowData names(1): feature_id
#> colnames(4): A B C D
#> colData names(1): Treatment

# rowRanges(a SummarizedExperiment object) --> RangedSummarizedExperiment object:
se <- se0
rowRanges(se) <- rowRanges
se
#> class: RangedSummarizedExperiment 
#> dim: 20 4 
#> metadata(0):
#> assays(1): counts
#> rownames: NULL
#> rowData names(1): feature_id
#> colnames(4): A B C D
#> colData names(1): Treatment

stopifnot(identical(SummarizedExperiment::assays(se0), SummarizedExperiment::assays(rse)))
stopifnot(identical(dim(se0), dim(rse)))
stopifnot(identical(dimnames(se0), dimnames(rse)))
stopifnot(identical(SummarizedExperiment::rowData(se0), SummarizedExperiment::rowData(rse)))
stopifnot(identical(SummarizedExperiment::colData(se0), SummarizedExperiment::colData(rse)))