Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
.Ruserdata
doc
Meta
inst/doc
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export(compileFeatureScores)
export(compileFeatures)
export(confusionMatrix)
export(convertProfileToNetworks)
export(convertToMAE)
export(countIntType)
export(countIntType_batch)
export(countPatientsInNet)
Expand Down
9 changes: 7 additions & 2 deletions R/buildPredictor.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,13 @@ for (k in seq_len(length(exprs))) {
tmp <- exprs[[k]]
df <- sampleMap(dataList)[which(sampleMap(dataList)$assay==names(exprs)[k]),]
colnames(tmp) <- df$primary[match(df$colname,colnames(tmp))]
tmp <- as.matrix(assays(tmp)[[1]]) # convert to matrix
datList2[[names(exprs)[k]]]<- tmp
if ("matrix" %in% class(tmp)) {
datList2[[names(exprs)[k]]] <- tmp
} else {
tmp <- as.matrix(assays(tmp)[[1]]) # convert to matrix
datList2[[names(exprs)[k]]]<- tmp
}

}
if ("clinical" %in% names(groupList)) {
tmp <- colData(dataList)
Expand Down
84 changes: 84 additions & 0 deletions R/convertToMAE.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#' Wrapper that converts an input list into a MultiAssayExperiment object
#'
#' @details This function takes in a list of key-value pairs (keys: data types,
#' values: matrices/dataframes) and calls the necessary functions from the
#' MultiAssayExperiment package to incorporate the values from the input list
#' into a MultiAssayExperiment object, transforming the values according to the
#' keys. If duplicate sample names are found in the assay data, only the first
#' instance is kept.
#' @param dataList (list) input key-value pairs (keys: data types, values:
#' data in the form of matrices/dataframes); must have a key-value pair that
#' corresponds to patient IDs/metadata labelled pheno.
#' @return MAE (MultiAssayExperiment) data from input list incorporated into a
#' MultiAssayExperiment object, compatible with further analysis using the
#' netDx algorithm.
#' @examples
#' data(xpr, pheno)
#'
#' # Generate random proteomic data
#' prot <- matrix(rnorm(100*20), ncol=20)
#' colnames(prot) <- sample(pheno$ID, 20)
#' rownames(prot) <- sprintf("protein%i",1:100)
#'
#' myList <- list(rna = xpr, proteomic = prot, pheno = pheno)
#'
#' MAE <- convertToMAE(myList)
#' @export


convertToMAE <- function(dataList) {

# Check input data:
if (class(dataList) != "list") {
stop("dataList must be a list. \n")
}
if (is.null(dataList$pheno)) {
stop("dataList must have key-value pair labelled pheno.\n")
}
if (length(dataList) == 1) {
stop("dataList must have assay data to incorporate into a
MultiAssayExperiment object")
}

# Note that a MultiAssayExperiment object requires an ExperimentList and
# colData (sampleMap optional if each assay uses the same colnames)

# Possible elements for ExperimentList:
# - base::matrix (gene expression, microRNA, metabolomics, microbiome data)
# - SummarizedExperiment::SummarizedExperiment (same as matrix, but capable
# of storing additional assay-level metadata)
# - Biobase::ExpressionSet (legacy representation, use SummarizedExperiment)
# - SummarizedExperiment::RangedSummarizedExperiment (range-based datasets;
# gene expression, methylation, data types that refer to genomic positions)
# - RaggedExperiment::RaggedExperiment (range-based datasets; copy number and
# mutation data, measurements by genomic positions)

# Assumes that pheno is a DataFrame (or coerceable to be a DataFrame)
patientPheno <- dataList$pheno

# Generate ExperimentList from input dataList
tmp <- NULL
track <- c()
datType <- names(dataList)
for (k in 1:length(dataList)) {
# For key-value pairs that aren't labelled pheno, transform into
# objects compatible with input into MultiAssayExperiment object
if (names(dataList[k]) != "pheno") {

# Remove duplicated columns (we keep the first column) in the assay data
if (sum(duplicated(colnames(dataList[[k]]))) != 0) {
dataList[[k]] <- dataList[[k]][,!duplicated(colnames(dataList[[k]]))]
}

# Assumes that data is of matrix class
# *(maybe implement matrix conversion into SummarizedExperiment in future)
track <- c(track, k)
tmp <- c(tmp, list(dataList[[k]]))
}
}
names(tmp) <- datType[track]

MAE <- MultiAssayExperiment(experiments = tmp, colData = patientPheno)

return(MAE)
}
2 changes: 1 addition & 1 deletion man/buildPredictor_sparseGenetic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 41 additions & 0 deletions man/convertToMAE.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

75 changes: 75 additions & 0 deletions tests/testthat/test_convertToMAE.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# test convertToMAE.R

test_that("convertToMAE works", {
# 20 patients, 10 case, 10 control
pheno <- data.frame(ID=sprintf("PAT%i",1:20),
STATUS=rep(c("case","control"),each=10))
# 100 dummy genes
rna <- matrix(rnorm(100*20),nrow=100);
colnames(rna) <- pheno$ID
rownames(rna) <- sprintf("gene%i",1:100)
# 2 dummy clin variables
clin <- t(data.frame(AGE=runif(20,10,50)))
colnames(clin) <- pheno$ID
clin <- t(clin)

# netDx files
dataList <- list(rna=rna,pheno=clin)

x <- convertToMAE(dataList)
expect_is(x, "MultiAssayExperiment")
})

test_that("convertToMAE works with more than one assay", {
# 20 patients, 10 case, 10 control
pheno <- data.frame(ID=sprintf("PAT%i",1:20),
STATUS=rep(c("case","control"),each=10))
# 100 dummy genes
rna <- matrix(rnorm(100*20),nrow=100);
colnames(rna) <- pheno$ID
rownames(rna) <- sprintf("gene%i",1:100)
# 200 dummy proteins
prot <- matrix(rnorm(200*20), nrow = 200);
colnames(prot) <- pheno$ID
rownames(prot) <- sprintf("protein%i",1:200)
# 2 dummy clin variables
clin <- t(data.frame(AGE=runif(20,10,50)))
colnames(clin) <- pheno$ID
clin <- t(clin)

# netDx files
dataList <- list(rna = rna, proteomics = prot, pheno = clin)

x <- convertToMAE(dataList)
expect_is(x, "MultiAssayExperiment")
})

test_that ("convertToMAE removes duplicated sample", {
# 20 patients, 10 case, 10 control
pheno <- data.frame(ID=sprintf("PAT%i",1:20),
STATUS=rep(c("case","control"),each=10))
# 100 dummy genes, with first sample duplicated
rna <- matrix(rnorm(100*20),nrow=100);
colnames(rna) <- pheno$ID
rownames(rna) <- sprintf("gene%i",1:100)
rna <- cbind(rna, rna[,1])
colnames(rna)[21] <- colnames(rna)[1]
# 200 dummy proteins
prot <- matrix(rnorm(200*20), nrow = 200);
colnames(prot) <- pheno$ID
rownames(prot) <- sprintf("protein%i",1:200)
# 2 dummy clin variables
clin <- t(data.frame(AGE=runif(20,10,50)))
colnames(clin) <- pheno$ID
clin <- t(clin)

# netDx files
dataList <- list(rna = rna, proteomics = prot, pheno = clin)

x <- convertToMAE(dataList)
expect_is(x, "MultiAssayExperiment")
# number of samples in rna assay vs colData should differ by 1
expect_equal((dim(rna)[2] - dim(colData(x))[1]), 1)
# number of samples in metadata should agree with colData
expect_equal((dim(clin)[1] - dim(colData(x))[1]), 0)
})
Loading