diff --git a/DESCRIPTION b/DESCRIPTION index 267fa1c5..3f1cd9e5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,7 +17,6 @@ Depends: R (>= 3.6) Suggests: curatedTCGAData, - TCGAutils, rmarkdown, testthat, knitr, diff --git a/NAMESPACE b/NAMESPACE index 94091be0..98139214 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(convertProfileToNetworks) export(countIntType) export(countIntType_batch) export(countPatientsInNet) +export(createNetFuncFromSimList) export(createPSN_MultiData) export(dataList2List) export(enrichLabelNets) @@ -64,6 +65,7 @@ export(sparsify2) export(sparsify3) export(splitTestTrain) export(splitTestTrain_resampling) +export(subsampleValidationData) export(tSNEPlotter) export(thresholdSmoothedMutations) export(updateNets) diff --git a/R/buildPredictor.R b/R/buildPredictor.R index a84fcebb..df05dad3 100644 --- a/R/buildPredictor.R +++ b/R/buildPredictor.R @@ -29,6 +29,9 @@ #' So keys(groupList[["rna"]]) would have pathway names, generating one PSN #' per pathways, and values(groupList[["rna"]]) would be genes that would be #' grouped for the corresponding pathwayList. +#' @param sims (list) rules to create similarity networks from input data. Keys are names of +#' data layers and should be identical to names(groupList). Values is either a character +#' for built-in similarity functions; call allowedSims() to see full list; or a custom function. #' @param makeNetFunc (function) user-defined function for creating the set #' of input PSN provided to netDx. See createPSN_MultiData()::customFunc. #' @param outDir (char) directory where results will be stored. If this @@ -171,7 +174,8 @@ #' # makeNetFunc=makeNets, ### custom network creation function #' # outDir=paste(tempdir(),"pred_output",sep=getFileSep()), ## absolute path #' # numCores=16L,featScoreMax=2L, featSelCutoff=1L,numSplits=2L) -buildPredictor <- function(dataList,groupList,outDir=tempdir(),makeNetFunc, +buildPredictor <- function(dataList,groupList,outDir=tempdir(), + makeNetFunc=NULL,sims=NULL, featScoreMax=10L,trainProp=0.8,numSplits=10L,numCores,JavaMemory=4L, featSelCutoff=9L,keepAllData=FALSE,startAt=1L, preFilter=FALSE, impute=FALSE,preFilterGroups=NULL, imputeGroups=NULL,logging="default", @@ -196,7 +200,7 @@ if (logging == "all") { verbose_predict <- FALSE } -# Check input +# Check input - error handling if (missing(dataList)) stop("dataList must be supplied.\n") if (missing(groupList)) stop("groupList must be supplied.\n") if (length(groupList)<1) stop("groupList must be of length 1+\n") @@ -213,6 +217,9 @@ if (!is(groupList,"list") || not_list || names_nomatch ) { stop(paste(msg,sep="")) } +# checks either/or provided, sets missing var to NULL +x <- checkMakeNetFuncSims(makeNetFunc=makeNetFunc, sims=sims,groupList=groupList) + if (!is(dataList,"MultiAssayExperiment")) stop("dataList must be a MultiAssayExperiment") @@ -220,6 +227,8 @@ if (trainProp <= 0 | trainProp >= 1) stop("trainProp must be greater than 0 and less than 1") if (startAt > numSplits) stop("startAt should be between 1 and numSplits") +# end check input error handling + megaDir <- outDir if (file.exists(megaDir)) { stop(paste("outDir seems to already exist!", @@ -274,7 +283,6 @@ if (verbose_default){ } } - outList <- list() # create master list of possible networks @@ -289,8 +297,14 @@ colnames(tmp) <- c("NetType","NetName") outList[["inputNets"]] <- tmp if (verbose_default) { - message("\n\nCustom function to generate input nets:") - print(makeNetFunc) + if (!is.null(makeNetFunc)){ + message("\n\nCustom function to generate input nets:") + print(makeNetFunc) + + } else { + message("Similarity metrics provided:") + print(sims) + } message(sprintf("-------------------------------\n")) } @@ -386,8 +400,8 @@ for (rngNum in startAt:numSplits) { if (verbose_default) message("** Creating features") createPSN_MultiData(dataList=dats_train,groupList=groupList, - pheno=pheno_id, - netDir=netDir,customFunc=makeNetFunc,numCores=numCores, + pheno=pheno_id, + netDir=netDir,makeNetFunc=makeNetFunc,sims=sims, numCores=numCores, verbose=verbose_makeFeatures) if (verbose_default) message("** Compiling features") dbDir <- compileFeatures(netDir,outDir, numCores=numCores, @@ -512,7 +526,8 @@ for (rngNum in startAt:numSplits) { pheno_id <- setupFeatureDB(pheno,netDir) createPSN_MultiData(dataList=dats_tmp,groupList=groupList, pheno=pheno_id, - netDir=netDir,customFunc=makeNetFunc,numCores=numCores, + netDir=netDir,makeNetFunc=makeNetFunc,sims=sims, + numCores=numCores, filterSet=pTally,verbose=verbose_default) dbDir <- compileFeatures(netDir,outDir=pDir,numCores=numCores, verbose=verbose_compileNets,debugMode=debugMode) diff --git a/R/compileFeatures.R b/R/compileFeatures.R index f5bb54b0..e7e7d4b8 100644 --- a/R/compileFeatures.R +++ b/R/compileFeatures.R @@ -58,7 +58,7 @@ #' #' pheno_id <- setupFeatureDB(pheno,netDir) #' netList <- createPSN_MultiData(dataList=dataList, groupList=groupList, -#' pheno=pheno_id,netDir=netDir,customFunc=makeNets,verbose=TRUE) +#' pheno=pheno_id,netDir=netDir,makeNetFunc=makeNets,verbose=TRUE) #' #' outDir <- paste(tmpDir,'dbdir',sep=getFileSep()); #' dir.create(outDir) diff --git a/R/createPSN_MultiData.R b/R/createPSN_MultiData.R index ea6e976c..719497cd 100644 --- a/R/createPSN_MultiData.R +++ b/R/createPSN_MultiData.R @@ -12,11 +12,15 @@ #' with internally-generated identifiers. #' @param netDir (char) path to directory where networks will be stored #' @param filterSet (char) vector of networks to include -#' @param customFunc (function) custom user-function to create PSN. +#' @param makeNetFunc (function) custom user-function to create PSN. #' Must take dataList,groupList,netDir as parameters. Must #' check if a given groupList is empty (no networks to create) before #' the makePSN call for it. This is to avoid trying to make nets for datatypes #' that did not pass feature selection +#' @param sims (list) Similarity metric settings for patient data. +#' Keys must be identical to those of groupList. +#' Values are either of type character, used for built-in similarity functions, +#' or are functions, when a custom function is provided. #' @param verbose (logical) print messages #' @param ... other parameters to makePSN_NamedMatrix() or makePSN_RangedSets() #' @return (char) vector of network names. Side effect of creating the nets @@ -95,24 +99,24 @@ #' pheno_id <- setupFeatureDB(colData(brca),netDir) #' createPSN_MultiData(dataList=datList2,groupList=groupList, #' pheno=pheno_id, -#' netDir=netDir,customFunc=makeNets,numCores=1) +#' netDir=netDir,makeNetFunc=makeNets,numCores=1) #' @export createPSN_MultiData <- function(dataList, groupList, pheno, netDir=tempdir(), filterSet = NULL, - verbose = TRUE, customFunc, ...) { + verbose = TRUE, makeNetFunc=NULL, sims=NULL, ...) { if (missing(dataList)) stop("dataList must be supplied.\n") if (missing(groupList)) stop("groupList must be supplied.\n") - + # resolve user-provided IDs with internal IDs dataList <- lapply(dataList, function(x) { midx <- match(colnames(x), pheno$ID) colnames(x) <- pheno$INTERNAL_ID[midx] x }) - + if (!is.null(filterSet)) { if (length(filterSet) < 1) { s1 <- "filterSet is empty." @@ -120,8 +124,8 @@ createPSN_MultiData <- function(dataList, groupList, pheno, netDir=tempdir(), stop(paste(s1, s2, sep = " ")) } } - if (missing(customFunc)) - stop("customFunc must be suppled.\n") + + # Filter for nets (potentially feature-selected ones) if (!is.null(filterSet)) { @@ -139,12 +143,22 @@ createPSN_MultiData <- function(dataList, groupList, pheno, netDir=tempdir(), } } groupList <- groupList2 + sims <- sims[which(names(sims) %in% names(groupList))] rm(groupList2) } + if (!is.null(makeNetFunc)){ # call user-defined function for making PSN - netList <- customFunc(dataList = dataList, groupList = groupList, + netList <- makeNetFunc(dataList = dataList, groupList = groupList, netDir = netDir, ...) + } else { + netList <- createNetFuncFromSimList(dataList=dataList, + groupList = groupList, + netDir = netDir, + sims = sims, + ... + ) + } if (length(netList) < 1) stop("\n\nNo features created! Filters may be too stringent.\n") diff --git a/R/helper.R b/R/helper.R index e5e0bdec..02aa0e6c 100755 --- a/R/helper.R +++ b/R/helper.R @@ -159,8 +159,11 @@ return(list( #' @param EMapPctPass (numeric between 0 and 1) percent of splits for which feature must have score in range #' [EMapMinScore,EMapMaxScore] to be included for EnrichmentMap visualization #' @param outDir (char) directory where files should be written -#' @return -#' @export +#' @return (list) 1) GMTfiles (char): GMT files used to create EnrichmentMap in Cytoscape. +#' 2) NodeStyles (char): .txt files used to assign node attributes in Cytoscape. Importantly, +#' attributes include node fill, which indicates the highest consistent score for a given +#' feature. +#' @export makeInputForEnrichmentMap <- function(model,results,pathwayList, EMapMinScore=0L, EMapMaxScore=1L, EMapPctPass=0.5,outDir) @@ -214,8 +217,10 @@ return(list(GMTfiles=gmtFiles,NodeStyles=nodeAttrFiles)) #' same class, relative to those of other classes, using Dijkstra distance (calcShortestPath flag). #' @param dat (MultiAssayExperiment) input data #' @param groupList (list) feature groups, identical to groupList provided for buildPredictor() -#' @param makeNets (function) Function used to create patient similarity networks. Identical to +#' @param makeNetFunc (function) Function used to create patient similarity networks. Identical to #' makeNets provided to buildPredictor() +#' @param sims (list) rules for creating PSN. Preferred over makeNetFunc. See buildPredictor() +#' for details. #' @param selectedFeatures (list) selected features for each class (key of list). This object is returned as #' part of a call to getResults(), after running buildPredictor(). #' @param plotCytoscape (logical) If TRUE, plots network in Cytoscape. @@ -245,9 +250,18 @@ return(list(GMTfiles=gmtFiles,NodeStyles=nodeAttrFiles)) #' colours (colour) #' 6) outDir (char) value of outDir parameter #' @export -getPSN <- function(dat, groupList, makeNets, selectedFeatures, plotCytoscape=FALSE, - aggFun="MEAN", prune_pctX=0.30, prune_useTop=TRUE,numCores=1L,calcShortestPath=FALSE +getPSN <- function(dat, groupList, + makeNetFunc=NULL, sims=NULL, + selectedFeatures, plotCytoscape=FALSE, + aggFun="MEAN", prune_pctX=0.30, prune_useTop=TRUE, + numCores=1L,calcShortestPath=FALSE ){ + + +# checks either/or provided, sets missing var to NULL +x <- checkMakeNetFuncSims(makeNetFunc=makeNetFunc, + sims=sims,groupList=groupList) + topPath <- gsub(".profile","", unique(unlist(selectedFeatures))) topPath <- gsub("_cont.txt","",topPath) @@ -262,8 +276,10 @@ for (nm in names(groupList)) { message("* Making integrated PSN") psn <- - plotIntegratedPatientNetwork(dat, - groupList=g2, makeNetFunc=makeNets, + plotIntegratedPatientNetwork( + dataList=dat, + groupList=g2, makeNetFunc=makeNetFunc, + sims=sims, aggFun=aggFun, prune_pctX=prune_pctX, prune_useTop=prune_useTop, diff --git a/R/plotEmap.R b/R/plotEmap.R index 2e1ff6bc..a15a994f 100644 --- a/R/plotEmap.R +++ b/R/plotEmap.R @@ -80,14 +80,14 @@ plotEmap <- function(gmtFile, nodeAttrFile, netName = "generic", } ####################################### create EM using given parameters - if (netName %in% getNetworkList()) { + if (netName %in% RCy3::getNetworkList()) { RCy3::deleteNetwork(netName) } em_command <- paste("enrichmentmap build analysisType=\"generic\"", "gmtFile=", gmtFile, "pvalue=", 1, "qvalue=", 1, "similaritycutoff=", 0.05, "coefficients=", "JACCARD") response <- RCy3::commandsGET(em_command) - renameNetwork(netName, getNetworkSuid()) + RCy3::renameNetwork(netName, RCy3::getNetworkSuid()) ### #annotate the network using AutoAnnotate app aa_command <- paste("autoannotate annotate-clusterBoosted", diff --git a/R/plotIntegratedPatientNetwork.R b/R/plotIntegratedPatientNetwork.R index cd9ed4c4..4cb86750 100644 --- a/R/plotIntegratedPatientNetwork.R +++ b/R/plotIntegratedPatientNetwork.R @@ -19,6 +19,7 @@ #' list of lists, where the outer list corresponds to assay (e.g. mRNA, #' clinical) and inner list to features to generate from that datatype. #' @param makeNetFunc (function) function to create features +#' @param sims (list) rules for creating PSN. Preferred over makeNetFunc #' @param setName (char) name to assign the network in Cytoscape #' @param numCores (integer) number of cores for parallel processing #' @param prune_pctX (numeric between 0 and 1) fraction of most/least @@ -59,7 +60,8 @@ #' @importFrom RColorBrewer brewer.pal #' @importFrom stats wilcox.test qexp density #' @export -plotIntegratedPatientNetwork <- function(dataList,groupList,makeNetFunc, +plotIntegratedPatientNetwork <- function(dataList,groupList, + makeNetFunc=NULL,sims=NULL, setName="predictor",prune_pctX=0.05, prune_useTop=TRUE, aggFun="MAX",calcShortestPath=FALSE, showStats=FALSE, @@ -67,6 +69,11 @@ plotIntegratedPatientNetwork <- function(dataList,groupList,makeNetFunc, nodeTransparency=155L,plotCytoscape=FALSE, verbose=FALSE) { + +# checks either/or provided, sets missing var to NULL +checkMakeNetFuncSims(makeNetFunc=makeNetFunc, + sims=sims,groupList=groupList) + if (missing(dataList)) stop("dataList is missing.") dat <- dataList2List(dataList, groupList) @@ -81,7 +88,9 @@ pheno_id <- setupFeatureDB(pheno,outDir) createPSN_MultiData(dataList=dat$assays,groupList=groupList, pheno=pheno_id, - netDir=outDir,customFunc=makeNetFunc,numCores=numCores, + netDir=outDir, + makeNetFunc=makeNetFunc,sims=sims, + numCores=numCores, verbose=FALSE) convertProfileToNetworks( netDir=profDir, diff --git a/R/predict.R b/R/predict.R index 4fd8ee0a..0df38814 100644 --- a/R/predict.R +++ b/R/predict.R @@ -5,12 +5,12 @@ #' @param testMAE (MultiAssayExperiment) new patient dataset for testing model. Assays must be the same as for trainMAE. #' @param groupList (list) list of features used to train the model. Keys are data types, and values are lists for groupings within those datatypes. #' e.g. keys could include {'clinical','rna','methylation'}, and values within 'rna' could include pathway names {'cell cycle', 'DNA repair'}, etc., -#' featSel will be used to subset -#' @param featSel (list) selected features to be used in the predictive model. +#' selectedFeatures will be used to subset +#' @param selectedFeatures (list) selected features to be used in the predictive model. #' keys are patient labels (e.g. "responder/nonresponder"), and values are feature names #' identified by running buildPredictor(). Feature names must correspond to names of groupList, from which they will be subset. #' @param makeNetFunc (function) function to create PSN features from patient data. See makeNetFunc in buildPredictor() for details -#' @param impute (logical) if TRUE imputes train and test samples separately before creating features. Currently unsupported. +#' @param sims (list) rules for creating PSN. Preferred over makeNetFunc. #' @param outDir (char) directory for results #' @param verbose (logical) print messages #' @param numCores (integer) number of CPU cores for parallel processing @@ -20,19 +20,20 @@ #' columns are: 1) ID, 2) STATUS (ground truth), 3)