diff --git a/R/FeatureEngineering.R b/R/FeatureEngineering.R index 0f40d38b1..486616e92 100644 --- a/R/FeatureEngineering.R +++ b/R/FeatureEngineering.R @@ -41,40 +41,13 @@ createFeatureEngineeringSettings <- function(type = 'none'){ } -#' Create the settings for defining any feature selection that will be done -#' -#' @details -#' Returns an object of class \code{featureEngineeringSettings} that specifies the sampling function that will be called and the settings -#' -#' @param k This function returns the K features most associated (univariately) to the outcome -#' -#' @return -#' An object of class \code{featureEngineeringSettings} -# createUnivariateFeatureSelection <- function(k = 100){ -# -# if (inherits(k, 'numeric')) { -# k <- as.integer(k) -# } -# -# checkIsClass(k, 'integer') -# checkHigherEqual(k, 0) -# -# featureEngineeringSettings <- list(k = k) -# -# attr(featureEngineeringSettings, "fun") <- "univariateFeatureSelection" -# class(featureEngineeringSettings) <- "featureEngineeringSettings" -# -# return(featureEngineeringSettings) -# -# } - #' Create the settings for random foreat based feature selection #' #' @details #' Returns an object of class \code{featureEngineeringSettings} that specifies the sampling function that will be called and the settings #' -#' @param ntrees number of tree in forest -#' @param maxDepth MAx depth of each tree +#' @param ntrees Number of tree in forest +#' @param maxDepth Max depth of each tree #' #' @return #' An object of class \code{featureEngineeringSettings} @@ -97,67 +70,6 @@ createRandomForestFeatureSelection <- function(ntrees = 2000, maxDepth = 17){ return(featureEngineeringSettings) } -# univariateFeatureSelection <- function( -# trainData, -# featureEngineeringSettings, -# covariateIdsInclude = NULL){ -# -# if(is.null(covariateIdsInclude)){ -# #convert data into matrix: -# mappedData <- toSparseM(trainData, trainData$labels) -# -# matrixData <- mappedData$dataMatrix -# labels <- mappedData$labels -# covariateMap <- mappedData$covariateMap -# -# X <- reticulate::r_to_py(matrixData) -# y <- reticulate::r_to_py(labels[,'outcomeCount']) -# -# np <- reticulate::import('numpy') -# os <- reticulate::import('os') -# sys <- reticulate::import('sys') -# math <- reticulate::import('math') -# scipy <- reticulate::import('scipy') -# -# sklearn <- reticulate::import('sklearn') -# -# SelectKBest <- sklearn$feature_selection$SelectKBest -# chi2 <- sklearn$feature_selection$chi2 -# -# kbest <- SelectKBest(chi2, k = featureEngineeringSettings$k)$fit(X, y$outcomeCount) -# kbest$scores_ <- np$nan_to_num(kbest$scores_) -# -# # taken from sklearn code, matches the application during transform call -# k <- featureEngineeringSettings$k -# mask <- np$zeros(length(kbest$scores_), dtype='bool') -# mask[np$argsort(kbest$scores_, kind="mergesort")+1][(length(kbest$scores_)-k+1):length(kbest$scores_)] <- TRUE -# -# covariateIdsInclude <- covariateMap[mask,]$covariateId -# } -# -# trainData$covariateData$covariates <- trainData$covariateData$covariates %>% -# dplyr::filter(.data$covariateId %in% covariateIdsInclude) -# -# trainData$covariateData$covariateRef <- trainData$covariateData$covariateRef %>% -# dplyr::filter(.data$covariateId %in% covariateIdsInclude) -# -# featureEngineering <- list( -# funct = 'univariateFeatureSelection', -# settings = list( -# featureEngineeringSettings = featureEngineeringSettings, -# covariateIdsInclude = covariateIdsInclude -# ) -# ) -# -# attr(trainData, 'metaData')$featureEngineering = listAppend( -# attr(trainData, 'metaData')$featureEngineering, -# featureEngineering -# ) -# -# return(trainData) -# -# } - randomForestFeatureSelection <- function( trainData, diff --git a/man/createRandomForestFeatureSelection.Rd b/man/createRandomForestFeatureSelection.Rd index 78977aa4b..ca7157573 100644 --- a/man/createRandomForestFeatureSelection.Rd +++ b/man/createRandomForestFeatureSelection.Rd @@ -2,28 +2,21 @@ % Please edit documentation in R/FeatureEngineering.R \name{createRandomForestFeatureSelection} \alias{createRandomForestFeatureSelection} -\title{Create the settings for defining any feature selection that will be done} +\title{Create the settings for random foreat based feature selection} \usage{ createRandomForestFeatureSelection(ntrees = 2000, maxDepth = 17) } \arguments{ -\item{ntrees}{number of tree in forest} +\item{ntrees}{Number of tree in forest} -\item{maxDepth}{MAx depth of each tree} - -\item{k}{This function returns the K features most associated (univariately) to the outcome} +\item{maxDepth}{Max depth of each tree} } \value{ -An object of class \code{featureEngineeringSettings} -Create the settings for random foreat based feature selection - An object of class \code{featureEngineeringSettings} } \description{ -Create the settings for defining any feature selection that will be done +Create the settings for random foreat based feature selection } \details{ -Returns an object of class \code{featureEngineeringSettings} that specifies the sampling function that will be called and the settings - Returns an object of class \code{featureEngineeringSettings} that specifies the sampling function that will be called and the settings } diff --git a/man/loadTrainTestData.Rd b/man/loadTrainTestData.Rd deleted file mode 100644 index 092f9ee30..000000000 --- a/man/loadTrainTestData.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/SaveLoadData.R -\name{loadTrainTestData} -\alias{loadTrainTestData} -\title{Load the cohort data from a folder} -\usage{ -loadTrainTestData(file, readOnly = TRUE) -} -\arguments{ -\item{file}{The name of the folder containing the data.} - -\item{readOnly}{If true, the data is opened read only.} -} -\value{ -Data object before runModelDevelopment. -} -\description{ -\code{loadTrainTestData} loads an data object before runModelDevelopment from a folder in the file. -system. -} -\details{ -The data will be written to a set of files in the folder specified by the user. -} -\examples{ -# todo - -} diff --git a/man/saveTrainTestData.Rd b/man/saveTrainTestData.Rd deleted file mode 100644 index 79245cb9b..000000000 --- a/man/saveTrainTestData.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/SaveLoadData.R -\name{saveTrainTestData} -\alias{saveTrainTestData} -\title{Save the cohort data to folder} -\usage{ -saveTrainTestData(data, file, test = T, envir = NULL, overwrite = F) -} -\arguments{ -\item{data}{Data object before runModelDevelopment.} - -\item{file}{The name of the folder where the data will be written. The folder should -not yet exist.} - -\item{envir}{The environment for to evaluate variables when saving.} - -\item{overwrite}{Whether to force overwrite an existing file} -} -\description{ -\code{saveTrainTestData} saves an data object before runModelDevelopment to folder. -} -\details{ -The data will be written to a set of files in the folder specified by the user. -} -\examples{ -# todo - -}