/*
* Copyright 2016 IBM Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var Utils = require('../../utils.js');
var RDD = require('../../rdd/RDD.js');
var gKernelP;
/**
* A class which implements a decision tree learning algorithm for classification and regression.
* It supports both continuous and categorical features.
* @param strategy The configuration parameters for the tree algorithm which specify the type
* of algorithm (classification, regression, etc.), feature type (continuous,
* categorical), depth of the tree, quantile calculation strategy, etc.
* @classdesc
*/
/**
* @param {module:eclairjs/mllib/tree/configuration.Strategy} strategy
* @class
* @memberof module:eclairjs/mllib/tree
*/
function DecisionTree() {
Utils.handleConstructor(this, arguments, gKernelP);
}
/**
* Method to train a decision tree model over an RDD
* @param {module:eclairjs/rdd.RDD} input Training data: RDD of {@link LabeledPoint}
* @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel} DecisionTreeModel that can be used for prediction
*/
DecisionTree.prototype.run = function(input) {
throw "not implemented by ElairJS";
// var args ={
// target: this,
// method: 'run',
// args: [
// { value: input, type: 'RDD' }
// ],
// returnType: DecisionTreeModel
//
// };
//
// return Utils.generate(args);
};
//
// static methods
//
/**
* Method to train a decision tree model.
* The method supports binary and multiclass classification and regression.
*
* Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
* and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
* is recommended to clearly separate classification and regression.
*
* @param {module:eclairjs/rdd.RDD} input Training dataset: RDD of {@link LabeledPoint}.
* For classification, labels should take values {0, 1, ..., numClasses-1}.
* For regression, labels are real numbers.
* @param {module:eclairjs/mllib/tree/configuration.Strategy} strategy The configuration parameters for the tree algorithm which specify the type
* of algorithm (classification, regression, etc.), feature type (continuous,
* categorical), depth of the tree, quantile calculation strategy, etc.
* @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel} DecisionTreeModel that can be used for prediction
*/
DecisionTree.train0 = function(input,strategy) {
throw "not implemented by ElairJS";
// var args ={
// target: DecisionTree,
// method: 'train',
// args: [
// { value: input, type: 'RDD' },
// { value: strategy, type: 'Strategy' }
// ],
// returnType: DecisionTreeModel
//
// };
//
// return Utils.generate(args);
};
/**
* Method to train a decision tree model.
* The method supports binary and multiclass classification and regression.
*
* Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
* and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
* is recommended to clearly separate classification and regression.
*
* @param {module:eclairjs/rdd.RDD} input Training dataset: RDD of {@link LabeledPoint}.
* For classification, labels should take values {0, 1, ..., numClasses-1}.
* For regression, labels are real numbers.
* @param {Algo} algo algorithm, classification or regression
* @param {Impurity} impurity impurity criterion used for information gain calculation
* @param {number} maxDepth Maximum depth of the tree.
* E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
* @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel} DecisionTreeModel that can be used for prediction
*/
DecisionTree.train1 = function(input,algo,impurity,maxDepth) {
throw "not implemented by ElairJS";
// var args ={
// target: DecisionTree,
// method: 'train',
// args: [
// { value: input, type: 'RDD' },
// { value: algo, type: 'Algo' },
// { value: impurity, type: 'Impurity' },
// { value: maxDepth, type: 'number' }
// ],
// returnType: DecisionTreeModel
//
// };
//
// return Utils.generate(args);
};
/**
* Method to train a decision tree model.
* The method supports binary and multiclass classification and regression.
*
* Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
* and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
* is recommended to clearly separate classification and regression.
*
* @param {module:eclairjs/rdd.RDD} input Training dataset: RDD of {@link LabeledPoint}.
* For classification, labels should take values {0, 1, ..., numClasses-1}.
* For regression, labels are real numbers.
* @param {Algo} algo algorithm, classification or regression
* @param {Impurity} impurity impurity criterion used for information gain calculation
* @param {number} maxDepth Maximum depth of the tree.
* E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
* @param {number} numClasses number of classes for classification. Default value of 2.
* @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel} DecisionTreeModel that can be used for prediction
*/
DecisionTree.train2 = function(input,algo,impurity,maxDepth,numClasses) {
throw "not implemented by ElairJS";
// var args ={
// target: DecisionTree,
// method: 'train',
// args: [
// { value: input, type: 'RDD' },
// { value: algo, type: 'Algo' },
// { value: impurity, type: 'Impurity' },
// { value: maxDepth, type: 'number' },
// { value: numClasses, type: 'number' }
// ],
// returnType: DecisionTreeModel
//
// };
//
// return Utils.generate(args);
};
/**
* Method to train a decision tree model.
* The method supports binary and multiclass classification and regression.
*
* Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
* and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
* is recommended to clearly separate classification and regression.
*
* @param {module:eclairjs/rdd.RDD} input Training dataset: RDD of {@link LabeledPoint}.
* For classification, labels should take values {0, 1, ..., numClasses-1}.
* For regression, labels are real numbers.
* @param {Algo} algo classification or regression
* @param {Impurity} impurity criterion used for information gain calculation
* @param {number} maxDepth Maximum depth of the tree.
* E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
* @param {number} numClasses number of classes for classification. Default value of 2.
* @param {number} maxBins maximum number of bins used for splitting features
* @param {QuantileStrategy} quantileCalculationStrategy algorithm for calculating quantiles
* @param {Map} categoricalFeaturesInfo Map storing arity of categorical features.
* E.g., an entry (n -> k) indicates that feature n is categorical
* with k categories indexed from 0: {0, 1, ..., k-1}.
* @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel} DecisionTreeModel that can be used for prediction
*/
DecisionTree.train3 = function(input,algo,impurity,maxDepth,numClasses,maxBins,quantileCalculationStrategy,categoricalFeaturesInfo) {
throw "not implemented by ElairJS";
// var args ={
// target: DecisionTree,
// method: 'train',
// args: [
// { value: input, type: 'RDD' },
// { value: algo, type: 'Algo' },
// { value: impurity, type: 'Impurity' },
// { value: maxDepth, type: 'number' },
// { value: numClasses, type: 'number' },
// { value: maxBins, type: 'number' },
// { value: quantileCalculationStrategy, type: 'QuantileStrategy' },
// { value: categoricalFeaturesInfo, type: 'Map' }
// ],
// returnType: DecisionTreeModel
//
// };
//
// return Utils.generate(args);
};
/**
* Method to train a decision tree model for binary or multiclass classification.
*
* @param {module:eclairjs/rdd.RDD} input Training dataset: RDD of {@link LabeledPoint}.
* Labels should take values {0, 1, ..., numClasses-1}.
* @param {number} numClasses number of classes for classification.
* @param {object} categoricalFeaturesInfo object name key pair map storing arity of categorical features.
* E.g., an entry (n -> k) indicates that feature n is categorical
* with k categories indexed from 0: {0, 1, ..., k-1}.
* @param {string} impurity Criterion used for information gain calculation.
* Supported values: "gini" (recommended) or "entropy".
* @param {number} maxDepth Maximum depth of the tree.
* E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
* (suggested value: 5)
* @param {number} maxBins maximum number of bins used for splitting features
* (suggested value: 32)
* @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel} DecisionTreeModel that can be used for prediction
*/
DecisionTree.trainClassifier = function(input,numClasses,categoricalFeaturesInfo,impurity,maxDepth,maxBins) {
var DecisionTreeModel = require('./model/DecisionTreeModel.js')(this.kernelP);
var args = {
target: this,
method: 'trainClassifier',
args: Utils.wrapArguments(arguments),
returnType: DecisionTreeModel,
kernelP: gKernelP,
static: true
};
return Utils.generate(args);
};
/**
* Method to train a decision tree model for regression.
*
* @param {module:eclairjs/rdd.RDD} input Training dataset: RDD of {@link LabeledPoint}.
* Labels are real numbers.
* @param {Map} categoricalFeaturesInfo Map storing arity of categorical features.
* E.g., an entry (n -> k) indicates that feature n is categorical
* with k categories indexed from 0: {0, 1, ..., k-1}.
* @param {string} impurity Criterion used for information gain calculation.
* Supported values: "variance".
* @param {number} maxDepth Maximum depth of the tree.
* E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
* (suggested value: 5)
* @param {number} maxBins maximum number of bins used for splitting features
* (suggested value: 32)
* @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel} DecisionTreeModel that can be used for prediction
*/
DecisionTree.trainRegressorwithnumber = function(input,categoricalFeaturesInfo,impurity,maxDepth,maxBins) {
throw "not implemented by ElairJS";
// var args ={
// target: DecisionTree,
// method: 'trainRegressor',
// args: [
// { value: input, type: 'RDD' },
// { value: categoricalFeaturesInfo, type: 'Map' },
// { value: impurity, type: 'string' },
// { value: maxDepth, type: 'number' },
// { value: maxBins, type: 'number' }
// ],
// returnType: DecisionTreeModel
//
// };
//
// return Utils.generate(args);
};
/**
* Java-friendly API for [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
* @param {JavaRDD} input
* @param {Map} categoricalFeaturesInfo
* @param {string} impurity
* @param {number} maxDepth
* @param {number} maxBins
* @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel}
*/
DecisionTree.trainRegressorwithnumber = function(input,categoricalFeaturesInfo,impurity,maxDepth,maxBins) {
throw "not implemented by ElairJS";
// var args ={
// target: DecisionTree,
// method: 'trainRegressor',
// args: [
// { value: input, type: 'JavaRDD' },
// { value: categoricalFeaturesInfo, type: 'Map' },
// { value: impurity, type: 'string' },
// { value: maxDepth, type: 'number' },
// { value: maxBins, type: 'number' }
// ],
// returnType: DecisionTreeModel
//
// };
//
// return Utils.generate(args);
};
DecisionTree.moduleLocation = '/mllib/tree/DecisionTree';
module.exports = function(kP) {
if (kP) gKernelP = kP;
return DecisionTree;
};