Source: eclairjs/mllib/tree/DecisionTree.js

/*                                                                         
 * Copyright 2016 IBM Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

(function () {
    var JavaWrapper = require(EclairJS_Globals.NAMESPACE + '/JavaWrapper');
    var Logger = require(EclairJS_Globals.NAMESPACE + '/Logger');
    var Utils = require(EclairJS_Globals.NAMESPACE + '/Utils');
    /**
     * A class which implements a decision tree learning algorithm for classification and regression.
     * It supports both continuous and categorical features.
     * @param strategy The configuration parameters for the tree algorithm which specify the type
     *                 of algorithm (classification, regression, etc.), feature type (continuous,
     *                 categorical), depth of the tree, quantile calculation strategy, etc.
     * @classdesc
     */

    /**
     * @param {module:eclairjs/mllib/tree/configuration.Strategy} strategy
     * @class
     * @memberof module:eclairjs/mllib/tree
     */
    var DecisionTree = function (strategy) {
        this.logger = Logger.getLogger("DecisionTree_js");
        var jvmObject;
        if (strategy instanceof Strategy) {
            jvmObject = new org.apache.spark.mllib.tree.DecisionTree(Utils.unwrapObject(strategy));
        } else if (strategy instanceof rg.apache.spark.mllib.tree.DecisionTree) {
            jvmObject = strategy;
        } else {
            throw "DecisionTree invalid constructor parameter"
        }

        JavaWrapper.call(this, jvmObject);

    };

    DecisionTree.prototype = Object.create(JavaWrapper.prototype);

    DecisionTree.prototype.constructor = DecisionTree;


    /**
     * Method to train a decision tree model over an RDD
     * @param {module:eclairjs.RDD} input  Training data: RDD of {@link LabeledPoint}
     * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel}  DecisionTreeModel that can be used for prediction
     */
    DecisionTree.prototype.run = function (input) {
        throw "not implemented by ElairJS";
//   var input_uw = Utils.unwrapObject(input);
//   var javaObject =  this.getJavaObject().run(input_uw);
//   return new DecisionTreeModel(javaObject);
    };
//
// static methods
//


    /**
     * Method to train a decision tree model.
     * The method supports binary and multiclass classification and regression.
     *
     * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
     *       and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
     *       is recommended to clearly separate classification and regression.
     *
     * @param {module:eclairjs.RDD} input  Training dataset: RDD of {@link LabeledPoint}.
     *              For classification, labels should take values {0, 1, ..., numClasses-1}.
     *              For regression, labels are real numbers.
     * @param {module:eclairjs/mllib/tree/configuration.Strategy} strategy  The configuration parameters for the tree algorithm which specify the type
     *                 of algorithm (classification, regression, etc.), feature type (continuous,
     *                 categorical), depth of the tree, quantile calculation strategy, etc.
     * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel}  DecisionTreeModel that can be used for prediction
     */
    DecisionTree.train0 = function (input, strategy) {
        throw "not implemented by ElairJS";
//   var input_uw = Utils.unwrapObject(input);
//   var strategy_uw = Utils.unwrapObject(strategy);
//   var javaObject =  org.apache.spark.mllib.tree.DecisionTree.train(input_uw,strategy_uw);
//   return new DecisionTreeModel(javaObject);
    };


    /**
     * Method to train a decision tree model.
     * The method supports binary and multiclass classification and regression.
     *
     * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
     *       and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
     *       is recommended to clearly separate classification and regression.
     *
     * @param {module:eclairjs.RDD} input  Training dataset: RDD of {@link LabeledPoint}.
     *              For classification, labels should take values {0, 1, ..., numClasses-1}.
     *              For regression, labels are real numbers.
     * @param {Algo} algo  algorithm, classification or regression
     * @param {Impurity} impurity  impurity criterion used for information gain calculation
     * @param {number} maxDepth  Maximum depth of the tree.
     *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
     * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel}  DecisionTreeModel that can be used for prediction
     */
    DecisionTree.train1 = function (input, algo, impurity, maxDepth) {
        throw "not implemented by ElairJS";
//   var input_uw = Utils.unwrapObject(input);
//   var algo_uw = Utils.unwrapObject(algo);
//   var impurity_uw = Utils.unwrapObject(impurity);
//   var javaObject =  org.apache.spark.mllib.tree.DecisionTree.train(input_uw,algo_uw,impurity_uw,maxDepth);
//   return new DecisionTreeModel(javaObject);
    };


    /**
     * Method to train a decision tree model.
     * The method supports binary and multiclass classification and regression.
     *
     * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
     *       and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
     *       is recommended to clearly separate classification and regression.
     *
     * @param {module:eclairjs.RDD} input  Training dataset: RDD of {@link LabeledPoint}.
     *              For classification, labels should take values {0, 1, ..., numClasses-1}.
     *              For regression, labels are real numbers.
     * @param {Algo} algo  algorithm, classification or regression
     * @param {Impurity} impurity  impurity criterion used for information gain calculation
     * @param {number} maxDepth  Maximum depth of the tree.
     *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
     * @param {number} numClasses  number of classes for classification. Default value of 2.
     * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel}  DecisionTreeModel that can be used for prediction
     */
    DecisionTree.train2 = function (input, algo, impurity, maxDepth, numClasses) {
        throw "not implemented by ElairJS";
//   var input_uw = Utils.unwrapObject(input);
//   var algo_uw = Utils.unwrapObject(algo);
//   var impurity_uw = Utils.unwrapObject(impurity);
//   var javaObject =  org.apache.spark.mllib.tree.DecisionTree.train(input_uw,algo_uw,impurity_uw,maxDepth,numClasses);
//   return new DecisionTreeModel(javaObject);
    };


    /**
     * Method to train a decision tree model.
     * The method supports binary and multiclass classification and regression.
     *
     * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
     *       and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
     *       is recommended to clearly separate classification and regression.
     *
     * @param {module:eclairjs.RDD} input  Training dataset: RDD of {@link LabeledPoint}.
     *              For classification, labels should take values {0, 1, ..., numClasses-1}.
     *              For regression, labels are real numbers.
     * @param {Algo} algo  classification or regression
     * @param {Impurity} impurity  criterion used for information gain calculation
     * @param {number} maxDepth  Maximum depth of the tree.
     *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
     * @param {number} numClasses  number of classes for classification. Default value of 2.
     * @param {number} maxBins  maximum number of bins used for splitting features
     * @param {QuantileStrategy} quantileCalculationStrategy   algorithm for calculating quantiles
     * @param {Map} categoricalFeaturesInfo  Map storing arity of categorical features.
     *                                E.g., an entry (n -> k) indicates that feature n is categorical
     *                                with k categories indexed from 0: {0, 1, ..., k-1}.
     * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel}  DecisionTreeModel that can be used for prediction
     */
    DecisionTree.train3 = function (input, algo, impurity, maxDepth, numClasses, maxBins, quantileCalculationStrategy, categoricalFeaturesInfo) {
        throw "not implemented by ElairJS";
//   var input_uw = Utils.unwrapObject(input);
//   var algo_uw = Utils.unwrapObject(algo);
//   var impurity_uw = Utils.unwrapObject(impurity);
//   var quantileCalculationStrategy_uw = Utils.unwrapObject(quantileCalculationStrategy);
//   var categoricalFeaturesInfo_uw = Utils.unwrapObject(categoricalFeaturesInfo);
//   var javaObject =  org.apache.spark.mllib.tree.DecisionTree.train(input_uw,algo_uw,impurity_uw,maxDepth,numClasses,maxBins,quantileCalculationStrategy_uw,categoricalFeaturesInfo_uw);
//   return new DecisionTreeModel(javaObject);
    };


    /**
     * Method to train a decision tree model for binary or multiclass classification.
     *
     * @param {module:eclairjs.RDD} input  Training dataset: RDD of {@link LabeledPoint}.
     *              Labels should take values {0, 1, ..., numClasses-1}.
     * @param {number} numClasses  number of classes for classification.
     * @param {object} categoricalFeaturesInfo  object name key pair map storing arity of categorical features.
     *                                E.g., an entry (n -> k) indicates that feature n is categorical
     *                                with k categories indexed from 0: {0, 1, ..., k-1}.
     * @param {string} impurity  Criterion used for information gain calculation.
     *                 Supported values: "gini" (recommended) or "entropy".
     * @param {number} maxDepth  Maximum depth of the tree.
     *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
     *                  (suggested value: 5)
     * @param {number} maxBins  maximum number of bins used for splitting features
     *                 (suggested value: 32)
     * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel}  DecisionTreeModel that can be used for prediction
     */
    DecisionTree.trainClassifier = function (input, numClasses, categoricalFeaturesInfo, impurity, maxDepth, maxBins) {
        var input_uw = Utils.unwrapObject(input);
        var categoricalFeaturesInfo_uw = Utils.createJavaHashMap(categoricalFeaturesInfo);
        var javaObject = org.apache.spark.mllib.tree.DecisionTree.trainClassifier(input_uw, numClasses, categoricalFeaturesInfo_uw, impurity, maxDepth, maxBins);
        return Utils.javaToJs(javaObject);
    };


    /**
     * Method to train a decision tree model for regression.
     *
     * @param {module:eclairjs.RDD} input  Training dataset: RDD of {@link LabeledPoint}.
     *              Labels are real numbers.
     * @param {object} categoricalFeaturesInfo  key value  storing arity of categorical features.
     *                                E.g., an entry (n -> k) indicates that feature n is categorical
     *                                with k categories indexed from 0: {0, 1, ..., k-1}.
     * @param {string} impurity  Criterion used for information gain calculation.
     *                 Supported values: "variance".
     * @param {number} maxDepth  Maximum depth of the tree.
     *                 E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
     *                  (suggested value: 5)
     * @param {number} maxBins  maximum number of bins used for splitting features
     *                 (suggested value: 32)
     * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel}  DecisionTreeModel that can be used for prediction
     */
    DecisionTree.trainRegressor = function (input, categoricalFeaturesInfo, impurity, maxDepth, maxBins) {
        var input_uw = Utils.unwrapObject(input);
        var categoricalFeaturesInfo_uw = Utils.createJavaHashMap(categoricalFeaturesInfo);
        var javaObject = org.apache.spark.mllib.tree.DecisionTree.trainRegressor(input_uw, categoricalFeaturesInfo_uw, impurity, maxDepth, maxBins);
        return Utils.javaToJs(javaObject);
    };

    module.exports = DecisionTree;

})();