Source: mllib/tree/GradientBoostedTrees.js

/*
 * Copyright 2016 IBM Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

var Utils = require('../../utils.js');
var RDD = require('../../rdd/RDD.js');
var GradientBoostedTreesModel = require('./model/GradientBoostedTreesModel.js')();

var gKernelP;

/**
 * A class that implements
 * [[http://en.wikipedia.org/wiki/Gradient_boosting  Stochastic Gradient Boosting]]
 * for regression and binary classification.
 *
 * The implementation is based upon:
 *   J.H. Friedman.  "Stochastic Gradient Boosting."  1999.
 *
 * Notes on Gradient Boosting vs. TreeBoost:
 *  - This implementation is for Stochastic Gradient Boosting, not for TreeBoost.
 *  - Both algorithms learn tree ensembles by minimizing loss functions.
 *  - TreeBoost (Friedman, 1999) additionally modifies the outputs at tree leaf nodes
 *    based on the loss function, whereas the original gradient boosting method does not.
 *     - When the loss is SquaredError, these methods give the same result, but they could differ
 *       for other loss functions.
 *
 * @param boostingStrategy Parameters for the gradient boosting algorithm.
 * @classdesc
 */

/**
 * @param {module:eclairjs/mllib/tree/configuration.BoostingStrategy} boostingStrategy
 * @class
 * @memberof module:eclairjs/mllib/tree
 */
function GradientBoostedTrees() {
  Utils.handleConstructor(this, arguments, gKernelP);
}

/**
 * Method to train a gradient boosting model
 * @param {module:eclairjs/rdd.RDD} input  Training dataset: RDD of {@link LabeledPoint}.
 * @returns {GradientBoostedTreesModel}  a gradient boosted trees model that can be used for prediction
 */
GradientBoostedTrees.prototype.run = function(input) {
  var args = {
    target: this,
    method: 'run',
    args: Utils.wrapArguments(arguments),
    returnType: GradientBoostedTreesModel
  };

  return Utils.generate(args);
};


/**
 * Method to validate a gradient boosting model
 * @param {module:eclairjs/rdd.RDD} input  Training dataset: RDD of {@link LabeledPoint}.
 * @param {module:eclairjs/rdd.RDD} validationInput  Validation dataset.
 *                        This dataset should be different from the training dataset,
 *                        but it should follow the same distribution.
 *                        E.g., these two datasets could be created from an original dataset
 *                        by using [[org.apache.spark.rdd.RDD.randomSplit()]]
 * @returns {GradientBoostedTreesModel}  a gradient boosted trees model that can be used for prediction
 */
GradientBoostedTrees.prototype.runWithValidation = function(input,validationInput) {
  var args = {
    target: this,
    method: 'runWithValidation',
    args: Utils.wrapArguments(arguments),
    returnType: GradientBoostedTreesModel
  };

  return Utils.generate(args);
};

//
// static methods
//


/**
 * Method to train a gradient boosting model.
 *
 * @param {module:eclairjs/rdd.RDD} input  Training dataset: RDD of {@link LabeledPoint}.
 *              For classification, labels should take values {0, 1, ..., numClasses-1}.
 *              For regression, labels are real numbers.
 * @param {module:eclairjs/mllib/tree/configuration.BoostingStrategy} boostingStrategy  Configuration options for the boosting algorithm.
 * @returns {GradientBoostedTreesModel}  a gradient boosted trees model that can be used for prediction
 */
GradientBoostedTrees.train = function(input, boostingStrategy) {
  var args = {
    target: GradientBoostedTrees,
    method: 'train',
    args: Utils.wrapArguments(arguments),
    static: true,
    kernelP: gKernelP,
    returnType: GradientBoostedTreesModel
  };

  return Utils.generate(args);
};

GradientBoostedTrees.moduleLocation = '/mllib/tree/GradientBoostedTrees';

module.exports = function(kP) {
  if (kP) gKernelP = kP;

  return GradientBoostedTrees;
};