Source: mllib/tree/DecisionTree.js

  1. /*
  2. * Copyright 2016 IBM Corp.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. module.exports = function(kernelP) {
  17. return (function() {
  18. var Utils = require('../../utils.js');
  19. var RDD = require('../../rdd/RDD.js');
  20. var gKernelP = kernelP;
  21. /**
  22. * A class which implements a decision tree learning algorithm for classification and regression.
  23. * It supports both continuous and categorical features.
  24. * @param strategy The configuration parameters for the tree algorithm which specify the type
  25. * of algorithm (classification, regression, etc.), feature type (continuous,
  26. * categorical), depth of the tree, quantile calculation strategy, etc.
  27. * @classdesc
  28. */
  29. /**
  30. * @param {module:eclairjs/mllib/tree/configuration.Strategy} strategy
  31. * @class
  32. * @memberof module:eclairjs/mllib/tree
  33. */
  34. function DecisionTree() {
  35. Utils.handleConstructor(this, arguments, gKernelP);
  36. }
  37. /**
  38. * Method to train a decision tree model over an RDD
  39. * @param {module:eclairjs/rdd.RDD} input Training data: RDD of {@link LabeledPoint}
  40. * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel} DecisionTreeModel that can be used for prediction
  41. */
  42. DecisionTree.prototype.run = function(input) {
  43. throw "not implemented by ElairJS";
  44. // var args ={
  45. // target: this,
  46. // method: 'run',
  47. // args: [
  48. // { value: input, type: 'RDD' }
  49. // ],
  50. // returnType: DecisionTreeModel
  51. //
  52. // };
  53. //
  54. // return Utils.generate(args);
  55. };
  56. //
  57. // static methods
  58. //
  59. /**
  60. * Method to train a decision tree model.
  61. * The method supports binary and multiclass classification and regression.
  62. *
  63. * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
  64. * and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
  65. * is recommended to clearly separate classification and regression.
  66. *
  67. * @param {module:eclairjs/rdd.RDD} input Training dataset: RDD of {@link LabeledPoint}.
  68. * For classification, labels should take values {0, 1, ..., numClasses-1}.
  69. * For regression, labels are real numbers.
  70. * @param {module:eclairjs/mllib/tree/configuration.Strategy} strategy The configuration parameters for the tree algorithm which specify the type
  71. * of algorithm (classification, regression, etc.), feature type (continuous,
  72. * categorical), depth of the tree, quantile calculation strategy, etc.
  73. * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel} DecisionTreeModel that can be used for prediction
  74. */
  75. DecisionTree.train0 = function(input,strategy) {
  76. throw "not implemented by ElairJS";
  77. // var args ={
  78. // target: DecisionTree,
  79. // method: 'train',
  80. // args: [
  81. // { value: input, type: 'RDD' },
  82. // { value: strategy, type: 'Strategy' }
  83. // ],
  84. // returnType: DecisionTreeModel
  85. //
  86. // };
  87. //
  88. // return Utils.generate(args);
  89. };
  90. /**
  91. * Method to train a decision tree model.
  92. * The method supports binary and multiclass classification and regression.
  93. *
  94. * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
  95. * and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
  96. * is recommended to clearly separate classification and regression.
  97. *
  98. * @param {module:eclairjs/rdd.RDD} input Training dataset: RDD of {@link LabeledPoint}.
  99. * For classification, labels should take values {0, 1, ..., numClasses-1}.
  100. * For regression, labels are real numbers.
  101. * @param {Algo} algo algorithm, classification or regression
  102. * @param {Impurity} impurity impurity criterion used for information gain calculation
  103. * @param {number} maxDepth Maximum depth of the tree.
  104. * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
  105. * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel} DecisionTreeModel that can be used for prediction
  106. */
  107. DecisionTree.train1 = function(input,algo,impurity,maxDepth) {
  108. throw "not implemented by ElairJS";
  109. // var args ={
  110. // target: DecisionTree,
  111. // method: 'train',
  112. // args: [
  113. // { value: input, type: 'RDD' },
  114. // { value: algo, type: 'Algo' },
  115. // { value: impurity, type: 'Impurity' },
  116. // { value: maxDepth, type: 'number' }
  117. // ],
  118. // returnType: DecisionTreeModel
  119. //
  120. // };
  121. //
  122. // return Utils.generate(args);
  123. };
  124. /**
  125. * Method to train a decision tree model.
  126. * The method supports binary and multiclass classification and regression.
  127. *
  128. * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
  129. * and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
  130. * is recommended to clearly separate classification and regression.
  131. *
  132. * @param {module:eclairjs/rdd.RDD} input Training dataset: RDD of {@link LabeledPoint}.
  133. * For classification, labels should take values {0, 1, ..., numClasses-1}.
  134. * For regression, labels are real numbers.
  135. * @param {Algo} algo algorithm, classification or regression
  136. * @param {Impurity} impurity impurity criterion used for information gain calculation
  137. * @param {number} maxDepth Maximum depth of the tree.
  138. * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
  139. * @param {number} numClasses number of classes for classification. Default value of 2.
  140. * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel} DecisionTreeModel that can be used for prediction
  141. */
  142. DecisionTree.train2 = function(input,algo,impurity,maxDepth,numClasses) {
  143. throw "not implemented by ElairJS";
  144. // var args ={
  145. // target: DecisionTree,
  146. // method: 'train',
  147. // args: [
  148. // { value: input, type: 'RDD' },
  149. // { value: algo, type: 'Algo' },
  150. // { value: impurity, type: 'Impurity' },
  151. // { value: maxDepth, type: 'number' },
  152. // { value: numClasses, type: 'number' }
  153. // ],
  154. // returnType: DecisionTreeModel
  155. //
  156. // };
  157. //
  158. // return Utils.generate(args);
  159. };
  160. /**
  161. * Method to train a decision tree model.
  162. * The method supports binary and multiclass classification and regression.
  163. *
  164. * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
  165. * and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
  166. * is recommended to clearly separate classification and regression.
  167. *
  168. * @param {module:eclairjs/rdd.RDD} input Training dataset: RDD of {@link LabeledPoint}.
  169. * For classification, labels should take values {0, 1, ..., numClasses-1}.
  170. * For regression, labels are real numbers.
  171. * @param {Algo} algo classification or regression
  172. * @param {Impurity} impurity criterion used for information gain calculation
  173. * @param {number} maxDepth Maximum depth of the tree.
  174. * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
  175. * @param {number} numClasses number of classes for classification. Default value of 2.
  176. * @param {number} maxBins maximum number of bins used for splitting features
  177. * @param {QuantileStrategy} quantileCalculationStrategy algorithm for calculating quantiles
  178. * @param {Map} categoricalFeaturesInfo Map storing arity of categorical features.
  179. * E.g., an entry (n -> k) indicates that feature n is categorical
  180. * with k categories indexed from 0: {0, 1, ..., k-1}.
  181. * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel} DecisionTreeModel that can be used for prediction
  182. */
  183. DecisionTree.train3 = function(input,algo,impurity,maxDepth,numClasses,maxBins,quantileCalculationStrategy,categoricalFeaturesInfo) {
  184. throw "not implemented by ElairJS";
  185. // var args ={
  186. // target: DecisionTree,
  187. // method: 'train',
  188. // args: [
  189. // { value: input, type: 'RDD' },
  190. // { value: algo, type: 'Algo' },
  191. // { value: impurity, type: 'Impurity' },
  192. // { value: maxDepth, type: 'number' },
  193. // { value: numClasses, type: 'number' },
  194. // { value: maxBins, type: 'number' },
  195. // { value: quantileCalculationStrategy, type: 'QuantileStrategy' },
  196. // { value: categoricalFeaturesInfo, type: 'Map' }
  197. // ],
  198. // returnType: DecisionTreeModel
  199. //
  200. // };
  201. //
  202. // return Utils.generate(args);
  203. };
  204. /**
  205. * Method to train a decision tree model for binary or multiclass classification.
  206. *
  207. * @param {module:eclairjs/rdd.RDD} input Training dataset: RDD of {@link LabeledPoint}.
  208. * Labels should take values {0, 1, ..., numClasses-1}.
  209. * @param {number} numClasses number of classes for classification.
  210. * @param {object} categoricalFeaturesInfo object name key pair map storing arity of categorical features.
  211. * E.g., an entry (n -> k) indicates that feature n is categorical
  212. * with k categories indexed from 0: {0, 1, ..., k-1}.
  213. * @param {string} impurity Criterion used for information gain calculation.
  214. * Supported values: "gini" (recommended) or "entropy".
  215. * @param {number} maxDepth Maximum depth of the tree.
  216. * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
  217. * (suggested value: 5)
  218. * @param {number} maxBins maximum number of bins used for splitting features
  219. * (suggested value: 32)
  220. * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel} DecisionTreeModel that can be used for prediction
  221. */
  222. DecisionTree.trainClassifier = function(input,numClasses,categoricalFeaturesInfo,impurity,maxDepth,maxBins) {
  223. var DecisionTreeModel = require('./model/DecisionTreeModel.js')(this.kernelP);
  224. var args = {
  225. target: this,
  226. method: 'trainClassifier',
  227. args: Utils.wrapArguments(arguments),
  228. returnType: DecisionTreeModel,
  229. kernelP: gKernelP,
  230. static: true
  231. };
  232. return Utils.generate(args);
  233. };
  234. /**
  235. * Method to train a decision tree model for regression.
  236. *
  237. * @param {module:eclairjs/rdd.RDD} input Training dataset: RDD of {@link LabeledPoint}.
  238. * Labels are real numbers.
  239. * @param {Map} categoricalFeaturesInfo Map storing arity of categorical features.
  240. * E.g., an entry (n -> k) indicates that feature n is categorical
  241. * with k categories indexed from 0: {0, 1, ..., k-1}.
  242. * @param {string} impurity Criterion used for information gain calculation.
  243. * Supported values: "variance".
  244. * @param {number} maxDepth Maximum depth of the tree.
  245. * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
  246. * (suggested value: 5)
  247. * @param {number} maxBins maximum number of bins used for splitting features
  248. * (suggested value: 32)
  249. * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel} DecisionTreeModel that can be used for prediction
  250. */
  251. DecisionTree.trainRegressorwithnumber = function(input,categoricalFeaturesInfo,impurity,maxDepth,maxBins) {
  252. throw "not implemented by ElairJS";
  253. // var args ={
  254. // target: DecisionTree,
  255. // method: 'trainRegressor',
  256. // args: [
  257. // { value: input, type: 'RDD' },
  258. // { value: categoricalFeaturesInfo, type: 'Map' },
  259. // { value: impurity, type: 'string' },
  260. // { value: maxDepth, type: 'number' },
  261. // { value: maxBins, type: 'number' }
  262. // ],
  263. // returnType: DecisionTreeModel
  264. //
  265. // };
  266. //
  267. // return Utils.generate(args);
  268. };
  269. /**
  270. * Java-friendly API for [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
  271. * @param {JavaRDD} input
  272. * @param {Map} categoricalFeaturesInfo
  273. * @param {string} impurity
  274. * @param {number} maxDepth
  275. * @param {number} maxBins
  276. * @returns {module:eclairjs/mllib/tree/model.DecisionTreeModel}
  277. */
  278. DecisionTree.trainRegressorwithnumber = function(input,categoricalFeaturesInfo,impurity,maxDepth,maxBins) {
  279. throw "not implemented by ElairJS";
  280. // var args ={
  281. // target: DecisionTree,
  282. // method: 'trainRegressor',
  283. // args: [
  284. // { value: input, type: 'JavaRDD' },
  285. // { value: categoricalFeaturesInfo, type: 'Map' },
  286. // { value: impurity, type: 'string' },
  287. // { value: maxDepth, type: 'number' },
  288. // { value: maxBins, type: 'number' }
  289. // ],
  290. // returnType: DecisionTreeModel
  291. //
  292. // };
  293. //
  294. // return Utils.generate(args);
  295. };
  296. DecisionTree.moduleLocation = '/mllib/tree/DecisionTree';
  297. return DecisionTree;
  298. })();
  299. };