Source: eclairjs/mllib/fpm/FPGrowth.js

  1. /*
  2. * Copyright 2016 IBM Corp.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. (function () {
  17. var JavaWrapper = require(EclairJS_Globals.NAMESPACE + '/JavaWrapper');
  18. var Logger = require(EclairJS_Globals.NAMESPACE + '/Logger');
  19. var Utils = require(EclairJS_Globals.NAMESPACE + '/Utils');
  20. var FPGrowthModel = require(EclairJS_Globals.NAMESPACE + '/mllib/fpm/FPGrowthModel');
  21. /**
  22. * A parallel FP-growth algorithm to mine frequent itemsets. The algorithm is described in
  23. * [[http://dx.doi.org/10.1145/1454008.1454027 Li et al., PFP: Parallel FP-Growth for Query
  24. * Recommendation]]. PFP distributes computation in such a way that each worker executes an
  25. * independent group of mining tasks. The FP-Growth algorithm is described in
  26. * [[http://dx.doi.org/10.1145/335191.335372 Han et al., Mining frequent patterns without candidate
  27. * generation]].
  28. *
  29. * @param minSupport the minimal support level of the frequent pattern, any pattern appears
  30. * more than (minSupport * size-of-the-dataset) times will be output
  31. * @param numPartitions number of partitions used by parallel FP-growth
  32. *
  33. * @see [[http://en.wikipedia.org/wiki/Association_rule_learning Association rule learning
  34. * (Wikipedia)]]
  35. *
  36. * @classdesc
  37. */
  38. /**
  39. * Constructs a default instance with default parameters {minSupport: `0.3`, numPartitions: same
  40. * as the input data}.
  41. *
  42. * @class
  43. * @memberof module:eclairjs/mllib/fpm
  44. */
  45. var FPGrowth = function(obj) {
  46. this.logger = Logger.getLogger("FPGrowth_js");
  47. var jvmObject;
  48. if (obj instanceof org.apache.spark.mllib.fpm.FPGrowth) {
  49. jvmObject = obj;
  50. } else {
  51. jvmObject = new org.apache.spark.mllib.fpm.FPGrowth();
  52. }
  53. JavaWrapper.call(this, jvmObject);
  54. };
  55. FPGrowth.prototype = Object.create(JavaWrapper.prototype);
  56. FPGrowth.prototype.constructor = FPGrowth;
  57. /**
  58. * Sets the minimal support level (default: `0.3`).
  59. *
  60. * @param {float} minSupport
  61. * @returns {module:eclairjs/mllib/fpm.FPGrowth}
  62. */
  63. FPGrowth.prototype.setMinSupport = function(minSupport) {
  64. var javaObject = this.getJavaObject().setMinSupport(minSupport);
  65. return new FPGrowth(javaObject);
  66. };
  67. /**
  68. * Sets the number of partitions used by parallel FP-growth (default: same as input data).
  69. *
  70. * @param {integer} numPartitions
  71. * @returns {module:eclairjs/mllib/fpm.FPGrowth}
  72. */
  73. FPGrowth.prototype.setNumPartitions = function(numPartitions) {
  74. var javaObject = this.getJavaObject().setNumPartitions(numPartitions);
  75. return new FPGrowth(javaObject);
  76. };
  77. /**
  78. * Computes an FP-Growth model that contains frequent itemsets.
  79. * @param {module:eclairjs.RDD} data input data set, each element contains a transaction
  80. *
  81. * @returns {module:eclairjs/mllib/fpm.FPGrowthModel} an [[module:eclairjs/mllib/fpm.FPGrowthModel]]
  82. */
  83. FPGrowth.prototype.run = function(data) {
  84. var data_uw = Utils.unwrapObject(data);
  85. var javaObject = this.getJavaObject().run(data_uw);
  86. return new FPGrowthModel(javaObject);
  87. };
  88. module.exports = FPGrowth;
  89. })();