Source: eclairjs/ml/feature/HashingTF.js

  1. /*
  2. * Copyright 2016 IBM Corp.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. (function () {
  17. var Logger = require(EclairJS_Globals.NAMESPACE + '/Logger');
  18. var Utils = require(EclairJS_Globals.NAMESPACE + '/Utils');
  19. var Transformer = require(EclairJS_Globals.NAMESPACE + '/ml/Transformer');
  20. /**
  21. * @classdesc
  22. * Maps a sequence of terms to their term frequencies using the hashing trick.
  23. * Currently we use Austin Appleby's MurmurHash 3 algorithm (MurmurHash3_x86_32)
  24. * to calculate the hash code value for the term object.
  25. * Since a simple modulo is used to transform the hash function to a column index,
  26. * it is advisable to use a power of two as the numFeatures parameter;
  27. * otherwise the features will not be mapped evenly to the columns.
  28. * @class
  29. * @memberof module:eclairjs/ml/feature
  30. * @extends module:eclairjs/ml.Transformer
  31. * @param {string} [uid]
  32. * @constructor
  33. */
  34. var HashingTF = function(uid) {
  35. this.logger = Logger.getLogger("ml_feature_HashingTF_js");
  36. var jvmObject;
  37. if (uid) {
  38. if (uid instanceof org.apache.spark.ml.feature.HashingTF) {
  39. jvmObject = uid;
  40. } else {
  41. jvmObject = new org.apache.spark.ml.feature.HashingTF(uid);
  42. }
  43. } else {
  44. jvmObject = new org.apache.spark.ml.feature.HashingTF();
  45. }
  46. Transformer.call(this, jvmObject);
  47. };
  48. HashingTF.prototype = Object.create(Transformer.prototype);
  49. HashingTF.prototype.constructor = HashingTF;
  50. /**
  51. * An immutable unique ID for the object and its derivatives.
  52. * @returns {string}
  53. */
  54. HashingTF.prototype.uid = function () {
  55. return this.getJavaObject().uid();
  56. };
  57. /**
  58. * @param {string} value
  59. * @returns {module:eclairjs/mllib/feature.HashingTF}
  60. */
  61. HashingTF.prototype.setInputCol = function(value) {
  62. var javaObject = this.getJavaObject().setInputCol(value);
  63. return new HashingTF(javaObject);
  64. };
  65. /**
  66. * @param {string} value
  67. * @returns {module:eclairjs/mllib/feature.HashingTF}
  68. */
  69. HashingTF.prototype.setOutputCol = function(value) {
  70. var javaObject = this.getJavaObject().setOutputCol(value);
  71. return new HashingTF(javaObject);
  72. };
  73. /**
  74. * @returns {number}
  75. */
  76. HashingTF.prototype.getNumFeatures = function() {
  77. return this.getJavaObject().getNumFeatures();
  78. };
  79. /**
  80. * @returns {module:eclairjs/ml/param.Param}
  81. */
  82. HashingTF.prototype.numFeatures = function() {
  83. return Utils.javaToJs(this.getJavaObject().numFeatures());
  84. };
  85. /**
  86. * @param {number} value
  87. * @returns {module:eclairjs/mllib/feature.HashingTF}
  88. */
  89. HashingTF.prototype.setNumFeatures = function(value) {
  90. var javaObject = this.getJavaObject().setNumFeatures(value);
  91. return new HashingTF(javaObject);
  92. };
  93. HashingTF.prototype.getBinary = function() {
  94. return this.getJavaObject().getBinary();
  95. };
  96. /**
  97. * @param {boolean} value
  98. * @returns {module:eclairjs/mllib/feature.HashingTF}
  99. */
  100. HashingTF.prototype.setBinary = function(value) {
  101. var javaObject = this.getJavaObject().setBinary(value);
  102. return new HashingTF(javaObject);
  103. };
  104. /**
  105. * @param {module:eclairjs/sql.Dataset} dataset
  106. * @returns {module:eclairjs/sql.Dataset}
  107. */
  108. HashingTF.prototype.transform = function(dataset) {
  109. var dataset_uw = Utils.unwrapObject(dataset);
  110. var javaObject = this.getJavaObject().transform(dataset_uw);
  111. return Utils.javaToJs(javaObject);
  112. };
  113. /**
  114. * @param {module:eclairjs/sql/types.StructType} schema
  115. * @returns {module:eclairjs/sql/types.StructType}
  116. */
  117. HashingTF.prototype.transformSchema = function(schema) {
  118. var schema_uw = Utils.unwrapObject(schema);
  119. var javaObject = this.getJavaObject().transformSchema(schema_uw);
  120. return Utils.javaToJs(javaObject);
  121. };
  122. /**
  123. * @param {module:eclairjs/ml/param.ParamMap} extra
  124. * @returns {module:eclairjs/mllib/feature.HashingTF}
  125. */
  126. HashingTF.prototype.copy = function(extra) {
  127. var extra_uw = Utils.unwrapObject(extra);
  128. var javaObject = this.getJavaObject().copy(extra_uw);
  129. return new HashingTF(javaObject);
  130. };
  131. //
  132. // static methods
  133. //
  134. /**
  135. * @param {string} path
  136. * @returns {module:eclairjs/mllib/feature.HashingTF}
  137. */
  138. HashingTF.load = function(path) {
  139. var javaObject = org.apache.spark.ml.feature.HashingTF.load(path);
  140. return new HashingTF(javaObject);
  141. };
  142. module.exports = HashingTF;
  143. })();