Source: ml/clustering/BisectingKMeans.js

/*                                                                         
* Copyright 2016 IBM Corp.                                                 
*                                                                          
* Licensed under the Apache License, Version 2.0 (the "License");          
* you may not use this file except in compliance with the License.         
* You may obtain a copy of the License at                                  
*                                                                          
*      http://www.apache.org/licenses/LICENSE-2.0                          
*                                                                          
* Unless required by applicable law or agreed to in writing, software      
* distributed under the License is distributed on an "AS IS" BASIS,        
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
* See the License for the specific language governing permissions and      
* limitations under the License.                                           
*/ 

module.exports = function(kernelP) {
  return (function() {    var Utils = require('../../utils.js');

    var Estimator = require('../Estimator')();

    var gKernelP = kernelP;

    /**
     * @classdesc
     * :: Experimental ::
     *
     * A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques"
     * by Steinbach, Karypis, and Kumar, with modification to fit Spark.
     * The algorithm starts from a single cluster that contains all points.
     * Iteratively it finds divisible clusters on the bottom level and bisects each of them using
     * k-means, until there are `k` leaf clusters in total or no leaf clusters are divisible.
     * The bisecting steps of clusters on the same level are grouped together to increase parallelism.
     * If bisecting all divisible clusters on the bottom level would result more than `k` leaf clusters,
     * larger clusters get higher priority.
     *
     * @see [[http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf
     *     Steinbach, Karypis, and Kumar, A comparison of document clustering techniques,
     *     KDD Workshop on Text Mining, 2000.]]
     * @class
     * @extends module:eclairjs/ml.Estimator
     * @memberof module:eclairjs/ml/clustering
     * @param {string} uid
     * @constructor
     */
    function BisectingKMeans() {
        Utils.handleConstructor(this, arguments, gKernelP);
    };

    BisectingKMeans.prototype = Object.create(Estimator.prototype);

    BisectingKMeans.prototype.constructor = BisectingKMeans;

    /**
     * An immutable unique ID for the object and its derivatives.
     * @returns {Promise.<string>}
     */
    BisectingKMeans.prototype.uid = function () {
      var args = {
        target: this,
        method: 'uid',
        args: Utils.wrapArguments(arguments),
        returnType: String
      };

      return Utils.generate(args);
    };

    /**
     * @param {module:eclairjs/ml/param.ParamMap} extra
     * @returns {BisectingKMeans} 
     */
    BisectingKMeans.prototype.copy = function(extra) {
      var args ={
        target: this, 
        method: 'copy', 
        args: Utils.wrapArguments(arguments),
        returnType: BisectingKMeans

      };

      return Utils.generate(args);
    };


    /**
     * @param {string} value
     * @returns {type} 
     */
    BisectingKMeans.prototype.setFeaturesCol = function(value) {
      var args ={
        target: this, 
        method: 'setFeaturesCol', 
        args: Utils.wrapArguments(arguments),
        returnType: BisectingKMeans

      };

      return Utils.generate(args);
    };


    /**
     * @param {string} value
     * @returns {type} 
     */
    BisectingKMeans.prototype.setPredictionCol = function(value) {
      var args ={
        target: this, 
        method: 'setPredictionCol', 
        args: Utils.wrapArguments(arguments),
        returnType: BisectingKMeans

      };

      return Utils.generate(args);
    };


    /**
     * @param {number} value
     * @returns {type} 
     */
    BisectingKMeans.prototype.setK = function(value) {
      var args ={
        target: this, 
        method: 'setK', 
        args: Utils.wrapArguments(arguments),
        returnType: BisectingKMeans

      };

      return Utils.generate(args);
    };


    /**
     * @param {number} value
     * @returns {type} 
     */
    BisectingKMeans.prototype.setMaxIter = function(value) {
      var args ={
        target: this, 
        method: 'setMaxIter', 
        args: Utils.wrapArguments(arguments),
        returnType: BisectingKMeans

      };

      return Utils.generate(args);
    };


    /**
     * @param {number} value
     * @returns {type} 
     */
    BisectingKMeans.prototype.setSeed = function(value) {
      var args ={
        target: this, 
        method: 'setSeed', 
        args: Utils.wrapArguments(arguments),
        returnType: BisectingKMeans

      };

      return Utils.generate(args);
    };


    /**
     * @param {number} value
     * @returns {type} 
     */
    BisectingKMeans.prototype.setMinDivisibleClusterSize = function(value) {
      var args ={
        target: this, 
        method: 'setMinDivisibleClusterSize', 
        args: Utils.wrapArguments(arguments),
        returnType: BisectingKMeans

      };

      return Utils.generate(args);
    };


    /**
     * @param {module:eclairjs/sql.Dataset} dataset
     * @returns {BisectingKMeansModel} 
     */
    BisectingKMeans.prototype.fit = function(dataset) {
      var BisectingKMeansModel = require('./BisectingKMeansModel')();

      var args ={
        target: this, 
        method: 'fit', 
        args: Utils.wrapArguments(arguments),
        returnType: BisectingKMeansModel

      };

      return Utils.generate(args);
    };


    /**
     * @param {module:eclairjs/sql/types.StructType} schema
     * @returns {StructType} 
     */
    BisectingKMeans.prototype.transformSchema = function(schema) {
      var StructType = require('../../sql/types/StructType.js');

      var args ={
        target: this, 
        method: 'transformSchema', 
        args: Utils.wrapArguments(arguments),
        returnType: StructType

      };

      return Utils.generate(args);
    };

    //
    // static methods
    //

    /**
     * @param {string} path
     * @returns {BisectingKMeans} 
     */
    BisectingKMeans.load = function(path) {
      var args ={
        target: BisectingKMeans, 
        method: 'load', 
        args: Utils.wrapArguments(arguments),
        static: true,
        returnType: BisectingKMeans

      };

      return Utils.generate(args);
    };

    BisectingKMeans.moduleLocation = '/ml/clustering/BisectingKMeans';

    return BisectingKMeans;
  })();
};