Source: ml/clustering/LDA.js

/*
 * Copyright 2016 IBM Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

module.exports = function(kernelP) {
  return (function() {
    var Utils = require('../../utils.js');

    var Estimator = require('../Estimator')();

    var gKernelP = kernelP;

    /**
     * @classdesc
     *
     * Latent Dirichlet Allocation (LDA), a topic model designed for text documents.
     *
     * Terminology:
     *  - "term" = "word": an element of the vocabulary
     *  - "token": instance of a term appearing in a document
     *  - "topic": multinomial distribution over terms representing some concept
     *  - "document": one piece of text, corresponding to one row in the input data
     *
     *  Original LDA paper (journal version):
     *    Blei, Ng, and Jordan.  "Latent Dirichlet Allocation."  JMLR, 2003.
     *
     * Input data (featuresCol):
     *  LDA is given a collection of documents as input data, via the featuresCol parameter.
     *  Each document is specified as a {@link Vector} of length vocabSize, where each entry is the
     *  count for the corresponding term (word) in the document.  Feature transformers such as
     *  [[org.apache.spark.ml.feature.Tokenizer]] and {@link CountVectorizer}
     *  can be useful for converting text to word count vectors.
     *
     * @see [Latent Dirichlet allocation(Wikipedia)]{@link http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation}
     * @class
     * @extends module:eclairjs/ml.Estimator
     * @memberof module:eclairjs/ml/clustering
     * @param {string} [uid]
     */
    function LDA() {
      Utils.handleConstructor(this, arguments, gKernelP);
    }

    LDA.prototype = Object.create(Estimator.prototype);

    LDA.prototype.constructor = LDA;

    /**
     * An immutable unique ID for the object and its derivatives.
     * @returns {Promise.<string>}
     */
    LDA.prototype.uid = function () {
      var args = {
        target: this,
        method: 'uid',
        args: Utils.wrapArguments(arguments),
        returnType: String
      };

      return Utils.generate(args);
    };

    /**
     * The features for LDA should be a {@link Vector} representing the word counts in a document.
     * The vector should be of length vocabSize, with counts for each term (word).
     * @param {string} value
     * @returns {module:eclairjs/mllib/clustering.LDA}
     */
    LDA.prototype.setFeaturesCol = function(value) {
      var args = {
        target: this,
        method: 'setFeaturesCol',
        args: Utils.wrapArguments(arguments),
        returnType: LDA
      };

      return Utils.generate(args);
    };

    /**
     * @param {number} value
     * @returns {module:eclairjs/mllib/clustering.LDA}
     */
    LDA.prototype.setMaxIter = function(value) {
      var args = {
        target: this,
        method: 'setMaxIter',
        args: Utils.wrapArguments(arguments),
        returnType: LDA
      };

      return Utils.generate(args);
    };

    /**
     * @param {number} value
     * @returns {module:eclairjs/mllib/clustering.LDA}
     */
    LDA.prototype.setSeed = function(value) {
      var args = {
        target: this,
        method: 'setSeed',
        args: Utils.wrapArguments(arguments),
        returnType: LDA
      };

      return Utils.generate(args);
    };

    /**
     * @param {number} value
     * @returns {module:eclairjs/mllib/clustering.LDA}
     */
    LDA.prototype.setCheckpointInterval = function(value) {
      var args = {
        target: this,
        method: 'setCheckpointInterval',
        args: Utils.wrapArguments(arguments),
        returnType: LDA
      };

      return Utils.generate(args);
    };

    /**
     * @param {number} value
     * @returns {module:eclairjs/mllib/clustering.LDA}
     */
    LDA.prototype.setK = function(value) {
      var args = {
        target: this,
        method: 'setK',
        args: Utils.wrapArguments(arguments),
        returnType: LDA
      };

      return Utils.generate(args);
    };

    /**
     * @param {number[]|number} value
     * @returns {module:eclairjs/mllib/clustering.LDA}
     */
    LDA.prototype.setDocConcentration = function(value) {
      var args = {
        target: this,
        method: 'setDocConcentration',
        args: Utils.wrapArguments(arguments),
        returnType: LDA
      };

      return Utils.generate(args);
    };

    /**
     * @param {number} value
     * @returns {module:eclairjs/mllib/clustering.LDA}
     */
    LDA.prototype.setTopicConcentration = function(value) {
      var args = {
        target: this,
        method: 'setTopicConcentration',
        args: Utils.wrapArguments(arguments),
        returnType: LDA
      };

      return Utils.generate(args);
    };

    /**
     * @param {string} value
     * @returns {module:eclairjs/mllib/clustering.LDA}
     */
    LDA.prototype.setOptimizer = function(value) {
      var args = {
        target: this,
        method: 'setOptimizer',
        args: Utils.wrapArguments(arguments),
        returnType: LDA
      };

      return Utils.generate(args);
    };

    /**
     * @param {string} value
     * @returns {module:eclairjs/mllib/clustering.LDA}
     */
    LDA.prototype.setTopicDistributionCol = function(value) {
      var args = {
        target: this,
        method: 'setTopicDistributionCol',
        args: Utils.wrapArguments(arguments),
        returnType: LDA
      };

      return Utils.generate(args);
    };

    /**
     * @param {number} value
     * @returns {module:eclairjs/mllib/clustering.LDA}
     */
    LDA.prototype.setLearningOffset = function(value) {
      var args = {
        target: this,
        method: 'setLearningOffset',
        args: Utils.wrapArguments(arguments),
        returnType: LDA
      };

      return Utils.generate(args);
    };

    /**
     * @param {number} value
     * @returns {module:eclairjs/mllib/clustering.LDA}
     */
    LDA.prototype.setLearningDecay = function(value) {
      var args = {
        target: this,
        method: 'setLearningDecay',
        args: Utils.wrapArguments(arguments),
        returnType: LDA
      };

      return Utils.generate(args);
    };

    /**
     * @param {number} value
     * @returns {module:eclairjs/mllib/clustering.LDA}
     */
    LDA.prototype.setSubsamplingRate = function(value) {
      var args = {
        target: this,
        method: 'setSubsamplingRate',
        args: Utils.wrapArguments(arguments),
        returnType: LDA
      };

      return Utils.generate(args);
    };

    /**
     * @param {boolean} value
     * @returns {module:eclairjs/mllib/clustering.LDA}
     */
    LDA.prototype.setOptimizeDocConcentration = function(value) {
      var args = {
        target: this,
        method: 'setOptimizeDocConcentration',
        args: Utils.wrapArguments(arguments),
        returnType: LDA
      };

      return Utils.generate(args);
    };

    /**
     * @param {module:eclairjs/ml/param.ParamMap} extra
     * @returns {module:eclairjs/mllib/clustering.LDA}
     */
    LDA.prototype.copy = function(extra) {
      var args = {
        target: this,
        method: 'copy',
        args: Utils.wrapArguments(arguments),
        returnType: LDA
      };

      return Utils.generate(args);
    };

    /**
     * @param {boolean} value
     * @returns {type}
     */
    LDA.prototype.setKeepLastCheckpoint = function(value) {
      var args ={
        target: this,
        method: 'setKeepLastCheckpoint',
        args: Utils.wrapArguments(arguments),
        returnType: LDA

      };

      return Utils.generate(args);
    };


    /**
     * @param {module:eclairjs/sql.Dataset} dataset
     * @returns {module:eclairjs/mllib/clustering.LDAModel}
     */
    LDA.prototype.fit = function(dataset) {
      var LDAModel = require('./LDAModel')();

      var args = {
        target: this,
        method: 'fit',
        args: Utils.wrapArguments(arguments),
        returnType: LDAModel
      };

      return Utils.generate(args);
    };

    /**
     * @param {module:eclairjs/sql/types.StructType} schema
     * @returns {module:eclairjs/sql/types.StructType}
     */
    LDA.prototype.transformSchema = function(schema) {
      var StructType = require('../../sql/types/StructType')();

      var args = {
        target: this,
        method: 'transformSchema',
        args: Utils.wrapArguments(arguments),
        returnType: StructType
      };

      return Utils.generate(args);
    };

    //
    // static methods
    //


    /**
     * @param {string} path
     * @returns {LDA}
     */
    LDA.load = function(path) {
      var LDA = require('../../mllib/clustering/LDA.js');
      var args ={
        target: LDA,
        method: 'load',
        args: Utils.wrapArguments(arguments),
        static: true,
        returnType: LDA

      };

      return Utils.generate(args);
    };

    LDA.moduleLocation = '/ml/clustering/LDA';

    return LDA;
  })();
};