Source: eclairjs/ml/feature/CountVectorizerModel.js

/*                                                                         
 * Copyright 2016 IBM Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

(function () {

    var JavaWrapper = require(EclairJS_Globals.NAMESPACE + '/JavaWrapper');
    var Logger = require(EclairJS_Globals.NAMESPACE + '/Logger');
    var Utils = require(EclairJS_Globals.NAMESPACE + '/Utils');


    /**
     * @classdesc
     * :: Experimental ::
     * Converts a text document to a sparse vector of token counts.
     * @class
     * @memberof module:eclairjs/ml/feature
     * @param {string[]} vocabulary  An Array over terms. Only the terms in the vocabulary will be counted.
     * @param {string} [uid]
     */
    var CountVectorizerModel = function (vocabulary, uid) {
        this.logger = Logger.getLogger("ml.feature.CountVectorizerModel_js");
        var jvmObject;
        if (vocabulary instanceof org.apache.spark.ml.feature.CountVectorizerModel) {
            jvmObject = vocabulary;
        } else {
            if (uid) {
                jvmObject = new org.apache.spark.ml.feature.CountVectorizerModel(uid, vocabulary);
            } else {
                jvmObject = new org.apache.spark.ml.feature.CountVectorizerModel(vocabulary);
            }
        }

        // MLWritable.call(this, jvmObject);
        JavaWrapper.call(this, jvmObject);

    };

    //CountVectorizerModel.prototype = Object.create(MLWritable.prototype);
    CountVectorizerModel.prototype = Object.create(JavaWrapper.prototype);
    CountVectorizerModel.prototype.constructor = CountVectorizerModel;

    /**
     * An immutable unique ID for the object and its derivatives.
     * @returns {string}
     */
    CountVectorizerModel.prototype.uid = function () {
        return this.getJavaObject().uid();
    };

    /**
     * @returns {string[]}
     */
    CountVectorizerModel.prototype.vocabulary = function () {
        return this.getJavaObject().vocabulary();
    };

    /**
     * @param {string} value
     * @returns {module:eclairjs/ml/feature.CountVectorizerModel}
     */
    CountVectorizerModel.prototype.setInputCol = function (value) {
        var javaObject = this.getJavaObject().setInputCol(value);
        return new CountVectorizerModel(javaObject);
    };


    /**
     * @param {string} value
     * @returns {module:eclairjs/ml/feature.CountVectorizerModel}
     */
    CountVectorizerModel.prototype.setOutputCol = function (value) {
        var javaObject = this.getJavaObject().setOutputCol(value);
        return new CountVectorizerModel(javaObject);
    };


    /**
     * @param {float} value
     * @returns {module:eclairjs/ml/feature.CountVectorizerModel}
     */
    CountVectorizerModel.prototype.setMinTF = function (value) {
        var javaObject = this.getJavaObject().setMinTF(value);
        return new CountVectorizerModel(javaObject);
    };


    /**
     * @param {module:eclairjs/sql.DataFrame} dataset
     * @returns {module:eclairjs/sql.DataFrame}
     */
    CountVectorizerModel.prototype.transform = function (dataset) {
        var dataset_uw = Utils.unwrapObject(dataset);
        var javaObject = this.getJavaObject().transform(dataset_uw);
        return Utils.javaToJs(javaObject);
    };


    /**
     * @param {module:eclairjs/sql/types.StructType} schema
     * @returns {module:eclairjs/sql/types.StructType}
     */
    CountVectorizerModel.prototype.transformSchema = function (schema) {
        var schema_uw = Utils.unwrapObject(schema);
        var javaObject = this.getJavaObject().transformSchema(schema_uw);
        return Utils.javaToJs(javaObject);
    };


    /**
     * @param {module:eclairjs/ml/param.ParamMap} extra
     * @returns {module:eclairjs/ml/feature.CountVectorizerModel}
     */
    CountVectorizerModel.prototype.copy = function (extra) {
        var extra_uw = Utils.unwrapObject(extra);
        var javaObject = this.getJavaObject().copy(extra_uw);
        return new CountVectorizerModel(javaObject);
    };


    /**
     * @returns {module:eclairjs/ml/util.MLWriter}
     */
    CountVectorizerModel.prototype.write = function () {
        var MLWriter = require(EclairJS_Globals.NAMESPACE + '/ml/util/MLWriter');
        var javaObject = this.getJavaObject().write();
        /*
         the object is an inner class so don't use Utils.javaToJs
         to create the MLWriter object.
         */
        return new MLWriter(javaObject);
    };

    /**
     * Max size of the vocabulary. CountVectorizer will build a vocabulary that only considers the top vocabSize
     * terms ordered by term frequency across the corpus.
     * Default: 2^18^
     * @returns {module:eclairjs/ml/param.IntParam}
     */
    CountVectorizerModel.prototype.vocabSize = function () {
        var javaObject = this.getJavaObject().vocabSize();
        return Utils.javaToJs(javaObject);
    };

    /**
     *
     * @returns {integer}
     */
    CountVectorizerModel.prototype.getVocabSize = function () {
        return this.getJavaObject().getVocabSize();
    };

    /**
     * Specifies the minimum number of different documents a term must appear in to be included in the vocabulary.
     * If this is an integer >= 1, this specifies the number of documents the term must appear in;
     * if this is a double in [0,1), then this specifies the fraction of documents.
     * @returns {module:eclairjs/ml/param.DoubleParam}
     */
    CountVectorizerModel.prototype.minDF = function () {
        var javaObject = this.getJavaObject().minDF();
        return Utils.javaToJs(javaObject);
    };

    /**
     *
     * @returns {float}
     */
    CountVectorizerModel.prototype.getMinDF = function () {
        return this.getJavaObject().getMinDF();
    };

    /**
     * Validates and transforms the input schema.
     * @param {module:eclairjs/sql/types.StructType} schema
     * @returns {module:eclairjs/sql/types.StructType}
     */
    CountVectorizerModel.prototype.validateAndTransformSchema = function (schema) {
        var schema_uw = Utils.unwrapObject(schema);
        var javaObject = this.getJavaObject().validateAndTransformSchema(schema_uw);
        return Utils.javaToJs(javaObject);
    };

    /**
     *
     * @returns {float}
     */
    CountVectorizerModel.prototype.getMinTF = function () {
        return this.getJavaObject().getMinTF();
    };

    //
    // static methods
    //


    /**
     * @returns {module:eclairjs/ml/util.MLReader}
     */
    CountVectorizerModel.read = function () {
        var MLReader = require(EclairJS_Globals.NAMESPACE + '/ml/util/MLReader');
        var javaObject = org.apache.spark.ml.feature.CountVectorizerModel.read();
        /*
         The object is and inner class so don't user Utils.javaToJs
         to create th MLReader.
         */
        return new MLReader(javaObject);
    };


    /**
     * @param {string} path
     * @returns {module:eclairjs/ml/feature.CountVectorizerModel}
     */
    CountVectorizerModel.load = function (path) {
        var javaObject = org.apache.spark.ml.feature.CountVectorizerModel.load(path);
        return new CountVectorizerModel(javaObject);
    };

    module.exports = CountVectorizerModel;
})();