Source: eclairjs/ml/feature/StringIndexer.js

/*                                                                         
 * Copyright 2016 IBM Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

(function () {

    var PipelineStage = require(EclairJS_Globals.NAMESPACE + '/ml/PipelineStage');
    var Logger = require(EclairJS_Globals.NAMESPACE + '/Logger');
    var Utils = require(EclairJS_Globals.NAMESPACE + '/Utils');


    /**
     * @classdesc
     * A label indexer that maps a string column of labels to an ML column of label indices.
     * If the input column is numeric, we cast it to string and index the string values.
     * The indices are in [0, numLabels), ordered by label frequencies.
     * So the most frequent label gets index 0.
     *
     * @see {@link module:eclairjs/ml/feature.IndexToString} for the inverse transformation
     * @class
     * @extends module:eclairjs/ml.PipelineStage
     * @memberof module:eclairjs/ml/feature
     * @param {string} [uid]
     */
    var StringIndexer = function (uid) {
        this.logger = Logger.getLogger("ml.feature.StringIndexer_js");
        var jvmObject;
        if (uid) {
            if (uid instanceof org.apache.spark.ml.feature.StringIndexer) {
                jvmObject = uid;
            } else {
                jvmObject = new org.apache.spark.ml.feature.StringIndexer(uid);
            }
        } else {
            jvmObject = new org.apache.spark.ml.feature.StringIndexer();
        }

        PipelineStage.call(this, jvmObject);

    };

    StringIndexer.prototype = Object.create(PipelineStage.prototype);

    StringIndexer.prototype.constructor = StringIndexer;

    /**
     * An immutable unique ID for the object and its derivatives.
     * @returns {string}
     */
    StringIndexer.prototype.uid = function () {
        return this.getJavaObject().uid();
    };


    /**
     * @param {string} value
     * @returns {module:eclairjs/ml/feature.StringIndexer}
     */
    StringIndexer.prototype.setHandleInvalid = function (value) {
        var javaObject = this.getJavaObject().setHandleInvalid(value);
        return new StringIndexer(javaObject);
    };


    /**
     * @param {string} value
     * @returns {module:eclairjs/ml/feature.StringIndexer}
     */
    StringIndexer.prototype.setInputCol = function (value) {
        var javaObject = this.getJavaObject().setInputCol(value);
        return new StringIndexer(javaObject);
    };


    /**
     * @param {string} value
     * @returns {module:eclairjs/ml/feature.StringIndexer}
     */
    StringIndexer.prototype.setOutputCol = function (value) {
        var javaObject = this.getJavaObject().setOutputCol(value);
        return new StringIndexer(javaObject);
    };


    /**
     * @param {module:eclairjs/sql.Dataset} dataset
     * @returns {module:eclairjs/ml/feature.StringIndexerModel}
     */
    StringIndexer.prototype.fit = function (dataset) {
        var dataset_uw = Utils.unwrapObject(dataset);
        return Utils.javaToJs(this.getJavaObject().fit(dataset_uw));
    };


    /**
     * @param {module:eclairjs/sql/types.StructType} schema
     * @returns {module:eclairjs/sql/types.StructType}
     */
    StringIndexer.prototype.transformSchema = function (schema) {
        var schema_uw = Utils.unwrapObject(schema);
        var javaObject = this.getJavaObject().transformSchema(schema_uw);
        return Utils.javaToJs(javaObject);
    };


    /**
     * @param {module:eclairjs/ml/param.ParamMap} extra
     * @returns {module:eclairjs/ml/feature.StringIndexer}
     */
    StringIndexer.prototype.copy = function (extra) {
        var extra_uw = Utils.unwrapObject(extra);
        return Utils.javaToJs(this.getJavaObject().copy(extra_uw));
    };

    /**
     * @param {module:eclairjs/sql/types.StructType} schema
     * @returns {module:eclairjs/sql/types.StructType}
     */
    StringIndexer.prototype.validateAndTransformSchema = function (schema) {
        var schema_uw = Utils.unwrapObject(schema);
        var javaObject = this.getJavaObject().validateAndTransformSchema(schema_uw);
        return Utils.javaToJs(javaObject);
    };

    /**
     * @returns {module:eclairjs/ml/param.Param}
     */
    StringIndexer.prototype.inputCol = function () {
        var javaObject = this.getJavaObject().inputCol();
        return Utils.javaToJs(javaObject);
    };

    /**
     * @returns {string}
     */
    StringIndexer.prototype.getInputCol = function () {
        return this.getJavaObject().getInputCol();
    };

    /**
     * @returns {module:eclairjs/ml/param.Param}
     */
    StringIndexer.prototype.outputCol = function () {
        var javaObject = this.getJavaObject().outputCol();
        return Utils.javaToJs(javaObject);
    };

    /**
     * @returns {string}
     */
    StringIndexer.prototype.getOutputCol = function () {
        return this.getJavaObject().getOutputCol();
    };

    /**
     * Param for how to handle invalid entries. Options are skip (which will filter out rows with bad values),
     * or error (which will throw an errror). More options may be added later..
     * @returns {module:eclairjs/ml/param.Param}
     */
    StringIndexer.prototype.handleInvalid = function () {
        var javaObject = this.getJavaObject().handleInvalid();
        return Utils.javaToJs(javaObject);
    };

    /**
     * @returns {string}
     */
    StringIndexer.prototype.getHandleInvalid = function () {
        return this.getJavaObject().getHandleInvalid();
    };

    //
    // static methods
    //


    /**
     * @param {string} path
     * @returns {module:eclairjs/ml/feature.StringIndexer}
     */
    StringIndexer.load = function (path) {
        return org.apache.spark.ml.feature.StringIndexer.load(path);
    };

    module.exports = StringIndexer;
})();