Source: eclairjs/mllib/evaluation/BinaryClassificationMetrics.js

/*                                                                         
* Copyright 2016 IBM Corp.                                                 
*                                                                          
* Licensed under the Apache License, Version 2.0 (the "License");          
* you may not use this file except in compliance with the License.         
* You may obtain a copy of the License at                                  
*                                                                          
*      http://www.apache.org/licenses/LICENSE-2.0                          
*                                                                          
* Unless required by applicable law or agreed to in writing, software      
* distributed under the License is distributed on an "AS IS" BASIS,        
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
* See the License for the specific language governing permissions and      
* limitations under the License.                                           
*/ 
(function () {

    var JavaWrapper = require(EclairJS_Globals.NAMESPACE + '/JavaWrapper');
    var Logger = require(EclairJS_Globals.NAMESPACE + '/Logger');
    var Utils = require(EclairJS_Globals.NAMESPACE + '/Utils');

    var RDD = require(EclairJS_Globals.NAMESPACE + '/RDD');

    /**
     * Evaluator for binary classification.
     *
     * @param scoreAndLabels an RDD of (score, label) pairs.
     * @param numBins if greater than 0, then the curves (ROC curve, PR curve) computed internally
     *                will be down-sampled to this many "bins". If 0, no down-sampling will occur.
     *                This is useful because the curve contains a point for each distinct score
     *                in the input, and this could be as large as the input itself -- millions of
     *                points or more, when thousands may be entirely sufficient to summarize
     *                the curve. After down-sampling, the curves will instead be made of approximately
     *                `numBins` points instead. Points are made from bins of equal numbers of
     *                consecutive points. The size of each bin is
     *                `floor(scoreAndLabels.count() / numBins)`, which means the resulting number
     *                of bins may not exactly equal numBins. The last bin in each partition may
     *                be smaller as a result, meaning there may be an extra sample at
     *                partition boundaries.
     *
     * @classdesc
     */

    /**
     * @param {module:eclairjs.RDD} scoreAndLabels
     * @param {number} numBins
     * @class
     * @memberof module:eclairjs/mllib/evaluation
     */
    var BinaryClassificationMetrics = function(scoreAndLabels,numBins) {
        var jvmObject;
        if (scoreAndLabels instanceof org.apache.spark.mllib.evaluation.BinaryClassificationMetrics) {
            jvmObject = scoreAndLabels;
        } else {
            jvmObject = new org.apache.spark.mllib.evaluation.BinaryClassificationMetrics(Utils.unwrapObject(scoreAndLabels).rdd(),numBins);
        }

         this.logger = Logger.getLogger("BinaryClassificationMetrics_js");
         JavaWrapper.call(this, jvmObject);

    };

    BinaryClassificationMetrics.prototype = Object.create(JavaWrapper.prototype);

    BinaryClassificationMetrics.prototype.constructor = BinaryClassificationMetrics;



    /**
     * Unpersist intermediate RDDs used in the computation.
     */
    BinaryClassificationMetrics.prototype.unpersist = function() {
        this.getJavaObject().unpersist();
    };


    /**
     * Returns thresholds in descending order.
     * @returns {module:eclairjs.RDD}
     */
    BinaryClassificationMetrics.prototype.thresholds = function() {
       var javaObject =  this.getJavaObject().thresholds().toJavaRDD();
       return new RDD(javaObject);
    };


    /**
     * Returns the receiver operating characteristic (ROC) curve,
     * which is an RDD of (false positive rate, true positive rate)
     * with (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
     * @see http://en.wikipedia.org/wiki/Receiver_operating_characteristic
     * @returns {module:eclairjs.RDD}
     */
    BinaryClassificationMetrics.prototype.roc = function() {
    throw "not implemented by ElairJS";
        var javaObject =  this.getJavaObject().roc().toJavaRDD();
        return new RDD(javaObject);
    };


    /**
     * Computes the area under the receiver operating characteristic (ROC) curve.
     * @returns {number} 
     */
    BinaryClassificationMetrics.prototype.areaUnderROC = function() {
       return  this.getJavaObject().areaUnderROC();
    };


    /**
     * Returns the precision-recall curve, which is an RDD of (recall, precision),
     * NOT (precision, recall), with (0.0, 1.0) prepended to it.
     * @see http://en.wikipedia.org/wiki/Precision_and_recall
     * @returns {module:eclairjs.RDD}
     */
    BinaryClassificationMetrics.prototype.pr = function() {
        return new RDD(this.getJavaObject().pr().toJavaRDD());
    };


    /**
     * Computes the area under the precision-recall curve.
     * @returns {number} 
     */
    BinaryClassificationMetrics.prototype.areaUnderPR = function() {
        return  this.getJavaObject().areaUnderPR();
    };


    /**
     * Returns the (threshold, F-Measure) curve.
     * @param {number} [beta]  the beta factor in F-Measure computation.
     * @see http://en.wikipedia.org/wiki/F1_score
     * @returns {module:eclairjs.RDD}  an RDD of (threshold, F-Measure) pairs.
     */
    BinaryClassificationMetrics.prototype.fMeasureByThreshold = function(beta) {
        if(beta) {
            return new RDD(this.getJavaObject().fMeasureByThreshold(beta).toJavaRDD());
        } else {
            return new RDD(this.getJavaObject().fMeasureByThreshold().toJavaRDD());
        }
    };


    /**
     * Returns the (threshold, precision) curve.
     * @returns {module:eclairjs.RDD}
     */
    BinaryClassificationMetrics.prototype.precisionByThreshold = function() {
        return new RDD(this.getJavaObject().precisionByThreshold().toJavaRDD());
    };


    /**
     * Returns the (threshold, recall) curve.
     * @returns {module:eclairjs.RDD}
     */
    BinaryClassificationMetrics.prototype.recallByThreshold = function() {
        return new RDD(this.getJavaObject().recallByThreshold().toJavaRDD());
    };

    module.exports = BinaryClassificationMetrics;

})();