/*
* Copyright 2016 IBM Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var Utils = require('../utils.js');
var FloatRDD = require('./FloatRDD.js');
var gKernelP;
/**
* @param {module:eclairjs/rdd.RDD} rdd of [Tuple(value, value)]{@link Tuple}.
* @class
* @memberof module:eclairjs/rdd
* @extends RDD
*/
function PairRDD() {
Utils.handleConstructor(this, arguments, gKernelP);
}
/**
* Persist this PairRDD with the default storage level (`MEMORY_ONLY`).
* @returns {module:eclairjs/rdd.PairRDD}
*/
PairRDD.prototype.cache = function() {
var args = {
target: this,
method: 'cache',
returnType: PairRDD
};
return Utils.generate(args);
};
/**
* Asynchronously returns the number of elements in the PairRDD.
*
* @returns {Promise.<Number>} A Promise that resolves to the number of elements in the PairRDD.
*/
PairRDD.prototype.count = function() {
var args = {
target: this,
method: 'count',
returnType: Number
};
return Utils.generate(args);
};
/**
* Asynchronously returns all elements of the PairRDD.
*
* @returns {Promise.<Array>} A Promise that resolves to an array containing all elements in the PairRDD.
*/
PairRDD.prototype.collect = function() {
var args = {
target: this,
method: 'collect',
returnType: [Object],
stringify: true
};
return Utils.generate(args);
};
/**
* Return a sampled subset of this RDD.
* @param {boolean} withReplacement
* @param {number} fraction
* @param {number} [seed]
* @returns {module:eclairjs/rdd.PairRDD}
*/
PairRDD.prototype.sample = function(withReplacement, fraction, seed) {
var args = {
target: this,
method: 'sample',
args: Utils.wrapArguments(arguments),
returnType: PairRDD
};
return Utils.generate(args);
};
/**
* Save this PairRDD as a text file, using string representations of elements.
* @param {string} path
* @returns {Promise.<void>}
*/
PairRDD.prototype.saveAsTextFile = function(path) {
var args = {
target: this,
method: 'saveAsTextFile',
args: Utils.wrapArguments(arguments)
};
return Utils.generate(args);
};
/**
* Asynchronously returns the first num elements in this PairRDD.
*
* @param {Number} num
* @returns {Promise.<Array>} A Promise that resolves to an array containing the first num elements in this PairRDD.
*/
PairRDD.prototype.take = function(num) {
var args = {
target: this,
method: 'take',
args: Utils.wrapArguments(arguments),
returnType: [Object],
stringify: true
};
return Utils.generate(args);
};
/**
* Asynchronously returns the first k (smallest) elements from this RDD as defined by the specified implicit Ordering[T] and maintains the ordering. This does the opposite of top.
*
* @param {Number} num
* @param {function} func - (undocumented) Function with one parameter
* @param {Object[]} [bindArgs] - array whose values will be added to func's argument list.
* @returns {Promise.<Array>} A Promise that resolves to an array containing the first num elements in this RDD.
*/
PairRDD.prototype.takeOrdered = function(num, func, bindArgs) {
var args = {
target: this,
method: 'takeOrdered',
args: [
{value: num, type: 'number'},
{value: func, type: 'lambda'},
{value: Utils.wrapBindArgs(bindArgs), optional: true}
],
returnType: [Object],
stringify: true
};
return Utils.generate(args);
};
/**
* Return a fixed-size sampled subset of this PairRDD in an array
*
* @param {boolean} withReplacement whether sampling is done with replacement
* @param {number} num size of the returned sample
* @param {number} seed seed for the random number generator
* @returns {Promise.<Array>} A Promise that resolves to an array containing the specified number of elements in this PairRDD.
*/
PairRDD.prototype.takeSample = function(withReplacement, num, seed) {
var args = {
target: this,
method: 'takeSample',
args: Utils.wrapArguments(arguments),
returnType: [Object],
stringify: true
};
return Utils.generate(args);
};
/**
* Return an array that contains all of the elements in this PairRDD.
* @returns {Promise.<Array>} A Promise that resolves to an array containing all elements in this PairRDD.
*/
PairRDD.prototype.toArray = function() {
throw "not implemented by ElairJS";
};
PairRDD.prototype.toString = function() {
var args = {
target: this,
method: 'toString',
returnType: String
};
return Utils.generate(args);
};
/**
* Return a new PairRDD by applying a function to all elements of this RDD.
* @param {Function} func
* @param {Object[]} bindArgs - Optional array whose values will be added to func's argument list.
* @returns {module:eclairjs/rdd.PairRDD}
*/
PairRDD.prototype.map = function(func, bindArgs) {
var args = {
target: this,
method: 'map',
args: [
{value: func, type: 'lambda'},
{value: Utils.wrapBindArgs(bindArgs), optional: true}
],
returnType: PairRDD
};
return Utils.generate(args);
};
/**
* Return a new PairRDD by applying a function to all elements of this RDD.
* @param {Function} func
* @param {Object[]} bindArgs - Optional array whose values will be added to func's argument list.
* @returns {module:eclairjs/rdd.PairRDD}
*/
PairRDD.prototype.mapValues = function(func, bindArgs) {
var args = {
target: this,
method: 'mapValues',
args: [
{value: func, type: 'lambda'},
{value: Utils.wrapBindArgs(bindArgs), optional: true}
],
returnType: PairRDD
};
return Utils.generate(args);
};
/**
* Return a new PairRDD by first applying a function to all elements of this PairRDD, and then flattening the results.
* @param {Function} func
* @param {Object[]} bindArgs - Optional array whose values will be added to func's argument list.
* @returns {PairRDD
*/
PairRDD.prototype.flatMap = function(func, bindArgs) {
var args = {
target: this,
method: 'flatMap',
args: [
{value: func, type: 'lambda'},
{value: Utils.wrapBindArgs(bindArgs), optional: true}
],
returnType: PairRDD
};
return Utils.generate(args);
};
/**
* Return a new PairRDD containing only the elements that satisfy a predicate.
* @param {function} func
* @param {Object[]} [bindArgs] - array whose values will be added to func's argument list.
* @returns {module:eclairjs/rdd.PairRDD}
*/
PairRDD.prototype.filter = function(func, bindArgs) {
var args = {
target: this,
method: 'filter',
args: [
{value: func, type: 'lambda'},
{value: Utils.wrapBindArgs(bindArgs), optional: true}
],
returnType: PairRDD
};
return Utils.generate(args);
};
/**
* Return a new RDD by applying a function to all elements of this RDD.
* @param {Function} func
* @param {Object[]} bindArgs - Optional array whose values will be added to func's argument list.
* @returns {module:eclairjs/rdd.PairRDD}
*/
PairRDD.prototype.mapToPair = function(func, bindArgs) {
var args = {
target: this,
method: 'mapToPair',
args: [
{value: func, type: 'lambda'},
{value: Utils.wrapBindArgs(bindArgs), optional: true}
],
returnType: PairRDD
};
return Utils.generate(args);
};
/**
* Group the values for each key in the RDD into a single sequence. Allows controlling the partitioning of the resulting
* key-value pair RDD by passing a Partitioner. Note: If you are grouping in order to perform an aggregation (such as a sum or average)
* over each key, using PairRDD.reduceByKey or combineByKey will provide much better performance.
* @param {number} [number] number of partitions
* @returns {module:eclairjs/rdd.PairRDD}
*/
PairRDD.prototype.groupByKey = function(func, bindArgs) {
var args = {
target: this,
method: 'groupByKey',
args: Utils.wrapArguments(arguments),
returnType: PairRDD
};
return Utils.generate(args);
};
/**
* Return a new RDD by applying a function to all elements of this RDD.
* @param {Function} func
* @param {Object[]} bindArgs - Optional array whose values will be added to func's argument list.
* @returns {module:eclairjs/rdd.FloatRDD}
*/
PairRDD.prototype.mapToFloat = function(func, bindArgs) {
var args = {
target: this,
method: 'mapToFloat',
args: [
{value: func, type: 'lambda'},
{value: Utils.wrapBindArgs(bindArgs), optional: true}
],
returnType: FloatRDD
};
return Utils.generate(args);
};
/**
* Randomly splits this PairRDD with the provided weights.
*
* @param {number[]} weights - weights for splits, will be normalized if they don't sum to 1
* @param {number} seed - random seed
* @returns {Promise.<PairRDD[]>} A Promise that resolves to the array of split up RDDs
* @private
*/
PairRDD.prototype.randomSplit = function(weights, seed) {
var args = {
target: this,
method: 'randomSplit',
args: Utils.wrapArguments(arguments),
returnType: [PairRDD]
};
return Utils.generate(args);
};
/**
* Reduces the elements of this PairRDD using the specified commutative and
* associative binary operator.
* {function} func - (undocumented) Function with two parameters
* @param {Object[]} bindArgs - Optional array whose values will be added to func's argument list.
* @returns {Promise<Object>}
*/
PairRDD.prototype.reduce = function(func, bindArgs) {
var args = {
target: this,
method: 'reduce',
args: [
{value: func, type: 'lambda'},
{value: Utils.wrapBindArgs(bindArgs), optional: true}
],
returnType: Object
};
return Utils.generate(args);
};
/**
* Merge the values for each key using an associative reduce function. This will also perform
* the merging locally on each mapper before sending results to a reducer, similarly to a
* "combiner" in MapReduce.
* @param {func} func
* @param {Object[]} [bindArgs] - array whose values will be added to func's argument list.
* @returns {module:eclairjs/rdd.PairRDD}
*/
PairRDD.prototype.reduceByKey = function(func, bindArgs) {
var args = {
target: this,
method: 'reduceByKey',
args: [
{value: func, type: 'lambda'},
{value: Utils.wrapBindArgs(bindArgs), optional: true}
],
returnType: PairRDD
};
return Utils.generate(args);
};
/**
* Return this RDD sorted by the given key function.
* @param {Boolean} ascending
* @returns {module:eclairjs/rdd.PairRDD}
*/
PairRDD.prototype.sortByKey = function(ascending) {
var args = {
target: this,
method: 'sortByKey',
args: Utils.wrapArguments(arguments),
returnType: PairRDD
};
return Utils.generate(args);
};
/**
* Applies a function func to all rows.
* @param {function} func
* @returns {Promise.<Void>} A Promise that resolves to nothing.
*/
PairRDD.prototype.foreach = function(func, bindArgs) {
var args = {
target: this,
method: 'foreach',
args: [
{value: func, type: 'lambda'},
{value: Utils.wrapBindArgs(bindArgs), optional: true}
]
};
return Utils.generate(args);
};
/**
* @param {RDFD} rdd
* @returns {module:eclairjs/rdd.PairRDD}
*/
PairRDD.fromRDD = function(rdd) {
var args = {
target: PairRDD,
method: 'fromRDD',
args: Utils.wrapArguments(arguments),
returnType: PairRDD,
kernelP: rdd.kernelP,
static: true
};
return Utils.generate(args);
};
/**
* Return an * @param {module:eclairjs/rdd.PairRDD}
containing all pairs of elements with matching keys in `this` and `other`. Each
* pair of elements will be returned as a (k, (v1, v2)) tuple, where (k, v1) is in `this` and
* (k, v2) is in `other`. Performs a hash join across the cluster.
* @param {module:eclairjs/rdd.PairRDD}
* @param {number} optionanl
* @returns {module:eclairjs/rdd.PairRDD}
*/
PairRDD.prototype.join = function(other, numPartitions) {
var args = {
target: this,
method: 'join',
args: Utils.wrapArguments(arguments),
returnType: PairRDD
};
return Utils.generate(args);
};
/**
* Return an PairRDD with the values of each tuple.
* @returns {module:eclairjs/rdd.PairRDD}
*/
PairRDD.prototype.values = function() {
var args = {
target: this,
method: 'values',
returnType: PairRDD
};
return Utils.generate(args);
};
PairRDD.moduleLocation = '/PairRDD';
module.exports = PairRDD;