Source: mllib/recommendation/ALS.js

/*
 * Copyright 2016 IBM Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


var Utils = require('../../utils.js');

var gKernelP;

/**
 * Alternating Least Squares matrix factorization.
 *
 * ALS attempts to estimate the ratings matrix `R` as the product of two lower-rank matrices,
 * `X` and `Y`, i.e. `X * Yt = R`. Typically these approximations are called 'factor' matrices.
 * The general approach is iterative. During each iteration, one of the factor matrices is held
 * constant, while the other is solved for using least squares. The newly-solved factor matrix is
 * then held constant while solving for the other factor matrix.
 *
 * This is a blocked implementation of the ALS factorization algorithm that groups the two sets
 * of factors (referred to as "users" and "products") into blocks and reduces communication by only
 * sending one copy of each user vector to each product block on each iteration, and only for the
 * product blocks that need that user's feature vector. This is achieved by precomputing some
 * information about the ratings matrix to determine the "out-links" of each user (which blocks of
 * products it will contribute to) and "in-link" information for each product (which of the feature
 * vectors it receives from each user block it will depend on). This allows us to send only an
 * array of feature vectors between each user block and product block, and have the product block
 * find the users' ratings and update the products based on these messages.
 *
 * For implicit preference data, the algorithm used is based on
 * "Collaborative Filtering for Implicit Feedback Datasets", available at
 * [[http://dx.doi.org/10.1109/ICDM.2008.22]], adapted for the blocked approach used here.
 *
 * Essentially instead of finding the low-rank approximations to the rating matrix `R`,
 * this finds the approximations for a preference matrix `P` where the elements of `P` are 1 if
 * r > 0 and 0 if r <= 0. The ratings then act as 'confidence' values related to strength of
 * indicated user
 * preferences rather than explicit ratings given to items.
 * @classdesc
 */

/**
 * Constructs an ALS instance with default parameters: {numBlocks: -1, rank: 10, iterations: 10,
 * lambda: 0.01, implicitPrefs: false, alpha: 1.0}.
 * @returns {??}
 * @class
 * @memberof module:eclairjs/mllib/recommendation
 */
function ALS() {
  Utils.handleConstructor(this, arguments, gKernelP);
}

/**
 * Set the number of blocks for both user blocks and product blocks to parallelize the computation
 * into; pass -1 for an auto-configured number of blocks. Default: -1.
 * @param {number} numBlocks
 * @returns {}
 */
ALS.prototype.setBlocks = function(numBlocks) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: this,
//     method: 'setBlocks',
//     args: [
//       { value: numBlocks, type: 'number' }
//     ],
//     returnType:
//
//   };
//
//   return Utils.generate(args);
};


/**
 * Set the number of user blocks to parallelize the computation.
 * @param {number} numUserBlocks
 * @returns {}
 */
ALS.prototype.setUserBlocks = function(numUserBlocks) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: this,
//     method: 'setUserBlocks',
//     args: [
//       { value: numUserBlocks, type: 'number' }
//     ],
//     returnType:
//
//   };
//
//   return Utils.generate(args);
};


/**
 * Set the number of product blocks to parallelize the computation.
 * @param {number} numProductBlocks
 * @returns {}
 */
ALS.prototype.setProductBlocks = function(numProductBlocks) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: this,
//     method: 'setProductBlocks',
//     args: [
//       { value: numProductBlocks, type: 'number' }
//     ],
//     returnType:
//
//   };
//
//   return Utils.generate(args);
};


/**
 * @param {number} rank
 * @returns {}
 */
ALS.prototype.setRank = function(rank) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: this,
//     method: 'setRank',
//     args: [
//       { value: rank, type: 'number' }
//     ],
//     returnType:
//
//   };
//
//   return Utils.generate(args);
};


/**
 * @param {number} iterations
 * @returns {}
 */
ALS.prototype.setIterations = function(iterations) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: this,
//     method: 'setIterations',
//     args: [
//       { value: iterations, type: 'number' }
//     ],
//     returnType:
//
//   };
//
//   return Utils.generate(args);
};


/**
 * @param {number} lambda
 * @returns {}
 */
ALS.prototype.setLambda = function(lambda) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: this,
//     method: 'setLambda',
//     args: [
//       { value: lambda, type: 'number' }
//     ],
//     returnType:
//
//   };
//
//   return Utils.generate(args);
};


/**
 * @param {boolean} implicitPrefs
 * @returns {}
 */
ALS.prototype.setImplicitPrefs = function(implicitPrefs) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: this,
//     method: 'setImplicitPrefs',
//     args: [
//       { value: implicitPrefs, type: 'boolean' }
//     ],
//     returnType:
//
//   };
//
//   return Utils.generate(args);
};


/**
 * Sets the constant used in computing confidence in implicit ALS. Default: 1.0.
 * @param {number} alpha
 * @returns {}
 */
ALS.prototype.setAlpha = function(alpha) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: this,
//     method: 'setAlpha',
//     args: [
//       { value: alpha, type: 'number' }
//     ],
//     returnType:
//
//   };
//
//   return Utils.generate(args);
};


/**
 * @param {number} seed
 * @returns {}
 */
ALS.prototype.setSeed = function(seed) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: this,
//     method: 'setSeed',
//     args: [
//       { value: seed, type: 'number' }
//     ],
//     returnType:
//
//   };
//
//   return Utils.generate(args);
};


/**
 * Set whether the least-squares problems solved at each iteration should have
 * nonnegativity constraints.
 * @param {boolean} b
 * @returns {}
 */
ALS.prototype.setNonnegative = function(b) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: this,
//     method: 'setNonnegative',
//     args: [
//       { value: b, type: 'boolean' }
//     ],
//     returnType:
//
//   };
//
//   return Utils.generate(args);
};


/**
 * Run ALS with the configured parameters on an input RDD of (user, product, rating) triples.
 * Returns a MatrixFactorizationModel with feature vectors for each user and product.
 * @param {module:eclairjs/rdd.RDD} ratings
 * @returns {module:eclairjs/mllib/recommendation.MatrixFactorizationModel}
 */
ALS.prototype.runwithRDD = function(ratings) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: this,
//     method: 'run',
//     args: [
//       { value: ratings, type: 'RDD' }
//     ],
//     returnType: MatrixFactorizationModel
//
//   };
//
//   return Utils.generate(args);
};


/**
 * Java-friendly version of {@link run}.
 * @param {JavaRDD} ratings
 * @returns {module:eclairjs/mllib/recommendation.MatrixFactorizationModel}
 */
ALS.prototype.runwithJavaRDD = function(ratings) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: this,
//     method: 'run',
//     args: [
//       { value: ratings, type: 'JavaRDD' }
//     ],
//     returnType: MatrixFactorizationModel
//
//   };
//
//   return Utils.generate(args);
};

//
// static methods
//

/**
 * Train a matrix factorization model given an RDD of ratings given by users to some products,
 * in the form of (userID, productID, rating) pairs. We approximate the ratings matrix as the
 * product of two lower-rank matrices of a given rank (number of features). To solve for these
 * features, we run a given number of iterations of ALS. This is done using a level of
 * parallelism given by `blocks`.
 *
 * @param {module:eclairjs/rdd.RDD} ratings     RDD of (userID, productID, rating) pairs
 * @param {number} rank        number of features to use
 * @param {number} iterations  number of iterations of ALS (recommended: 10-20)
 * @param {number} [lambda]  regularization factor (recommended: 0.01)
 * @param {number} [blocks]  level of parallelism to split computation into
 * @param {number} [seed]  random seed
 * @returns {module:eclairjs/mllib/recommendation.MatrixFactorizationModel}
 */
ALS.train = function() {
  var MatrixFactorizationModel = require('./MatrixFactorizationModel.js')(this.kernelP);

  var args = {
    target: ALS,
    method: 'train',
    args: Utils.wrapArguments(arguments),
    static: true,
    kernelP: gKernelP,
    returnType: MatrixFactorizationModel
  };

  return Utils.generate(args);
};

/**
 * Train a matrix factorization model given an RDD of 'implicit preferences' given by users
 * to some products, in the form of (userID, productID, preference) pairs. We approximate the
 * ratings matrix as the product of two lower-rank matrices of a given rank (number of features).
 * To solve for these features, we run a given number of iterations of ALS. This is done using
 * a level of parallelism given by `blocks`.
 *
 * @param {module:eclairjs/rdd.RDD} ratings     RDD of (userID, productID, rating) pairs
 * @param {number} rank        number of features to use
 * @param {number} iterations  number of iterations of ALS (recommended: 10-20)
 * @param {number} lambda      regularization factor (recommended: 0.01)
 * @param {number} blocks      level of parallelism to split computation into
 * @param {number} alpha       confidence parameter
 * @param {number} seed        random seed
 * @returns {module:eclairjs/mllib/recommendation.MatrixFactorizationModel}
 */
ALS.trainImplicit0 = function(ratings,rank,iterations,lambda,blocks,alpha,seed) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: ALS,
//     method: 'trainImplicit',
//     args: [
//       { value: ratings, type: 'RDD' },
//       { value: rank, type: 'number' },
//       { value: iterations, type: 'number' },
//       { value: lambda, type: 'number' },
//       { value: blocks, type: 'number' },
//       { value: alpha, type: 'number' },
//       { value: seed, type: 'number' }
//     ],
//     returnType: MatrixFactorizationModel
//
//   };
//
//   return Utils.generate(args);
};


/**
 * Train a matrix factorization model given an RDD of 'implicit preferences' given by users
 * to some products, in the form of (userID, productID, preference) pairs. We approximate the
 * ratings matrix as the product of two lower-rank matrices of a given rank (number of features).
 * To solve for these features, we run a given number of iterations of ALS. This is done using
 * a level of parallelism given by `blocks`.
 *
 * @param {module:eclairjs/rdd.RDD} ratings     RDD of (userID, productID, rating) pairs
 * @param {number} rank        number of features to use
 * @param {number} iterations  number of iterations of ALS (recommended: 10-20)
 * @param {number} lambda      regularization factor (recommended: 0.01)
 * @param {number} blocks      level of parallelism to split computation into
 * @param {number} alpha       confidence parameter
 * @returns {module:eclairjs/mllib/recommendation.MatrixFactorizationModel}
 */
ALS.trainImplicit1 = function(ratings,rank,iterations,lambda,blocks,alpha) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: ALS,
//     method: 'trainImplicit',
//     args: [
//       { value: ratings, type: 'RDD' },
//       { value: rank, type: 'number' },
//       { value: iterations, type: 'number' },
//       { value: lambda, type: 'number' },
//       { value: blocks, type: 'number' },
//       { value: alpha, type: 'number' }
//     ],
//     returnType: MatrixFactorizationModel
//
//   };
//
//   return Utils.generate(args);
};


/**
 * Train a matrix factorization model given an RDD of 'implicit preferences' given by users to
 * some products, in the form of (userID, productID, preference) pairs. We approximate the
 * ratings matrix as the product of two lower-rank matrices of a given rank (number of features).
 * To solve for these features, we run a given number of iterations of ALS. The level of
 * parallelism is determined automatically based on the number of partitions in `ratings`.
 *
 * @param {module:eclairjs/rdd.RDD} ratings     RDD of (userID, productID, rating) pairs
 * @param {number} rank        number of features to use
 * @param {number} iterations  number of iterations of ALS (recommended: 10-20)
 * @param {number} lambda      regularization factor (recommended: 0.01)
 * @param {number} alpha       confidence parameter
 * @returns {module:eclairjs/mllib/recommendation.MatrixFactorizationModel}
 */
ALS.trainImplicit2 = function(ratings,rank,iterations,lambda,alpha) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: ALS,
//     method: 'trainImplicit',
//     args: [
//       { value: ratings, type: 'RDD' },
//       { value: rank, type: 'number' },
//       { value: iterations, type: 'number' },
//       { value: lambda, type: 'number' },
//       { value: alpha, type: 'number' }
//     ],
//     returnType: MatrixFactorizationModel
//
//   };
//
//   return Utils.generate(args);
};


/**
 * Train a matrix factorization model given an RDD of 'implicit preferences' ratings given by
 * users to some products, in the form of (userID, productID, rating) pairs. We approximate the
 * ratings matrix as the product of two lower-rank matrices of a given rank (number of features).
 * To solve for these features, we run a given number of iterations of ALS. The level of
 * parallelism is determined automatically based on the number of partitions in `ratings`.
 * Model parameters `alpha` and `lambda` are set to reasonable default values
 *
 * @param {module:eclairjs/rdd.RDD} ratings     RDD of (userID, productID, rating) pairs
 * @param {number} rank        number of features to use
 * @param {number} iterations  number of iterations of ALS (recommended: 10-20)
 * @returns {module:eclairjs/mllib/recommendation.MatrixFactorizationModel}
 */
ALS.trainImplicit3 = function(ratings,rank,iterations) {
  throw "not implemented by ElairJS";
//   var args ={
//     target: ALS,
//     method: 'trainImplicit',
//     args: [
//       { value: ratings, type: 'RDD' },
//       { value: rank, type: 'number' },
//       { value: iterations, type: 'number' }
//     ],
//     returnType: MatrixFactorizationModel
//
//   };
//
//   return Utils.generate(args);
};

ALS.moduleLocation = '/mllib/recommendation/ALS';

module.exports = function(kP) {
  if (kP) gKernelP = kP;

  return ALS;
};