/*
* Copyright 2016 IBM Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var Utils = require('../../utils.js');
var gKernelP;
/**
* Word2Vec creates vector representation of words in a text corpus.
* The algorithm first constructs a vocabulary from the corpus
* and then learns vector representation of words in the vocabulary.
* The vector representation can be used as features in
* natural language processing and machine learning algorithms.
*
* We used skip-gram model in our implementation and hierarchical softmax
* method to train the model. The variable names in the implementation
* matches the original C implementation.
*
* For original C implementation, see https://code.google.com/p/word2vec/
* For research papers, see
* Efficient Estimation of Word Representations in Vector Space
* and
* Distributed Representations of Words and Phrases and their Compositionality.
* @memberof module:eclairjs/mllib/feature
* @classdesc
* @class
*/
function Word2Vec() {
Utils.handleConstructor(this, arguments, gKernelP);
}
/**
* Sets vector size (default: 100).
* @param {number} vectorSize
* @returns {}
*/
Word2Vec.prototype.setVectorSize = function(vectorSize) {
throw "not implemented by ElairJS";
// var args ={
// target: this,
// method: 'setVectorSize',
// args: [
// { value: vectorSize, type: 'number' }
// ],
// returnType:
//
// };
//
// return Utils.generate(args);
};
/**
* Sets initial learning rate (default: 0.025).
* @param {number} learningRate
* @returns {}
*/
Word2Vec.prototype.setLearningRate = function(learningRate) {
throw "not implemented by ElairJS";
// var args ={
// target: this,
// method: 'setLearningRate',
// args: [
// { value: learningRate, type: 'number' }
// ],
// returnType:
//
// };
//
// return Utils.generate(args);
};
/**
* Sets number of partitions (default: 1). Use a small number for accuracy.
* @param {number} numPartitions
* @returns {}
*/
Word2Vec.prototype.setNumPartitions = function(numPartitions) {
throw "not implemented by ElairJS";
// var args ={
// target: this,
// method: 'setNumPartitions',
// args: [
// { value: numPartitions, type: 'number' }
// ],
// returnType:
//
// };
//
// return Utils.generate(args);
};
/**
* Sets number of iterations (default: 1), which should be smaller than or equal to number of
* partitions.
* @param {number} numIterations
* @returns {}
*/
Word2Vec.prototype.setNumIterations = function(numIterations) {
throw "not implemented by ElairJS";
// var args ={
// target: this,
// method: 'setNumIterations',
// args: [
// { value: numIterations, type: 'number' }
// ],
// returnType:
//
// };
//
// return Utils.generate(args);
};
/**
* Sets random seed (default: a random long integer).
* @param {number} seed
* @returns {}
*/
Word2Vec.prototype.setSeed = function(seed) {
throw "not implemented by ElairJS";
// var args ={
// target: this,
// method: 'setSeed',
// args: [
// { value: seed, type: 'number' }
// ],
// returnType:
//
// };
//
// return Utils.generate(args);
};
/**
* Sets the window of words (default: 5)
* @param {number} window
* @returns {}
*/
Word2Vec.prototype.setWindowSize = function(window) {
throw "not implemented by ElairJS";
// var args ={
// target: this,
// method: 'setWindowSize',
// args: [
// { value: window, type: 'number' }
// ],
// returnType:
//
// };
//
// return Utils.generate(args);
};
/**
* Sets minCount, the minimum number of times a token must appear to be included in the word2vec
* model's vocabulary (default: 5).
* @param {number} minCount
* @returns {}
*/
Word2Vec.prototype.setMinCount = function(minCount) {
throw "not implemented by ElairJS";
// var args ={
// target: this,
// method: 'setMinCount',
// args: [
// { value: minCount, type: 'number' }
// ],
// returnType:
//
// };
//
// return Utils.generate(args);
};
/**
* Computes the vector representation of each word in vocabulary.
* @param {module:eclairjs/rdd.RDD} dataset an RDD of words
* @returns {module:eclairjs/mllib/feature.Word2VecModel} a Word2VecModel
*/
Word2Vec.prototype.fit = function(dataset) {
var Word2VecModel = require('./Word2VecModel.js')(this.kernelP);
var args = {
target: this,
method: 'fit',
args: Utils.wrapArguments(arguments),
returnType: Word2VecModel
};
return Utils.generate(args);
};
Word2Vec.moduleLocation = '/mllib/feature/Word2Vec';
module.exports = function(kP) {
if (kP) gKernelP = kP;
return Word2Vec;
};