Source: sql/DataFrameWriter.js

/*
 * Copyright 2015 IBM Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

var Utils = require('../utils.js');
var RDD = require('../rdd/RDD.js');
var DataFrame = require('./DataFrame.js');

/**
 * :: Experimental ::
 * Interface used to write a {@link DataFrame} to external storage systems (e.g. file systems,
 * key-value stores, etc). Use {@link write} to access this.
 *
 * @since EclairJS 0.1 Spark  1.4.0
 * @constructor
 * @memberof module:eclairjs/sql
 * @classdesc
 */
function DataFrameWriter(kernelP, refIdP) {
  this.kernelP = kernelP;
  this.refIdP = refIdP;
}

/**
 * Specifies the underlying output data source. Built-in options include "parquet", "json", etc.
 *
 * @since EclairJS 0.1 Spark  1.4.0
 * @param {string} source
 * @returns {module:eclairjs/sql.DataFrameWriter}
 */
DataFrameWriter.prototype.format = function(source) {
  var args = {
    target: this,
    method: 'format',
    args: Utils.wrapArguments(arguments),
    returnType: DataFrameWriter
  };

  return Utils.generate(args);
};

/**
 * Inserts the content of the {@link DataFrame} to the specified table. It requires that
 * the schema of the {@link DataFrame} is the same as the schema of the table.
 *
 * Because it inserts data to an existing table, format or options will be ignored.
 *
 * @since EclairJS 0.1 Spark  1.4.0
 * @param {string} tableName
 * @returns {Promise.<Void>} A Promise that resolves to nothing.
 */
DataFrameWriter.prototype.insertInto = function(tableName) {
  var args = {
    target: this,
    method: 'insertInto',
    args: Utils.wrapArguments(arguments)
  };

  return Utils.generate(args);
};

/**
 * Saves the content of the {@link DataFrame} to a external database table via JDBC. In the case the
 * table already exists in the external database, behavior of this function depends on the
 * save mode, specified by the `mode` function (default to throwing an exception).
 *
 * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
 * your external database systems.
 *
 * @param {string} url  JDBC database url of the form `jdbc:subprotocol:subname`
 * @param {string} table  Name of the table in the external database.
 * @param {object} connectionProperties  JDBC database connection arguments, a list of arbitrary string
 *                             tag/value. Normally at least a "user" and "password" property
 *                             should be included.
 * @returns {Promise.<Void>} A Promise that resolves to nothing.
 */
DataFrameWriter.prototype.jdbc = function(url,table,connectionProperties) {
  var args = {
    target: this,
    method: 'jdbc',
    args: Utils.wrapArguments(arguments)
  };

  return Utils.generate(args);
};

/**
 * Saves the content of the {@link DataFrame} in JSON format at the specified path.
 * This is equivalent to:
 * @example
 *   format("json").save(path)
 *
 * @since EclairJS 0.1 Spark  1.4.0
 * @param {string} path
 * @returns {Promise.<Void>} A Promise that resolves to nothing.
 */
DataFrameWriter.prototype.json = function(path) {
  var args = {
    target: this,
    method: 'json',
    args: Utils.wrapArguments(arguments)
  };

  return Utils.generate(args);
};

/**
 * Specifies the behavior when data or table already exists. Options include:
 *   - `overwrite`: overwrite the existing data.
 *   - `append`: append the data.
 *   - `ignore`: ignore the operation (i.e. no-op).
 *   - `error`: default option, throw an exception at runtime.
 *
 * @since EclairJS 0.1 Spark  1.4.0
 * @param {string} saveMode
 * @returns {module:eclairjs/sql.DataFrameWriter}
 */
DataFrameWriter.prototype.mode = function(saveMode) {
  var args = {
    target: this,
    method: 'mode',
    args: Utils.wrapArguments(arguments),
    returnType: DataFrameWriter
  };

  return Utils.generate(args);
};

/**
 * Adds an input option for the underlying data source.
 *
 * @since EclairJS 0.1 Spark  1.4.0
 * @param {string} key
 * @param {string} value
 * @returns {module:eclairjs/sql.DataFrameWriter}
 */
DataFrameWriter.prototype.option = function(key, value) {
  var args = {
    target: this,
    method: 'option',
    args: Utils.wrapArguments(arguments),
    returnType: DataFrameWriter
  };

  return Utils.generate(args);
};

/**
 * Adds input options for the underlying data source.
 *
 * @since EclairJS 0.1 Spark  1.4.0
 * @param {Map} options
 * @returns {module:eclairjs/sql.DataFrameWriter}
 */
DataFrameWriter.prototype.options = function(options) {
  var args = {
    target: this,
    method: 'options',
    args: Utils.wrapArguments(arguments),
    returnType: DataFrameWriter
  };

  return Utils.generate(args);
};

/**
 * Saves the content of the {@link DataFrame} in Parquet format at the specified path.
 * This is equivalent to:
 * @example
 *   format("parquet").save(path)
 *
 * @since EclairJS 0.1 Spark  1.4.0
 * @param {string} path
 * @returns {Promise.<Void>} A Promise that resolves to nothing.
 */
DataFrameWriter.prototype.parquet = function(path) {
  var args = {
    target: this,
    method: 'parquet',
    args: Utils.wrapArguments(arguments)
  };

  return Utils.generate(args);
};

/**
 * Saves the content of the {@link DataFrame} in ORC format at the specified path.
 * This is equivalent to:
 * @example
 *   format("orc").save(path)
 *
 * @since EclairJS 0.1 Spark  1.5.0
 * @note Currently, this method can only be used together with `HiveContext`.
 * @param {string} path
 * @returns {Promise.<Void>} A Promise that resolves to nothing.
 */
DataFrameWriter.prototype.orc = function(path) {
  var args = {
    target: this,
    method: 'orc',
    args: Utils.wrapArguments(arguments)
  };

  return Utils.generate(args);
};

/**
 * Partitions the output by the given columns on the file system. If specified, the output is
 * laid out on the file system similar to Hive's partitioning scheme.
 *
 * This is only applicable for Parquet at the moment.
 *
 * @since EclairJS 0.1 Spark  1.4.0
 * @param {string} colName,...colName
 * @returns {module:eclairjs/sql.DataFrameWriter}
 */
DataFrameWriter.prototype.partitionBy = function(colNames) {
  var args = {
    target: this,
    method: 'partitionBy',
    args: Utils.wrapArguments(arguments),
    returnType: DataFrameWriter
  };

  return Utils.generate(args);
};

/**
 * Saves the content of the {@link DataFrame} as the specified table.
 *
 * @since EclairJS 0.1 Spark  1.4.0
 * @param {string} [path] Saves the content of the {@link DataFrame} at the specified path.
 * @returns {Promise.<Void>} A Promise that resolves to nothing.
 */
DataFrameWriter.prototype.save = function(path) {
  var args = {
    target: this,
    method: 'save',
    args: Utils.wrapArguments(arguments)
  };

  return Utils.generate(args);
};

/**
 * Saves the content of the {@link DataFrame} as the specified table.
 *
 * In the case the table already exists, behavior of this function depends on the
 * save mode, specified by the `mode` function (default to throwing an exception).
 * When `mode` is `Overwrite`, the schema of the {@link DataFrame} does not need to be
 * the same as that of the existing table.
 * When `mode` is `Append`, the schema of the {@link DataFrame} need to be
 * the same as that of the existing table, and format or options will be ignored.
 *
 * When the DataFrame is created from a non-partitioned {@link HadoopFsRelation} with a single input
 * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC
 * and Parquet), the table is persisted in a Hive compatible format, which means other systems
 * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL
 * specific format.
 *
 * @since EclairJS 0.1 Spark  1.4.0
 * @param {string} tableName
 * @returns {Promise.<Void>} A Promise that resolves to nothing.
 */
DataFrameWriter.prototype.saveAsTable = function(tableName) {
  var args = {
    target: this,
    method: 'saveAsTable',
    args: Utils.wrapArguments(arguments),
  };

  return Utils.generate(args);
};

DataFrameWriter.moduleLocation = '/sql/DataFrameWriter';

module.exports = DataFrameWriter;