JSDoc: Source: eclairjs/sql/streaming/DataStreamWriter.js

/*                                                                         
* Copyright 2016 IBM Corp.                                                 
*                                                                          
* Licensed under the Apache License, Version 2.0 (the "License");          
* you may not use this file except in compliance with the License.         
* You may obtain a copy of the License at                                  
*                                                                          
*      http://www.apache.org/licenses/LICENSE-2.0                          
*                                                                          
* Unless required by applicable law or agreed to in writing, software      
* distributed under the License is distributed on an "AS IS" BASIS,        
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
* See the License for the specific language governing permissions and      
* limitations under the License.                                           
*/ 

(function () {

    
    /**
     * @classdesc
     * 
     * Interface used to write a streaming {@link Dataset} to external storage systems (e.g. file systems,
     * key-value stores, etc). Use {@link writeStream} to access this.
     *
     * @since EclairJS 0.7 Spark  2.0.0
     * @class DataStreamWriter
     * @memberof module:eclairjs/sql/streaming
     */
    var DataStreamWriter = Java.type('org.eclairjs.nashorn.wrap.sql.streaming.DataStreamWriter');
    
    
    /**
     * 
     * Specifies how data of a streaming DataFrame/Dataset is written to a streaming sink.
     *   - `append`: only the new rows in the streaming DataFrame/Dataset will be
     *                            written to the sink
     *   - `complete`: all the rows in the streaming DataFrame/Dataset will be written
     *                              to the sink every time these is some updates
     * @function
     * @name module:eclairjs/sql/streaming.DataStreamWriter#outputMode
     * @since EclairJS 0.7 Spark  2.0.0
     * @param {string} outputMode
     * @returns {module:eclairjs/sql/streaming.DataStreamWriter} 
     */

    
    
    /**
     * 
     * Set the trigger for the stream query. The default value is `ProcessingTime(0)` and it will run
     * the query as fast as possible.
     *
     * @function
     * @name module:eclairjs/sql/streaming.DataStreamWriter#trigger
     *
     * @example 
     *   df.writeStream().trigger(ProcessingTime.create("10 seconds"))
     *
     * @since EclairJS 0.7 Spark  2.0.0
     * @param {module:eclairjs/sql/streaming.Trigger} trigger
     * @returns {module:eclairjs/sql/streaming.DataStreamWriter} 
     */
    
    
    /**
     * 
     * Specifies the name of the {@link StreamingQuery} that can be started with `start()`.
     * This name must be unique among all the currently active queries in the associated SQLContext.
     *
     * @function
     * @name module:eclairjs/sql/streaming.DataStreamWriter#queryName
     * @since EclairJS 0.7 Spark  2.0.0
     * @param {string} queryName
     * @returns {module:eclairjs/sql/streaming.DataStreamWriter} 
     */

    
    /**
     * 
     * Specifies the underlying output data source. Built-in options include "parquet", "json", etc.
     *
     * @function
     * @name module:eclairjs/sql/streaming.DataStreamWriter#format
     * @since EclairJS 0.7 Spark  2.0.0
     * @param {string} source
     * @returns {module:eclairjs/sql/streaming.DataStreamWriter} 
     */
    
    
    /**
     * Partitions the output by the given columns on the file system. If specified, the output is
     * laid out on the file system similar to Hive's partitioning scheme. As an example, when we
     * partition a dataset by year and then month, the directory layout would look like:
     *
     *   - year=2016/month=01/
     *   - year=2016/month=02/
     *
     * Partitioning is one of the most widely used techniques to optimize physical data layout.
     * It provides a coarse-grained index for skipping unnecessary data reads when queries have
     * predicates on the partitioned columns. In order for partitioning to work well, the number
     * of distinct values in each column should typically be less than tens of thousands.
     *
     * This was initially applicable for Parquet but in 1.5+ covers JSON, text, ORC and avro as well.
     *
     * @function
     * @name module:eclairjs/sql/streaming.DataStreamWriter#partitionBy
     * @since EclairJS 0.5 Spark  1.4.0
     * @param {...string} colNames
     * @returns {module:eclairjs/sql/streaming.DataStreamWriter} 
     */
    
    
    /**
     * 
     * Adds an output option for the underlying data source.
     *
     * @function
     * @name module:eclairjs/sql/streaming.DataStreamWriter#option
     * @since EclairJS 0.7 Spark  2.0.0
     * @param {string} key
     * @param {string} value
     * @returns {module:eclairjs/sql/streaming.DataStreamWriter} 
     */
    
    
    /**
     * 
     * Starts the execution of the streaming query, which will continually output results to the given
     * path as new data arrives. The returned {@link StreamingQuery} object can be used to interact with
     * the stream.
     *
     * @function
     * @name module:eclairjs/sql/streaming.DataStreamWriter#start
     * @since EclairJS 0.7 Spark  2.0.0
     * @param {string} [path]
     * @returns {module:eclairjs/sql/streaming.StreamingQuery} 
     */

    
    /**
     * Starts the execution of the streaming query, which will continually send results to the given external system.
     * processFunction as as new data arrives. The processFunction can be used to send the data
     * generated by the [[DataFrame]]/{@link module:eclairjs/sql.Dataset} to an external system.
     *
     * @function
     * @name module:eclairjs/sql/streaming.DataStreamWriter#foreach
     *
     * @example
     * var query = counts.writeStream().foreach(function(partitionId, version) {
     *       // open connection
     *      var socket = new java.net.Socket(serverAddress, port);
     *      return socket;
     *  },
     *  function(socket, value) {
     *     var out = new java.io.PrintWriter(socket.getOutputStream(), true);
     *     out.print(JSON.stringify(value));
     *     out.close();
     *   },
     *  function(socket) {
     *     socket.close();
     *  }).start();
     *  
     *
     * @since EclairJS 0.5 Spark  2.0.0
     * @param {module:eclairjs/sql/streaming.DataStreamWriter~openCallback} openCallback Used to open connection to external system.
     * @param {module:eclairjs/sql/streaming.DataStreamWriter~processCallback} processCallback use to send the data external system.
     * @param {module:eclairjs/sql/streaming.DataStreamWriter~closeCallback} closeFunction Used to close connection to external system.
     * @param {object[]} [openFunctionBindArgs]
     * @param {object[]} [processFunctionBindArgs]
     * @param {object[]} [closeFunctionBindArgs]
     * @returns {module:eclairjs/sql/streaming.DataStreamWriter}
     */

    /**
     * This callback Used to open connection to external system.
     * @callback module:eclairjs/sql/streaming.DataStreamWriter~openCallback
     * @param {number} partitionId
     * @param {number} version
     * @param {object[]} [bindArgs]
     * @returns {object} connection that is passed to {@link module:eclairjs/sql/streaming.DataStreamWriter~processCallback}
     * and {@link module:eclairjs/sql/streaming.DataStreamWriter~closeCallback}
     */

    /**
     * This callback consume data generated by a StreamingQuery. Typically this is used to send the generated data
     * to external systems from each partition so you usually should do all the initialization (e.g. opening a connection
     * or initiating a transaction) in the {@link module:eclairjs/sql/streaming.DataStreamWriter~openCallback}.
     * @callback module:eclairjs/sql/streaming.DataStreamWriter~processCallback
     * @param {object} connection
     * @param {number} value
     * @param {object[]} [bindArgs]
     */

    /**
     * Used to open connection to external system.
     * @callback module:eclairjs/sql/streaming.DataStreamWriter~closeCallback
     * @param {object} connection
     * @param {object[]} [bindArgs]
     */
    
    module.exports = DataStreamWriter;
})();