Source: eclairjs/sql/SparkSession.js

  1. /*
  2. * Copyright 2016 IBM Corp.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. (function () {
  17. /**
  18. * @classdesc
  19. * The entry point to programming Spark with the Dataset and DataFrame API.
  20. *
  21. * In environments that this has been created upfront (e.g. REPL, notebooks), use the builder
  22. * to get an existing session:
  23. *
  24. * @example
  25. * SparkSession.builder().getOrCreate()
  26. *
  27. *
  28. * The builder can also be used to create a new session:
  29. *
  30. * @example
  31. * SparkSession.builder()
  32. * .master("local")
  33. * .appName("Word Count")
  34. * .config("spark.some.config.option", "some-value").
  35. * .getOrCreate()
  36. *
  37. * @class
  38. * @memberof module:eclairjs/sql
  39. */
  40. var SparkSession = Java.type('org.eclairjs.nashorn.wrap.sql.SparkSession');
  41. /**
  42. * The version of Spark on which this application is running.
  43. *
  44. * @since EclairJS 0.6 Spark 2.0.0
  45. * @function
  46. * @name module:eclairjs/sql.SparkSession#version
  47. * @returns {string}
  48. */
  49. /**
  50. * The underlying SparkContext.
  51. *
  52. * @since EclairJS 0.6 Spark 2.0.0
  53. * @function
  54. * @name module:eclairjs/sql.SparkSession#sparkContext
  55. * @returns {module:eclairjs.SparkContext}
  56. */
  57. /**
  58. * A collection of methods for registering user-defined functions (UDF).
  59. * Note that the user-defined functions must be deterministic. Due to optimization,
  60. * duplicate invocations may be eliminated or the function may even be invoked more times than
  61. * it is present in the query.
  62. *
  63. *
  64. *
  65. * @since EclairJS 0.6 Spark 2.0.0
  66. * @function
  67. * @name module:eclairjs/sql.SparkSession#udf
  68. * @returns {module:eclairjs/sql.UDFRegistration}
  69. */
  70. /*
  71. SparkSession.prototype.udf = function() {
  72. var javaObject = this.getJavaObject().udf();
  73. return Utils.javaToJs(javaObject);
  74. };
  75. */
  76. /**
  77. * :: Experimental ::
  78. * Returns a {@link StreamingQueryManager} that allows managing all the
  79. * [[StreamingQuery StreamingQueries]] active on `this`.
  80. *
  81. * @since EclairJS 0.6 Spark 2.0.0
  82. * @function
  83. * @name module:eclairjs/sql.SparkSession#streams
  84. * @returns {module:eclairjs/sql/streaming.StreamingQueryManager}
  85. */
  86. /*
  87. SparkSession.prototype.streams = function() {
  88. var javaObject = this.getJavaObject().streams();
  89. return Utils.javaToJs(javaObject);
  90. };
  91. */
  92. /**
  93. * Start a new session with isolated SQL configurations, temporary tables, registered
  94. * functions are isolated, but sharing the underlying {@link SparkContext} and cached data.
  95. *
  96. * Note: Other than the {@link SparkContext}, all shared state is initialized lazily.
  97. * This method will force the initialization of the shared state to ensure that parent
  98. * and child sessions are set up with the same shared state. If the underlying catalog
  99. * implementation is Hive, this will initialize the metastore, which may take some time.
  100. *
  101. * @since EclairJS 0.6 Spark 2.0.0
  102. * @function
  103. * @name module:eclairjs/sql.SparkSession#newSession
  104. * @returns {module:eclairjs/sql.SparkSession}
  105. */
  106. /* SparkSession.prototype.newSession = function() {
  107. var javaObject = this.getJavaObject().newSession();
  108. return Utils.javaToJs(javaObject);
  109. };
  110. */
  111. /**
  112. * :: Experimental ::
  113. * Creates a new {@link Dataset} of type T containing zero elements.
  114. *
  115. * @function
  116. * @name module:eclairjs/sql.SparkSession#emptyDataset
  117. * @returns {module:eclairjs/sql.Dataset} 2.0.0
  118. */
  119. /*
  120. SparkSession.prototype.emptyDataset = function() {
  121. var javaObject = this.getJavaObject().emptyDataset();
  122. return Utils.javaToJs(javaObject);
  123. };
  124. */
  125. /**
  126. * Creates a {@link Dataset} from {@link RDD} of Rows using the schema
  127. * @function
  128. * @name module:eclairjs/sql.SparkSession#createDataFrame
  129. * @param {module:eclairjs.RDD<module:eclairjs/sql.Row> | module:eclairjs/sql.Row[]} rowRDD_or_values A RDD of [Rows]{@link Row} or array of arrays that contain values of valid {@link DataTypes}
  130. * @param {module:eclairjs/sql/types.StructType} schema -
  131. * @returns {module:eclairjs/sql.DataFrame}
  132. * @example
  133. * var df = sqlSession.createDataFrame([[1,1], [1,2], [2,1], [2,1], [2,3], [3,2], [3,3]], schema);
  134. *
  135. */
  136. /**
  137. * Creates a {@link Dataset} from RDD of JSON
  138. * @function
  139. * @name module:eclairjs/sql.SparkSession#createDataFrameFromJson
  140. * @param {{module:eclairjs.RDD<object>} RDD of JSON
  141. * @param {object} schema - object with keys corresponding to JSON field names (or getter functions), and values indicating Datatype
  142. * @returns {module:eclairjs/sql.Dataset}
  143. * @example
  144. * var df = sqlSession.createDataFrameFromJson([{id:1,"name":"jim"},{id:2,"name":"tom"}], {"id":"Integer","name","String"});
  145. *
  146. */
  147. /**
  148. * Convert a [[BaseRelation]] created for external data sources into a {@link DataFrame}.
  149. *
  150. * @since EclairJS 0.6 Spark 2.0.0
  151. * @function
  152. * @name module:eclairjs/sql.SparkSession#baseRelationToDataFrame
  153. * @param {module:eclairjs/sql/sources.BaseRelation} baseRelation
  154. * @returns {DataFrame}
  155. */
  156. /* SparkSession.prototype.baseRelationToDataFrame = function(baseRelation) {
  157. var baseRelation_uw = Utils.unwrapObject(baseRelation);
  158. var javaObject = this.getJavaObject().baseRelationToDataFrame(baseRelation_uw);
  159. return Utils.javaToJs(javaObject);
  160. };*/
  161. //
  162. // /**
  163. // * :: Experimental ::
  164. // * Creates a {@link Dataset} from a local Seq of data of a given type. This method requires an
  165. // * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
  166. // * that is generally created automatically through implicits from a `SparkSession`, or can be
  167. // * created explicitly by calling static methods on {@link Encoders}.
  168. // *
  169. // * == Example ==
  170. // *
  171. // * @example
  172. // *
  173. // * import spark.implicits._
  174. // * case class Person(name: String, age: Long)
  175. // * val data = Seq(Person("Michael", 29), Person("Andy", 30), Person("Justin", 19))
  176. // * val ds = spark.createDataset(data)
  177. // *
  178. // * ds.show()
  179. // * // +-------+---+
  180. // * // | name|age|
  181. // * // +-------+---+
  182. // * // |Michael| 29|
  183. // * // | Andy| 30|
  184. // * // | Justin| 19|
  185. // * // +-------+---+
  186. // *
  187. // *
  188. // * @since EclairJS 0.6 Spark 2.0.0
  189. // * @param {object[]} data
  190. // * @returns {module:eclairjs/sql.Dataset}
  191. // */
  192. // SparkSession.prototype.createDataset0 = function(data) {
  193. // throw "not implemented by ElairJS";
  194. // // var data_uw = Utils.unwrapObject(data);
  195. // // var javaObject = this.getJavaObject().createDataset(data_uw);
  196. // // return new Dataset(javaObject);
  197. // };
  198. //
  199. //
  200. // /**
  201. // * :: Experimental ::
  202. // * Creates a {@link Dataset} from an RDD of a given type. This method requires an
  203. // * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
  204. // * that is generally created automatically through implicits from a `SparkSession`, or can be
  205. // * created explicitly by calling static methods on {@link Encoders}.
  206. // *
  207. // * @since EclairJS 0.6 Spark 2.0.0
  208. // * @param {module:eclairjs/rdd.RDD} data
  209. // * @returns {module:eclairjs/sql.Dataset}
  210. // */
  211. // SparkSession.prototype.createDataset1 = function(data) {
  212. // throw "not implemented by ElairJS";
  213. // // var data_uw = Utils.unwrapObject(data);
  214. // // var javaObject = this.getJavaObject().createDataset(data_uw);
  215. // // return new Dataset(javaObject);
  216. // };
  217. //
  218. //
  219. // /**
  220. // * :: Experimental ::
  221. // * Creates a [[Dataset]] from a {@link List} of a given type. This method requires an
  222. // * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
  223. // * that is generally created automatically through implicits from a `SparkSession`, or can be
  224. // * created explicitly by calling static methods on {@link Encoders}.
  225. // *
  226. // * == Java Example ==
  227. // *
  228. // * @example
  229. // * List<String> data = Arrays.asList("hello", "world");
  230. // * Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
  231. // *
  232. // *
  233. // * @since EclairJS 0.6 Spark 2.0.0
  234. // * @param {[]} data
  235. // * @returns {module:eclairjs/sql.Dataset}
  236. // */
  237. // SparkSession.prototype.createDataset2 = function(data) {
  238. // throw "not implemented by ElairJS";
  239. // // var javaObject = this.getJavaObject().createDataset(data);
  240. // // return new Dataset(javaObject);
  241. // };
  242. /**
  243. * :: Experimental ::
  244. * Creates a [[Dataset]] with a single {@link LongType} column named `id`, containing elements
  245. * in a range from `start` to `end` (exclusive) with a step value, with partition number
  246. * specified.
  247. *
  248. * @since EclairJS 0.6 Spark 2.0.0
  249. * @function
  250. * @name module:eclairjs/sql.SparkSession#range
  251. * @param {string} tableName
  252. * @param {number} start
  253. * @param {number} end
  254. * @param {number} [step]
  255. * @param {number} [numPartitions]
  256. * @returns {module:eclairjs/sql.Dataset}
  257. */
  258. /*
  259. SparkSession.prototype.range3 = function(start,end,step,numPartitions) {
  260. throw "not implemented by ElairJS";
  261. // var javaObject = this.getJavaObject().range(start,end,step,numPartitions);
  262. // return Utils.javaToJs(javaObject);
  263. };
  264. */
  265. /**
  266. * Returns the specified table as a {@link module:eclairjs/sql.Dataset}.
  267. *
  268. * @since EclairJS 0.6 Spark 2.0.0
  269. * @function
  270. * @name module:eclairjs/sql.SparkSession#table
  271. * @param {string} tableName
  272. * @returns {module:eclairjs/sql.Dataset}
  273. */
  274. /*
  275. SparkSession.prototype.table = function(tableName) {
  276. var javaObject = this.getJavaObject().table(tableName);
  277. return Utils.javaToJs(javaObject);
  278. };
  279. */
  280. /**
  281. * Executes a SQL query using Spark, returning the result as a {@link module:eclairjs/sql.Dataset}.
  282. * The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'.
  283. *
  284. * @since EclairJS 0.6 Spark 2.0.0
  285. * @function
  286. * @name module:eclairjs/sql.SparkSession#sql
  287. * @param {string} sqlText
  288. * @returns {module:eclairjs/sql.Dataset}
  289. */
  290. /*
  291. SparkSession.prototype.sql = function(sqlText) {
  292. var javaObject = this.getJavaObject().sql(sqlText);
  293. return Utils.javaToJs(javaObject);
  294. };
  295. */
  296. /**
  297. * Returns a {@link DataFrameReader} that can be used to read non-streaming data in as a
  298. * {@link DataFrame}.
  299. * @example
  300. * sparkSession.read.parquet("/path/to/file.parquet")
  301. * sparkSession.read.schema(schema).json("/path/to/file.json")
  302. *
  303. *
  304. * @since EclairJS 0.6 Spark 2.0.0
  305. * @function
  306. * @name module:eclairjs/sql.SparkSession#read
  307. * @returns {module:eclairjs/sql.DataFrameReader}
  308. */
  309. /*
  310. SparkSession.prototype.read = function() {
  311. var javaObject = this.getJavaObject().read();
  312. return new DataFrameReader(javaObject);
  313. };
  314. */
  315. /**
  316. * :: Experimental ::
  317. * Returns a [[DataStreamReader]] that can be used to read streaming data in as a {@link DataFrame}.
  318. * @example
  319. * sparkSession.readStream.parquet("/path/to/directory/of/parquet/files")
  320. * sparkSession.readStream.schema(schema).json("/path/to/directory/of/json/files")
  321. *
  322. *
  323. * @since EclairJS 0.6 Spark 2.0.0
  324. * @function
  325. * @name module:eclairjs/sql.SparkSession#readStream
  326. * @returns {module:eclairjs/sql/streaming.DataStreamReader}
  327. */
  328. /* SparkSession.prototype.readStream = function() {
  329. var javaObject = this.getJavaObject().readStream();
  330. return Utils.javaToJs(javaObject);
  331. };
  332. */
  333. /**
  334. * Stop the underlying {@link module:eclairjs.SparkContext}.
  335. *
  336. * @function
  337. * @name module:eclairjs/sql.SparkSession#stop
  338. * @since EclairJS 0.6 Spark 2.0.0
  339. */
  340. //
  341. // static methods
  342. //
  343. /**
  344. * Creates a [[module:eclairjs/sql.SparkSessionBuilder]] for constructing a {@link module:eclairjs/sql.SparkSession}.
  345. *
  346. * @since EclairJS 0.6 Spark 2.0.0
  347. * @function
  348. * @static
  349. * @name module:eclairjs/sql.SparkSession#builder
  350. * @returns {module:eclairjs/sql.SparkSessionBuilder}
  351. */
  352. /**
  353. * Changes the SparkSession that will be returned in this thread and its children when
  354. * SparkSession.getOrCreate() is called. This can be used to ensure that a given thread receives
  355. * a SparkSession with an isolated session, instead of the global (first created) context.
  356. *
  357. * @since EclairJS 0.6 Spark 2.0.0
  358. * @function
  359. * @static
  360. * @name module:eclairjs/sql.SparkSession#setActiveSession
  361. * @param {module:eclairjs/sql.SparkSession} session
  362. */
  363. /**
  364. * Clears the active SparkSession for current thread. Subsequent calls to getOrCreate will
  365. * return the first created context instead of a thread-local override.
  366. *
  367. * @since EclairJS 0.6 Spark 2.0.0
  368. * @function
  369. * @static
  370. * @name module:eclairjs/sql.SparkSession#clearActiveSession
  371. */
  372. /**
  373. * Sets the default SparkSession that is returned by the builder.
  374. *
  375. * @since EclairJS 0.6 Spark 2.0.0
  376. * @function
  377. * @static
  378. * @name module:eclairjs/sql.SparkSession#setDefaultSession
  379. * @param {module:eclairjs/sql.SparkSession} session
  380. */
  381. /**
  382. * Clears the default SparkSession that is returned by the builder.
  383. *
  384. * @function
  385. * @static
  386. * @name module:eclairjs/sql.SparkSession#clearDefaultSession
  387. * @since EclairJS 0.6 Spark 2.0.0
  388. */
  389. module.exports = SparkSession;
  390. })();