Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 51 additions & 50 deletions notebook/2A94M5J1Z/note.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"groups": [],
"scatter": {}
},
"editorHide": false
"editorHide": true
},
"settings": {
"params": {},
Expand All @@ -32,42 +32,9 @@
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"title": "Prepare data",
"text": "import sys.process._\n//you will need \u0027wget\u0027 tool to download\n\"wget http://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank.zip\" !\n\"mkdir data\" !\n\"unzip bank.zip -d data\" !\n\"rm bank.zip\" !",
"config": {
"colWidth": 12.0,
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false,
"keys": [],
"values": [],
"groups": [],
"scatter": {}
},
"title": true
},
"settings": {
"params": {},
"forms": {}
},
"jobName": "paragraph_1417656535623_-196593192",
"id": "20141204-102855_1590713432",
"result": {
"code": "SUCCESS",
"type": "TEXT",
"msg": "import sys.process._\nwarning: there were 1 feature warning(s); re-run with -feature for details\nres1: Int \u003d 0\nwarning: there were 1 feature warning(s); re-run with -feature for details\nres2: Int \u003d 0\nwarning: there were 1 feature warning(s); re-run with -feature for details\nres3: Int \u003d 0\nwarning: there were 1 feature warning(s); re-run with -feature for details\nres4: Int \u003d 0\n"
},
"dateCreated": "Dec 4, 2014 10:28:55 AM",
"dateStarted": "Apr 1, 2015 9:11:12 PM",
"dateFinished": "Apr 1, 2015 9:11:22 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"title": "Load data into table",
"text": "import sys.process._\n// Zeppelin creates and injects sc (SparkContext) and sqlContext (HiveContext or SqlContext)\n// So you don\u0027t need create them manually\n\nval zeppelinHome \u003d (\"pwd\" !!).replace(\"\\n\", \"\")\nval bankText \u003d sc.textFile(s\"file://$zeppelinHome/data/bank-full.csv\")\n\ncase class Bank(age: Integer, job: String, marital: String, education: String, balance: Integer)\n\nval bank \u003d bankText.map(s \u003d\u003e s.split(\";\")).filter(s \u003d\u003e s(0) !\u003d \"\\\"age\\\"\").map(\n s \u003d\u003e Bank(s(0).toInt, \n s(1).replaceAll(\"\\\"\", \"\"),\n s(2).replaceAll(\"\\\"\", \"\"),\n s(3).replaceAll(\"\\\"\", \"\"),\n s(5).replaceAll(\"\\\"\", \"\").toInt\n )\n).toDF()\nbank.registerTempTable(\"bank\")\n\n",
"text": "import org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\n\n// Zeppelin creates and injects sc (SparkContext) and sqlContext (HiveContext or SqlContext)\n// So you don\u0027t need create them manually\n\n// load bank data\nval bankText \u003d sc.parallelize(\n IOUtils.toString(\n new URL(\"https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv\"),\n Charset.forName(\"utf8\")).split(\"\\n\"))\n\ncase class Bank(age: Integer, job: String, marital: String, education: String, balance: Integer)\n\nval bank \u003d bankText.map(s \u003d\u003e s.split(\";\")).filter(s \u003d\u003e s(0) !\u003d \"\\\"age\\\"\").map(\n s \u003d\u003e Bank(s(0).toInt, \n s(1).replaceAll(\"\\\"\", \"\"),\n s(2).replaceAll(\"\\\"\", \"\"),\n s(3).replaceAll(\"\\\"\", \"\"),\n s(5).replaceAll(\"\\\"\", \"\").toInt\n )\n).toDF()\nbank.registerTempTable(\"bank\")",
"config": {
"colWidth": 12.0,
"graph": {
Expand All @@ -90,11 +57,11 @@
"result": {
"code": "SUCCESS",
"type": "TEXT",
"msg": "import sys.process._\nsqlContext: org.apache.spark.sql.SQLContext \u003d org.apache.spark.sql.SQLContext@2c91e2d6\nwarning: there were 1 feature warning(s); re-run with -feature for details\nzeppelinHome: String \u003d /home/langley/lab/incubator-zeppelin\nbankText: org.apache.spark.rdd.RDD[String] \u003d /home/langley/lab/incubator-zeppelin/data/bank-full.csv MapPartitionsRDD[1] at textFile at \u003cconsole\u003e:31\ndefined class Bank\nbank: org.apache.spark.sql.DataFrame \u003d [age: int, job: string, marital: string, education: string, balance: int]\n"
"msg": "import org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\nbankText: org.apache.spark.rdd.RDD[String] \u003d ParallelCollectionRDD[32] at parallelize at \u003cconsole\u003e:65\ndefined class Bank\nbank: org.apache.spark.sql.DataFrame \u003d [age: int, job: string, marital: string, education: string, balance: int]\n"
},
"dateCreated": "Feb 10, 2015 1:52:59 AM",
"dateStarted": "Apr 1, 2015 9:11:28 PM",
"dateFinished": "Apr 1, 2015 9:11:39 PM",
"dateStarted": "Jul 3, 2015 1:43:40 PM",
"dateFinished": "Jul 3, 2015 1:43:45 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
Expand Down Expand Up @@ -144,11 +111,11 @@
"result": {
"code": "SUCCESS",
"type": "TABLE",
"msg": "age\tvalue\n18\t12\n19\t35\n20\t50\n21\t79\n22\t129\n23\t202\n24\t302\n25\t527\n26\t805\n27\t909\n28\t1038\n29\t1185\n"
"msg": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n"
},
"dateCreated": "Feb 10, 2015 1:53:02 AM",
"dateStarted": "Apr 1, 2015 9:11:43 PM",
"dateFinished": "Apr 1, 2015 9:11:45 PM",
"dateStarted": "Jul 3, 2015 1:43:17 PM",
"dateFinished": "Jul 3, 2015 1:43:23 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
Expand Down Expand Up @@ -206,11 +173,11 @@
"result": {
"code": "SUCCESS",
"type": "TABLE",
"msg": "age\tvalue\n18\t12\n19\t35\n20\t50\n21\t79\n22\t129\n23\t202\n24\t302\n25\t527\n26\t805\n27\t909\n28\t1038\n29\t1185\n30\t1757\n31\t1996\n32\t2085\n33\t1972\n34\t1930\n"
"msg": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n30\t150\n31\t199\n32\t224\n33\t186\n34\t231\n"
},
"dateCreated": "Feb 12, 2015 2:54:04 PM",
"dateStarted": "Apr 1, 2015 9:12:03 PM",
"dateFinished": "Apr 1, 2015 9:12:03 PM",
"dateStarted": "Jul 3, 2015 1:43:28 PM",
"dateFinished": "Jul 3, 2015 1:43:29 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
Expand Down Expand Up @@ -279,11 +246,11 @@
"result": {
"code": "SUCCESS",
"type": "TABLE",
"msg": "age\tvalue\n18\t12\n19\t35\n20\t47\n21\t74\n22\t120\n23\t175\n24\t248\n25\t423\n26\t615\n27\t658\n28\t697\n29\t683\n30\t1012\n31\t1017\n32\t941\n33\t746\n34\t650\n35\t631\n36\t538\n37\t453\n38\t394\n39\t346\n40\t257\n41\t241\n42\t218\n43\t183\n44\t170\n45\t146\n46\t130\n47\t100\n48\t124\n49\t101\n50\t76\n51\t72\n52\t62\n53\t71\n54\t55\n55\t54\n56\t45\n57\t38\n58\t35\n59\t36\n60\t27\n61\t5\n63\t2\n66\t5\n67\t3\n68\t4\n69\t2\n70\t1\n71\t1\n72\t5\n73\t2\n77\t1\n83\t2\n86\t1\n"
"msg": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t17\n24\t13\n25\t33\n26\t56\n27\t64\n28\t78\n29\t56\n30\t92\n31\t86\n32\t105\n33\t61\n34\t75\n35\t46\n36\t50\n37\t43\n38\t44\n39\t30\n40\t25\n41\t19\n42\t23\n43\t21\n44\t20\n45\t15\n46\t14\n47\t12\n48\t12\n49\t11\n50\t8\n51\t6\n52\t9\n53\t4\n55\t3\n56\t3\n57\t2\n58\t7\n59\t2\n60\t5\n66\t2\n69\t1\n"
},
"dateCreated": "Feb 13, 2015 11:04:22 PM",
"dateStarted": "Apr 1, 2015 9:12:10 PM",
"dateFinished": "Apr 1, 2015 9:12:10 PM",
"dateStarted": "Jul 3, 2015 1:43:33 PM",
"dateFinished": "Jul 3, 2015 1:43:34 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
Expand All @@ -299,7 +266,8 @@
"values": [],
"groups": [],
"scatter": {}
}
},
"editorHide": true
},
"settings": {
"params": {},
Expand All @@ -319,22 +287,55 @@
"progressUpdateIntervalMs": 500
},
{
"config": {},
"text": "%md\n\nAbout bank data\n\n```\nCitation Request:\n This dataset is public available for research. The details are described in [Moro et al., 2011]. \n Please include this citation if you plan to use this database:\n\n [Moro et al., 2011] S. Moro, R. Laureano and P. Cortez. Using Data Mining for Bank Direct Marketing: An Application of the CRISP-DM Methodology. \n In P. Novais et al. (Eds.), Proceedings of the European Simulation and Modelling Conference - ESM\u00272011, pp. 117-121, Guimarães, Portugal, October, 2011. EUROSIS.\n\n Available at: [pdf] http://hdl.handle.net/1822/14838\n [bib] http://www3.dsi.uminho.pt/pcortez/bib/2011-esm-1.txt\n```",
"config": {
"colWidth": 12.0,
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false,
"keys": [],
"values": [],
"groups": [],
"scatter": {}
},
"editorHide": true
},
"settings": {
"params": {},
"forms": {}
},
"jobName": "paragraph_1427420818407_872443482",
"id": "20150326-214658_12335843",
"result": {
"code": "SUCCESS",
"type": "HTML",
"msg": "\u003cp\u003eAbout bank data\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003eCitation Request:\n This dataset is public available for research. The details are described in [Moro et al., 2011]. \n Please include this citation if you plan to use this database:\n\n [Moro et al., 2011] S. Moro, R. Laureano and P. Cortez. Using Data Mining for Bank Direct Marketing: An Application of the CRISP-DM Methodology. \n In P. Novais et al. (Eds.), Proceedings of the European Simulation and Modelling Conference - ESM\u00272011, pp. 117-121, Guimarães, Portugal, October, 2011. EUROSIS.\n\n Available at: [pdf] http://hdl.handle.net/1822/14838\n [bib] http://www3.dsi.uminho.pt/pcortez/bib/2011-esm-1.txt\n\u003c/code\u003e\u003c/pre\u003e\n"
},
"dateCreated": "Mar 26, 2015 9:46:58 PM",
"dateStarted": "Jul 3, 2015 1:44:56 PM",
"dateFinished": "Jul 3, 2015 1:44:56 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"config": {},
"settings": {
"params": {},
"forms": {}
},
"jobName": "paragraph_1435955447812_-158639899",
"id": "20150703-133047_853701097",
"dateCreated": "Jul 3, 2015 1:30:47 PM",
"status": "READY",
"progressUpdateIntervalMs": 500
}
],
"name": "Zeppelin Tutorial",
"id": "2A94M5J1Z",
"angularObjects": {},
"config": {
"looknfeel": "default"
},
"info": {}
}
}