CoCalc Public Filespyspark-test2.ipynbOpen with one click!
Author: Harald Schilly
Views : 35

PySpark on CoCalc

Run Spark locally to learn about its API...

In [1]:
import sys sys.version
'3.6.9 (default, Apr 18 2020, 01:56:04) \n[GCC 8.4.0]'
In [2]:
import os, sys os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3' os.environ['SPARK_HOME'] = '/ext/spark/default' os.environ['JAVA_HOME'] = '/usr/lib/jvm/java-1.8.0-openjdk-amd64' sys.path.insert(0, os.environ['SPARK_HOME'] + '/python') import pyspark sc = pyspark.SparkContext('local')
In [3]:
sc.range(100).filter(lambda x : (x+1) % 7 == 0).collect()
[6, 13, 20, 27, 34, 41, 48, 55, 62, 69, 76, 83, 90, 97]
In [4]:
fn = 'spark-data.txt' ! cat $fn
4 5 1 -9 9
In [5]:
sc.textFile(fn).map(int).sum()
10
In [ ]: