Spark: Difference between revisions

From Chorke Wiki
Jump to navigation Jump to search
Line 127: Line 127:
| valign="top" |
| valign="top" |
* [https://stackoverflow.com/questions/19943766/ Spark » Unable to load native-hadoop library]
* [https://stackoverflow.com/questions/19943766/ Spark » Unable to load native-hadoop library]
* [https://arrow.apache.org/cookbook/java/ Spark » Apache Arrow Java Cookbook]
* [https://community.cloudera.com/t5/Support-Questions/Spark-access-remote-HDFS-in-cross-realm-trust-setup/td-p/87813 Spark » Access remote HDFS]
* [https://community.cloudera.com/t5/Support-Questions/Spark-access-remote-HDFS-in-cross-realm-trust-setup/td-p/87813 Spark » Access remote HDFS]
* [https://spark.apache.org/docs/latest/api/python/index.html Spark » API Docs » Python]
* [https://spark.apache.org/docs/latest/api/python/index.html Spark » API Docs » Python]

Revision as of 03:20, 2 January 2023

export PYSPARK_PYTHON='/usr/bin/python3';\
export SPARK_HOME='/opt/cli/spark-3.3.0-bin-hadoop3';\
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-amd64';\
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
spark-shell
pyspark
http://localhost:8080/
http://localhost:7077/
http://localhost:4040/
ssh -L 8080:localhost:8080 [email protected]
ssh -L 7077:localhost:7077 [email protected]

Master Node

sudo apt -qq update;\
export PYSPARK_PYTHON='/usr/bin/python3';\
export SPARK_HOME='/opt/cli/spark-3.3.0-bin-hadoop3';\
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-arm64';\
bash <(curl -s 'https://cdn.chorke.org/exec/cli/bash/install/apache-spark-master/3.3.0.sh.txt')
sudo systemctl daemon-reload
sudo systemctl enable spark-master.service
sudo systemctl start  spark-master.service
sudo systemctl status spark-master.service

Worker Node

sudo apt -qq update;\
export PYSPARK_PYTHON='/usr/bin/python3';\
export SPARK_MASTER='spark://ns12-pc04:7077';\
export SPARK_HOME='/opt/cli/spark-3.3.0-bin-hadoop3';\
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-amd64';\
bash <(curl -s 'https://cdn.chorke.org/exec/cli/bash/install/apache-spark-slave/3.3.0.sh.txt')
sudo systemctl daemon-reload
sudo systemctl enable spark-slave.service
sudo systemctl start  spark-slave.service
sudo systemctl status spark-slave.service

Knowledge

readlink -f /usr/bin/java | sed "s:bin/java::"
sudo apt-get install pdsh
sudo apt-get install ssh

sudo apt dist-upgrade
sudo do-release-upgrade

sudo apt --fix-broken install
sudo apt install ubuntu-desktop
[Service]
User=spark
Group=spark
Type=forking
SuccessExitStatus=143

References