Spark: Difference between revisions

Latest revision as of 10:18, 25 January 2024

export PYSPARK_PYTHON='/usr/bin/python3';\
export SPARK_HOME='/opt/cli/spark-3.3.0-bin-hadoop3';\
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-amd64';\
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin

spark-shell
pyspark

http://localhost:8080/
http://localhost:7077/
http://localhost:4040/

ssh -L 8080:localhost:8080 [email protected]
ssh -L 7077:localhost:7077 [email protected]

Master Node

sudo apt -qq update;\
export PYSPARK_PYTHON='/usr/bin/python3';\
export SPARK_HOME='/opt/cli/spark-3.3.0-bin-hadoop3';\
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-arm64';\
bash <(curl -s 'https://cdn.chorke.org/exec/cli/bash/install/apache-spark-master/3.3.0.sh.txt')

sudo systemctl daemon-reload
sudo systemctl enable spark-master.service
sudo systemctl start  spark-master.service
sudo systemctl status spark-master.service

Worker Node

sudo apt -qq update;\
export PYSPARK_PYTHON='/usr/bin/python3';\
export SPARK_MASTER='spark://ns12-pc04:7077';\
export SPARK_HOME='/opt/cli/spark-3.3.0-bin-hadoop3';\
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-amd64';\
bash <(curl -s 'https://cdn.chorke.org/exec/cli/bash/install/apache-spark-slave/3.3.0.sh.txt')

sudo systemctl daemon-reload
sudo systemctl enable spark-slave.service
sudo systemctl start  spark-slave.service
sudo systemctl status spark-slave.service

Knowledge

ssh-keygen -b 4096 -t rsa -f ~/.ssh/id_rsa -q -N "spark@${HOSTNAME}" readlink -f /usr/bin/java \| sed "s:bin/java::" sudo apt-get install pdsh sudo apt-get install ssh

su -h spark sudo -u spark -H sh -c "whoami; echo ${HOME}" sh $SPARK_HOME/bin/spark-shell sh $SPARK_HOME/bin/pyspark http://127.0.0.1:8080 http://127.0.0.1:8088 http://127.0.0.1:9870 http://127.0.0.1:4040

sudo apt dist-upgrade sudo do-release-upgrade sudo apt --fix-broken install sudo apt install ubuntu-desktop		[Service] User=spark Group=spark Type=forking SuccessExitStatus=143

if [ -f '/etc/os-release' ];then HOST_OS_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release \| tr -d '"') HOST_OS_ID_LIKE=$(grep -oP '(?<=^ID_LIKE=).+' /etc/os-release \| tr -d '"') HOST_OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release \| tr -d '"') fi

References

Spark » Systemd » Failed to stop Master/Slave Node Spark » Download » Archive Spark » Change WebUI Port Spark » SQL & DataFrames Spark » Machine Learning Spark » Streaming Spark » Download Spark » Examples Spark » GraphX Spark » Install	Spark » Concatenate columns in DataFrame Spark » Structured Streaming Programming Spark » Apache Spark, Hive & Spring Boot Spark » Integration Testing Example Spark » Install Master as a Daemon Spark » Install Slave as a Daemon Spark » As a Linux Service Spark » Apache Camel Spark » Introduction Spark » Spring Boot	KillSignal interact with TimeoutStopSec Delay Systemd Service If File Exist System Unit Conditions & Asserts VS Code on iPad Pro Apache Camel Mosquitto ActiveMQ Keycloak GraphQL Grafana

Spark » Running Spark on Top of a Hadoop YARN Spark » Deploy » Cluster Mode Overview Spark » Deploy » Standalone Mode Spark » Deploy » Spark Submit Spark » Deploy » Kubernetes Spark » Multi-Node Cluster Spark » Deploy » Cluster Spark » Deploy » Mesos Spark » Deploy » YARN	Spark » Unable to load native-hadoop library Spark » Apache Arrow Java Cookbook Spark » Access remote HDFS Spark » API Docs » Python Spark » API Docs » Scala Spark » API Docs » Java Spark » API Docs » SQL Spark » API Docs » R	AWS » Bastion Host SSH Tunneling Set Up Environment for Spark Machine Learning Linux Containers Apache Hive Aerospike Hadoop Jupyter NLP

Systemd » Safe Remove Services Bastion SSH Tunneling

@@ Line 1: / Line 1: @@
-==Spark Master==
 <source lang="bash">
-sudo tee -a /etc/systemd/system/spark-master.service >/dev/null <<EOF
+export PYSPARK_PYTHON='/usr/bin/python3';\
-[Unit]
+export SPARK_HOME='/opt/cli/spark-3.3.0-bin-hadoop3';\
-Description=Apache Spark Master
+export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-amd64';\
-Documentation=https://spark.apache.org/docs/3.3.0
+export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
-Wants=network-online.target
+</source>
-After=network-online.target
-After=systemd-user-sessions.service
+ spark-shell
+ pyspark
+ http://localhost:8080/
+ http://localhost:7077/
+ http://localhost:4040/
-[Service]
+ ssh -L 8080:localhost:8080 user@spark.chorke.org
-User=spark
+ ssh -L 7077:localhost:7077 user@spark.chorke.org
-Type=forking
-EnvironmentFile=${SPARK_HOME}/etc/.env
-ExecStart=${SPARK_HOME}/sbin/start-master.sh
-ExecStop=${SPARK_HOME}/sbin/stop-master.sh
-[Install]
+==Master Node==
-WantedBy=multi-user.target
+<source lang="bash">
-EOF
+sudo apt -qq update;\
+export PYSPARK_PYTHON='/usr/bin/python3';\
+export SPARK_HOME='/opt/cli/spark-3.3.0-bin-hadoop3';\
+export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-arm64';\
+bash <(curl -s 'https://cdn.chorke.org/exec/cli/bash/install/apache-spark-master/3.3.0.sh.txt')
 </source>
-==Slave Daemon==
+ sudo systemctl daemon-reload
+ sudo systemctl enable spark-master.service
+ sudo systemctl start  spark-master.service
+ sudo systemctl status spark-master.service
+==Worker Node==
 <source lang="bash">
-sudo tee -a /etc/systemd/system/spark-slave.service >/dev/null <<EOF
+sudo apt -qq update;\
-[Unit]
+export PYSPARK_PYTHON='/usr/bin/python3';\
-Description=Apache Spark Slave
+export SPARK_MASTER='spark://ns12-pc04:7077';\
-Documentation=https://spark.apache.org/docs/3.3.0
+export SPARK_HOME='/opt/cli/spark-3.3.0-bin-hadoop3';\
-Wants=network-online.target
+export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-amd64';\
-After=network-online.target
+bash <(curl -s 'https://cdn.chorke.org/exec/cli/bash/install/apache-spark-slave/3.3.0.sh.txt')
+</source>
+ sudo systemctl daemon-reload
+ sudo systemctl enable spark-slave.service
+ sudo systemctl start  spark-slave.service
+ sudo systemctl status spark-slave.service
+== Knowledge ==
+{|
+|valign="top" colspan="3"|
+ ssh-keygen -b 4096 -t rsa -f ~/.ssh/id_rsa -q -N "spark@${HOSTNAME}"
+ readlink -f /usr/bin/java | sed "s:bin/java::"
+ sudo apt-get install pdsh
+ sudo apt-get install ssh
+|-
+|colspan="3"|
+----
+|-
+|valign="top" colspan="3"|
+ su -h spark
+ sudo -u spark -H sh -c "whoami; echo ${HOME}"
+ sh $SPARK_HOME/bin/spark-shell
+ sh $SPARK_HOME/bin/pyspark
+ http://127.0.0.1:8080
+ http://127.0.0.1:8088
+ http://127.0.0.1:9870
+ http://127.0.0.1:4040
+|-
+|colspan="3"|
+----
+|-
+|valign="bottom" colspan="2"|
+ sudo apt dist-upgrade
+ sudo do-release-upgrade
+ sudo apt --fix-broken install
+ sudo apt install ubuntu-desktop
+|valign="top"|
+<source lang="ini">
 [Service]
 User=spark
+Group=spark
 Type=forking
-EnvironmentFile=${SPARK_HOME}/etc/.env
+SuccessExitStatus=143
-WorkingDirectory=${SPARK_HOME}/sbin
+</source>
-ExecStart=${SPARK_HOME}/sbin/start-slave.sh spark://10.20.22.10:7077
-ExecStop=${SPARK_HOME}/sbin/stop-slave.sh
-Restart=on-failure
-RestartSec=10s
-[Install]
+|-
-WantedBy=multi-user.target
+|colspan="3"|
-EOF
+----
+|-
+|valign="top" colspan="3"|
+<source lang="bash">
+if [ -f '/etc/os-release' ];then
+         HOST_OS_ID=$(grep -oP '(?<=^ID=).+'         /etc/os-release | tr -d '"')
+    HOST_OS_ID_LIKE=$(grep -oP '(?<=^ID_LIKE=).+'    /etc/os-release | tr -d '"')
+    HOST_OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')
+fi
 </source>
+|}
 ==References==
 {|
 | valign="top" |
+* [https://stackoverflow.com/questions/73853179/ Spark » Systemd » Failed to stop Master/Slave Node]
 * [https://archive.apache.org/dist/spark/ Spark » Download » Archive]
+* [https://stackoverflow.com/questions/36083783/ Spark » Change WebUI Port]
 * [https://spark.apache.org/sql/ Spark » SQL & DataFrames]
 * [https://spark.apache.org/mllib/ Spark » Machine Learning]
@@ Line 55: / Line 115: @@
 * [https://spark.apache.org/examples.html Spark » Examples]
 * [https://spark.apache.org/graphx/ Spark » GraphX]
-* [[Hadoop]]
+* [https://cdn.chorke.org/exec/cli/bash/install/ Spark » Install]
-* [[Jupyter]]
-* [[NLP]]
 | valign="top" |
+* [https://stackoverflow.com/questions/31450846/ Spark » Concatenate columns in DataFrame]
 * [https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html Spark » Structured Streaming Programming]
 * [https://dev.to/kirekov/apache-spark-hive-and-spring-boot-testing-guide-mdp Spark » Apache Spark, Hive & Spring Boot]
@@ Line 66: / Line 125: @@
 * [https://stackoverflow.com/questions/59249135/ Spark » Install Slave as a Daemon]
 * [https://stackoverflow.com/questions/40166056/ Spark » As a Linux Service]
-* [[VS Code on iPad Pro]]
+* [https://camel.apache.org/components/3.18.x/spark-component.html Spark » Apache Camel]
-* [[Machine Learning]]
+* [https://www.baeldung.com/apache-spark Spark » Introduction]
-* [https://hive.apache.org/ Apache Hive]
+* [https://github.com/vim89/spark-spring-boot/blob/master/src/main/java/com/vitthalmirji/spring/spark/ApplicationConfig.java Spark » Spring Boot]
-* [https://aerospike.com/ Aerospike]
 | valign="top" |
+* [https://stackoverflow.com/questions/61472322/ KillSignal interact with TimeoutStopSec]
 * [https://unix.stackexchange.com/questions/388483/ Delay Systemd Service If File Exist]
 * [https://www.freedesktop.org/software/systemd/man/systemd.unit.html#Conditions%20and%20Asserts System Unit Conditions & Asserts]
+* [[VS Code on iPad Pro]]
+* [[Apache Camel]]
+* [[Mosquitto]]
+* [[ActiveMQ]]
 * [[Keycloak]]
 * [[GraphQL]]
@@ Line 83: / Line 146: @@
 |-
 | valign="top" |
+* [https://www.linode.com/docs/guides/install-configure-run-spark-on-top-of-hadoop-yarn-cluster/ Spark » Running Spark on Top of a Hadoop YARN]
 * [https://spark.apache.org/docs/latest/cluster-overview.html Spark » Deploy » Cluster Mode Overview]
 * [https://spark.apache.org/docs/latest/spark-standalone.html Spark » Deploy » Standalone Mode]
 * [https://spark.apache.org/docs/latest/submitting-applications.html Spark » Deploy » Spark Submit]
 * [https://spark.apache.org/docs/latest/running-on-kubernetes.html Spark » Deploy » Kubernetes]
+* [https://medium.com/ymedialabs-innovation/apache-spark-on-a-multi-node-cluster-b75967c8cb2b Spark » Multi-Node Cluster]
 * [https://spark.apache.org/docs/latest/cluster-overview.html Spark » Deploy » Cluster]
 * [https://spark.apache.org/docs/latest/running-on-mesos.html Spark » Deploy » Mesos]
@@ Line 92: / Line 157: @@
 | valign="top" |
+* [https://stackoverflow.com/questions/19943766/ Spark » Unable to load native-hadoop library]
+* [https://arrow.apache.org/cookbook/java/ Spark » Apache Arrow Java Cookbook]
+* [https://community.cloudera.com/t5/Support-Questions/Spark-access-remote-HDFS-in-cross-realm-trust-setup/td-p/87813 Spark » Access remote HDFS]
 * [https://spark.apache.org/docs/latest/api/python/index.html Spark » API Docs » Python]
 * [https://spark.apache.org/docs/latest/api/scala/org/apache/spark/index.html Spark » API Docs » Scala]
@@ Line 97: / Line 165: @@
 * [https://spark.apache.org/docs/latest/api/sql/index.html Spark » API Docs » SQL]
 * [https://spark.apache.org/docs/latest/api/R/index.html Spark » API Docs » R]
+| valign="top" |
+* [https://dev.to/aws-builders/ssh-setup-and-tunneling-via-bastion-host-3kcc AWS » Bastion Host SSH Tunneling]
+* [https://pub.towardsai.net/how-to-set-up-your-environment-for-spark-7820b84491ef Set Up Environment for Spark]
+* [[Machine Learning]]
+* [[Linux Containers]]
+* [https://hive.apache.org/ Apache Hive]
+* [https://aerospike.com/ Aerospike]
+* [[Hadoop]]
+* [[Jupyter]]
+* [[NLP]]
+|-
+| colspan="3" |
+----
+|-
+| valign="top" |
+* [https://superuser.com/questions/513159/ Systemd » Safe Remove Services]
+* [[Bastion SSH Tunneling]]
+| valign="top" |
 | valign="top" |
 |}

Spark: Difference between revisions

Latest revision as of 10:18, 25 January 2024

Contents

Master Node

Worker Node

Knowledge

References

Navigation menu

Spark: Difference between revisions

Latest revision as of 10:18, 25 January 2024

Master Node

Worker Node

Knowledge

References

Navigation menu

Search