Spark: Difference between revisions

From Chorke Wiki
Jump to navigation Jump to search
No edit summary
 
Line 1: Line 1:
<source lang="bash">
<syntaxhighlight lang="bash">
export PYSPARK_PYTHON='/usr/bin/python3';\
export PYSPARK_PYTHON='/usr/bin/python3';\
export SPARK_HOME='/opt/cli/spark-3.3.0-bin-hadoop3';\
export SPARK_HOME='/opt/cli/spark-3.3.0-bin-hadoop3';\
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-amd64';\
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-amd64';\
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
</source>
</syntaxhighlight>


  spark-shell
  spark-shell
Line 17: Line 17:


==Master Node==
==Master Node==
<source lang="bash">
<syntaxhighlight lang="bash">
sudo apt -qq update;\
sudo apt -qq update;\
export PYSPARK_PYTHON='/usr/bin/python3';\
export PYSPARK_PYTHON='/usr/bin/python3';\
Line 23: Line 23:
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-arm64';\
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-arm64';\
bash <(curl -s 'https://cdn.chorke.org/exec/cli/bash/install/apache-spark-master/3.3.0.sh.txt')
bash <(curl -s 'https://cdn.chorke.org/exec/cli/bash/install/apache-spark-master/3.3.0.sh.txt')
</source>
</syntaxhighlight>


  sudo systemctl daemon-reload
  sudo systemctl daemon-reload
Line 31: Line 31:


==Worker Node==
==Worker Node==
<source lang="bash">
<syntaxhighlight lang="bash">
sudo apt -qq update;\
sudo apt -qq update;\
export PYSPARK_PYTHON='/usr/bin/python3';\
export PYSPARK_PYTHON='/usr/bin/python3';\
Line 38: Line 38:
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-amd64';\
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-amd64';\
bash <(curl -s 'https://cdn.chorke.org/exec/cli/bash/install/apache-spark-slave/3.3.0.sh.txt')
bash <(curl -s 'https://cdn.chorke.org/exec/cli/bash/install/apache-spark-slave/3.3.0.sh.txt')
</source>
</syntaxhighlight>


  sudo systemctl daemon-reload
  sudo systemctl daemon-reload
Line 81: Line 81:


|valign="top"|
|valign="top"|
<source lang="ini">
<syntaxhighlight lang="ini">
[Service]
[Service]
User=spark
User=spark
Line 87: Line 87:
Type=forking
Type=forking
SuccessExitStatus=143
SuccessExitStatus=143
</source>
</syntaxhighlight>


|-
|-
Line 94: Line 94:
|-
|-
|valign="top" colspan="3"|
|valign="top" colspan="3"|
<source lang="bash">
<syntaxhighlight lang="bash">
if [ -f '/etc/os-release' ];then
if [ -f '/etc/os-release' ];then
         HOST_OS_ID=$(grep -oP '(?<=^ID=).+'        /etc/os-release | tr -d '"')
         HOST_OS_ID=$(grep -oP '(?<=^ID=).+'        /etc/os-release | tr -d '"')
Line 100: Line 100:
     HOST_OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')
     HOST_OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')
fi
fi
</source>
</syntaxhighlight>
|}
|}


Line 184: Line 184:
* [https://superuser.com/questions/513159/ Systemd » Safe Remove Services]
* [https://superuser.com/questions/513159/ Systemd » Safe Remove Services]
* [[Bastion SSH Tunneling]]
* [[Bastion SSH Tunneling]]
* [[Linux User Creation]]


| valign="top" |
| valign="top" |

Latest revision as of 16:42, 12 December 2024

export PYSPARK_PYTHON='/usr/bin/python3';\
export SPARK_HOME='/opt/cli/spark-3.3.0-bin-hadoop3';\
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-amd64';\
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
spark-shell
pyspark
http://localhost:8080/
http://localhost:7077/
http://localhost:4040/
ssh -L 8080:localhost:8080 [email protected]
ssh -L 7077:localhost:7077 [email protected]

Master Node

sudo apt -qq update;\
export PYSPARK_PYTHON='/usr/bin/python3';\
export SPARK_HOME='/opt/cli/spark-3.3.0-bin-hadoop3';\
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-arm64';\
bash <(curl -s 'https://cdn.chorke.org/exec/cli/bash/install/apache-spark-master/3.3.0.sh.txt')
sudo systemctl daemon-reload
sudo systemctl enable spark-master.service
sudo systemctl start  spark-master.service
sudo systemctl status spark-master.service

Worker Node

sudo apt -qq update;\
export PYSPARK_PYTHON='/usr/bin/python3';\
export SPARK_MASTER='spark://ns12-pc04:7077';\
export SPARK_HOME='/opt/cli/spark-3.3.0-bin-hadoop3';\
export JAVA_HOME='/usr/lib/jvm/java-17-openjdk-amd64';\
bash <(curl -s 'https://cdn.chorke.org/exec/cli/bash/install/apache-spark-slave/3.3.0.sh.txt')
sudo systemctl daemon-reload
sudo systemctl enable spark-slave.service
sudo systemctl start  spark-slave.service
sudo systemctl status spark-slave.service

Knowledge

ssh-keygen -b 4096 -t rsa -f ~/.ssh/id_rsa -q -N "spark@${HOSTNAME}"
readlink -f /usr/bin/java | sed "s:bin/java::"
sudo apt-get install pdsh
sudo apt-get install ssh

su -h spark
sudo -u spark -H sh -c "whoami; echo ${HOME}"
sh $SPARK_HOME/bin/spark-shell
sh $SPARK_HOME/bin/pyspark
http://127.0.0.1:8080
http://127.0.0.1:8088
http://127.0.0.1:9870
http://127.0.0.1:4040

sudo apt dist-upgrade
sudo do-release-upgrade

sudo apt --fix-broken install
sudo apt install ubuntu-desktop
[Service]
User=spark
Group=spark
Type=forking
SuccessExitStatus=143

if [ -f '/etc/os-release' ];then
         HOST_OS_ID=$(grep -oP '(?<=^ID=).+'         /etc/os-release | tr -d '"')
    HOST_OS_ID_LIKE=$(grep -oP '(?<=^ID_LIKE=).+'    /etc/os-release | tr -d '"')
    HOST_OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')
fi

References