Hadoop: Difference between revisions

From Chorke Wiki
Jump to navigation Jump to search
Line 68: Line 68:
== Knowledge ==
== Knowledge ==
{|
{|
|valign="top" colspan="2"|
|valign="top" colspan="3"|
  ssh-keygen -b 4096 -t rsa -f ~/.ssh/id_rsa -q -N "[email protected]"
  ssh-keygen -b 4096 -t rsa -f ~/.ssh/id_rsa -q -N "[email protected]"
  readlink -f /usr/bin/java | sed "s:bin/java::"
  readlink -f /usr/bin/java | sed "s:bin/java::"
Line 75: Line 75:


|-
|-
|colspan="2"|
|colspan="3"|
----
----
|-
|-
|valign="top" colspan="2"|
|valign="top" colspan="3"|
  su -h hadoop
  su -h hadoop
  hdfs namenode -format
  hdfs namenode -format
Line 90: Line 90:


|-
|-
|colspan="2"|
|colspan="3"|
----
----
|-
|-
|valign="bottom"|
|valign="bottom" colspan="2"|
  sudo apt dist-upgrade
  sudo apt dist-upgrade
  sudo do-release-upgrade
  sudo do-release-upgrade
Line 110: Line 110:


|-
|-
|colspan="2"|
|colspan="3"|
----
----
|-
|-
|valign="top" colspan="2"|
|valign="top" colspan="3"|
<source lang="bash">
<source lang="bash">
if [ -f '/etc/os-release' ];then
if [ -f '/etc/os-release' ];then
Line 121: Line 121:
fi
fi
</source>
</source>
|-
|colspan="3"|
----
|-
|valign="top"|
<source lang='bash'>
lxc launch images:fedora/37 robotics &&
lxc exec robotics bash <<'EOF'
sleep 5
dnf install -y curl java-11-openjdk
java -version
EOF
</source>
lxc snapshot robotics curl:java
lxc stop    robotics
lxc publish  robotics --alias\
  fedora/37:curl:java
lxc publish  robotics/curl:java --alias\
  fedora/37:curl:java
|valign="top"|
<source lang='bash'>
lxc launch images:opensuse/15.3 agronomy &&
lxc exec agronomy bash <<'EOF'
sleep 5
zypper install -y curl java-11-openjdk
java -version
EOF
</source>
lxc snapshot agronomy curl:java
lxc stop    agronomy
lxc publish  agronomy --alias\
  opensuse/15.3:curl:java
lxc publish  agronomy/curl:java --alias\
  opensuse/15.3:curl:java
|valign="top"|
<source lang='bash'>
lxc launch images:ubuntu/22.04 software &&
lxc exec software bash <<'EOF'
sleep 5
apt-get install -y curl openjdk-11-jdk
java -version
EOF
</source>
lxc snapshot software curl:java
lxc stop    software
lxc publish  software --alias\
  ubuntu:22.04:curl:java
lxc publish  software/curl:java --alias\
  ubuntu:22.04:curl:java
|}
|}



Revision as of 11:13, 28 December 2022

Hadoop is a Java-based programming framework that supports the processing and storage of extremely large datasets on a cluster of inexpensive machines. It was the first major open source project in the big data playing field and is sponsored by the Apache Software Foundation. Hadoop is comprised of four main layers:

  1. Hadoop Common is the collection of utilities and libraries that support other Hadoop modules.
  2. HDFS, which stands for Hadoop Distributed File System, is responsible for persisting data to disk.
  3. YARN, short for Yet Another Resource Negotiator, is the "operating system" for HDFS.
  4. MapReduce is the original processing model for Hadoop clusters. It distributes work within the cluster or map, then organizes and reduces the results from the nodes into a response to a query. Many other processing models are available for the 3.x version of Hadoop

Configuration

mkdir -p /home/hadoop/hdfs/{datanode,namenode}/
sudo tee -a $HADOOP_HOME/etc/hadoop/core-site.xml >/dev/null <<EOF
<configuration>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/home/hadoop/tmp</value>
    </property>
    <property>
        <name>fs.default.name</name>
        <value>hdfs://0.0.0.0:9000</value>
        <description>The default file system URI</description>
    </property>
</configuration>
EOF

mkdir -p /home/hadoop/hdfs/{datanode,namenode}
sudo tee -a $HADOOP_HOME/etc/hadoop/hdfs-site.xml >/dev/null <<EOF
<configuration>
    <property>
      <name>dfs.replication</name>
      <value>1</value>
    </property>
    <property>
      <name>dfs.namenode.name.dir</name>
      <value>/home/hadoop/hdfs/namenode/</value>
    </property>
    <property>
      <name>dfs.datanode.data.dir</name>
      <value>/home/hadoop/hdfs/datanode/</value>
    </property>
</configuration>
EOF

sudo tee -a $HADOOP_HOME/etc/hadoop/mapred-site.xml >/dev/null <<EOF
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
</configuration>
EOF

sudo tee -a $HADOOP_HOME/etc/hadoop/yarn-site.xml >/dev/null <<EOF
<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
</configuration>
EOF

Knowledge

ssh-keygen -b 4096 -t rsa -f ~/.ssh/id_rsa -q -N "[email protected]"
readlink -f /usr/bin/java | sed "s:bin/java::"
sudo apt-get install pdsh
sudo apt-get install ssh

su -h hadoop
hdfs namenode -format
sudo -u haddop -H sh -c "whoami; echo ${HOME}"
sh $HADOOP_HOME/sbin/start-dfs.sh
http://127.0.0.1:9870
sh $HADOOP_HOME/sbin/start-yarn.sh
http://127.0.0.1:8088

sudo apt dist-upgrade
sudo do-release-upgrade

sudo apt --fix-broken install
sudo apt install ubuntu-desktop
[Service]
User=hadoop
Group=hadoop
Type=forking
SuccessExitStatus=143

if [ -f '/etc/os-release' ];then
         HOST_OS_ID=$(grep -oP '(?<=^ID=).+'         /etc/os-release | tr -d '"')
    HOST_OS_ID_LIKE=$(grep -oP '(?<=^ID_LIKE=).+'    /etc/os-release | tr -d '"')
    HOST_OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')
fi

lxc launch images:fedora/37 robotics &&
lxc exec robotics bash <<'EOF'
sleep 5
dnf install -y curl java-11-openjdk
java -version
EOF
lxc snapshot robotics curl:java
lxc stop     robotics
lxc publish  robotics --alias\
 fedora/37:curl:java

lxc publish  robotics/curl:java --alias\
 fedora/37:curl:java
lxc launch images:opensuse/15.3 agronomy &&
lxc exec agronomy bash <<'EOF'
sleep 5
zypper install -y curl java-11-openjdk
java -version
EOF
lxc snapshot agronomy curl:java
lxc stop     agronomy
lxc publish  agronomy --alias\
 opensuse/15.3:curl:java

lxc publish  agronomy/curl:java --alias\
 opensuse/15.3:curl:java
lxc launch images:ubuntu/22.04 software &&
lxc exec software bash <<'EOF'
sleep 5
apt-get install -y curl openjdk-11-jdk
java -version
EOF
lxc snapshot software curl:java
lxc stop     software
lxc publish  software --alias\
 ubuntu:22.04:curl:java

lxc publish  software/curl:java --alias\
 ubuntu:22.04:curl:java

References