Hadoop is a Java-based programming framework that supports the processing and storage of extremely large datasets on a cluster of inexpensive machines. It was the first major open source project in the big data playing field and is sponsored by the Apache Software Foundation. Hadoop is comprised of four main layers:
- Hadoop Common is the collection of utilities and libraries that support other Hadoop modules.
- HDFS, which stands for Hadoop Distributed File System, is responsible for persisting data to disk.
- YARN, short for Yet Another Resource Negotiator, is the "operating system" for HDFS.
- MapReduce is the original processing model for Hadoop clusters. It distributes work within the cluster or map, then organizes and reduces the results from the nodes into a response to a query. Many other processing models are available for the 3.x version of Hadoop
Configuration
mkdir -p /home/hadoop/hdfs/{datanode,namenode}/
sudo tee -a $HADOOP_HOME/etc/hadoop/core-site.xml >/dev/null <<EOF
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/tmp</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://0.0.0.0:9000</value>
<description>The default file system URI</description>
</property>
</configuration>
EOF
mkdir -p /home/hadoop/hdfs/{datanode,namenode}
sudo tee -a $HADOOP_HOME/etc/hadoop/hdfs-site.xml >/dev/null <<EOF
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop/hdfs/namenode/</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/hadoop/hdfs/datanode/</value>
</property>
</configuration>
EOF
sudo tee -a $HADOOP_HOME/etc/hadoop/mapred-site.xml >/dev/null <<EOF
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
EOF
sudo tee -a $HADOOP_HOME/etc/hadoop/yarn-site.xml >/dev/null <<EOF
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
EOF
Unit Testing
lxc launch images:debian/12 agronomy &&
lxc exec agronomy bash <<'EOF'
sleep 5
apt-get install -y curl openjdk-11-jre
java -version
EOF
|
lxc launch images:fedora/37 robotics &&
lxc exec robotics bash <<'EOF'
sleep 5
dnf install -y curl java-11-openjdk
java -version
EOF
|
lxc launch images:ubuntu/22.04 software &&
lxc exec software bash <<'EOF'
sleep 5
apt-get install -y curl openjdk-11-jre
java -version
EOF
|
|
create alias:
lxc stop agronomy
lxc publish agronomy --alias\
debian/12:curl:java
create alias from snapshot:
lxc snapshot agronomy curl:java
lxc publish agronomy/curl:java --alias\
debian/12:curl:java
lxc delete agronomy
launch alias:
lxc launch debian/12:curl:java agronomy &&
lxc exec agronomy bash
lxc stop agronomy && lxc delete agronomy
|
create alias:
lxc stop robotics
lxc publish robotics --alias\
fedora/37:curl:java
create alias from snapshot:
lxc snapshot robotics curl:java
lxc publish robotics/curl:java --alias\
fedora/37:curl:java
lxc delete robotics
launch alias:
lxc launch fedora/37:curl:java robotics &&
lxc exec robotics bash
lxc stop robotics && lxc delete robotics
|
create alias:
lxc stop software
lxc publish software --alias\
ubuntu/22.04:curl:java
create alias from snapshot:
lxc snapshot software curl:java
lxc publish software/curl:java --alias\
ubuntu/22.04:curl:java
lxc delete software
launch alias:
lxc launch ubuntu/22.04:curl:java software &&
lxc exec software bash
lxc stop software && lxc delete software
|
Knowledge
ssh-keygen -b 4096 -t rsa -f ~/.ssh/id_rsa -q -N "[email protected]"
readlink -f /usr/bin/java | sed "s:bin/java::"
sudo apt-get install pdsh
sudo apt-get install ssh
|
|
su -h hadoop
hdfs namenode -format
sudo -u haddop -H sh -c "whoami; echo ${HOME}"
sh $HADOOP_HOME/sbin/start-dfs.sh
http://127.0.0.1:9870
sh $HADOOP_HOME/sbin/start-yarn.sh
http://127.0.0.1:8088
|
|
sudo apt dist-upgrade
sudo do-release-upgrade
sudo apt --fix-broken install
sudo apt install ubuntu-desktop
|
[Service]
User=hadoop
Group=hadoop
Type=forking
SuccessExitStatus=143
|
|
if [ -f '/etc/os-release' ];then
HOST_OS_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
HOST_OS_ID_LIKE=$(grep -oP '(?<=^ID_LIKE=).+' /etc/os-release | tr -d '"')
HOST_OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')
fi
|
References