Hadoop: Difference between revisions
Jump to navigation
Jump to search
Line 68: | Line 68: | ||
== Knowledge == | == Knowledge == | ||
{| | {| | ||
|valign="top" colspan=" | |valign="top" colspan="3"| | ||
ssh-keygen -b 4096 -t rsa -f ~/.ssh/id_rsa -q -N "[email protected]" | ssh-keygen -b 4096 -t rsa -f ~/.ssh/id_rsa -q -N "[email protected]" | ||
readlink -f /usr/bin/java | sed "s:bin/java::" | readlink -f /usr/bin/java | sed "s:bin/java::" | ||
Line 75: | Line 75: | ||
|- | |- | ||
|colspan=" | |colspan="3"| | ||
---- | ---- | ||
|- | |- | ||
|valign="top" colspan=" | |valign="top" colspan="3"| | ||
su -h hadoop | su -h hadoop | ||
hdfs namenode -format | hdfs namenode -format | ||
Line 90: | Line 90: | ||
|- | |- | ||
|colspan=" | |colspan="3"| | ||
---- | ---- | ||
|- | |- | ||
|valign="bottom"| | |valign="bottom" colspan="2"| | ||
sudo apt dist-upgrade | sudo apt dist-upgrade | ||
sudo do-release-upgrade | sudo do-release-upgrade | ||
Line 110: | Line 110: | ||
|- | |- | ||
|colspan=" | |colspan="3"| | ||
---- | ---- | ||
|- | |- | ||
|valign="top" colspan=" | |valign="top" colspan="3"| | ||
<source lang="bash"> | <source lang="bash"> | ||
if [ -f '/etc/os-release' ];then | if [ -f '/etc/os-release' ];then | ||
Line 121: | Line 121: | ||
fi | fi | ||
</source> | </source> | ||
|- | |||
|colspan="3"| | |||
---- | |||
|- | |||
|valign="top"| | |||
<source lang='bash'> | |||
lxc launch images:fedora/37 robotics && | |||
lxc exec robotics bash <<'EOF' | |||
sleep 5 | |||
dnf install -y curl java-11-openjdk | |||
java -version | |||
EOF | |||
</source> | |||
lxc snapshot robotics curl:java | |||
lxc stop robotics | |||
lxc publish robotics --alias\ | |||
fedora/37:curl:java | |||
lxc publish robotics/curl:java --alias\ | |||
fedora/37:curl:java | |||
|valign="top"| | |||
<source lang='bash'> | |||
lxc launch images:opensuse/15.3 agronomy && | |||
lxc exec agronomy bash <<'EOF' | |||
sleep 5 | |||
zypper install -y curl java-11-openjdk | |||
java -version | |||
EOF | |||
</source> | |||
lxc snapshot agronomy curl:java | |||
lxc stop agronomy | |||
lxc publish agronomy --alias\ | |||
opensuse/15.3:curl:java | |||
lxc publish agronomy/curl:java --alias\ | |||
opensuse/15.3:curl:java | |||
|valign="top"| | |||
<source lang='bash'> | |||
lxc launch images:ubuntu/22.04 software && | |||
lxc exec software bash <<'EOF' | |||
sleep 5 | |||
apt-get install -y curl openjdk-11-jdk | |||
java -version | |||
EOF | |||
</source> | |||
lxc snapshot software curl:java | |||
lxc stop software | |||
lxc publish software --alias\ | |||
ubuntu:22.04:curl:java | |||
lxc publish software/curl:java --alias\ | |||
ubuntu:22.04:curl:java | |||
|} | |} | ||
Revision as of 11:13, 28 December 2022
Hadoop is a Java-based programming framework that supports the processing and storage of extremely large datasets on a cluster of inexpensive machines. It was the first major open source project in the big data playing field and is sponsored by the Apache Software Foundation. Hadoop is comprised of four main layers:
- Hadoop Common is the collection of utilities and libraries that support other Hadoop modules.
- HDFS, which stands for Hadoop Distributed File System, is responsible for persisting data to disk.
- YARN, short for Yet Another Resource Negotiator, is the "operating system" for HDFS.
- MapReduce is the original processing model for Hadoop clusters. It distributes work within the cluster or map, then organizes and reduces the results from the nodes into a response to a query. Many other processing models are available for the 3.x version of Hadoop
Configuration
mkdir -p /home/hadoop/hdfs/{datanode,namenode}/
sudo tee -a $HADOOP_HOME/etc/hadoop/core-site.xml >/dev/null <<EOF
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/tmp</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://0.0.0.0:9000</value>
<description>The default file system URI</description>
</property>
</configuration>
EOF
mkdir -p /home/hadoop/hdfs/{datanode,namenode}
sudo tee -a $HADOOP_HOME/etc/hadoop/hdfs-site.xml >/dev/null <<EOF
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop/hdfs/namenode/</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/hadoop/hdfs/datanode/</value>
</property>
</configuration>
EOF
sudo tee -a $HADOOP_HOME/etc/hadoop/mapred-site.xml >/dev/null <<EOF
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
EOF
sudo tee -a $HADOOP_HOME/etc/hadoop/yarn-site.xml >/dev/null <<EOF
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
EOF
Knowledge
ssh-keygen -b 4096 -t rsa -f ~/.ssh/id_rsa -q -N "[email protected]" readlink -f /usr/bin/java | sed "s:bin/java::" sudo apt-get install pdsh sudo apt-get install ssh | ||
| ||
su -h hadoop hdfs namenode -format sudo -u haddop -H sh -c "whoami; echo ${HOME}" sh $HADOOP_HOME/sbin/start-dfs.sh http://127.0.0.1:9870 sh $HADOOP_HOME/sbin/start-yarn.sh http://127.0.0.1:8088 | ||
| ||
sudo apt dist-upgrade sudo do-release-upgrade sudo apt --fix-broken install sudo apt install ubuntu-desktop |
[Service]
User=hadoop
Group=hadoop
Type=forking
SuccessExitStatus=143
| |
| ||
if [ -f '/etc/os-release' ];then
HOST_OS_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
HOST_OS_ID_LIKE=$(grep -oP '(?<=^ID_LIKE=).+' /etc/os-release | tr -d '"')
HOST_OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')
fi
| ||
| ||
lxc launch images:fedora/37 robotics &&
lxc exec robotics bash <<'EOF'
sleep 5
dnf install -y curl java-11-openjdk
java -version
EOF
lxc snapshot robotics curl:java lxc stop robotics lxc publish robotics --alias\ fedora/37:curl:java lxc publish robotics/curl:java --alias\ fedora/37:curl:java |
lxc launch images:opensuse/15.3 agronomy &&
lxc exec agronomy bash <<'EOF'
sleep 5
zypper install -y curl java-11-openjdk
java -version
EOF
lxc snapshot agronomy curl:java lxc stop agronomy lxc publish agronomy --alias\ opensuse/15.3:curl:java lxc publish agronomy/curl:java --alias\ opensuse/15.3:curl:java |
lxc launch images:ubuntu/22.04 software &&
lxc exec software bash <<'EOF'
sleep 5
apt-get install -y curl openjdk-11-jdk
java -version
EOF
lxc snapshot software curl:java lxc stop software lxc publish software --alias\ ubuntu:22.04:curl:java lxc publish software/curl:java --alias\ ubuntu:22.04:curl:java |