documentation
Software
all nodes
- apt -y install oracle-java8-jdk
- write to /etc/profile.d/jdk.sh
export J2SDKDIR=/usr/lib/jvm/jdk-8-oracle-x64
export J2REDIR=/usr/lib/jvm/jdk-8-oracle-x64/jre
export PATH=$PATH:/usr/lib/jvm/jdk-8-oracle-x64/bin:/usr/lib/jvm/jdk-8-oracle-x64/db/bin:/usr/lib/jvm/jdk-8-oracle-x64/jre/bin
export JAVA_HOME=/usr/lib/jvm/jdk-8-oracle-x64
export DERBY_HOME=/usr/lib/jvm/jdk-8-oracle-x64/db
* and to /etc/profile.d/jdk.csh
setenv J2SDKDIR /usr/lib/jvm/jdk-8-oracle-x64
setenv J2REDIR /usr/lib/jvm/jdk-8-oracle-x64/jre
setenv PATH ${PATH}:/usr/lib/jvm/jdk-8-oracle-x64/bin:/usr/lib/jvm/jdk-8-oracle-x64/db/bin:/usr/lib/jvm/jdk-8-oracle-x64/jre/bin
setenv JAVA_HOME /usr/lib/jvm/jdk-8-oracle-x64
configuration
all nodes
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://namenode:9000</value>
</property>
</configuration>
namenode
- write to /etc/hadoop/conf/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/data/hdfs/namenode</value>
</property>
</configuration>
datanode
- write to /etc/hadoop/conf/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/data/hdfs/datanode</value>
</property>
</configuration>
using policies
- hdfs storagepolicies -listPolicies shows policies and storageTypes
- on the datanode modifiy the /etc/hadoop/conf/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.datanode.data.dir</name>
<value>[SSD]file:/data/HDFS_FAST</value>
</property>
</configuration>
- instead of [SSD] use another storage types
- on the file or directory level set or get the policy by
hdfs storagepolicies -setStoragePolicy -path /somepath/somefile -policy ALL_SSD
- instead of ALL_SSD use other policies and
rack awareness
- add lines /etc/hadoop/conf/core-site.xml
<property>
<name>net.topology.script.file.name</name>
<value>/usr/local/bin/topology.sh</value>
</property>
- create the script /usr/local/bin/topology.sh
- the output should be /rack-'some number' the argument is the ip address
snapshot
- it works at least on the datanode
- hdfs dfsadmin -allowSnapshot
- hdfs dfs -createSnapshot
- hdfs dfs -ls /.snapshot
- hdfs dfs -deleteSnapshot
- hdfs dfsadmin -disallowSnapshot
- can be directly accessed by going into /.snapshot
- hdfs snapshotDiff
deletion
- works automatically
- see the logs in /var/log/hadoop-hdfs
nodes down
- replicas are automatically shifted after a configurable time out, if the datanode is down and the namenode is operating normally.
- bringing the namenode into the safemode
HA setup
what is required
- at least zookeeper 3 server are needed (scr01, scr02, scr03)
- 3 or more journal nodes (scr01, scr02, scr03)
- 2 name nodes (scr01,scr02)
software
- apt -y install hadoop-hdfs-zkfc hadoop-hdfs-journalnode hadoop-hdfs-namenode hadoop-hdfs-zkfc zookeeper-server
zookeeper
- create the the /etc/zookeper/conf/zoo.conf
maxClientCnxns=50
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/var/lib/zookeeper
clientPort=2181
server.1=scr01:2888:3888
server.2=scr02:2888:3888
server.3=scr03:2888:3888
journal and namenode
- write into the /etc/hadoop/conf/core.
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>net.topology.script.file.name</name>
<value>/usr/local/bin/topology.sh</value>
</property>
<property>
<name>net.topology.script.number.args</name>
<value>1</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ha-cluster</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>scr01:2181,scr02:2181,scr03:2181</value>
</property>
</configuration>
- write the /etc/hadoop/conf/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.nameservices</name>
<value>ha-cluster</value>
</property>
<property>
<name>dfs.ha.namenodes.ha-cluster</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.ha-cluster.nn1</name>
<value>scr01:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.ha-cluster.nn2</name>
<value>scr02:8020</value>
</property>
<property>
<name>dfs.namenode.servicerpc-address.ha-cluster.nn1</name>
<value>scr01:8022</value>
</property>
<property>
<name>dfs.namenode.servicerpc-address.ha-cluster.nn2</name>
<value>scr02:8022</value>
</property>
<property>
<name>dfs.namenode.http-address.ha-cluster.nn1</name>
<value>scr01:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.ha-cluster.nn2</name>
<value>scr02:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://scr01:8485;scr02:8485;scr03:8485/ha-cluster</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.ha-cluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/var/lib/hadoop-hdfs/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/hdfs/journal</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///data/hdfs/namenode</value>
</property>
</configuration>
- start the zookeeper by systemctl start zookeeper-server
- run on one of the name nodes as user hdfs: hdfs zkfc -formatZK
- start the journalnode by systemctl start hadoop-hdfs-journalnode
- sthart the zkfc daemon by systemctl start hadoop-hdfs-zkfc
- format the namenode as user hdfs on the first namenode: hdfs namenode -format
- bootstrap the namenode on the other namenode as user hdfs by: hdfs namenode -bootstrapStandby
- start the namenodes by systemctl start hadoop-hdfs-namenode
- access the webpage of the nameserver port 50070
datanode config
- the /etc/hadoop/conf/core-site.xml
dfs.nameservices
ha-cluster
dfs.ha.namenodes.ha-cluster
nn1,nn2
dfs.namenode.rpc-address.ha-cluster.nn1
scr01:8020
dfs.namenode.rpc-address.ha-cluster.nn2
scr02:8020
dfs.namenode.servicerpc-address.ha-cluster.nn1
scr01:8022
dfs.namenode.servicerpc-address.ha-cluster.nn2
scr02:8022
- the /etc/hadoop/conf/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.datanode.max.locked.memory</name>
<value>2147483648</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>[SSD]file:/data/HDFS_SSD,[DISK]file:/data/HDFS_DISK</value>
</property>
</configuration>
-- HenningFehrmann - 17 Jan 2017