You are here: Foswiki>Main Web>HaDoop (02 Feb 2017, HenningFehrmann)Edit Attach

documentation

Software

all nodes

  • apt -y install oracle-java8-jdk
  • write to /etc/profile.d/jdk.sh
export J2SDKDIR=/usr/lib/jvm/jdk-8-oracle-x64
export J2REDIR=/usr/lib/jvm/jdk-8-oracle-x64/jre
export PATH=$PATH:/usr/lib/jvm/jdk-8-oracle-x64/bin:/usr/lib/jvm/jdk-8-oracle-x64/db/bin:/usr/lib/jvm/jdk-8-oracle-x64/jre/bin
export JAVA_HOME=/usr/lib/jvm/jdk-8-oracle-x64
export DERBY_HOME=/usr/lib/jvm/jdk-8-oracle-x64/db
* and to /etc/profile.d/jdk.csh
setenv J2SDKDIR /usr/lib/jvm/jdk-8-oracle-x64
setenv J2REDIR /usr/lib/jvm/jdk-8-oracle-x64/jre
setenv PATH ${PATH}:/usr/lib/jvm/jdk-8-oracle-x64/bin:/usr/lib/jvm/jdk-8-oracle-x64/db/bin:/usr/lib/jvm/jdk-8-oracle-x64/jre/bin
setenv JAVA_HOME /usr/lib/jvm/jdk-8-oracle-x64

configuration

all nodes

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://namenode:9000</value>
    </property>
</configuration>

namenode

  • write to /etc/hadoop/conf/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
        <property>
                <name>dfs.replication</name>
                <value>3</value>
        </property>
        <property>
                <name>dfs.namenode.name.dir</name>
                <value>file:/data/hdfs/namenode</value>
        </property>
</configuration>

datanode

  • write to /etc/hadoop/conf/hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
        <property>
                <name>dfs.datanode.data.dir</name>
                <value>file:/data/hdfs/datanode</value>
        </property>
</configuration>

using policies

  • hdfs storagepolicies -listPolicies shows policies and storageTypes
  • on the datanode modifiy the /etc/hadoop/conf/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
        <property>
                <name>dfs.datanode.data.dir</name>
                <value>[SSD]file:/data/HDFS_FAST</value>
        </property>
</configuration>
  • instead of [SSD] use another storage types
  • on the file or directory level set or get the policy by
hdfs storagepolicies  -setStoragePolicy -path /somepath/somefile -policy ALL_SSD
  • instead of ALL_SSD use other policies and

rack awareness

  • add lines /etc/hadoop/conf/core-site.xml
    <property>
        <name>net.topology.script.file.name</name>
        <value>/usr/local/bin/topology.sh</value>
    </property>
  • create the script /usr/local/bin/topology.sh
  • the output should be /rack-'some number' the argument is the ip address

snapshot

  • it works at least on the datanode
  • hdfs dfsadmin -allowSnapshot
  • hdfs dfs -createSnapshot
  • hdfs dfs -ls /.snapshot
  • hdfs dfs -deleteSnapshot
  • hdfs dfsadmin -disallowSnapshot
  • can be directly accessed by going into /.snapshot
  • hdfs snapshotDiff

deletion

  • works automatically
  • see the logs in /var/log/hadoop-hdfs

nodes down

  • replicas are automatically shifted after a configurable time out, if the datanode is down and the namenode is operating normally.
  • bringing the namenode into the safemode

HA setup

what is required

  • at least zookeeper 3 server are needed (scr01, scr02, scr03)
  • 3 or more journal nodes (scr01, scr02, scr03)
  • 2 name nodes (scr01,scr02)

software

  • apt -y install hadoop-hdfs-zkfc hadoop-hdfs-journalnode hadoop-hdfs-namenode hadoop-hdfs-zkfc zookeeper-server

zookeeper

  • create the the /etc/zookeper/conf/zoo.conf
maxClientCnxns=50
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/var/lib/zookeeper
clientPort=2181

server.1=scr01:2888:3888
server.2=scr02:2888:3888
server.3=scr03:2888:3888

journal and namenode

  • write into the /etc/hadoop/conf/core.
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
        <property>
                <name>net.topology.script.file.name</name>
                <value>/usr/local/bin/topology.sh</value>
        </property>
        <property>
                <name>net.topology.script.number.args</name>
                <value>1</value>
        </property>
        <property>
                <name>fs.defaultFS</name>
                <value>hdfs://ha-cluster</value>
        </property>
        <property>
                <name>ha.zookeeper.quorum</name>
                <value>scr01:2181,scr02:2181,scr03:2181</value>
        </property>
</configuration>
  • write the /etc/hadoop/conf/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
        <property>
                <name>dfs.nameservices</name>
                <value>ha-cluster</value>
        </property>
        <property>
                <name>dfs.ha.namenodes.ha-cluster</name>
                <value>nn1,nn2</value>
        </property>
        <property>
                <name>dfs.namenode.rpc-address.ha-cluster.nn1</name>
                <value>scr01:8020</value>
        </property>
        <property>
                <name>dfs.namenode.rpc-address.ha-cluster.nn2</name>
                <value>scr02:8020</value>
        </property>
        <property>
                <name>dfs.namenode.servicerpc-address.ha-cluster.nn1</name>
                <value>scr01:8022</value>
        </property>
        <property>
                <name>dfs.namenode.servicerpc-address.ha-cluster.nn2</name>
                <value>scr02:8022</value>
        </property>
        <property>
                <name>dfs.namenode.http-address.ha-cluster.nn1</name>
                <value>scr01:50070</value>
        </property>
        <property>
                <name>dfs.namenode.http-address.ha-cluster.nn2</name>
                <value>scr02:50070</value>
        </property>
        <property>
                <name>dfs.namenode.shared.edits.dir</name>
                <value>qjournal://scr01:8485;scr02:8485;scr03:8485/ha-cluster</value>
        </property>
        <property>
                <name>dfs.client.failover.proxy.provider.ha-cluster</name>
                <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
        </property>
        <property>
                <name>dfs.ha.fencing.methods</name>
                <value>sshfence</value>
        </property>
        <property>
                <name>dfs.ha.fencing.ssh.private-key-files</name>
                <value>/var/lib/hadoop-hdfs/.ssh/id_rsa</value>
        </property>
        <property>
                <name>dfs.journalnode.edits.dir</name>
                <value>/data/hdfs/journal</value>
        </property>

        <property>
                <name>dfs.ha.automatic-failover.enabled</name>
                <value>true</value>
        </property>

        <property>
                <name>dfs.namenode.name.dir</name>
                <value>file:///data/hdfs/namenode</value>
        </property>
</configuration>

  • start the zookeeper by systemctl start zookeeper-server
  • run on one of the name nodes as user hdfs: hdfs zkfc -formatZK
  • start the journalnode by systemctl start hadoop-hdfs-journalnode
  • sthart the zkfc daemon by systemctl start hadoop-hdfs-zkfc
  • format the namenode as user hdfs on the first namenode: hdfs namenode -format
  • bootstrap the namenode on the other namenode as user hdfs by: hdfs namenode -bootstrapStandby
  • start the namenodes by systemctl start hadoop-hdfs-namenode
  • access the webpage of the nameserver port 50070

datanode config

  • the /etc/hadoop/conf/core-site.xml

dfs.nameservices ha-cluster dfs.ha.namenodes.ha-cluster nn1,nn2 dfs.namenode.rpc-address.ha-cluster.nn1 scr01:8020 dfs.namenode.rpc-address.ha-cluster.nn2 scr02:8020 dfs.namenode.servicerpc-address.ha-cluster.nn1 scr01:8022 dfs.namenode.servicerpc-address.ha-cluster.nn2 scr02:8022

  • the /etc/hadoop/conf/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
         <property>
                <name>dfs.replication</name>
                <value>3</value>
        </property>
        <property>
                <name>dfs.datanode.max.locked.memory</name>
                <value>2147483648</value>
        </property>
        <property>
                <name>dfs.datanode.data.dir</name>
                <value>[SSD]file:/data/HDFS_SSD,[DISK]file:/data/HDFS_DISK</value>
        </property>
</configuration>

-- HenningFehrmann - 17 Jan 2017
Topic revision: r15 - 02 Feb 2017, HenningFehrmann
This site is powered by FoswikiCopyright © by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Foswiki? Send feedback