hadoop安装


原文链接: hadoop安装

干货】Apache Hadoop 2.8 完全分布式集群搭建超详细过程,实现NameNode HA、ResourceManager HA高可靠性
目录:/root/hadoop-2.6.0/sbin
启动yarn历史进程命令:./mr-jobhistory-daemon.sh start historyservice

192.168.10.90-192.168.10.102
root info_ top_123
http://192.168.8.240:8080/websitems-hadoop/
admin 123456

hadoop安装
1、下载安装包

wget http://mirrors.hust.edu.cn/apache/hadoop/common/stable2/hadoop-2.7.3.tar.gz

2、解压

tar -zvxf hadoop-2.7.3.tar.gz -C /usr/local/

3、配置core-site.xml

	<configuration>
		<!-- 指定hdfs的nameservice为ns1 -->
        <property>
        <name>fs.defaultFS</name>
        <value>hdfs://ns1/</value>
        </property>
        <!-- 指定hadoop临时目录 -->
        <property>
        <name>hadoop.tmp.dir</name>
        <value>/usr/local/hadoop-2.7.3/tmp</value>
        </property>
        <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/usr/local/hadoop-2.7.3/journal</value>
		</property>
		<!-- 指定zookeeper地址 -->
        <property>
        <name>ha.zookeeper.quorum</name>
        <value>10.130.213.53:2181,10.130.213.54:2181,10.130.213.55:2181</value>
        </property>
        <property>
          <name>hadoop.native.lib</name>
          <value>true</value>
          <description>Should native hadoop libraries, if present, be used.</description>
        </property>
	</configuration>

4、配置hdfs-site.xml
手动强制切换 sudo -u hdfs hdfs haadmin -transitionToActive/transitionToStandby

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
	<!--指定hdfs的nameservice为ns1,需要和core-site.xml中的保持一致 -->
	<property>
		<name>dfs.nameservices</name>
		<value>ns1</value>
	</property>
	<!-- ns1下面有两个NameNode,分别是nn1,nn2 -->
	<property>
		<name>dfs.ha.namenodes.ns1</name>
		<value>nn1,nn2</value>
	</property>
	<!-- nn1的RPC通信地址 -->
	<property>
		<name>dfs.namenode.rpc-address.ns1.nn1</name>
		<value>192.168.10.90:9000</value>
	</property>
	<!-- nn1的http通信地址 -->
	<property>
		<name>dfs.namenode.http-address.ns1.nn1</name>
		<value>192.168.10.90:50070</value>
	</property>
	<!-- nn2的RPC通信地址 -->
	<property>
		<name>dfs.namenode.rpc-address.ns1.nn2</name>
		<value>192.168.10.91:9000</value>
	</property>
	<!-- nn2的http通信地址 -->
	<property>
		<name>dfs.namenode.http-address.ns1.nn2</name>
		<value>192.168.10.91:50070</value>
	</property>
	<!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
	<property>
		<name>dfs.namenode.shared.edits.dir</name>
		<value>qjournal://S1PA222:8485;S1PA223:8485;S1PA224:8485/ns1</value>
	</property>
	<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
	<property>
		<name>dfs.journalnode.edits.dir</name>
		<value>/root/hadoop-2.6.0/journal</value>
	</property>
	<!-- 开启ZKFC NameNode失败自动切换 -->
	<property>
		<name>dfs.ha.automatic-failover.enabled.ns1</name>
		<value>true</value>
	</property>
	<!-- 配置失败自动切换实现方式 -->
	<property>
		<name>dfs.client.failover.proxy.provider.ns1</name>
		<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
	</property>
	<!-- 配置隔离机制方法,多个机制用换行分割,即每个机制占用一行-->
	<property>
		<name>dfs.ha.fencing.methods</name>
		<value>
			sshfence
			shell(/bin/true)
		</value>
	</property>
	<!-- 使用sshfence隔离机制时需要ssh免登陆 -->
	<property>
		<name>dfs.ha.fencing.ssh.private-key-files</name>
		<value>/root/.ssh/id_rsa</value>
	</property>
	<!-- 配置sshfence隔离机制超时时间 -->
	<property>
		<name>dfs.ha.fencing.ssh.connect-timeout</name>
		<value>30000</value>
	</property>

	<!-- 开启webhdfs服务 -->
	<property>
		<name>dfs.webhdfs.enabled</name>
		<value>true</value>
	</property>

</configuration>

5、配置mapred-site.xml

	<configuration>
		<!-- 指定mr框架为yarn方式 -->
        <property>
                <name>mapreduce.framework.name</name>
                <value>yarn</value>
        </property>

        <property>
                <name>mapred.hosts.exclude</name>
                <value>/usr/local/hadoop-2.7.3/conf/excludes</value>
                <final>true</final>
        </property>
	</configuration>

6、配置yarn-site.xml
启动 sudo -u yarn yarn-daemon.sh start resourcemanager
sudo -u yarn yarn rmadmin -transitionToActive/transitionToStandby
```xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- 指定resourcemanager地址 -->
<property>
    <name>yarn.resourcemanager.hostname</name>
    <value>S1PA13</value>
</property>
<!-- 指定nodemanager启动时加载server的方式为shuffle server -->
<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
</property>

```xml

<configuration>
    <!-- Site specific YARN configuration properties -->
    <!-- 开启RM高可用 -->
    <property>
       <name>yarn.resourcemanager.ha.enabled</name>
       <value>true</value>
    </property>
    <!-- 指定RM的cluster id -->
    <property>
       <name>yarn.resourcemanager.cluster-id</name>
       <value>yrc</value>
    </property>
    <!-- 指定RM的名字 -->
    <property>
       <name>yarn.resourcemanager.ha.rm-ids</name>
       <value>rm1,rm2</value>
    </property>
    <!-- 分别指定RM的地址 -->
    <property>
       <name>yarn.resourcemanager.hostname.rm1</name>
       <value>10.130.213.52</value>
    </property>
    <property>
       <name>yarn.resourcemanager.hostname.rm2</name>
       <value>10.130.213.53</value>
    </property>
    <!-- 指定zk集群地址 -->
    <property>
       <name>yarn.resourcemanager.zk-address</name>
       <value>10.130.213.53:2181,10.130.213.54:2181,10.130.213.55:2181</value>
    </property>
    <property>
       <name>yarn.nodemanager.aux-services</name>
       <value>mapreduce_shuffle</value>
    </property>
</configuration>

7、配置slaves

10.130.213.52
10.130.213.53
10.130.213.54
10.130.213.55

8、格式化namenode

hadoop namenode -format
报错:java.net.UnknownHostException: iZ2ze1vepnzhenh9aaz723Z: unknown error
我们通过执行hostname命令可以看到:

[shirdrn@localhost bin]# hostname
localhost.localdomain
也就是说,Hadoop在格式化HDFS的时候,通过hostname命令获取到的主机名是localhost.localdomain,然后在/etc/hosts文件中进行映射的时候,没有找到,看下我的/etc/hosts内容:

[root@localhost bin]# cat /etc/hosts
# Do not remove the following line, or various programs
# that require network functionality will fail.
127.0.0.1               localhost       localhost
192.168.1.103           localhost       localhost
也就说,通过localhost.localdomain根本无法映射到一个IP地址,所以报错了。

此时,我们查看一下/etc/sysconfig/network文件:

NETWORKING=yes
NETWORKING_IPV6=yes
HOSTNAME=localhost.localdomain

可见,执行hostname获取到这里配置的HOSTNAME的值。
解决办法:配置/etc/hosts或者修改/etc/sysconfig/network文件中的HOSTNAME

9、格式化zkfc

namenode节点上:hdfs zkfc -formatZK

10、配置无密登陆

namenode节点:ssh-keygen -t rsa
一路回车
cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys
复制master上的authorized_keys到其他子节点上:
首先,为了避免权限问题,我们可以事先设置文件和目录权限:
#设置authorized_keys权限
chmod 600 authorized_keys
#设置.ssh目录权限
chmod 700 -R .ssh

11、hadoop迁移数据

hadoop distcp hdfs://172.18.16.44:9000/MP4 hdfs://101.201.56.236:50071/

zookeeper安装
1、下载安装包

wget http://mirror.bit.edu.cn/apache/zookeeper/zookeeper-3.4.10/zookeeper-3.4.10.tar.gz

2、解压

tar -zvxf zookeeper-3.4.10.tar.gz -C /usr/local/

3、 配置

# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/usr/local/zookeeper-3.4.10/data/
dataLogDir=/usr/local/zookeeper-3.4.10/logs
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
server.1=10.130.213.53:2888:3888
server.2=10.130.213.54:2888:3888
server.3=10.130.213.55:2888:3888

3、安装jdk

cd /home/es/setup/
rpm -ivh jdk-8u40-linux-x64.rpm

4、配置java环境变量

vi /etc/profile
#set environment
export JAVA_HOME=/usr/java/jdk1.8.0_40
export CLASSPATH=.:$JAVA_HOME/lib.tools.jar
export PATH=$JAVA_HOME/bin:$PATH
source /etc/profile

5、vi /usr/local/zookeeper-3.4.10/bin/zookeeper.out
[myid:] - ERROR [main:QuorumPeerMain@85] - Invalid config, exiting abnormally
org.apache.zookeeper.server.quorum.QuorumPeerConfig$ConfigException: Error processing /usr/local/zookeeper-3.4.10/bin/../conf/zoo.cfg

    at org.apache.zookeeper.server.quorum.QuorumPeerConfig.parse(QuorumPeerConfig.java:154)
    at org.apache.zookeeper.server.quorum.QuorumPeerMain.initializeAndRun(QuorumPeerMain.java:101)
    at org.apache.zookeeper.server.quorum.QuorumPeerMain.main(QuorumPeerMain.java:78)

Caused by: java.lang.IllegalArgumentException: /usr/local/zookeeper-3.4.10/data/myid file is missing

    at org.apache.zookeeper.server.quorum.QuorumPeerConfig.parseProperties(QuorumPeerConfig.java:406)
    at org.apache.zookeeper.server.quorum.QuorumPeerConfig.parse(QuorumPeerConfig.java:150)
    ... 2 more

Invalid config, exiting abnormally

解决办法:echo '1' > myid
注意:此数字必须与conf/zoo.cfg中的server.ID一致。

`