Hadoop 高可用集群部署

使用 VMware Workstation + centos 实现 hadoop 的高可用集群部署。

设计思路

hadoop112 hadoop113 hadoop114
NameNode NameNode NameNode
ZKFC ZKFC ZKFC
JournalNode JournalNode JournalNode
ZK ZK ZK
ResourceManager ResourceManager ResourceManager
NodeManager NodeManager NodeManager

服务器配置

这里我直接克隆伪分布式的 hadoop111,参照之前的博客再部署一遍也行

修改静态IP

vim /etc/sysconfig/network-scripts/ifcfg-ens33

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
TYPE="Ethernet"
PROXY_METHOD="none"
BROWSER_ONLY="no"
BOOTPROTO="static"
DEFROUTE="yes"
IPV4_FAILURE_FATAL="no"
IPV6INIT="yes"
IPV6_AUTOCONF="yes"
IPV6_DEFROUTE="yes"
IPV6_FAILURE_FATAL="no"
IPV6_ADDR_GEN_MODE="stable-privacy"
NAME="ens33"
UUID="cafa4500-048b-41a3-a880-0ea13ff55b07"
DEVICE="ens33"
ONBOOT="yes"
IPADDR=192.168.64.112
NETMASK=255.255.255.0
GATEWAY=192.168.64.2
DNS1=192.168.64.2

Alt text

vim /etc/hostname

Alt text

vim /etc/hosts

1
2
3
4
192.168.64.111 hadoop111
192.168.64.112 hadoop112
192.168.64.113 hadoop113
192.168.64.114 hadoop114

安装 zookeeper

使用 scp 把 apache-zookeeper-3.5.7-bin.tar.gz 传入 devops

tar -zxvf apache-zookeeper-3.5.7-bin.tar.gz cd /devops/apache-zookeeper-3.5.7-bin mkdir -p zkData cd /devops/apache-zookeeper-3.5.7-bin/conf cp zoo_sample.cfg zoo.cfg vim zoo.cfg

1
2
3
4
5
6
dataDir=/devops/apache-zookeeper-3.5.7-bin/zkData

#######################cluster##########################
server.2=hadoop112:2888:3888
server.3=hadoop113:2888:3888
server.4=hadoop114:2888:3888

cd /devops/apache-zookeeper-3.5.7-bin/zkData touch myid 注:由于我当前处于 hadoop112 服务器,所以 myid 里面写入2,如果是 hadoop113、hadoop114则对应地写入3、4 vim myid

1
2

sudo vim /etc/profile.d/my_env.sh

1
2
export ZOOKEEPER_HOME=/devops/apache-zookeeper-3.5.7-bin
export PATH=$ZOOKEEPER_HOME/bin:$PATH

source /etc/profile

hadoop 配置

sudo chown -R hadoop:hadoop /devops

vim /devops/hadoop-3.1.3/etc/hadoop/core-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
<configuration>
<!-- 把多个NameNode的地址组装成一个集群mycluster -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>

<!-- 指定hadoop数据的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/devops/hadoop-3.1.3/data</value>
</property>

<!-- 配置HDFS网页登录使用的静态用户为hadoop -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>hadoop</value>
</property>

<!-- 配置该hadoop(superUser)允许通过代理访问的主机节点 -->
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<!-- 配置该hadoop(superUser)允许通过代理用户所属组 -->
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<!-- 配置该hadoop(superUser)允许通过代理的用户-->
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop112:2181,hadoop113:2181,hadoop114:2181</value>
</property>
</configuration>

vim /devops/hadoop-3.1.3/etc/hadoop/hdfs-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
<configuration>
<!-- NameNode数据存储目录 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>file://${hadoop.tmp.dir}/name</value>
</property>
<!-- DataNode数据存储目录 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>file://${hadoop.tmp.dir}/data</value>
</property>
<!-- JournalNode数据存储目录 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>${hadoop.tmp.dir}/jn</value>
</property>
<!-- 完全分布式集群名称 -->
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<!-- 集群中NameNode节点都有哪些 -->
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2,nn3</value>
</property>
<!-- NameNode的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>hadoop112:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>hadoop113:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn3</name>
<value>hadoop114:8020</value>
</property>
<!-- NameNode的http通信地址 -->
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>hadoop112:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>hadoop113:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn3</name>
<value>hadoop114:9870</value>
</property>
<!-- 指定NameNode元数据在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop112:8485;hadoop113:8485;hadoop114:8485/mycluster</value>
</property>
<!-- 访问代理类:client用于确定哪个NameNode为Active -->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制,即同一时刻只能有一台服务器对外响应 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!-- 使用隔离机制时需要ssh秘钥登录-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<!-- 启用nn故障自动转移 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
</configuration>

vim /devops/hadoop-3.1.3/etc/hadoop/yarn-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
<configuration>
<!-- 指定MR走shuffle -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 环境变量的继承 -->
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<!-- yarn容器允许分配的最大最小内存 -->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>512</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>4096</value>
</property>
<!-- yarn容器允许管理的物理内存大小 -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>4096</value>
</property>
<!-- 关闭yarn对物理内存和虚拟内存的限制检查 -->
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<!-- 开启日志聚集功能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 设置日志聚集服务器地址 -->
<property>
<name>yarn.log.server.url</name>
<value>http://hadoop112:19888/jobhistory/logs</value>
</property>
<!-- 设置日志保留时间为7天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<!-- 启用resourcemanager ha -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>

<!-- 声明三台resourcemanager的地址 -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>cluster-yarn1</value>
</property>
<!--指定resourcemanager的逻辑列表-->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2,rm3</value>
</property>

<!-- ========== rm1的配置 ========== -->
<!-- 指定rm1的主机名 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop112</value>
</property>
<!-- 指定rm1的web端地址 -->
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>hadoop112:8088</value>
</property>
<!-- 指定rm1的内部通信地址 -->
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>hadoop112:8032</value>
</property>
<!-- 指定AM向rm1申请资源的地址 -->
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>hadoop112:8030</value>
</property>
<!-- 指定供NM连接的地址 -->
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>hadoop112:8031</value>
</property>

<!-- ========== rm2的配置 ========== -->
<!-- 指定rm2的主机名 -->
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop113</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>hadoop113:8088</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>hadoop113:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>hadoop113:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>hadoop113:8031</value>
</property>

<!-- ========== rm3的配置 ========== -->
<!-- 指定rm3的主机名 -->
<property>
<name>yarn.resourcemanager.hostname.rm3</name>
<value>hadoop114</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm3</name>
<value>hadoop114:8088</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm3</name>
<value>hadoop114:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm3</name>
<value>hadoop114:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm3</name>
<value>hadoop114:8031</value>
</property>

<!-- 指定zookeeper集群的地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>hadoop112:2181,hadoop113:2181,hadoop114:2181</value>
</property>

<!-- 启用自动恢复 -->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>

<!-- 指定resourcemanager的状态信息存储在zookeeper集群 -->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
</configuration>

sudo vim /etc/hosts

Alt text

克隆 hadoop112

创建 hadoop112 的完整克隆 hadoop113、hadoop114,并配置其 ip 、 hostname

Alt text

Alt text

Alt text

Alt text

配置各自 zookeeper 的 myid

Alt text

Alt text

在 hadoop112、hadoop113、hadoop114 上执行

ssh-copy-id hadoop112 ssh-copy-id hadoop113 ssh-copy-id hadoop114

不使用克隆 hadoop112

可以创建新的 centos 系统挑战自己,注意使用 scp 传输相应的各文件和配置

scp 例子

scp -r /devops/apache-zookeeper-3.5.7-bin hadoop@hadoop113:/devops scp -r /devops/apache-zookeeper-3.5.7-bin hadoop@hadoop114:/devops

启动 hadoop 高可用集群

在 hadoop112、hadoop113、hadoop114 上执行 zkServer.sh start

在 hadoop112、hadoop113、hadoop114 上执行 cd /devops/hadoop-3.1.3 rm -rf data/ logs/ hdfs –daemon start journalnode

在 hadoop112上执行 hdfs namenode -format hdfs –daemon start namenode

在 hadoop113、hadoop114上执行 hdfs namenode -bootstrapStandby

在 hadoop112上执行 stop-all.sh hdfs zkfc -formatZK

再启动 start-all.sh

Alt text

Alt text

Alt text

Alt text

注意配置本地的 hosts 文件,才能使用 hadoop112、hadoop113、hadoop114 域名访问 web

Alt text

Alt text

Alt text

Alt text

Alt text

|