提交各组件部署Ansible剧本初版

This commit is contained in:
qidaijie
2024-01-18 15:35:33 +08:00
parent f0bd05d565
commit 0cc392df5c
262 changed files with 15927 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
[zookeeper]
192.168.45.102
[hdfs]
192.168.45.102

View File

@@ -0,0 +1,7 @@
- hosts: hdfs
remote_user: root
roles:
- role
vars_files:
- role/vars/main.yml

View File

@@ -0,0 +1,23 @@
#The default installation location
deploy_dir: /data/olap
#The default data storage location,use storing application data,logs and configuration files
data_dir: /data/olap
hadoop:
namenode:
#Running memory of the Hadoop Namenode.
java_opt: '-Xmx1024m -Xms1024m'
#The number of Namenode RPC server threads that listen to requests from clients.
dfs.namenode.handler.count: 30
datanode:
#Running memory of the Hadoop Datanode.
java_opt: '-Xmx1024m -Xms1024m'
#The number of server threads for the datanode.
dfs.datanode.handler.count: 40
journalnode:
#Running memory of the Hadoop JournalNode.
java_opt: '-Xmx1024m -Xms1024m'
zkfc:
#Running memory of the Hadoop DFSZKFailoverController.
java_opt: '-Xmx1024m -Xms1024m'

View File

@@ -0,0 +1,223 @@
- name: Setting node_nums variable
set_fact: node_nums="{{groups.hdfs|length}}"
- name: To terminate execution
fail:
msg: "Fully Distributed Mode at least 3 nodes, please checking configurations/hosts -> hdfs"
when: node_nums < 3
- name: check Jdk version
shell: source /etc/profile && java -version 2>&1 | grep {{ java_version }} | wc -l
ignore_errors: false
register: jdk_out
- name: To terminate execution
fail:
msg: "JDK is not installed in the target cluster, please check!"
when: jdk_out.stdout != '2'
run_once: true
delegate_to: 127.0.0.1
- name: create hadoop package path:{{ deploy_dir }}
file:
state: directory
path: '{{ item.path }}'
with_items:
- { path: '{{ hdfs_data_dir }}' }
- { path: '{{ deploy_dir }}' }
- name: master_ip to ansible variable
set_fact: master_ip={{groups.hdfs[0]}}
- name: slave1_ip to ansible variable
set_fact: slave1_ip={{groups.hdfs[1]}}
- name: slave2_ip to ansible variable
set_fact: slave2_ip={{groups.hdfs[2]}}
#解压tar
- name: unpack hadoop-2.7.1.tar.gz to {{ deploy_dir }}/
unarchive:
src: 'files/{{ hadoop_version }}.tar.gz'
dest: '{{ deploy_dir }}/'
- name: Copying hadoop config files
template:
src: '{{ item.src }}'
dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ item.dest }}'
mode: '{{ item.mode }}'
backup: false
with_items:
- { src: 'core-site.xml.j2', dest: 'etc/hadoop/core-site.xml', mode: '0644' }
- { src: 'hdfs-site.xml.j2', dest: 'etc/hadoop/hdfs-site.xml', mode: '0644' }
- { src: 'mapred-site.xml.j2', dest: 'etc/hadoop/mapred-site.xml', mode: '0644' }
- { src: 'slaves.j2', dest: 'etc/hadoop/slaves', mode: '0644' }
- { src: 'hadoop-env.sh.j2', dest: 'etc/hadoop/hadoop-env.sh', mode: '0755' }
- { src: 'set_hdfs_env.sh.j2', dest: 'bin/set_hdfs_env.sh', mode: '0755' }
- name: Copying HDFS config to {{ master_ip }}
template:
src: '{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: false
with_items:
- { src: 'daemonscript/dae-hdfsjournal.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsjournal.sh' }
- { src: 'daemonscript/dae-hdfsmaster.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsmaster.sh' }
- { src: 'daemonscript/keephdfsmaster.j2', dest: '/etc/init.d/keephdfsmaster' }
- { src: 'daemonscript/keephdfsjournal.j2', dest: '/etc/init.d/keephdfsjournal' }
- { src: 'ini_hdfs.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/bin/ini_hdfs.sh' }
run_once: true
delegate_to: "{{ master_ip }}"
- name: Copying HDFS config to {{ slave1_ip }}
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-hdfsjournal.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsjournal.sh' }
- { src: 'dae-hdfsslave.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsslave.sh' }
- { src: 'keephdfsslave.j2', dest: '/etc/init.d/keephdfsslave' }
- { src: 'keephdfsjournal.j2', dest: '/etc/init.d/keephdfsjournal' }
run_once: true
delegate_to: "{{ slave1_ip }}"
- name: Copying HDFS config to {{ slave2_ip }}
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-hdfsjournal.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsjournal.sh' }
- { src: 'keephdfsjournal.j2', dest: '/etc/init.d/keephdfsjournal' }
run_once: true
delegate_facts: true
delegate_to: "{{ slave2_ip }}"
- name: Copying HDFS config to worker nodes
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-hdfsworker.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsworker.sh' }
- { src: 'keephdfsworker.j2', dest: '/etc/init.d/keephdfsworker' }
- name: set hadoop env
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_hdfs_env.sh {{ item.opeation }}
with_items:
- { opeation: 'chkconfig' }
- { opeation: 'journal' }
- name: Waiting for the JournalNode start,sleep 10s
shell: sleep 10
- block:
- name: checking JournalNode status
shell: source /etc/profile && jps | grep JournalNode | grep -v grep | wc -l
register: status_out
- name: checking JournalNode
fail:
msg: "JournalNode节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*journalnode*"
when: status_out.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: inventory_hostname in [master_ip,slave1_ip,slave2_ip]
- name: Initialization NameNode/ZKFC,Start master NameNode
block:
- name: initialization hadoop NameNode
shell: sh {{ deploy_dir }}/{{ hadoop_version }}/bin/ini_hdfs.sh namenode | grep "yes" | grep -v grep | wc -l
register: ini_namenode_out
- name: checking namenode init status
fail:
msg: "namenode 初始化异常,请登陆[{{ master_ip }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*namenode*"
when: ini_namenode_out.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
- name: initialization hadoop ZKFC
shell: sh {{ deploy_dir }}/{{ hadoop_version }}/bin/ini_hdfs.sh zkfc | grep "yes" | grep -v grep | wc -l
register: ini_zkfc_out
- name: checking hadoop-zk init status
fail:
msg: "hadoop-zk 初始化异常,请登陆[{{ master_ip }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: ini_zkfc_out.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
- name: start hadoop Master node
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_hdfs_env.sh master
- name: Waiting for the Master-namenode start,sleep 20s
shell: sleep 20
- name: checking {{ master_ip }} NameNode status
shell: source /etc/profile && jps | grep NameNode | grep -v grep | wc -l
register: master_namenode_status
- name: checking master NameNode
fail:
msg: "NameNode-master未启动,请登陆[{{ master_ip }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*namenode*"
when: master_namenode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
run_once: true
delegate_facts: true
delegate_to: "{{ master_ip }}"
- name: Start slave NameNode
block:
- name: copying {{ master_ip }} NameNode files to Slave
shell: "yes | {{ deploy_dir }}/{{ hadoop_version }}/bin/hdfs namenode -bootstrapStandby"
- name: start hadoop Slave node
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_hdfs_env.sh slave
- name: Waiting for the Slave-namenode start,sleep 60s
shell: sleep 60
- name: checking {{ slave1_ip }} NameNode status
shell: source /etc/profile && jps | grep NameNode | grep -v grep | wc -l
register: slave1_namenode_status
- name: checking slavel NameNode
fail:
msg: "NameNode-slave未启动,请登陆[{{ slave1_ip }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*namenode*"
when: slave1_namenode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
run_once: true
delegate_facts: true
delegate_to: "{{ slave1_ip }}"
- name: Start DataNode
block:
- name: start hadoop Worker nodes
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_hdfs_env.sh worker
- name: Waiting for the DataNode start,sleep 60s
shell: sleep 60
- name: checking DataNode status
shell: source /etc/profile && jps | grep DataNode | grep -v grep | wc -l
register: datanode_status
- name: checking DataNode
fail:
msg: "DataNode未启动,请登陆[{{ inventory_hostname }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*datanode*"
when: datanode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
- name: delete {{ deploy_dir }}/hadoop-2.7.1.tar.gz
file:
path: "{{ deploy_dir }}/{{ hadoop_version }}.tar.gz"
state: absent

View File

@@ -0,0 +1,9 @@
- block:
- include: uninstall.yml
- include: deploy.yml
- include: status-check.yml
when: (operation) == "install"
- block:
- include: uninstall.yml
when: (operation) == "uninstall"

View File

@@ -0,0 +1,53 @@
- name: Setting node_nums variable
set_fact: node_nums="{{groups.hdfs|length}}"
- name: Waiting for the HDFS start,sleep 30s
shell: sleep 30
- block:
- name: checking JournalNode status
shell: source /etc/profile && jps | grep JournalNode | grep -v grep | wc -l
register: status_out
- name: checking JournalNode
fail:
msg: "JournalNode节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*journalnode*"
when: status_out.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: inventory_hostname in groups['hdfs'][0:3]
- block:
- name: checking DFSZKFailoverController status
shell: source /etc/profile && jps | grep DFSZKFailoverController | grep -v grep | wc -l
register: status_out
- name: checking DFSZKFailoverController
fail:
msg: "DFSZKFailoverController节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*zkfc*"
when: status_out.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
- name: checking NameNode status
shell: source /etc/profile && jps | grep NameNode | grep -v grep | wc -l
register: status_out
- name: checking NameNode
fail:
msg: "NameNode节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*namenode*"
when: status_out.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: inventory_hostname in groups['hdfs'][0:2]
- name: checking DataNode status
shell: source /etc/profile && jps | grep DataNode | grep -v grep | wc -l
register: status_out
- name: checking DataNode
fail:
msg: "DFSZKFailoverController节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*datanode*"
when: status_out.stdout != '1'
run_once: true
delegate_to: 127.0.0.1

View File

@@ -0,0 +1,38 @@
- block:
- name: copy unload_hdfs.sh to {{ deploy_dir }}/
template:
src: 'unload_hdfs.sh.j2'
dest: '{{ deploy_dir }}/unload_hdfs.sh'
force: true
mode: 0755
- name: unload hadoop
shell: cd {{ deploy_dir }} && sh unload_hdfs.sh
- name: Ansible delete {{ deploy_dir }}/unload_hdfs.sh
file:
path: "{{ deploy_dir }}/unload_hdfs.sh"
state: absent
- name: Checking ZooKeeper has Hadoop nodes
shell: docker exec zookeeper zkCli.sh ls / | grep -w "hadoop-ha" | wc -l
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
register: has_zknode
- name: Delete Hadoop nodes in ZooKeeper
shell: "docker exec zookeeper zkCli.sh rmr /hadoop-ha"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
when: has_zknode.stdout >= '1'
- name: Check if the Hadoop service already exists
shell: source /etc/profile && jps -l | egrep "org.apache.hadoop.hdfs.qjournal.server.JournalNode|org.apache.hadoop.hdfs.tools.DFSZKFailoverController|org.apache.hadoop.hdfs.server.datanode.DataNode|org.apache.hadoop.hdfs.server.namenode.NameNode" | wc -l
register: check_out
- name: To terminate execution
fail:
msg: "卸载失败,组件可能非本安装部署,请手动卸载后继续安装"
run_once: true
delegate_to: 127.0.0.1
when: check_out.stdout >= '1'

View File

@@ -0,0 +1,67 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns1</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:{{ hdfs_data_dir }}/tmp</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131702</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.logfile.size</name>
<value>10000000</value>
<description>The max size of each log file</description>
</property>
<property>
<name>hadoop.logfile.count</name>
<value>1</value>
<description>The max number of log files</description>
</property>
<property>
<name>ha.zookeeper.quorum</name>
{% for dev_info in groups.zookeeper -%}
{% if loop.last -%}
{{dev_info}}:2181</value>
{% elif loop.first %}
<value>{{dev_info}}:2181,
{%- else %}
{{dev_info}}:2181,
{%- endif %}
{%- endfor %}
</property>
<property>
<name>ipc.client.connect.timeout</name>
<value>90000</value>
</property>
</configuration>

View File

@@ -0,0 +1,42 @@
#!/bin/bash
source /etc/profile
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function set_log(){
RES_SUM_FILE=$BASE_DIR/$VERSION/logs
if [ ! -f "$RES_SUM_FILE/" ]
then
mkdir -p $RES_SUM_FILE
fi
if [ ! -d "$RES_SUM_FILE/$1" ];then
echo "0" > $RES_SUM_FILE/$1
fi
OLD_NUM=`cat $RES_SUM_FILE/$1`
RESTART_NUM=`expr $OLD_NUM + 1`
echo $RESTART_NUM > $RES_SUM_FILE/$1
if [ $OLD_NUM -eq "0" ];then
echo "`date "+%Y-%m-%d %H:%M:%S"` - Hadoop $2服务初次启动" >> $BASE_DIR/$VERSION/logs/restart.log
else
echo "`date +%Y-%m-%d` `date +%H:%M:%S` - Hadoop $2服务异常 - 重启次数 -> $RESTART_NUM." >> $BASE_DIR/$VERSION/logs/restart.log
fi
}
while true ; do
HAS_JN=`ps -ef | grep JournalNode | grep -v grep | wc -l`
if [ $HAS_JN -eq "0" ];then
yes | $BASE_DIR/$VERSION/sbin/hadoop-daemon.sh start journalnode > /dev/null
set_log jnRes_sum JournalNode
fi
sleep 60
done

View File

@@ -0,0 +1,53 @@
#!/bin/bash
source /etc/profile
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function set_log(){
RES_SUM_FILE=$BASE_DIR/$VERSION/logs
if [ ! -f "$RES_SUM_FILE/" ]
then
mkdir -p $RES_SUM_FILE
fi
if [ ! -d "$RES_SUM_FILE/$1" ];then
echo "0" > $RES_SUM_FILE/$1
fi
OLD_NUM=`cat $RES_SUM_FILE/$1`
RESTART_NUM=`expr $OLD_NUM + 1`
echo $RESTART_NUM > $RES_SUM_FILE/$1
if [ $OLD_NUM -eq "0" ];then
echo "`date "+%Y-%m-%d %H:%M:%S"` - Hadoop $2服务初次启动" >> $BASE_DIR/$VERSION/logs/restart.log
else
echo "`date +%Y-%m-%d` `date +%H:%M:%S` - Hadoop $2服务异常 - 重启次数 -> $RESTART_NUM." >> $BASE_DIR/$VERSION/logs/restart.log
fi
}
while true ; do
HAS_NN=`ps -ef | grep NameNode | grep -v grep | wc -l`
HAS_ZKFC=`ps -ef | grep DFSZKFailoverController | grep -v grep | wc -l`
#HAS_NM=`ps -ef | grep NodeManager | grep -v grep | wc -l`
if [ $HAS_NN -eq "0" ];then
yes | $BASE_DIR/$VERSION/sbin/hadoop-daemon.sh start namenode > /dev/null
set_log nnRes_sum NameNode
fi
if [ $HAS_ZKFC -eq "0" ];then
yes | $BASE_DIR/$VERSION/sbin/hadoop-daemon.sh start zkfc > /dev/null
set_log zkfcRes_sum DFSZKFailoverController
fi
#if [ $HAS_NM -eq "0" ];then
# $BASE_DIR/$VERSION/sbin/yarn-daemon.sh start nodemanager > /dev/null
# set_log nmRes_sum NodeManager
#fi
sleep 60
done

View File

@@ -0,0 +1,60 @@
#!/bin/bash
source /etc/profile
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function set_log(){
RES_SUM_FILE=$BASE_DIR/$VERSION/logs
if [ ! -f "$RES_SUM_FILE/" ]
then
mkdir -p $RES_SUM_FILE
fi
if [ ! -d "$RES_SUM_FILE/$1" ];then
echo "0" > $RES_SUM_FILE/$1
fi
OLD_NUM=`cat $RES_SUM_FILE/$1`
RESTART_NUM=`expr $OLD_NUM + 1`
echo $RESTART_NUM > $RES_SUM_FILE/$1
if [ $OLD_NUM -eq "0" ];then
echo "`date "+%Y-%m-%d %H:%M:%S"` - Hadoop $2服务初次启动" >> $BASE_DIR/$VERSION/logs/restart.log
else
echo "`date +%Y-%m-%d` `date +%H:%M:%S` - Hadoop $2服务异常 - 重启次数 -> $RESTART_NUM." >> $BASE_DIR/$VERSION/logs/restart.log
fi
}
while true ; do
HAS_NN=`ps -ef | grep NameNode | grep -v grep | wc -l`
HAS_ZKFC=`ps -ef | grep DFSZKFailoverController | grep -v grep | wc -l`
#HAS_NM=`ps -ef | grep NodeManager | grep -v grep | wc -l`
#HAS_RM=`ps -ef | grep ResourceManager | grep -v grep | wc -l`
if [ $HAS_NN -eq "0" ];then
yes | $BASE_DIR/$VERSION/sbin/hadoop-daemon.sh start namenode > /dev/null
set_log nnRes_sum NameNode
fi
if [ $HAS_ZKFC -eq "0" ];then
yes | $BASE_DIR/$VERSION/sbin/hadoop-daemon.sh start zkfc > /dev/null
set_log zkfcRes_sum DFSZKFailoverController
fi
#if [ $HAS_NM -eq "0" ];then
# $BASE_DIR/$VERSION/sbin/yarn-daemon.sh start nodemanager > /dev/null
# set_log nmRes_sum NodeManager
#fi
#if [ $HAS_RM -eq "0" ];then
# $BASE_DIR/$VERSION/sbin/yarn-daemon.sh start resourcemanager > /dev/null
# set_log RMRes_sum ResourceManager
#fi
sleep 60
done

View File

@@ -0,0 +1,47 @@
#!/bin/bash
source /etc/profile
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function set_log(){
RES_SUM_FILE=$BASE_DIR/$VERSION/logs
if [ ! -f "$RES_SUM_FILE/" ]
then
mkdir -p $RES_SUM_FILE
fi
if [ ! -d "$RES_SUM_FILE/$1" ];then
echo "0" > $RES_SUM_FILE/$1
fi
OLD_NUM=`cat $RES_SUM_FILE/$1`
RESTART_NUM=`expr $OLD_NUM + 1`
echo $RESTART_NUM > $RES_SUM_FILE/$1
if [ $OLD_NUM -eq "0" ];then
echo "`date "+%Y-%m-%d %H:%M:%S"` - Hadoop $2服务初次启动" >> $BASE_DIR/$VERSION/logs/restart.log
else
echo "`date +%Y-%m-%d` `date +%H:%M:%S` - Hadoop $2服务异常 - 重启次数 -> $RESTART_NUM." >> $BASE_DIR/$VERSION/logs/restart.log
fi
}
while true ; do
HAS_DN=`ps -ef | grep DataNode | grep -v grep | wc -l`
#HAS_NM=`ps -ef | grep NodeManager | grep -v grep | wc -l`
if [ $HAS_DN -eq "0" ];then
yes | $BASE_DIR/$VERSION/sbin/hadoop-daemon.sh start datanode > /dev/null
set_log dnRes_sum DataNode
fi
#if [ $HAS_NM -eq "0" ];then
# $BASE_DIR/$VERSION/sbin/yarn-daemon.sh start nodemanager > /dev/null
# set_log nmRes_sum NodeManager
#fi
sleep 60
done

View File

@@ -0,0 +1,47 @@
#!/bin/bash
#
# netconsole This loads the netconsole module with the configured parameters.
#
# chkconfig:123456 40 60
# description: keephdfsjournal
source /etc/profile
PRO_NAME=keephdfsjournal
INS_DIR={{ deploy_dir }}
#版本
VERSION={{ hadoop_version }}
case $1 in
start)
journal=`ps -ef | grep dae-hdfsjournal.sh | grep -v grep | wc -l`
if [ $journal -lt 1 ];then
nohup $INS_DIR/$VERSION/sbin/dae-hdfsjournal.sh > /dev/null 2>&1 &
fi
;;
stop)
HAS_KEEP_SHELL=`ps -ef | grep dae-hdfsjournal.sh | grep -v grep | awk '{print $2}'`
if [ $HAS_KEEP_SHELL ];then
echo "守护进程PID$HAS_KEEP_SHELL"
kill -9 $HAS_KEEP_SHELL
fi
sh $INS_DIR/$VERSION/sbin/hadoop-daemon.sh stop journalnode > /dev/null
;;
status)
num=`ps -ef | grep JournalNode | grep -v grep | wc -l`
if [ "$num" -eq "1" ];then
echo "JournalNode进程已启动"
else
echo "JournalNode进程未启动"
fi
;;
* )
echo "use keephdfsjournal [start|stop|status]"
;;
esac

View File

@@ -0,0 +1,42 @@
#!/bin/bash
#
# netconsole This loads the netconsole module with the configured parameters.
#
# chkconfig:123456 40 60
# description: keephdfsmaster
source /etc/profile
PRO_NAME=keephdfsmaster
INS_DIR={{ deploy_dir }}
#版本
VERSION={{ hadoop_version }}
case $1 in
start)
master=`ps -ef | grep dae-hdfsmaster.sh | grep -v grep | wc -l`
if [ $master -lt 1 ];then
nohup $INS_DIR/$VERSION/sbin/dae-hdfsmaster.sh > /dev/null 2>&1 &
fi
;;
stop)
HAS_KEEP_SHELL=`ps -ef | grep dae-hdfsmaster.sh | grep -v grep | awk '{print $2}'`
if [ $HAS_KEEP_SHELL ];then
echo "守护进程PID$HAS_KEEP_SHELL"
kill -9 $HAS_KEEP_SHELL
fi
sh $INS_DIR/$VERSION/sbin/hadoop-daemon.sh stop namenode > /dev/null
sh $INS_DIR/$VERSION/sbin/hadoop-daemon.sh stop zkfc > /dev/null
;;
status)
hdfs haadmin -getServiceState nn1
hdfs dfsadmin -report
;;
* )
echo "use keephdfsmaster [start|stop|status]"
;;
esac

View File

@@ -0,0 +1,42 @@
#!/bin/bash
#
# netconsole This loads the netconsole module with the configured parameters.
#
# chkconfig:123456 40 60
# description: keephdfsslave
source /etc/profile
PRO_NAME=keephdfsslave
INS_DIR={{ deploy_dir }}
#版本
VERSION={{ hadoop_version }}
case $1 in
start)
slave=`ps -ef | grep dae-hdfsslave.sh | grep -v grep | wc -l`
if [ $slave -lt 1 ];then
nohup $INS_DIR/$VERSION/sbin/dae-hdfsslave.sh > /dev/null 2>&1 &
fi
;;
stop)
HAS_KEEP_SHELL=`ps -ef | grep dae-hdfsslave.sh | grep -v grep | awk '{print $2}'`
if [ $HAS_KEEP_SHELL ];then
echo "守护进程PID$HAS_KEEP_SHELL"
kill -9 $HAS_KEEP_SHELL
fi
sh $INS_DIR/$VERSION/sbin/hadoop-daemon.sh stop namenode > /dev/null
sh $INS_DIR/$VERSION/sbin/hadoop-daemon.sh stop zkfc > /dev/null
;;
status)
hdfs haadmin -getServiceState nn2
hdfs dfsadmin -report
;;
* )
echo "use keephdfsslave [start|stop|status]"
;;
esac

View File

@@ -0,0 +1,47 @@
#!/bin/bash
#
# netconsole This loads the netconsole module with the configured parameters.
#
# chkconfig:123456 40 60
# description: keephdfsworker
source /etc/profile
PRO_NAME=keephdfsworker
INS_DIR={{ deploy_dir }}
#版本
VERSION={{ hadoop_version }}
case $1 in
start)
worker=`ps -ef | grep dae-hdfsworker.sh | grep -v grep | wc -l`
if [ $worker -lt 1 ];then
nohup $INS_DIR/$VERSION/sbin/dae-hdfsworker.sh > /dev/null 2>&1 &
fi
;;
stop)
HAS_KEEP_SHELL=`ps -ef | grep dae-hdfsworker.sh | grep -v grep | awk '{print $2}'`
if [ $HAS_KEEP_SHELL ];then
echo "守护进程PID$HAS_KEEP_SHELL"
kill -9 $HAS_KEEP_SHELL
fi
sh $INS_DIR/$VERSION/sbin/hadoop-daemon.sh stop datanode > /dev/null
;;
status)
num=`ps -ef | grep DataNode | grep -v grep | wc -l`
if [ "$num" -eq "1" ];then
echo "DataNode进程已启动"
else
echo "DataNode进程未启动"
fi
;;
* )
echo "use keephdfsworker [start|stop|status]"
;;
esac

View File

@@ -0,0 +1,105 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Set Hadoop-specific environment variables here.
# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.
export HADOOP_NAMENODE_JMX_OPTS="-Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.local.only=false -javaagent:{{ deploy_dir }}/{{ hadoop_version }}/monitor/jmx_prometheus_javaagent-0.12.0.jar=9905:{{ deploy_dir }}/{{ hadoop_version }}/monitor/hdfs.yaml"
export HADOOP_DATANODE_JMX_OPTS="-Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.local.only=false -javaagent:{{ deploy_dir }}/{{ hadoop_version }}/monitor/jmx_prometheus_javaagent-0.12.0.jar=9906:{{ deploy_dir }}/{{ hadoop_version }}/monitor/hdfs.yaml"
# The java implementation to use.
#export HADOOP_HEAPSIZE=m
#export JAVA_HOME=/usr/local/jdk/jdk1.8.0_73
export JAVA_HOME=$JAVA_HOME
# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
# Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="$HADOOP_NAMENODE_OPTS {{ hadoop.namenode.java_opt }} -Xss256k -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=256m -XX:SurvivorRatio=2 -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:MaxTenuringThreshold=15 -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=1 -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -XX:-DisableExplicitGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:{{ deploy_dir }}/{{ hadoop_version }}/logs/gc-namenode-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=100M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath={{ deploy_dir }}/{{ hadoop_version }}/logs/ -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender}"
export HADOOP_DATANODE_OPTS="$HADOOP_DATANODE_OPTS {{ hadoop.datanode.java_opt }} -Xss256k -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=256m -XX:SurvivorRatio=2 -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:MaxTenuringThreshold=15 -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=1 -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:{{ deploy_dir }}/{{ hadoop_version }}/logs/gc-datanode-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=100M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath={{ deploy_dir }}/{{ hadoop_version }}/logs/ -Dhadoop.security.logger=ERROR,RFAS"
export HADOOP_JOURNALNODE_OPTS="$HADOOP_JOURNALNODE_OPTS {{ hadoop.journalnode.java_opt }}"
export HADOOP_ZKFC_OPTS="$HADOOP_ZKFC_OPTS {{ hadoop.zkfc.java_opt }}"
export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol. This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
# Where log files are stored. $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_MOVER_OPTS=""
###
# Advanced Users Only!
###
# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the user that will run the hadoop daemons. Otherwise there is the
# potential for a symlink attack.
export HADOOP_PID_DIR={{ deploy_dir }}/{{ hadoop_version }}/pids
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER

View File

@@ -0,0 +1,142 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:{{ hdfs_data_dir }}/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:{{ hdfs_data_dir }}/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址nn1所在地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>{{ groups.hdfs[0] }}:9000</value>
</property>
<!-- nn1的http通信地址外部访问地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>{{ groups.hdfs[0] }}:50070</value>
</property>
<!-- nn2的RPC通信地址nn2所在地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>{{ groups.hdfs[1] }}:9000</value>
</property>
<!-- nn2的http通信地址外部访问地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>{{ groups.hdfs[1] }}:50070</value>
</property>
<!-- 指定NameNode的元数据在JournalNode日志上的存放位置(一般和zookeeper部署在一起) -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://{{groups.hdfs[0]}}:8485;{{groups.hdfs[1]}}:8485;{{groups.hdfs[2]}}:8485/ns1</value>
</property>
<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>{{ hdfs_data_dir }}/journal</value>
</property>
<!--客户端通过代理访问namenode访问文件系统HDFS 客户端与Active 节点通信的Java 类使用其确定Active 节点是否活跃 -->
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--这是配置自动切换的方法,有多种使用方法,具体可以看官网,在文末会给地址,这里是远程登录杀死的方法 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
<value>shell(true)</value>
</property>
<!-- 这个是使用sshfence隔离机制时才需要配置ssh免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔离机制超时时间这个属性同上如果你是用脚本的方法切换这个应该是可以不配置的 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<!-- 这个是开启自动故障转移,如果你没有自动故障转移,这个可以先不配 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>8192</value>
</property>
<!-- namenode处理RPC请求线程数增大该值资源占用不大 -->
<property>
<name>dfs.namenode.handler.count</name>
<value>{{ hadoop.namenode['dfs.namenode.handler.count'] }}</value>
</property>
<!-- datanode处理RPC请求线程数增大该值会占用更多内存 -->
<property>
<name>dfs.datanode.handler.count</name>
<value>{{ hadoop.datanode['dfs.datanode.handler.count'] }}</value>
</property>
<!-- balance时可占用的带宽 -->
<property>
<name>dfs.balance.bandwidthPerSec</name>
<value>104857600</value>
</property>
<!-- 磁盘预留空间该空间不会被hdfs占用单位字节-->
<property>
<name>dfs.datanode.du.reserved</name>
<value>53687091200</value>
</property>
<!-- datanode与namenode连接超时时间单位毫秒 2 * heartbeat.recheck.interval + 30000 -->
<property>
<name>heartbeat.recheck.interval</name>
<value>100000</value>
</property>
</configuration>

View File

@@ -0,0 +1,46 @@
#!/bin/bash
MASTER_IP={{ groups.hdfs[0] }}
SLAVE1_IP={{ groups.hdfs[1] }}
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function ini_namenode() {
cd $BASE_DIR/$VERSION/bin
yes | ./hadoop namenode -format
if [ $? -eq "0" ];then
# scp -r $BASE_DIR/hadoop/ root@$SLAVE1_IP:$BASE_DIR/
echo yes
else
echo no
fi
}
function ini_zk() {
cd $BASE_DIR/$VERSION/bin
yes | ./hdfs zkfc -formatZK
if [ $? -eq "0" ];then
echo yes
else
echo no
fi
}
case $1 in
[namenode]*)
ini_namenode
;;
[zkfc]*)
ini_zk
;;
* )
echo "请输入已有的指令."
;;
esac

View File

@@ -0,0 +1,33 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>{{ groups.hdfs[0] }}:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>{{ groups.hdfs[0] }}:19888</value>
</property>
</configuration>

View File

@@ -0,0 +1,71 @@
#!/bin/bash
source /etc/profile
function setChkconfig(){
echo -e "\n#hadoop\nexport HADOOP_HOME={{ deploy_dir }}/{{ hadoop_version }}\nexport PATH=\$HADOOP_HOME/sbin:\$PATH\nexport PATH=\$HADOOP_HOME/bin:\$PATH\nexport HADOOP_CLASSPATH=\`hadoop classpath\`" >> /etc/profile.d/hadoop.sh
chmod +x /etc/profile.d/hadoop.sh
if [ -x '/etc/init.d/keephdfsmaster' ];then
chkconfig --add keephdfsmaster
chkconfig keephdfsmaster on
fi
if [ -x '/etc/init.d/keephdfsslave' ];then
chkconfig --add keephdfsslave
chkconfig keephdfsslave on
fi
if [ -x '/etc/init.d/keephdfsworker' ];then
chkconfig --add keephdfsworker
chkconfig keephdfsworker on
fi
if [ -x '/etc/init.d/keephdfsjournal' ];then
chkconfig --add keephdfsjournal
chkconfig keephdfsjournal on
fi
}
case $1 in
journal)
if [ -x '/etc/init.d/keephdfsjournal' ];then
service keephdfsjournal start && sleep 5
journal_dae=`ps -ef | grep dae-hdfsjournal.sh | grep -v grep | wc -l`
if [ $journal_dae -lt 1 ];then
nohup {{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsjournal.sh > /dev/null 2>&1 &
fi
fi
;;
master)
if [ -x '/etc/init.d/keephdfsmaster' ];then
service keephdfsmaster start && sleep 5
master_dae=`ps -ef | grep dae-hdfsmaster.sh | grep -v grep | wc -l`
if [ $master_dae -lt 1 ];then
nohup {{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsmaster.sh > /dev/null 2>&1 &
fi
fi
;;
slave)
if [ -x '/etc/init.d/keephdfsslave' ];then
service keephdfsslave start && sleep 5
slave_dae=`ps -ef | grep dae-hdfsslave.sh | grep -v grep | wc -l`
if [ $slave_dae -lt 1 ];then
nohup {{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsslave.sh > /dev/null 2>&1 &
fi
fi
;;
worker)
if [ -x '/etc/init.d/keephdfsworker' ];then
service keephdfsworker start && sleep 5
worker_dae=`ps -ef | grep dae-hdfsworker.sh | grep -v grep | wc -l`
if [ $worker_dae -lt 1 ];then
nohup {{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsworker.sh > /dev/null 2>&1 &
fi
fi
;;
chkconfig)
setChkconfig;;
* )
;;
esac

View File

@@ -0,0 +1,4 @@
{% set combined_group = groups.hdfs %}
{% for dev_info in combined_group %}
{{dev_info}}
{% endfor %}

View File

@@ -0,0 +1,86 @@
#!/bin/bash
source /etc/profile
function killService(){
keeppath='/etc/init.d/keephdfsjournal'
if [ -x $keeppath ];then
service keephdfsjournal stop
chkconfig keephdfsjournal off
systemctl daemon-reload
rm -rf /etc/init.d/keephdfsjournal
fi
keeppath='/etc/init.d/keephdfsmaster'
if [ -x $keeppath ];then
service keephdfsmaster stop
chkconfig keephdfsmaster off
systemctl daemon-reload
rm -rf /etc/init.d/keephdfsmaster
fi
keeppath='/etc/init.d/keephdfsslave'
if [ -x $keeppath ];then
service keephdfsslave stop
chkconfig keephdfsslave off
systemctl daemon-reload
rm -rf /etc/init.d/keephdfsslave
fi
keeppath='/etc/init.d/keephdfsworker'
if [ -x $keeppath ];then
service keephdfsworker stop
chkconfig keephdfsworker off
systemctl daemon-reload
rm -rf /etc/init.d/keephdfsworker
fi
}
function killPid(){
livenum=`jps -l | egrep -w "org.apache.hadoop.hdfs.qjournal.server.JournalNode" | grep -v grep |wc -l`
if [ $livenum -ne 0 ];then
keeppid=`jps -l |egrep -w "org.apache.hadoop.hdfs.qjournal.server.JournalNode" | awk '{print $1}'`
kill -9 $keeppid
fi
livenum=`jps -l | egrep -w "org.apache.hadoop.hdfs.tools.DFSZKFailoverController" | grep -v grep |wc -l`
if [ $livenum -ne 0 ];then
keeppid=`jps -l |egrep -w "org.apache.hadoop.hdfs.tools.DFSZKFailoverController" | awk '{print $1}'`
kill -9 $keeppid
fi
livenum=`jps -l | egrep -w "org.apache.hadoop.hdfs.server.datanode.DataNode" | grep -v grep |wc -l`
if [ $livenum -ne 0 ];then
keeppid=`jps -l |egrep -w "org.apache.hadoop.hdfs.server.datanode.DataNode" | awk '{print $1}'`
kill -9 $keeppid
fi
livenum=`jps -l | egrep -w "org.apache.hadoop.hdfs.server.namenode.NameNode" | grep -v grep |wc -l`
if [ $livenum -ne 0 ];then
keeppid=`jps -l |egrep -w "org.apache.hadoop.hdfs.server.namenode.NameNode" | awk '{print $1}'`
kill -9 $keeppid
fi
}
function drop_folder(){
FOLDER_NAME=$1
if [ -d "$FOLDER_NAME" ];then
rm -rf $FOLDER_NAME
fi
}
function drop_file(){
FILE_NAME=$1
if [ -f "$FILE_NAME" ];then
rm -rf $FILE_NAME
fi
}
killService
sleep 15
killPid
drop_folder {{ deploy_dir }}/{{ hadoop_version }}
drop_folder {{ data_dir }}/{{ hadoop_version }}
drop_file /etc/profile.d/hadoop.sh

View File

@@ -0,0 +1,8 @@
#hadoop版本
hadoop_version: hadoop-2.7.1
#数据目录
hdfs_data_dir: "{{ data_dir }}/{{ hadoop_version }}/data/hadoop"
#jdk版本
java_version: 1.8.0_73

View File

@@ -0,0 +1,7 @@
[zookeeper]
192.168.45.102
[hdfs]
[yarn]
192.168.45.102

View File

@@ -0,0 +1,7 @@
- hosts: yarn
remote_user: root
roles:
- role
vars_files:
- role/vars/main.yml

View File

@@ -0,0 +1,56 @@
#The default installation location
deploy_dir: /data/olap
#The default data storage location,use storing application data,logs and configuration files
data_dir: /data/olap
hadoop:
namenode:
#Running memory of the Hadoop Namenode.
java_opt: '-Xmx1024m -Xms1024m'
#The number of Namenode RPC server threads that listen to requests from clients.
dfs.namenode.handler.count: 30
datanode:
#Running memory of the Hadoop Datanode.
java_opt: '-Xmx1024m -Xms1024m'
#The number of server threads for the datanode.
dfs.datanode.handler.count: 40
journalnode:
#Running memory of the Hadoop JournalNode.
java_opt: '-Xmx1024m -Xms1024m'
zkfc:
#Running memory of the Hadoop DFSZKFailoverController.
java_opt: '-Xmx1024m -Xms1024m'
yarn:
resourcemanager:
#Running memory of the Hadoop ResourceManager.
java_opt: '-Xmx1024m -Xms1024m'
nodemanager:
#Running memory of the Hadoop NodeManager.
java_opt: '-Xmx1024m -Xms1024m'
#Amount of physical memory, in MB, that can be allocated for containers.
yarn.nodemanager.resource.memory-mb: 16384
#The maximum allocation for every container request at the RM in MBs.
yarn.scheduler.maximum-allocation-mb: 16384
#Number of vcores that can be allocated for containers. This is used by the RM scheduler when allocating resources for containers.
yarn.nodemanager.resource.cpu-vcores: 48
#The maximum allocation for every container request at the RM in terms of virtual CPU cores.
yarn.scheduler.maximum-allocation-vcores: 48
flink:
#Total Process Memory size for the JobManager.
jobmanager.memory.process.size: 1024M
#Total Process Memory size for the TaskExecutors.
taskmanager.memory.process.size: 2048M
#This is the size of off-heap memory managed for sorting, hash tables, caching of intermediate results and state backend.
taskmanager.memory.managed.size: 128M
#Framework Off-Heap Memory size for TaskExecutors. This is the size of off-heap memory reserved for TaskExecutor framework
taskmanager.memory.framework.off-heap.size: 128M
#JVM Metaspace Size for the TaskExecutors.
taskmanager.memory.jvm-metaspace.size: 256M
#Max Network Memory size for TaskExecutors. Network Memory is off-heap memory reserved for ShuffleEnvironment.
taskmanager.memory.network.max: 256M
#The number of parallel operator or user function instances that a single TaskManager can run.
#This value is typically proportional to the number of physical CPU cores that the TaskManager's machine has (e.g., equal to the number of cores, or half the number of cores).
taskmanager.numberOfTaskSlots: 1

View File

@@ -0,0 +1,194 @@
- name: Setting node_nums variable
set_fact: node_nums="{{groups.hdfs|length}}"
- name: To terminate execution
fail:
msg: "Fully Distributed Mode at least 3 nodes, please checking configurations/hosts -> hdfs"
when: node_nums < 3
- name: check Jdk version
shell: source /etc/profile && java -version 2>&1 | grep {{ java_version }} | wc -l
ignore_errors: false
register: jdk_out
- name: To terminate execution
fail:
msg: "JDK is not installed in the target cluster, please check!"
when: jdk_out.stdout != '2'
run_once: true
delegate_to: 127.0.0.1
- name: create hadoop package path:{{ deploy_dir }}
file:
state: directory
path: '{{ deploy_dir }}'
- block:
- name: unpack hadoop-2.7.1.tar.gz to {{ deploy_dir }}/
unarchive:
src: 'files/{{ hadoop_version }}.tar.gz'
dest: '{{ deploy_dir }}/'
- name: copying yarn master config files
template:
src: '{{ item.src }}'
dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ item.dest }}'
mode: '{{ item.mode }}'
backup: false
with_items:
- { src: 'yarn-site.xml.j2', dest: 'etc/hadoop/yarn-site.xml', mode: '0644' }
- { src: 'slaves.j2', dest: 'etc/hadoop/slaves', mode: '0644' }
- { src: 'set_yarn_env.sh.j2', dest: 'bin/set_yarn_env.sh', mode: '0755' }
- { src: 'core-site.xml.j2', dest: 'etc/hadoop/core-site.xml', mode: '0644' }
- { src: 'hdfs-site.xml.j2', dest: 'etc/hadoop/hdfs-site.xml', mode: '0644' }
- { src: 'mapred-site.xml.j2', dest: 'etc/hadoop/mapred-site.xml', mode: '0644' }
- { src: 'capacity-scheduler.xml.j2', dest: 'etc/hadoop/capacity-scheduler.xml', mode: '0644' }
- { src: 'yarn-env.sh.j2', dest: 'etc/hadoop/yarn-env.sh', mode: '0755' }
- { src: 'hadoop-env.sh.j2', dest: 'etc/hadoop/hadoop-env.sh', mode: '0755' }
when: inventory_hostname not in groups['hdfs']
- name: copying yarn master config files
template:
src: '{{ item.src }}'
dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ item.dest }}'
mode: '{{ item.mode }}'
backup: false
with_items:
- { src: 'yarn-site.xml.j2', dest: 'etc/hadoop/yarn-site.xml', mode: '0644' }
- { src: 'slaves.j2', dest: 'etc/hadoop/slaves', mode: '0644' }
- { src: 'mapred-site.xml.j2', dest: 'etc/hadoop/mapred-site.xml', mode: '0644' }
- { src: 'yarn-env.sh.j2', dest: 'etc/hadoop/yarn-env.sh', mode: '0755' }
- { src: 'set_yarn_env.sh.j2', dest: 'bin/set_yarn_env.sh', mode: '0755' }
- { src: 'capacity-scheduler.xml.j2', dest: 'etc/hadoop/capacity-scheduler.xml', mode: '0644' }
when: inventory_hostname in groups['hdfs']
- block:
- name: copying yarn worker
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-yarnhistory.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnhistory.sh' }
- { src: 'dae-yarnmaster.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnmaster.sh' }
- { src: 'keepyarnhistory.j2', dest: '/etc/init.d/keepyarnhistory' }
- { src: 'keepyarnmaster.j2', dest: '/etc/init.d/keepyarnmaster' }
when: inventory_hostname in groups['yarn'][0:2]
- block:
- name: Start ResourceManager and JobHistoryServer
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_yarn_env.sh {{ item.opeation }}
with_items:
- { opeation: 'chkconfig' }
- { opeation: 'master' }
- { opeation: 'history' }
- name: Waiting for the ResourceManager start,sleep 60s
shell: sleep 60
- name: checking ResourceManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.resourcemanager.ResourceManager" | grep -v grep | wc -l
register: resourcemanager_check
- name: checking ResourceManager
fail:
msg: "ResourceManager节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: resourcemanager_check.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
- name: checking JobHistoryServer status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | grep -v grep | wc -l
register: history_check
- name: checking JobHistoryServer
fail:
msg: "JobHistoryServer节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: history_check.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: inventory_hostname in groups['yarn'][0:2]
- block:
- name: copying yarn worker
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-yarnworker.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnworker.sh' }
- { src: 'keepyarnworker.j2', dest: '/etc/init.d/keepyarnworker' }
- name: Start NodeManager
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_yarn_env.sh {{ item.opeation }}
with_items:
- { opeation: 'chkconfig' }
- { opeation: 'worker' }
- name: Waiting for the NodeManager start,sleep 60s
shell: sleep 60
- name: checking NodeManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep | wc -l
register: datanode_status
- name: checking NodeManager
fail:
msg: "NodeManager未启动,请登陆[{{ inventory_hostname }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: datanode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: node_nums >= cluster_limit and inventory_hostname not in groups['yarn'][0:2]
- block:
- name: copying yarn worker
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-yarnworker.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnworker.sh' }
- { src: 'keepyarnworker.j2', dest: '/etc/init.d/keepyarnworker' }
- name: Start NodeManager
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_yarn_env.sh {{ item.opeation }}
with_items:
- { opeation: 'chkconfig' }
- { opeation: 'worker' }
- name: Waiting for the NodeManager start,sleep 60s
shell: sleep 60
- name: checking NodeManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep | wc -l
register: datanode_status
- name: checking NodeManager
fail:
msg: "NodeManager未启动,请登陆[{{ inventory_hostname }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: datanode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: node_nums < cluster_limit
#--------------------------------------------Flink----------------------------------------------#
- name: Copying Flink installation package
unarchive:
src: 'files/{{ flink_version }}.tgz'
dest: '{{ deploy_dir }}/{{ hadoop_version }}/'
- name: Config flink configuration
template:
src: '{{ item.src }}'
dest: '{{ item.dest }}'
mode: '{{ item.mode }}'
with_items:
- { src: 'flink/flink.sh.j2', dest: '/etc/profile.d/flink.sh', mode: '0755' }
- { src: 'flink/flink-conf.yaml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/flink-conf.yaml', mode: '0644' }
- { src: 'yarn-site.xml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/yarn-site.xml', mode: '0644' }
- { src: 'core-site.xml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/core-site.xml', mode: '0644' }
- { src: 'hdfs-site.xml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/hdfs-site.xml', mode: '0644' }

View File

@@ -0,0 +1,136 @@
- name: check Jdk version
shell: source /etc/profile && java -version 2>&1 | grep {{ java_version }} | wc -l
ignore_errors: false
register: jdk_out
- name: To terminate execution
fail:
msg: "JDK is not installed in the target cluster, please check!"
when: jdk_out.stdout != '2'
run_once: true
delegate_to: 127.0.0.1
- name: create hadoop package path:{{ deploy_dir }}
file:
state: directory
path: '{{ deploy_dir }}'
- name: unpack hadoop-2.7.1.tar.gz to {{ deploy_dir }}/
unarchive:
src: 'files/{{ hadoop_version }}.tar.gz'
dest: '{{ deploy_dir }}/'
- name: copying yarn master config files
template:
src: '{{ item.src }}'
dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ item.dest }}'
mode: '{{ item.mode }}'
backup: false
with_items:
- { src: 'standalone/yarn-site.xml.j2', dest: 'etc/hadoop/yarn-site.xml', mode: '0644' }
- { src: 'standalone/hdfs-site.xml.j2', dest: 'etc/hadoop/hdfs-site.xml', mode: '0644' }
- { src: 'standalone/core-site.xml.j2', dest: 'etc/hadoop/core-site.xml', mode: '0644' }
- { src: 'slaves.j2', dest: 'etc/hadoop/slaves', mode: '0644' }
- { src: 'set_yarn_env.sh.j2', dest: 'bin/set_yarn_env.sh', mode: '0755' }
- { src: 'mapred-site.xml.j2', dest: 'etc/hadoop/mapred-site.xml', mode: '0644' }
- { src: 'capacity-scheduler.xml.j2', dest: 'etc/hadoop/capacity-scheduler.xml', mode: '0644' }
- { src: 'yarn-env.sh.j2', dest: 'etc/hadoop/yarn-env.sh', mode: '0755' }
- { src: 'hadoop-env.sh.j2', dest: 'etc/hadoop/hadoop-env.sh', mode: '0755' }
- name: copying yarn worker
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-yarnhistory.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnhistory.sh' }
- { src: 'dae-yarnmaster.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnmaster.sh' }
- { src: 'keepyarnhistory.j2', dest: '/etc/init.d/keepyarnhistory' }
- { src: 'keepyarnmaster.j2', dest: '/etc/init.d/keepyarnmaster' }
- block:
- name: Start ResourceManager and JobHistoryServer
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_yarn_env.sh {{ item.opeation }}
with_items:
- { opeation: 'chkconfig' }
- { opeation: 'master' }
- { opeation: 'history' }
- name: Waiting for the ResourceManager start,sleep 60s
shell: sleep 60
- name: checking ResourceManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.resourcemanager.ResourceManager" | grep -v grep | wc -l
register: resourcemanager_check
- name: checking ResourceManager
fail:
msg: "ResourceManager节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: resourcemanager_check.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
- name: checking JobHistoryServer status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | grep -v grep | wc -l
register: history_check
- name: checking JobHistoryServer
fail:
msg: "JobHistoryServer节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: history_check.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: inventory_hostname in groups['yarn'][0:2]
- block:
- name: copying yarn worker
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-yarnworker.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnworker.sh' }
- { src: 'keepyarnworker.j2', dest: '/etc/init.d/keepyarnworker' }
- name: Start NodeManager
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_yarn_env.sh {{ item.opeation }}
with_items:
- { opeation: 'chkconfig' }
- { opeation: 'worker' }
- name: Waiting for the NodeManager start,sleep 60s
shell: sleep 60
- name: checking NodeManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep | wc -l
register: datanode_status
- name: checking NodeManager
fail:
msg: "NodeManager未启动,请登陆[{{ inventory_hostname }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: datanode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
#--------------------------------------------Flink----------------------------------------------#
- name: Copying Flink installation package
unarchive:
src: 'files/{{ flink_version }}.tgz'
dest: '{{ deploy_dir }}/{{ hadoop_version }}/'
- name: Config flink configuration
template:
src: '{{ item.src }}'
dest: '{{ item.dest }}'
mode: '{{ item.mode }}'
with_items:
- { src: 'flink/flink.sh.j2', dest: '/etc/profile.d/flink.sh', mode: '0755' }
- { src: 'flink/flink-conf.yaml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/flink-conf.yaml', mode: '0644' }
- { src: 'standalone/yarn-site.xml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/yarn-site.xml', mode: '0644' }
- { src: 'standalone/core-site.xml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/core-site.xml', mode: '0644' }
- { src: 'standalone/hdfs-site.xml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/hdfs-site.xml', mode: '0644' }
- name: Start flink session
shell: source /etc/profile && cd {{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/bin/ && ./yarn-session.sh -d

View File

@@ -0,0 +1,12 @@
- block:
- include: uninstall.yml
- include: "{{ playbook_name }}"
vars:
playbook_name: "{{ 'deploy-cluster.yml' if groups.yarn | length > 1 else 'deploy-standalone.yml' }}"
- include: status-check.yml
when: (operation) == "install"
- block:
- include: uninstall.yml
when: (operation) == "uninstall"

View File

@@ -0,0 +1,57 @@
- name: Setting node_nums variable
set_fact: node_nums="{{groups.yarn|length}}"
- name: Waiting for the Yarn start,sleep 30s
shell: sleep 30
- block:
- name: checking ResourceManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.resourcemanager.ResourceManager" | grep -v grep | wc -l
register: resourcemanager_check
- name: checking ResourceManager
fail:
msg: "ResourceManager节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: resourcemanager_check.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
- name: checking JobHistoryServer status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | grep -v grep | wc -l
register: history_check
- name: checking JobHistoryServer
fail:
msg: "JobHistoryServer节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: history_check.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: inventory_hostname in groups['yarn'][0:2]
- block:
- name: checking NodeManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep | wc -l
register: datanode_status
- name: checking NodeManager
fail:
msg: "NodeManager未启动,请登陆[{{ inventory_hostname }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: datanode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: node_nums >= cluster_limit and inventory_hostname not in groups['yarn'][0:2]
- block:
- name: checking NodeManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep | wc -l
register: datanode_status
- name: checking NodeManager
fail:
msg: "NodeManager未启动,请登陆[{{ inventory_hostname }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: datanode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: node_nums < cluster_limit

View File

@@ -0,0 +1,55 @@
- block:
- name: copy unload_hadoop_yarn.sh to {{ deploy_dir }}/
template:
src: 'unload_hadoop_yarn.sh.j2'
dest: '{{ deploy_dir }}/unload_hadoop_yarn.sh'
force: true
mode: 0755
- name: unload hadoop
shell: cd {{ deploy_dir }} && sh unload_hadoop_yarn.sh
- name: Ansible delete {{ deploy_dir }}/unload_hadoop_yarn.sh
file:
path: "{{ deploy_dir }}/unload_hadoop_yarn.sh"
state: absent
- name: Ansible delete old /etc/profile.d/flink.sh
file:
path: '/etc/profile.d/flink.sh'
state: absent
- name: Checking ZooKeeper has yarn nodes
shell: "docker exec zookeeper zkCli.sh ls / | grep rmstore | wc -l"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
register: rmstore_zknode
- name: Delete Hadoop nodes in ZooKeeper
shell: "docker exec zookeeper zkCli.sh rmr /rmstore"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
when: rmstore_zknode.stdout >= '1'
- name: Checking ZooKeeper has yarn nodes
shell: docker exec zookeeper zkCli.sh ls / | grep "yarn-leader-election" | wc -l
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
register: leader_zknode
- name: Delete Hadoop nodes in ZooKeeper
shell: "docker exec zookeeper zkCli.sh rmr /yarn-leader-election"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
when: leader_zknode.stdout >= '1'
- name: Check if the Hadoop service already exists
shell: source /etc/profile && jps -l | egrep "org.apache.hadoop.yarn.server.resourcemanager.ResourceManager|org.apache.hadoop.yarn.server.nodemanager.NodeManager|org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | wc -l
register: check_out
- name: To terminate execution
fail:
msg: "卸载失败,组件可能非本安装部署,请手动卸载后继续安装"
run_once: true
delegate_to: 127.0.0.1
when: check_out.stdout >= '1'

View File

@@ -0,0 +1,134 @@
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>yarn.scheduler.capacity.maximum-applications</name>
<value>10000</value>
<description>
Maximum number of applications that can be pending and running.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>0.5</value>
<description>
Maximum percent of resources in the cluster which can be used to run
application masters i.e. controls number of concurrent running
applications.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.resource-calculator</name>
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
<description>
The ResourceCalculator implementation to be used to compare
Resources in the scheduler.
The default i.e. DefaultResourceCalculator only uses Memory while
DominantResourceCalculator uses dominant-resource to compare
multi-dimensional resources such as Memory, CPU etc.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>default</value>
<description>
The queues at the this level (root is the root queue).
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.capacity</name>
<value>100</value>
<description>Default queue target capacity.</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
<value>1</value>
<description>
Default queue user limit a percentage from 0.0 to 1.0.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
<value>100</value>
<description>
The maximum capacity of the default queue.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.state</name>
<value>RUNNING</value>
<description>
The state of the default queue. State can be one of RUNNING or STOPPED.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
<value>*</value>
<description>
The ACL of who can submit jobs to the default queue.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
<value>*</value>
<description>
The ACL of who can administer jobs on the default queue.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.node-locality-delay</name>
<value>40</value>
<description>
Number of missed scheduling opportunities after which the CapacityScheduler
attempts to schedule rack-local containers.
Typically this should be set to number of nodes in the cluster, By default is setting
approximately number of nodes in one rack which is 40.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.queue-mappings</name>
<value></value>
<description>
A list of mappings that will be used to assign jobs to queues
The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
Typically this list will be used to map users to queues,
for example, u:%user:%user maps all users to queues with the same name
as the user.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
<value>false</value>
<description>
If a queue mapping is present, will it override the value specified
by the user? This can be used by administrators to place jobs in queues
that are different than the one specified by the user.
The default is false.
</description>
</property>
</configuration>

View File

@@ -0,0 +1,77 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns1</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:{{ hdfs_data_dir }}/tmp</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131702</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.logfile.size</name>
<value>10000000</value>
<description>The max size of each log file</description>
</property>
<property>
<name>hadoop.logfile.count</name>
<value>1</value>
<description>The max number of log files</description>
</property>
<property>
<name>ha.zookeeper.quorum</name>
{% for dev_info in groups.zookeeper -%}
{% if loop.last -%}
{{dev_info}}:2181</value>
{% elif loop.first %}
<value>{{dev_info}}:2181,
{%- else %}
{{dev_info}}:2181,
{%- endif %}
{%- endfor %}
</property>
<property>
<name>ipc.client.connect.timeout</name>
<value>90000</value>
</property>
</configuration>

View File

@@ -0,0 +1,41 @@
#!/bin/bash
source /etc/profile
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function set_log(){
RES_SUM_FILE=$BASE_DIR/$VERSION/logs
if [ ! -f "$RES_SUM_FILE/" ]
then
mkdir -p $RES_SUM_FILE
fi
if [ ! -d "$RES_SUM_FILE/$1" ];then
echo "0" > $RES_SUM_FILE/$1
fi
OLD_NUM=`cat $RES_SUM_FILE/$1`
RESTART_NUM=`expr $OLD_NUM + 1`
echo $RESTART_NUM > $RES_SUM_FILE/$1
if [ $OLD_NUM -eq "0" ];then
echo "`date "+%Y-%m-%d %H:%M:%S"` - Yarn $2服务初次启动" >> $BASE_DIR/$VERSION/logs/restart.log
else
echo "`date +%Y-%m-%d` `date +%H:%M:%S` - Yarn $2服务异常 - 重启次数 -> $RESTART_NUM." >> $BASE_DIR/$VERSION/logs/restart.log
fi
}
while true ; do
HAS_HISTORY=`ps -ef | grep "org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | grep -v grep | wc -l`
if [ $HAS_HISTORY -eq "0" ];then
$BASE_DIR/$VERSION/sbin/mr-jobhistory-daemon.sh start historyserver > /dev/null
set_log nmRes_sum JobHistoryServer
fi
sleep 60
done

View File

@@ -0,0 +1,41 @@
#!/bin/bash
source /etc/profile
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function set_log(){
RES_SUM_FILE=$BASE_DIR/$VERSION/logs
if [ ! -f "$RES_SUM_FILE/" ]
then
mkdir -p $RES_SUM_FILE
fi
if [ ! -d "$RES_SUM_FILE/$1" ];then
echo "0" > $RES_SUM_FILE/$1
fi
OLD_NUM=`cat $RES_SUM_FILE/$1`
RESTART_NUM=`expr $OLD_NUM + 1`
echo $RESTART_NUM > $RES_SUM_FILE/$1
if [ $OLD_NUM -eq "0" ];then
echo "`date "+%Y-%m-%d %H:%M:%S"` - Yarn $2服务初次启动" >> $BASE_DIR/$VERSION/logs/restart.log
else
echo "`date +%Y-%m-%d` `date +%H:%M:%S` - Yarn $2服务异常 - 重启次数 -> $RESTART_NUM." >> $BASE_DIR/$VERSION/logs/restart.log
fi
}
while true ; do
HAS_RM=`ps -ef | grep "org.apache.hadoop.yarn.server.resourcemanager.ResourceManager" | grep -v grep | wc -l`
if [ $HAS_RM -eq "0" ];then
$BASE_DIR/$VERSION/sbin/yarn-daemon.sh start resourcemanager > /dev/null
set_log nmRes_sum ResourceManager
fi
sleep 60
done

View File

@@ -0,0 +1,41 @@
#!/bin/bash
source /etc/profile
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function set_log(){
RES_SUM_FILE=$BASE_DIR/$VERSION/logs
if [ ! -f "$RES_SUM_FILE/" ]
then
mkdir -p $RES_SUM_FILE
fi
if [ ! -d "$RES_SUM_FILE/$1" ];then
echo "0" > $RES_SUM_FILE/$1
fi
OLD_NUM=`cat $RES_SUM_FILE/$1`
RESTART_NUM=`expr $OLD_NUM + 1`
echo $RESTART_NUM > $RES_SUM_FILE/$1
if [ $OLD_NUM -eq "0" ];then
echo "`date "+%Y-%m-%d %H:%M:%S"` - Yarn $2服务初次启动" >> $BASE_DIR/$VERSION/logs/restart.log
else
echo "`date +%Y-%m-%d` `date +%H:%M:%S` - Yarn $2服务异常 - 重启次数 -> $RESTART_NUM." >> $BASE_DIR/$VERSION/logs/restart.log
fi
}
while true ; do
HAS_NM=`ps -ef | grep "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep | wc -l`
if [ $HAS_NM -eq "0" ];then
$BASE_DIR/$VERSION/sbin/yarn-daemon.sh start nodemanager > /dev/null
set_log nmRes_sum NodeManager
fi
sleep 60
done

View File

@@ -0,0 +1,46 @@
#!/bin/bash
#
# netconsole This loads the netconsole module with the configured parameters.
#
# chkconfig:123456 40 60
# description: keepyarnhistory
source /etc/profile
PRO_NAME=keepyarnhistory
INS_DIR={{ deploy_dir }}
#版本
VERSION={{ hadoop_version }}
case $1 in
start)
master=`ps -ef | grep "dae-yarnhistory.sh" | grep -v grep | wc -l`
if [ $master -lt 1 ];then
nohup $INS_DIR/$VERSION/sbin/dae-yarnhistory.sh > /dev/null 2>&1 &
fi
;;
stop)
HAS_KEEP_SHELL=`ps -ef | grep "dae-yarnhistory.sh" | grep -v grep | awk '{print $2}'`
if [ $HAS_KEEP_SHELL ];then
echo "守护进程PID$HAS_KEEP_SHELL"
kill -9 $HAS_KEEP_SHELL
fi
sh $INS_DIR/$VERSION/sbin/mr-jobhistory-daemon.sh stop historyserver > /dev/null
;;
status)
HAS_HISTORY=`ps -ef | grep "org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | grep -v grep | wc -l`
if [ $HAS_HISTORY -eq "0" ];then
echo "JobHistoryServer not running!"
else
echo "JobHistoryServer is running!"
fi
;;
* )
echo "use keepyarnhistory [start|stop|status]"
;;
esac

View File

@@ -0,0 +1,40 @@
#!/bin/bash
#
# netconsole This loads the netconsole module with the configured parameters.
#
# chkconfig:123456 40 60
# description: keepyarnmaster
source /etc/profile
PRO_NAME=keepyarnmaster
INS_DIR={{ deploy_dir }}
#版本
VERSION={{ hadoop_version }}
case $1 in
start)
master=`ps -ef | grep "dae-yarnmaster.sh" | grep -v grep | wc -l`
if [ $master -lt 1 ];then
nohup $INS_DIR/$VERSION/sbin/dae-yarnmaster.sh > /dev/null 2>&1 &
fi
;;
stop)
HAS_KEEP_SHELL=`ps -ef | grep "dae-yarnmaster.sh" | grep -v grep | awk '{print $2}'`
if [ $HAS_KEEP_SHELL ];then
echo "守护进程PID$HAS_KEEP_SHELL"
kill -9 $HAS_KEEP_SHELL
fi
sh $INS_DIR/$VERSION/sbin/yarn-daemon.sh stop resourcemanager > /dev/null
;;
status)
yarn rmadmin -getServiceState rsm1
;;
* )
echo "use keepyarnmaster [start|stop|status]"
;;
esac

View File

@@ -0,0 +1,46 @@
#!/bin/bash
#
# netconsole This loads the netconsole module with the configured parameters.
#
# chkconfig:123456 40 60
# description: keepyarnworker
source /etc/profile
PRO_NAME=keepyarnworker
INS_DIR={{ deploy_dir }}
#版本
VERSION={{ hadoop_version }}
case $1 in
start)
master=`ps -ef | grep "dae-yarnworker.sh" | grep -v grep | wc -l`
if [ $master -lt 1 ];then
nohup $INS_DIR/$VERSION/sbin/dae-yarnworker.sh > /dev/null 2>&1 &
fi
;;
stop)
HAS_KEEP_SHELL=`ps -ef | grep "dae-yarnworker.sh" | grep -v grep | awk '{print $2}'`
if [ $HAS_KEEP_SHELL ];then
echo "守护进程PID$HAS_KEEP_SHELL"
kill -9 $HAS_KEEP_SHELL
fi
sh $INS_DIR/$VERSION/sbin/yarn-daemon.sh stop nodemanager > /dev/null
;;
status)
HAS_NM=`ps -ef | grep "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep | wc -l`
if [ $HAS_NM -eq "0" ];then
echo "NodeManager not running!"
else
echo "NodeManager is running!"
fi
;;
* )
echo "use keepyarnworker [start|stop|status]"
;;
esac

View File

@@ -0,0 +1,198 @@
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
#==============================================================================
# Common
#==============================================================================
# The external address of the host on which the JobManager runs and can be
# reached by the TaskManagers and any clients which want to connect. This setting
# is only used in Standalone mode and may be overwritten on the JobManager side
# by specifying the --host <hostname> parameter of the bin/jobmanager.sh executable.
# In high availability mode, if you use the bin/start-cluster.sh script and setup
# the conf/masters file, this will be taken care of automatically. Yarn/Mesos
# automatically configure the host name based on the hostname of the node where the
# JobManager runs.
jobmanager.rpc.address: {{ groups.yarn[0] }}
#jobmanager rpc 端口
jobmanager.rpc.port: 6123
#允许任务在所有taskmanager上均匀分布
cluster.evenly-spread-out-slots: true
#避免报出metaspace oom而是flink jvm进程挂掉
classloader.fail-on-metaspace-oom-error: false
#规避第三方库堆栈泄漏问题
classloader.check-leaked-classloader: false
#避免由于task不能正常取消而使taskmanager服务挂掉
task.cancellation.timeout: 0
#JobManager进程占用的所有与Flink相关的内存
jobmanager.memory.process.size: {{ flink['jobmanager.memory.process.size'] }}
#TaskManager进程占用的所有与Flink相关的内存
taskmanager.memory.process.size: {{ flink['taskmanager.memory.process.size'] }}
#taskmanager使用的堆外内存的大小
taskmanager.memory.managed.size: 128M
#taskmanager.memory.off-heap默认为false主要指的是Flink Managed Memory使用Heap还是Non-heap
#默认使用Heap如果开启使用Non-heap将再减少一部分资源
taskmanager.memory.off-heap: false
#堆外部分Framework Off-Heap以直接内存形式分配
taskmanager.memory.framework.off-heap.size: {{ flink['taskmanager.memory.framework.off-heap.size'] }}
#taskmanager元数据大小 默认256M
taskmanager.memory.jvm-metaspace.size: {{ flink['taskmanager.memory.jvm-metaspace.size'] }}
#每个排序合并阻塞结果分区所需的最小网络缓冲区数默认64。对于生产使用建议将该配置值增加到2048以提高数据压缩比并减少较小的网络数据包。增加该参数值需要增加总网络内存大小。
taskmanager.network.sort-shuffle.min-buffers: 64
#用于读取shuffle数据的内存大小目前只用于排序合并shuffle。该内存参数占用framework.off-heap.size内存默认32M当更改该参数时需要增加framework.off-heap.size内存大小。
taskmanager.memory.framework.off-heap.batch-shuffle.size: 8M
#每个通道可以使用的最大缓冲区数默认为10。该参数可以通过防止在数据倾斜和配置的浮动缓冲区数量高的情况下缓冲的动态数据的过度增长来加速检查点对齐。
taskmanager.network.memory.max-buffers-per-channel: 10
# The number of task slots that each TaskManager offers. Each slot runs one parallel pipeline.
taskmanager.numberOfTaskSlots: {{ flink['taskmanager.numberOfTaskSlots'] }}
# The parallelism used for programs that did not specify and other parallelism.
parallelism.default: 1
# The default file system scheme and authority.
#
# By default file paths without scheme are interpreted relative to the local
# root file system 'file:///'. Use this to override the default and interpret
# relative paths relative to a different file system,
# for example 'hdfs://mynamenode:12345'
#
# fs.default-scheme
#==============================================================================
# NetWork
#==============================================================================
#网络缓冲区数目默认为8。帮助缓解由于子分区之间的数据分布不均匀造成的背压。
taskmanager.network.memory.floating-buffers-per-gate: 8
#输入/输出通道使用的独占网络缓冲区的数量。至少配置2。
taskmanager.network.memory.buffers-per-channel: 2
#用于TaskManager之间shuffle、广播等及与外部组件的数据传输
#Min
taskmanager.memory.network.min: 128M
#Max
taskmanager.memory.network.max: {{ flink['taskmanager.memory.network.max'] }}
#==============================================================================
# High Availability
#==============================================================================
# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
#
# high-availability: zookeeper
# The path where metadata for master recovery is persisted. While ZooKeeper stores
# the small ground truth for checkpoint and leader election, this location stores
# the larger objects, like persisted dataflow graphs.
#
# Must be a durable file system that is accessible from all nodes
# (like HDFS, S3, Ceph, nfs, ...)
#
# high-availability.storageDir: hdfs:///flink/ha/
# The list of ZooKeeper quorum peers that coordinate the high-availability
# setup. This must be a list of the form:
# "host1:clientPort,host2:clientPort,..." (default clientPort: 2181)
{% if groups.yarn | length > 1 %}
state.checkpoints.dir: hdfs:///flink/checkpoint/
{% elif groups.yarn | length == 1 %}
state.checkpoints.dir: file://{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/checkpoint
{% endif %}
heartbeat.timeout: 180000
heartbeat.interval: 20000
akka.ask.timeout: 300 s
# ACL options are based on https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_BuiltinACLSchemes
# It can be either "creator" (ZOO_CREATE_ALL_ACL) or "open" (ZOO_OPEN_ACL_UNSAFE)
# The default value is "open" and it can be changed to "creator" if ZK security is enabled
#
# high-availability.zookeeper.client.acl: open
# The failover strategy, i.e., how the job computation recovers from task failures.
# Only restart tasks that may have been affected by the task failure, which typically includes
# downstream tasks and potentially upstream tasks if their produced data is no longer available for consumption.
jobmanager.execution.failover-strategy: region
restart-strategy: fixed-delay
restart-strategy.fixed-delay.attempts: 2147483647
yarn.application-attempts: 10000
restart-strategy.fixed-delay.delay: 5 s
web.submit.enable: false
#==============================================================================
# Advanced
#==============================================================================
# Override the directories for temporary files. If not specified, the
# system-specific Java temporary directory (java.io.tmpdir property) is taken.
#
# For framework setups on Yarn or Mesos, Flink will automatically pick up the
# containers' temp directories without any need for configuration.
#
# Add a delimited list for multiple directories, using the system directory
# delimiter (colon ':' on unix) or a comma, e.g.:
# /data1/tmp:/data2/tmp:/data3/tmp
#
# Note: Each directory entry is read from and written to by a different I/O
# thread. You can include the same directory multiple times in order to create
# multiple I/O threads against that directory. This is for example relevant for
# high-throughput RAIDs.
#
# io.tmp.dirs: /tmp
# The classloading resolve order. Possible values are 'child-first' (Flink's default)
# and 'parent-first' (Java's default).
#
# Child first classloading allows users to use different dependency/library
# versions in their application than those in the classpath. Switching back
# to 'parent-first' may help with debugging dependency issues.
#
# classloader.resolve-order: child-first
classloader.resolve-order: parent-first
metrics.reporter.promgateway.class: org.apache.flink.metrics.prometheus.PrometheusPushGatewayReporter
metrics.reporter.promgateway.randomJobNameSuffix: true
metrics.reporter.promgateway.deleteOnShutdown: true
metrics.reporter.promgateway.interval: 10 SECONDS
metrics.reporter.promgateway.host: 127.0.0.1
metrics.reporter.promgateway.port: 9091

View File

@@ -0,0 +1,4 @@
#flink
export FLINK_HOME={{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}
export PATH=$FLINK_HOME/bin:$PATH

View File

@@ -0,0 +1,105 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Set Hadoop-specific environment variables here.
# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.
export HADOOP_NAMENODE_JMX_OPTS="-Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.local.only=false -javaagent:{{ deploy_dir }}/{{ hadoop_version }}/monitor/jmx_prometheus_javaagent-0.12.0.jar=9905:{{ deploy_dir }}/{{ hadoop_version }}/monitor/hdfs.yaml"
export HADOOP_DATANODE_JMX_OPTS="-Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.local.only=false -javaagent:{{ deploy_dir }}/{{ hadoop_version }}/monitor/jmx_prometheus_javaagent-0.12.0.jar=9906:{{ deploy_dir }}/{{ hadoop_version }}/monitor/hdfs.yaml"
# The java implementation to use.
#export HADOOP_HEAPSIZE=m
#export JAVA_HOME=/usr/local/jdk/jdk1.8.0_73
export JAVA_HOME=$JAVA_HOME
# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
# Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="$HADOOP_NAMENODE_OPTS {{ hadoop.namenode.java_opt }} -Xss256k -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=256m -XX:SurvivorRatio=2 -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:MaxTenuringThreshold=15 -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=1 -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -XX:-DisableExplicitGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:{{ deploy_dir }}/{{ hadoop_version }}/logs/gc-namenode-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=100M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath={{ deploy_dir }}/{{ hadoop_version }}/logs/ -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender}"
export HADOOP_DATANODE_OPTS="$HADOOP_DATANODE_OPTS {{ hadoop.datanode.java_opt }} -Xss256k -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=256m -XX:SurvivorRatio=2 -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:MaxTenuringThreshold=15 -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=1 -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:{{ deploy_dir }}/{{ hadoop_version }}/logs/gc-datanode-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=100M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath={{ deploy_dir }}/{{ hadoop_version }}/logs/ -Dhadoop.security.logger=ERROR,RFAS"
export HADOOP_JOURNALNODE_OPTS="$HADOOP_JOURNALNODE_OPTS {{ hadoop.journalnode.java_opt }}"
export HADOOP_ZKFC_OPTS="$HADOOP_ZKFC_OPTS {{ hadoop.zkfc.java_opt }}"
export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol. This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
# Where log files are stored. $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_MOVER_OPTS=""
###
# Advanced Users Only!
###
# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the user that will run the hadoop daemons. Otherwise there is the
# potential for a symlink attack.
export HADOOP_PID_DIR={{ deploy_dir }}/{{ hadoop_version }}/pids
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER

View File

@@ -0,0 +1,142 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:{{ hdfs_data_dir }}/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:{{ hdfs_data_dir }}/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址nn1所在地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>{{ groups.hdfs[0] }}:9000</value>
</property>
<!-- nn1的http通信地址外部访问地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>{{ groups.hdfs[0] }}:50070</value>
</property>
<!-- nn2的RPC通信地址nn2所在地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>{{ groups.hdfs[1] }}:9000</value>
</property>
<!-- nn2的http通信地址外部访问地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>{{ groups.hdfs[1] }}:50070</value>
</property>
<!-- 指定NameNode的元数据在JournalNode日志上的存放位置(一般和zookeeper部署在一起) -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://{{groups.hdfs[0]}}:8485;{{groups.hdfs[1]}}:8485;{{groups.hdfs[2]}}:8485/ns1</value>
</property>
<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>{{ hdfs_data_dir }}/journal</value>
</property>
<!--客户端通过代理访问namenode访问文件系统HDFS 客户端与Active 节点通信的Java 类使用其确定Active 节点是否活跃 -->
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--这是配置自动切换的方法,有多种使用方法,具体可以看官网,在文末会给地址,这里是远程登录杀死的方法 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
<value>shell(true)</value>
</property>
<!-- 这个是使用sshfence隔离机制时才需要配置ssh免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔离机制超时时间这个属性同上如果你是用脚本的方法切换这个应该是可以不配置的 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<!-- 这个是开启自动故障转移,如果你没有自动故障转移,这个可以先不配 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>8192</value>
</property>
<!-- namenode处理RPC请求线程数增大该值资源占用不大 -->
<property>
<name>dfs.namenode.handler.count</name>
<value>{{ hadoop.namenode['dfs.namenode.handler.count'] }}</value>
</property>
<!-- datanode处理RPC请求线程数增大该值会占用更多内存 -->
<property>
<name>dfs.datanode.handler.count</name>
<value>{{ hadoop.datanode['dfs.datanode.handler.count'] }}</value>
</property>
<!-- balance时可占用的带宽 -->
<property>
<name>dfs.balance.bandwidthPerSec</name>
<value>104857600</value>
</property>
<!-- 磁盘预留空间该空间不会被hdfs占用单位字节-->
<property>
<name>dfs.datanode.du.reserved</name>
<value>53687091200</value>
</property>
<!-- datanode与namenode连接超时时间单位毫秒 2 * heartbeat.recheck.interval + 30000 -->
<property>
<name>heartbeat.recheck.interval</name>
<value>100000</value>
</property>
</configuration>

View File

@@ -0,0 +1,33 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>{{ groups.yarn[0] }}:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>{{ groups.yarn[0] }}:19888</value>
</property>
</configuration>

View File

@@ -0,0 +1,58 @@
#!/bin/bash
source /etc/profile
function setChkconfig(){
echo -e "\n#hadoop\nexport HADOOP_HOME={{ deploy_dir }}/{{ hadoop_version }}\nexport PATH=\$HADOOP_HOME/sbin:\$PATH\nexport PATH=\$HADOOP_HOME/bin:\$PATH\nexport HADOOP_CLASSPATH=\`hadoop classpath\`" >> /etc/profile.d/hadoop.sh
chmod +x /etc/profile.d/hadoop.sh
if [ -x '/etc/init.d/keepyarnhistory' ];then
chkconfig --add keepyarnhistory
chkconfig keepyarnhistory on
fi
if [ -x '/etc/init.d/keepyarnmaster' ];then
chkconfig --add keepyarnmaster
chkconfig keepyarnmaster on
fi
if [ -x '/etc/init.d/keepyarnworker' ];then
chkconfig --add keepyarnworker
chkconfig keepyarnworker on
fi
}
case $1 in
history)
if [ -x '/etc/init.d/keepyarnhistory' ];then
service keepyarnhistory start && sleep 5
history_dae=`ps -ef | grep "dae-yarnhistory.sh" | grep -v grep | wc -l`
if [ $history_dae -lt 1 ];then
nohup {{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnhistory.sh > /dev/null 2>&1 &
fi
fi
;;
master)
if [ -x '/etc/init.d/keepyarnmaster' ];then
service keepyarnmaster start && sleep 5
master_dae=`ps -ef | grep "dae-yarnmaster.sh" | grep -v grep | wc -l`
if [ $master_dae -lt 1 ];then
nohup {{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnmaster.sh > /dev/null 2>&1 &
fi
fi
;;
worker)
if [ -x '/etc/init.d/keepyarnworker' ];then
service keepyarnworker start && sleep 5
worker_dae=`ps -ef | grep dae-yarnworker.sh | grep -v grep | wc -l`
if [ $worker_dae -lt 1 ];then
nohup {{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnworker.sh > /dev/null 2>&1 &
fi
fi
;;
chkconfig)
setChkconfig;;
* )
;;
esac

View File

@@ -0,0 +1,4 @@
{% set combined_group = groups.yarn | union(groups.hdfs) %}
{% for dev_info in combined_group %}
{{dev_info}}
{% endfor %}

View File

@@ -0,0 +1,65 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:{{ hdfs_data_dir }}/tmp</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131702</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.logfile.size</name>
<value>10000000</value>
<description>The max size of each log file</description>
</property>
<property>
<name>hadoop.logfile.count</name>
<value>1</value>
<description>The max number of log files</description>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>{{inventory_hostname}}:2181</value>
</property>
<property>
<name>ipc.client.connect.timeout</name>
<value>90000</value>
</property>
</configuration>

View File

@@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
</configuration>

View File

@@ -0,0 +1,183 @@
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>false</value>
</property>
<!--声明两台resourcemanager的地址-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rsmcluster</value>
</property>
<!-- 配置rm1-->
<!-- 配置rm1 hostname-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>{{ groups.yarn[0] }}</value>
</property>
<!-- 配置rm1 web application-->
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>{{ groups.yarn[0] }}:8080</value>
</property>
<!-- 配置rm1 调度端口默认8030-->
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>{{ groups.yarn[0] }}:8030</value>
</property>
<!-- 默认端口8031-->
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>{{ groups.yarn[0] }}:8031</value>
</property>
<!-- 配置rm1 应用程序管理器接口的地址端口默认8032-->
<property>
<name>yarn.resourcemanager.address</name>
<value>{{ groups.yarn[0] }}:8032</value>
</property>
<!-- 配置rm1 管理端口默认8033-->
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>{{ groups.yarn[0] }}:8033</value>
</property>
<property>
<name>yarn.resourcemanager.ha.admin.address</name>
<value>{{ groups.yarn[0] }}:23142</value>
</property>
<!--指定zookeeper集群的地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>{{inventory_hostname}}:2181</value>
</property>
<!--启用自动恢复当任务进行一半rm坏掉就要启动自动恢复默认是false-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--启用Nodemanager自动恢复默认是false-->
<property>
<name>yarn.nodemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--配置NodeManager保存运行状态的本地文件系统目录路径 -->
<property>
<name>yarn.nodemanager.recovery.dir</name>
<value>{{ deploy_dir }}/{{ hadoop_version }}/yarn</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<!--配置nm可用的RPC地址默认${yarn.nodemanager.hostname}:0为临时端口。集群重启后nm与rm连接的端口会变化这里指定端口保障nm restart功能 -->
<property>
<name>yarn.nodemanager.address</name>
<value>${yarn.nodemanager.hostname}:9923</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds</name>
<value>3600</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>{{ deploy_dir }}/{{ hadoop_version }}/logs/app-logs/</value>
</property>
<!--NM可以为容器分配的物理内存量以MB为单位 默认8192-->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>{{ hadoop.yarn.nodemanager['yarn.nodemanager.resource.memory-mb'] }}</value>
</property>
<!-- RM上每个容器请求的最小分配以mb为单位默认1024-->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
<!-- RM上每个容器请求的最大分配以mb为单位一般设置为 yarn.nodemanager.resource.memory-mb 一致默认8192-->
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>{{ hadoop.yarn.nodemanager['yarn.scheduler.maximum-allocation-mb'] }}</value>
</property>
<!--可为容器分配的vcore数。RM调度器在为容器分配资源时使用它。这不是用来限制YARN容器使用的物理内核的数量默认8一般配置为服务器cpu总核数一致 -->
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>{{ hadoop.yarn.nodemanager['yarn.nodemanager.resource.cpu-vcores'] }}</value>
</property>
<!--RM上每个容器请求的最小分配(以虚拟CPU内核为单位) ,默认1-->
<property>
<name>yarn.scheduler.minimum-allocation-vcores</name>
<value>1</value>
</property>
<!--RM上每个容器请求的最大分配(以虚拟CPU内核为单位) ,默认32一般配置为略小于yarn.nodemanager.resource.cpu-vcores同时指定任务的slot不应超过该值-->
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>{{ hadoop.yarn.nodemanager['yarn.scheduler.maximum-allocation-vcores'] }}</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<!--ApplicationMaster重启次数配置HA后默认为2生产环境可增大该值-->
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>10000</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://{{ groups.yarn[0] }}:19888/jobhistory/logs</value>
</property>
</configuration>

View File

@@ -0,0 +1,79 @@
#!/bin/bash
source /etc/profile
function killService(){
keeppath='/etc/init.d/keepyarnhistory'
if [ -x $keeppath ];then
service keepyarnhistory stop
chkconfig keepyarnhistory off
systemctl daemon-reload
rm -rf /etc/init.d/keepyarnhistory
fi
keeppath='/etc/init.d/keepyarnmaster'
if [ -x $keeppath ];then
service keepyarnmaster stop
chkconfig keepyarnmaster off
systemctl daemon-reload
rm -rf /etc/init.d/keepyarnmaster
fi
keeppath='/etc/init.d/keepyarnworker'
if [ -x $keeppath ];then
service keepyarnworker stop
chkconfig keepyarnworker off
systemctl daemon-reload
rm -rf /etc/init.d/keepyarnworker
fi
}
function killPid(){
livenum=`jps -l | egrep -w "org.apache.hadoop.yarn.server.resourcemanager.ResourceManager" | grep -v grep |wc -l`
if [ $livenum -ne 0 ];then
keeppid=`jps -l |egrep -w "org.apache.hadoop.yarn.server.resourcemanager.ResourceManager" | awk '{print $1}'`
kill -9 $keeppid
fi
livenum=`jps -l | egrep -w "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep |wc -l`
if [ $livenum -ne 0 ];then
keeppid=`jps -l |egrep -w "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | awk '{print $1}'`
kill -9 $keeppid
fi
livenum=`jps -l | egrep -w "org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | grep -v grep |wc -l`
if [ $livenum -ne 0 ];then
keeppid=`jps -l |egrep -w "org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | awk '{print $1}'`
kill -9 $keeppid
fi
}
function drop_folder(){
FOLDER_NAME=$1
if [ -d "$FOLDER_NAME" ];then
rm -rf $FOLDER_NAME
fi
}
function drop_file(){
FILE_NAME=$1
if [ -f "$FILE_NAME" ];then
rm -rf $FILE_NAME
fi
}
killService
sleep 15
killPid
HAS_HDFS=`jps -l | egrep "org.apache.hadoop.hdfs.qjournal.server.JournalNode|org.apache.hadoop.hdfs.tools.DFSZKFailoverController|org.apache.hadoop.hdfs.server.datanode.DataNode|org.apache.hadoop.hdfs.server.namenode.NameNode" | wc -l`
if [ $HAS_HDFS -eq "0" ];then
drop_folder {{ deploy_dir }}/{{ hadoop_version }}
drop_folder {{ deploy_dir }}/hadoop
drop_folder {{ data_dir }}/hadoop
drop_file /etc/profile.d/hadoop.sh
fi

View File

@@ -0,0 +1,127 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
export YARN_RESOURCEMANAGER_JMX_OPTS="-Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.local.only=false -javaagent:{{ deploy_dir }}/{{ hadoop_version }}/monitor/jmx_prometheus_javaagent-0.12.0.jar=9909:{{ deploy_dir }}/{{ hadoop_version }}/monitor/yarn.yaml"
export YARN_NODEMANAGER_JMX_OPTS="-Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.local.only=false -javaagent:{{ deploy_dir }}/{{ hadoop_version }}/monitor/jmx_prometheus_javaagent-0.12.0.jar=9910:{{ deploy_dir }}/{{ hadoop_version }}/monitor/yarn.yaml"
# User for YARN daemons
export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
# resolve links - $0 may be a softlink
export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
# some Java parameters
export JAVA_HOME=$JAVA_HOME
if [ "$JAVA_HOME" != "" ]; then
#echo "run java in $JAVA_HOME"
JAVA_HOME=$JAVA_HOME
fi
if [ "$JAVA_HOME" = "" ]; then
echo "Error: JAVA_HOME is not set."
exit 1
fi
JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx1000m
# For setting YARN specific HEAP sizes please use this
# Parameter and set appropriately
# YARN_HEAPSIZE=1000
# check envvars which might override default args
if [ "$YARN_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
fi
# Resource Manager specific parameters
# Specify the max Heapsize for the ResourceManager using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS
# and/or YARN_RESOURCEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#export YARN_RESOURCEMANAGER_HEAPSIZE=1000
export YARN_RESOURCEMANAGER_OPTS="$YARN_RESOURCEMANAGER_OPTS {{ hadoop.yarn.resourcemanager.java_opt }}"
# Specify the max Heapsize for the timeline server using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS
# and/or YARN_TIMELINESERVER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#export YARN_TIMELINESERVER_HEAPSIZE=1000
# Specify the JVM options to be used when starting the ResourceManager.
# These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS
#export YARN_RESOURCEMANAGER_OPTS=
# Node Manager specific parameters
# Specify the max Heapsize for the NodeManager using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS
# and/or YARN_NODEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#export YARN_NODEMANAGER_HEAPSIZE=1000
export YARN_NODEMANAGER_OPTS="$YARN_NODEMANAGER_OPTS {{ hadoop.yarn.nodemanager.java_opt }}"
# Specify the JVM options to be used when starting the NodeManager.
# These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS
#export YARN_NODEMANAGER_OPTS=
# so that filenames w/ spaces are handled correctly in loops below
IFS=
# default log directory & file
if [ "$YARN_LOG_DIR" = "" ]; then
YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
fi
if [ "$YARN_LOGFILE" = "" ]; then
YARN_LOGFILE='yarn.log'
fi
# default policy file for service-level authorization
if [ "$YARN_POLICYFILE" = "" ]; then
YARN_POLICYFILE="hadoop-policy.xml"
fi
# restore ordinary behaviour
unset IFS
YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME"
YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
fi
YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"

View File

@@ -0,0 +1,232 @@
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!--声明两台resourcemanager的地址-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rsmcluster</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rsm1,rsm2</value>
</property>
<!-- 配置rm1-->
<!-- 配置rm1 hostname-->
<property>
<name>yarn.resourcemanager.hostname.rsm1</name>
<value>{{ groups.yarn[0] }}</value>
</property>
<!-- 配置rm1 web application-->
<property>
<name>yarn.resourcemanager.webapp.address.rsm1</name>
<value>{{ groups.yarn[0] }}:8080</value>
</property>
<!-- 配置rm1 调度端口默认8030-->
<property>
<name>yarn.resourcemanager.scheduler.address.rsm1</name>
<value>{{ groups.yarn[0] }}:8030</value>
</property>
<!-- 默认端口8031-->
<property>
<name>yarn.resourcemanager.resource-tracker.address.rsm1</name>
<value>{{ groups.yarn[0] }}:8031</value>
</property>
<!-- 配置rm1 应用程序管理器接口的地址端口默认8032-->
<property>
<name>yarn.resourcemanager.address.rsm1</name>
<value>{{ groups.yarn[0] }}:8032</value>
</property>
<!-- 配置rm1 管理端口默认8033-->
<property>
<name>yarn.resourcemanager.admin.address.rsm1</name>
<value>{{ groups.yarn[0] }}:8033</value>
</property>
<property>
<name>yarn.resourcemanager.ha.admin.address.rsm1</name>
<value>{{ groups.yarn[0] }}:23142</value>
</property>
<!-- 配置rm2-->
<property>
<name>yarn.resourcemanager.hostname.rsm2</name>
<value>{{ groups.yarn[1] }}</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rsm2</name>
<value>{{ groups.yarn[1] }}:8080</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rsm2</name>
<value>{{ groups.yarn[1] }}:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rsm2</name>
<value>{{ groups.yarn[1] }}:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address.rsm2</name>
<value>{{ groups.yarn[1] }}:8032</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rsm2</name>
<value>{{ groups.yarn[1] }}:8033</value>
</property>
<property>
<name>yarn.resourcemanager.ha.admin.address.rsm2</name>
<value>{{ groups.yarn[1] }}:23142</value>
</property>
<!--指定zookeeper集群的地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
{% for dev_info in groups.zookeeper -%}
{% if loop.last -%}
{{dev_info}}:2181</value>
{% elif loop.first %}
<value>{{dev_info}}:2181,
{%- else %}
{{dev_info}}:2181,
{%- endif %}
{%- endfor %}
</property>
<!--启用自动恢复当任务进行一半rm坏掉就要启动自动恢复默认是false-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--启用Nodemanager自动恢复默认是false-->
<property>
<name>yarn.nodemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--配置NodeManager保存运行状态的本地文件系统目录路径 -->
<property>
<name>yarn.nodemanager.recovery.dir</name>
<value>{{ deploy_dir }}/{{ hadoop_version }}/yarn</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<!--配置nm可用的RPC地址默认${yarn.nodemanager.hostname}:0为临时端口。集群重启后nm与rm连接的端口会变化这里指定端口保障nm restart功能 -->
<property>
<name>yarn.nodemanager.address</name>
<value>${yarn.nodemanager.hostname}:9923</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds</name>
<value>3600</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>{{ deploy_dir }}/{{ hadoop_version }}/logs/app-logs/</value>
</property>
<!--NM可以为容器分配的物理内存量以MB为单位 默认8192-->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>{{ hadoop.yarn.nodemanager['yarn.nodemanager.resource.memory-mb'] }}</value>
</property>
<!-- RM上每个容器请求的最小分配以mb为单位默认1024-->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
<!-- RM上每个容器请求的最大分配以mb为单位一般设置为 yarn.nodemanager.resource.memory-mb 一致默认8192-->
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>{{ hadoop.yarn.nodemanager['yarn.scheduler.maximum-allocation-mb'] }}</value>
</property>
<!--可为容器分配的vcore数。RM调度器在为容器分配资源时使用它。这不是用来限制YARN容器使用的物理内核的数量默认8一般配置为服务器cpu总核数一致 -->
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>{{ hadoop.yarn.nodemanager['yarn.nodemanager.resource.cpu-vcores'] }}</value>
</property>
<!--RM上每个容器请求的最小分配(以虚拟CPU内核为单位) ,默认1-->
<property>
<name>yarn.scheduler.minimum-allocation-vcores</name>
<value>1</value>
</property>
<!--RM上每个容器请求的最大分配(以虚拟CPU内核为单位) ,默认32一般配置为略小于yarn.nodemanager.resource.cpu-vcores同时指定任务的slot不应超过该值-->
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>{{ hadoop.yarn.nodemanager['yarn.scheduler.maximum-allocation-vcores'] }}</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<!--ApplicationMaster重启次数配置HA后默认为2生产环境可增大该值-->
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>10000</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://{{ groups.yarn[0] }}:19888/jobhistory/logs</value>
</property>
</configuration>

View File

@@ -0,0 +1,15 @@
#Hadoop版本
hadoop_version: hadoop-2.7.1
#Flink版本
flink_version: flink-1.13.1
#Jdk版本
java_version: 1.8.0_73
#数据目录
hdfs_data_dir: "{{ data_dir }}/{{ hadoop_version }}/data/hadoop"
#大于5台的集群前两台启动ResourceManager其余启动NodeManager
#小于5台的集群前两台启动ResourceManager每台都启动NodeManager
cluster_limit: "5"