提交各组件部署Ansible剧本初版

This commit is contained in:
qidaijie
2024-01-18 15:35:33 +08:00
parent f0bd05d565
commit 0cc392df5c
262 changed files with 15927 additions and 0 deletions

View File

@@ -0,0 +1,11 @@
[zookeeper]
192.168.45.102
[mariadb]
192.168.45.102
[hdfs]
[druid]
192.168.45.102

View File

@@ -0,0 +1,6 @@
- hosts: druid
remote_user: root
roles:
- role
vars_files:
- role/vars/main.yml

View File

@@ -0,0 +1,44 @@
#The default installation location
deploy_dir: /data/olap
#The default data storage location,use storing application data,logs and configuration files
data_dir: /data/olap
druid:
common:
druid.zk.service.host: '{% for dev_info in groups.zookeeper -%}{% if loop.last -%}{{dev_info}}:2181{%- else %}{{dev_info}}:2181,{%- endif %}{%- endfor %}'
druid.metadata.storage.connector.connectURI: 'jdbc:mysql://{{ vrrp_instance.default.virtual_ipaddress }}:3306/druid'
druid.metadata.storage.connector.password: '{{ mariadb_default_pin }}'
broker:
#Running memory of the Druid-Broker.
java_opts: -Xmx1024m -Xms1024m
#Worker tasks also use off-heap ("direct") memory. Set the amount of direct memory available (-XX:MaxDirectMemorySize) to at least (druid.processing.numThreads + 1) * druid.processing.buffer.sizeBytes
MaxDirectMemorySize: 512m
#This specifies a buffer size (less than 2GiB), for the storage of intermediate results
druid.processing.buffer.sizeBytes: 50000000
#The number of direct memory buffers available for merging query results.
druid.processing.numMergeBuffers: 4
#The number of processing threads to have available for parallel processing of segments.
druid.processing.numThreads: 5
coordinator:
#Running memory of the Druid-Coordinator.
java_opts: -Xmx1024m -Xms1024m
historical:
#Running memory of the Druid-Historical.
java_opts: -Xmx1024m -Xms1024m
#The size of the process's temporary cache data on disk
druid.segmentCache.locations: 300000000000
#Worker tasks also use off-heap ("direct") memory. Set the amount of direct memory available (-XX:MaxDirectMemorySize) to at least (druid.processing.numThreads + 1) * druid.processing.buffer.sizeBytes
MaxDirectMemorySize: 512m
#This specifies a buffer size (less than 2GiB), for the storage of intermediate results
druid.processing.buffer.sizeBytes: 50000000
#The number of direct memory buffers available for merging query results.
druid.processing.numMergeBuffers: 4
#The number of processing threads to have available for parallel processing of segments.
druid.processing.numThreads: 5
middlemanager:
#Running memory of the Druid-Middlemanager.
java_opts: -Xmx1024m -Xms1024m
druid.indexer.fork.property.druid.processing.numMergeBuffers: 2
druid.indexer.fork.property.druid.processing.buffer.sizeBytes: 20000000
druid.indexer.fork.property.druid.processing.numThreads: 1

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,38 @@
- name: Loading Image
docker_image:
name: '{{ image_name }}'
tag: '{{ image_tag }}'
load_path: '{{ deploy_dir }}/{{ container_name }}/{{ image_name }}-{{ image_tag }}.tar'
source: load
force_tag: yes
force_source: yes
timeout: 300
- name: Stop Container
docker_container:
name: '{{ container_name }}'
state: absent
- name: Start Container
docker_compose:
project_src: '{{ deploy_dir }}/{{ container_name }}/'
- name: Removing Image
docker_image:
name: '{{ image_name }}'
tag: '{{ image_tag }}'
state: absent
- name: Loading Exporter Image
docker_image:
name: 'druid_exporter'
tag: '1.0.0'
load_path: '{{ deploy_dir }}/{{ container_name }}/monitor/druid_exporter-1.0.0.tar'
source: load
force_tag: yes
force_source: yes
timeout: 300
- name: Start Exporter Container
docker_compose:
project_src: '{{ deploy_dir }}/{{ container_name }}/monitor/'

View File

@@ -0,0 +1,156 @@
- block:
- name: To terminate execution
fail:
msg: "Druid Cluster mode at least 3 nodes,please checking configurations/hosts -> druid"
when: node_nums < (min_cluster_num)
- name: Check the Zookeeper status
shell: netstat -anlp | egrep "2181" | grep LISTEN | wc -l
register: port_out
delegate_to: "{{ groups.zookeeper[0] }}"
- name: To terminate execution
fail:
msg: "Port 2181 of the zookeeper node is not monitored. The status may be abnormal"
run_once: true
delegate_to: 127.0.0.1
when: port_out.stdout != '1'
- name: Checking Hadoop DataNode status
shell: source /etc/profile && hadoop dfsadmin -report | grep "Live datanodes" | grep -E -o "[0-9]"
async: 10
register: datanode_out
run_once: true
delegate_to: "{{ groups.hdfs[0] }}"
- name: Checking Hadoop NameNode status
shell: source /etc/profile && hadoop dfsadmin -report |grep 50010 | wc -l
async: 10
register: namenode_out
run_once: true
delegate_to: "{{ groups.hdfs[0] }}"
- name: To terminate execution
fail:
msg: "If the dependency test fails, check whether the Hadoop cluster is normal"
when: datanode_out.stdout <= '1' and namenode_out.stdout <= '1'
- name: Creating directory
file:
state: directory
path: '{{ deploy_dir }}/{{ container_name }}/{{ item.dir }}'
with_items:
- { dir: 'var' }
- { dir: 'log' }
- { dir: 'monitor' }
- name: Copying config
unarchive:
src: 'files/conf.zip'
dest: '{{ deploy_dir }}/{{ container_name }}/'
- name: Copying image to {{ deploy_dir }}/{{ container_name }}/
copy:
src: '{{ role_path }}/../../../software-packages/{{ image_name }}-{{ image_tag }}.tar'
dest: '{{ deploy_dir }}/{{ container_name }}/'
force: true
notify:
- Loading Image
- name: copying druid config files
template:
src: '{{ item.src }}'
dest: '{{ deploy_dir }}/{{ container_name }}/conf/druid/cluster/{{ item.dest }}'
backup: false
with_items:
- { src: 'common.runtime.properties.j2', dest: '_common/common.runtime.properties' }
- { src: 'broker_runtime.j2', dest: 'query/broker/runtime.properties' }
- { src: 'broker_jvm.j2', dest: 'query/broker/jvm.config' }
- { src: 'historical_runtime.j2', dest: 'data/historical/runtime.properties' }
- { src: 'historical_jvm.j2', dest: 'data/historical/jvm.config' }
- { src: 'middleManager_jvm.j2', dest: 'data/middleManager/jvm.config' }
- { src: 'middleManager_runtime.properties.j2', dest: 'data/middleManager/runtime.properties' }
- { src: 'coordinator_jvm.j2', dest: 'master/coordinator-overlord/jvm.config' }
- { src: 'router_runtime.properties.j2', dest: 'query/router/runtime.properties' }
- name: Fetching Hadoop config files to /tmp
ansible.builtin.fetch:
src: "{{ deploy_dir }}/hadoop-2.7.1/etc/hadoop/{{ item.filename }}"
dest: "/tmp/"
flat: yes
loop: "{{ hadoop_config_files }}"
run_once: true
delegate_to: "{{ groups.hdfs[0] }}"
- name: Copying Hadoop config files to other nodes
ansible.builtin.copy:
src: "/tmp/{{ item.filename }}"
dest: "{{ deploy_dir }}/{{ container_name }}/conf/druid/cluster/_common/"
loop: "{{ hadoop_config_files }}"
- name: Create a new database with name {{ druid_database }}
shell: mysql -uroot -p{{ mariadb_default_pin }} -P3306 -h{{ groups.mariadb[0] }} -e "create database {{ druid_database }} default character set utf8mb4 collate utf8mb4_general_ci;"
run_once: true
delegate_to: "{{ groups.mariadb[0] }}"
- block:
- name: Setting startup_mode variable
set_fact: startup_mode="cluster-all-server"
- name: Copying Druid docker-compose
template:
src: 'docker-compose.yml.j2'
dest: '{{ deploy_dir }}/{{ container_name }}/docker-compose.yml'
mode: 0644
notify:
- Loading Image
- Start Container
when: node_nums <= (min_cluster_num)
- block:
- name: Setting startup_mode variable
set_fact: startup_mode="cluster-query-server"
- name: Copying Druid docker-compose
template:
src: 'docker-compose.yml.j2'
dest: '{{ deploy_dir }}/{{ container_name }}/docker-compose.yml'
mode: 0644
notify:
- Loading Image
- Start Container
when: node_nums > (min_cluster_num) and inventory_hostname in groups['druid'][:2]
- block:
- name: Setting startup_mode variable
set_fact: startup_mode="cluster-data-server"
- name: Copying Druid docker-compose
template:
src: 'docker-compose.yml.j2'
dest: '{{ deploy_dir }}/{{ container_name }}/docker-compose.yml'
mode: 0644
notify:
- Loading Image
- Start Container
when: node_nums > (min_cluster_num) and inventory_hostname not in groups['druid'][:2]
- name: Copying image to {{ deploy_dir }}/{{ container_name }}/monitor
copy:
src: '{{ role_path }}/../../../software-packages/druid_exporter-1.0.0.tar'
dest: '{{ deploy_dir }}/{{ container_name }}/monitor/'
force: true
notify:
- Loading Exporter Image
- name: Config exporter config files
template:
src: 'docker-compose_exporter.yml.j2'
dest: '{{ deploy_dir }}/{{ container_name }}/monitor/docker-compose.yml'
mode: 0644
notify:
- Start Exporter Container
- meta: flush_handlers

View File

@@ -0,0 +1,19 @@
- block:
- include: uninstall.yml
- include: deploy.yml
- include: status-check.yml
when: (operation) == "install" and (groups.druid|length) > 1
- block:
- include: uninstall.yml
when: (operation) == "uninstall" and (groups.druid|length) > 1
- block:
- include: standalone/uninstall.yml
- include: standalone/deploy.yml
- include: status-check.yml
when: (operation) == "install" and (groups.druid|length) == 1
- block:
- include: standalone/uninstall.yml
when: (operation) == "uninstall" and (groups.druid|length) == 1

View File

@@ -0,0 +1,93 @@
- name: Setting node_nums variable
set_fact: node_nums="{{groups.druid|length}}"
- block:
- name: To terminate execution
fail:
msg: "Druid Standanloe mode at max 1 nodes,please checking configurations/hosts -> druid"
when: node_nums != '1'
- name: Check the Zookeeper status
shell: netstat -anlp | egrep "2181" | grep LISTEN | wc -l
register: port_out
delegate_to: "{{ groups.zookeeper[0] }}"
- name: To terminate execution
fail:
msg: "Port 2181 of the zookeeper node is not monitored. The status may be abnormal"
run_once: true
delegate_to: 127.0.0.1
when: port_out.stdout != '1'
- name: Creating directory
file:
state: directory
path: '{{ deploy_dir }}/{{ container_name }}/{{ item.dir }}'
with_items:
- { dir: 'var' }
- { dir: 'log' }
- { dir: 'monitor' }
- name: Copying config
unarchive:
src: 'files/conf.zip'
dest: '{{ deploy_dir }}/{{ container_name }}/'
- name: Copying image to {{ deploy_dir }}/{{ container_name }}/
copy:
src: 'files/{{ image_name }}-{{ image_tag }}.tar'
dest: '{{ deploy_dir }}/{{ container_name }}/'
force: true
notify:
- Loading Image
- name: copying druid config files
template:
src: '{{ item.src }}'
dest: '{{ deploy_dir }}/{{ container_name }}/conf/druid/single-server/medium/{{ item.dest }}'
backup: false
with_items:
- { src: 'common.runtime.properties.j2', dest: '_common/common.runtime.properties' }
- { src: 'broker_runtime.j2', dest: 'broker/runtime.properties' }
- { src: 'broker_jvm.j2', dest: 'broker/jvm.config' }
- { src: 'historical_runtime.j2', dest: 'historical/runtime.properties' }
- { src: 'historical_jvm.j2', dest: 'historical/jvm.config' }
- { src: 'middleManager_jvm.j2', dest: 'middleManager/jvm.config' }
- { src: 'middleManager_runtime.properties.j2', dest: 'middleManager/runtime.properties' }
- { src: 'coordinator_jvm.j2', dest: 'coordinator-overlord/jvm.config' }
- { src: 'router_runtime.properties.j2', dest: 'router/runtime.properties' }
- name: Create a new database with name {{ druid_database }}
shell: mysql -uroot -p{{ mariadb_default_pin }} -P3306 -h{{ groups.mariadb[0] }} -e "create database {{ druid_database }} default character set utf8mb4 collate utf8mb4_general_ci;"
run_once: true
delegate_to: "{{ groups.mariadb[0] }}"
- name: Setting startup_mode variable
set_fact: startup_mode="single-server-medium"
- name: Copying Druid docker-compose
template:
src: 'docker-compose.yml.j2'
dest: '{{ deploy_dir }}/{{ container_name }}/docker-compose.yml'
mode: 0644
notify:
- Loading Image
- Start Container
- name: Copying image to {{ deploy_dir }}/{{ container_name }}/monitor
copy:
src: 'files/druid_exporter-1.0.0.tar'
dest: '{{ deploy_dir }}/{{ container_name }}/monitor/'
force: true
notify:
- Loading Exporter Image
- name: Config exporter config files
template:
src: 'docker-compose_exporter.yml.j2'
dest: '{{ deploy_dir }}/{{ container_name }}/monitor/docker-compose.yml'
mode: 0644
notify:
- Start Exporter Container
- meta: flush_handlers

View File

@@ -0,0 +1,50 @@
- name: copy mysql to /usr/bin/
copy:
src: 'files/mysql'
dest: '/usr/bin/'
force: true
mode: 0755
- name: Stopping and removing {{ container_name }} container
docker_container:
name: '{{ container_name }}'
state: absent
- name: Removing old {{ image_name }} image
docker_image:
name: '{{ image_name }}'
tag: '{{ image_tag }}'
state: absent
- name: Ansible delete old {{ deploy_dir }}/{{ container_name }}
file:
path: '{{ deploy_dir }}/{{ container_name }}'
state: absent
- name: check database
shell: mysql -s -uroot -p{{ mariadb_default_pin }} -P3306 -h{{ groups.mariadb[0] }} -e "DROP DATABASE IF EXISTS {{ druid_database }};"
run_once: true
delegate_to: "{{ groups.druid[0] }}"
- name: Checking ZooKeeper has druid nodes
shell: "docker exec -it zookeeper zkCli.sh ls / | grep druid | wc -l"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
register: has_zknode
- name: Delete druid nodes in ZooKeeper
shell: "docker exec -it zookeeper zkCli.sh rmr /druid"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
when: has_zknode.stdout >= '1'
- name: Check if the Druid service already exists
shell: ps -ef |grep "org.apache.druid.cli.Main server" | grep -v grep | grep -v json | wc -l
register: check_out
- name: To terminate execution
fail:
msg: "Uninstall failed, the Druid process is still running, please check!"
run_once: true
delegate_to: 127.0.0.1
when: check_out.stdout >= '1'

View File

@@ -0,0 +1,41 @@
- name: Waitting for Druid running,60s
shell: sleep 60
- block:
- name: Check if the Druid already exists
shell: ps -ef | grep -v grep | grep "org.apache.druid.cli.Main server" | wc -l
register: process_out
- name: To terminate execution
fail:
msg: "Druid on node {{ inventory_hostname }} is not started. Please check"
run_once: true
delegate_to: 127.0.0.1
when: process_out.stdout != '5'
when: node_nums <= (min_cluster_num)
- block:
- name: Check if the Druid already exists
shell: ps -ef | grep -v grep | grep "org.apache.druid.cli.Main server" | wc -l
register: process_out
- name: To terminate execution
fail:
msg: "Druid on node {{ inventory_hostname }} is not started. Please check"
run_once: true
delegate_to: 127.0.0.1
when: process_out.stdout != '3'
when: node_nums > (min_cluster_num) and inventory_hostname in groups['druid'][:2]
- block:
- name: Check if the Druid already exists
shell: ps -ef | grep -v grep | grep "org.apache.druid.cli.Main server" | wc -l
register: process_out
- name: To terminate execution
fail:
msg: "Druid on node {{ inventory_hostname }} is not started. Please check"
run_once: true
delegate_to: 127.0.0.1
when: process_out.stdout != '2'
when: node_nums > (min_cluster_num) and inventory_hostname not in groups['druid'][:2]

View File

@@ -0,0 +1,64 @@
- name: copy mysql to /usr/bin/
copy:
src: 'files/mysql'
dest: '/usr/bin/'
force: true
mode: 0755
- block:
- name: Stopping and removing {{ container_name }} container
docker_container:
name: '{{ container_name }}'
state: absent
- name: Removing old {{ image_name }} image
docker_image:
name: '{{ image_name }}'
tag: '{{ image_tag }}'
state: absent
- name: Ansible delete old {{ deploy_dir }}/{{ container_name }}
file:
path: '{{ deploy_dir }}/{{ container_name }}'
state: absent
- block:
- name: check database
shell: mysql -s -uroot -p{{ mariadb_default_pin }} -P3306 -h{{ groups.mariadb[0] }} -e "DROP DATABASE IF EXISTS {{ druid_database }};"
run_once: true
delegate_to: "{{ groups.druid[0] }}"
- name: Checking ZooKeeper has druid nodes
shell: "docker exec -it zookeeper zkCli.sh ls / | grep druid | wc -l"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
register: has_zknode
- name: Delete druid nodes in ZooKeeper
shell: "docker exec -it zookeeper zkCli.sh rmr /druid"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
when: has_zknode.stdout >= '1'
- name: Checking HDFS has Druid folder
shell: source /etc/profile && hdfs dfs -ls / | grep druid | wc -l
register: folder_exists
run_once: true
delegate_to: "{{ groups.hdfs[0] }}"
- name: Delete Druid data folder in HDFS
shell: source /etc/profile && hadoop fs -rm -r /druid
run_once: true
delegate_to: "{{ groups.hdfs[0] }}"
when: folder_exists.stdout >= '1'
- name: Check if the Druid service already exists
shell: ps -ef |grep "org.apache.druid.cli.Main server" | grep -v grep | grep -v json | wc -l
register: check_out
- name: To terminate execution
fail:
msg: "Uninstall failed, the Druid process is still running, please check!"
run_once: true
delegate_to: 127.0.0.1
when: check_out.stdout >= '1'

View File

@@ -0,0 +1,9 @@
-server
{{ druid.broker.java_opts }}
-XX:MaxDirectMemorySize={{ druid.broker.MaxDirectMemorySize }}
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp
-Dlogfile.name=broker
-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager

View File

@@ -0,0 +1,41 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
druid.service=druid/broker
druid.plaintextPort=8082
# HTTP server settings
druid.server.http.numThreads=60
# HTTP client settings
druid.broker.http.numConnections=50
druid.broker.http.maxQueuedBytes=10000000
# Processing threads and buffers
druid.processing.buffer.sizeBytes={{ druid.broker['druid.processing.buffer.sizeBytes'] }}
druid.processing.numMergeBuffers={{ druid.broker['druid.processing.numMergeBuffers'] }}
druid.processing.numThreads={{ druid.broker['druid.processing.numThreads'] }}
druid.processing.tmpDir=var/druid/processing
# Query cache disabled -- push down caching and merging instead
druid.broker.cache.useCache=false
druid.broker.cache.populateCache=false
druid.query.groupBy.maxMergingDictionarySize=10000000000
druid.query.groupBy.maxOnDiskStorage=10000000000

View File

@@ -0,0 +1,169 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Extensions specified in the load list will be loaded by Druid
# We are using local fs for deep storage - not recommended for production - use S3, HDFS, or NFS instead
# We are using local derby for the metadata store - not recommended for production - use MySQL or Postgres instead
# If you specify `druid.extensions.loadList=[]`, Druid won't load any extension from file system.
# If you don't specify `druid.extensions.loadList`, Druid will load all the extensions under root extension directory.
# More info: https://druid.apache.org/docs/latest/operations/including-extensions.html
druid.extensions.loadList=["druid-hdfs-storage", "druid-kafka-indexing-service", "druid-datasketches", "druid-multi-stage-query","mysql-metadata-storage","druid-hlld", "druid-hdrhistogram"]
# If you have a different version of Hadoop, place your Hadoop client jar files in your hadoop-dependencies directory
# and uncomment the line below to point to your directory.
#druid.extensions.hadoopDependenciesDir=/my/dir/hadoop-dependencies
#
# Hostname
#
druid.host={{ inventory_hostname }}
#
# Logging
#
# Log all runtime properties on startup. Disable to avoid logging properties on startup:
druid.startup.logging.logProperties=true
#
# Zookeeper
#
druid.zk.service.host={{ druid.common['druid.zk.service.host'] }}
druid.zk.paths.base=/druid
#
# Metadata storage
#
# For Derby server on your Druid Coordinator (only viable in a cluster with a single Coordinator, no fail-over):
#druid.metadata.storage.type=derby
#druid.metadata.storage.connector.connectURI=jdbc:derby://localhost:1527/var/druid/metadata.db;create=true
#druid.metadata.storage.connector.host=localhost
#druid.metadata.storage.connector.port=1527
# For MySQL (make sure to include the MySQL JDBC driver on the classpath):
druid.metadata.storage.type=mysql
druid.metadata.storage.connector.connectURI={{ druid.common['druid.metadata.storage.connector.connectURI'] }}
druid.metadata.storage.connector.user=root
druid.metadata.storage.connector.password={{ druid.common['druid.metadata.storage.connector.password'] }}
# For PostgreSQL:
#druid.metadata.storage.type=postgresql
#druid.metadata.storage.connector.connectURI=jdbc:postgresql://db.example.com:5432/druid
#druid.metadata.storage.connector.user=...
#druid.metadata.storage.connector.password=...
#
# Deep storage
#
# For local disk (only viable in a cluster if this is a network mount):
{% if groups.druid | length == 1 %}
druid.storage.type=local
druid.storage.storageDirectory=var/druid/segments
{% elif groups.druid | length >= 3 %}
# For HDFS:
druid.storage.type=hdfs
druid.storage.storageDirectory=/druid/segments
{% endif %}
# For S3:
#druid.storage.type=s3
#druid.storage.bucket=your-bucket
#druid.storage.baseKey=druid/segments
#druid.s3.accessKey=...
#druid.s3.secretKey=...
#
# Indexing service logs
#
# For local disk (only viable in a cluster if this is a network mount):
{% if groups.druid | length == 1 %}
druid.indexer.logs.type=file
druid.indexer.logs.directory=var/druid/indexing-logs
{% elif groups.druid | length >= 3 %}
# For HDFS:
druid.indexer.logs.type=hdfs
druid.indexer.logs.directory=/druid/indexing-logs
{% endif %}
druid.indexer.logs.kill.enabled=true
druid.indexer.logs.kill.durationToRetain=604800000
druid.indexer.logs.kill.delay=21600000
# For S3:
#druid.indexer.logs.type=s3
#druid.indexer.logs.s3Bucket=your-bucket
#druid.indexer.logs.s3Prefix=druid/indexing-logs
#
# Service discovery
#
druid.selectors.indexing.serviceName=druid/overlord
druid.selectors.coordinator.serviceName=druid/coordinator
#
# Monitoring
#
druid.monitoring.monitors=["org.apache.druid.java.util.metrics.JvmMonitor"]
druid.emitter=http
druid.emitter.logging.logLevel=info
druid.emitter.http.recipientBaseUrl=http://{{ inventory_hostname }}:9903
# Storage type of double columns
# ommiting this will lead to index double as float at the storage layer
druid.indexing.doubleStorage=double
#
# Security
#
druid.server.hiddenProperties=["druid.s3.accessKey","druid.s3.secretKey","druid.metadata.storage.connector.password", "password", "key", "token", "pwd"]
#
# SQL
#
druid.sql.enable=true
#
# Lookups
#
druid.lookup.enableLookupSyncOnStartup=false
# Planning SQL query when there is aggregate distinct in the statement
druid.sql.planner.useGroupingSetForExactDistinct=true
# Expression processing config
druid.expressions.useStrictBooleans=true
# Http client
druid.global.http.eagerInitialization=false
#Set to false to store and query data in SQL compatible mode. When set to true (legacy mode), null values will be stored as '' for string columns and 0 for numeric columns.
druid.generic.useDefaultValueForNull=false

View File

@@ -0,0 +1,10 @@
-server
{{ druid.coordinator.java_opts }}
-XX:+UseG1GC
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp
-Dlogfile.name=coordinator
-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager
-Dderby.stream.error.file=var/druid/derby.log

View File

@@ -0,0 +1,18 @@
version: '3'
services:
druid-master:
image: {{ image_name }}:{{ image_tag }}
restart: always
container_name: {{ container_name }}
privileged: true
user: root
environment:
#cluster-data-server,cluster-query-server,cluster-all-server,single-server-small,single-server-medium,single-server-large,single-server-xlarge
MODE: {{ startup_mode }}
volumes:
- "{{ deploy_dir }}/{{ container_name }}/conf:/{{ component_version }}/conf"
- "{{ deploy_dir }}/{{ container_name }}/var:/{{ component_version }}/var"
- "{{ deploy_dir }}/{{ container_name }}/log:/{{ component_version }}/log"
network_mode: "host"

View File

@@ -0,0 +1,17 @@
version: '3.3'
services:
druid_exporter:
image: druid_exporter:1.0.0
container_name: druid_exporter
restart: always
ports:
- 9903:9903
environment:
JVM_MEM: "-Xmx1024m -Xms128m"
networks:
olap:
ipv4_address: 172.20.88.11
networks:
olap:
external: true

View File

@@ -0,0 +1,9 @@
-server
{{ druid.historical.java_opts }}
-XX:MaxDirectMemorySize={{ druid.historical.MaxDirectMemorySize }}
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp
-Dlogfile.name=historical
-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager

View File

@@ -0,0 +1,42 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
druid.service=druid/historical
druid.plaintextPort=8083
# HTTP server threads
druid.server.http.numThreads=60
# Processing threads and buffers
druid.processing.buffer.sizeBytes={{ druid.historical['druid.processing.buffer.sizeBytes'] }}
druid.processing.numMergeBuffers={{ druid.historical['druid.processing.numMergeBuffers'] }}
druid.processing.numThreads={{ druid.historical['druid.processing.numThreads'] }}
druid.processing.tmpDir=var/druid/processing
# Segment storage
druid.segmentCache.locations=[{"path":"var/druid/segment-cache","maxSize":{{ druid.historical['druid.segmentCache.locations'] }}}]
# Query cache
druid.historical.cache.useCache=true
druid.historical.cache.populateCache=true
druid.cache.type=caffeine
druid.cache.sizeInBytes=256000000
druid.query.groupBy.maxMergingDictionarySize=10000000000
druid.query.groupBy.maxOnDiskStorage=10000000000

View File

@@ -0,0 +1,7 @@
-server
{{ druid.middlemanager.java_opts }}
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp
-Dlogfile.name=middleManager
-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager

View File

@@ -0,0 +1,43 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
druid.service=druid/middleManager
druid.plaintextPort=8091
# Number of tasks per middleManager
druid.worker.capacity=200
# Task launch parameters
druid.worker.baseTaskDirs=[\"var/druid/task\"]
druid.indexer.runner.javaOptsArray=["-server","-Xms1024m","-Xmx1024m","-XX:MaxDirectMemorySize=1024m","-Duser.timezone=UTC","-Dfile.encoding=UTF-8","-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager","-Dlog4j.configurationFile=conf/druid/cluster/_common/log4j2-task.xml"]
# HTTP server threads
druid.server.http.numThreads=60
# Processing threads and buffers on Peons
druid.indexer.fork.property.druid.processing.numMergeBuffers={{ druid.middlemanager['druid.indexer.fork.property.druid.processing.numMergeBuffers'] }}
druid.indexer.fork.property.druid.processing.buffer.sizeBytes={{ druid.middlemanager['druid.indexer.fork.property.druid.processing.buffer.sizeBytes'] }}
druid.indexer.fork.property.druid.processing.numThreads={{ druid.middlemanager['druid.indexer.fork.property.druid.processing.numThreads'] }}
# Hadoop indexing
druid.indexer.task.hadoopWorkingPath=var/druid/hadoop-tmp
druid.query.groupBy.maxMergingDictionarySize=10000000000
druid.query.groupBy.maxOnDiskStorage=10000000000

View File

@@ -0,0 +1,34 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
druid.service=druid/router
druid.plaintextPort=8088
# HTTP proxy
druid.router.http.numConnections=50
druid.router.http.readTimeout=PT5M
druid.router.http.numMaxThreads=100
druid.server.http.numThreads=100
# Service discovery
druid.router.defaultBrokerServiceName=druid/broker
druid.router.coordinatorServiceName=druid/coordinator
# Management proxy to coordinator / overlord: required for unified web console.
druid.router.managementProxy.enabled=true

View File

@@ -0,0 +1,23 @@
#镜像名称
image_name: druid
#镜像版本号
image_tag: 26.0.0
#容器名称
container_name: druid
#组件版本
component_version: apache-druid-26.0.0
#最小集群数量
min_cluster_num: '3'
#mysql数据库名称
druid_database: druid
#集群模式下需要用到的hdfs配置文件
hadoop_config_files:
- { filename: 'hdfs-site.xml' }
- { filename: 'core-site.xml' }

View File

@@ -0,0 +1,8 @@
[zookeeper]
192.168.45.102
[hdfs]
[hbase]
192.168.45.102

View File

@@ -0,0 +1,7 @@
- hosts: hbase
remote_user: root
roles:
- role
vars_files:
- role/vars/main.yml

View File

@@ -0,0 +1,22 @@
#The default installation location
deploy_dir: /data/olap
#The default data storage location,use storing application data,logs and configuration files
data_dir: /data/olap
hbase:
common:
#The HBase resource isolation function is used to group tables for storage.
enable_rsgroup: true
hmaster:
#Running memory of the HBase HMaster.
java_opt: '-Xmx1024m -Xms1024m'
regionserver:
#Running memory of the HBase HRegionserver.
java_opt: '-Xmx1024m -Xms1024m -Xmn128m'
#This defines the number of threads the region server keeps open to serve requests to tables,It should generally be set to (number of cores - 1)
hbase.regionserver.handler.count: 40
#If any one of a column families' HStoreFiles has grown to exceed this value, the hosting HRegion is split in two.
hbase.hregion.max.filesize: 10737418240
#Indicates the memory used by all read caches. The value can be the actual memory value, expressed in MB
hbase.bucketcache.size: 100

Binary file not shown.

View File

@@ -0,0 +1,27 @@
- name: Loading Image
docker_image:
name: '{{ image_name }}'
tag: '{{ image_tag }}'
load_path: '{{ deploy_dir }}/{{ container_name }}/{{ image_name }}-{{ image_tag }}.tar'
source: load
force_tag: yes
force_source: yes
timeout: 300
- name: Stop Container
docker_container:
name: '{{ item }}'
state: absent
with_items:
- ['HMaster']
- ['HRegionServer']
- name: Start Container
docker_compose:
project_src: '{{ deploy_dir }}/{{ container_name }}/'
- name: Removing Image
docker_image:
name: '{{ image_name }}'
tag: '{{ image_tag }}'
state: absent

View File

@@ -0,0 +1,88 @@
- name: Setting node_nums variable
set_fact: node_nums="{{groups.hbase|length}}"
- name: To terminate execution
fail:
msg: "HBase Cluster mode at least 3 nodes,please checking configurations/hosts -> hbase"
when: node_nums < 3
- name: Checking Hadoop DataNode status
shell: source /etc/profile && hadoop dfsadmin -report | grep "Live datanodes" | grep -E -o "[0-9]"
async: 10
register: datanode_out
run_once: true
delegate_to: "{{ groups.hdfs[0] }}"
- name: Checking Hadoop NameNode status
shell: source /etc/profile && hadoop dfsadmin -report |grep 50010 | wc -l
async: 10
register: namenode_out
run_once: true
delegate_to: "{{ groups.hdfs[0] }}"
- name: To terminate execution
fail:
msg: "If the dependency test fails, check whether the Hadoop cluster is normal"
when: datanode_out.stdout <= '1' and namenode_out.stdout <= '1'
- name: Creating directory
file:
state: directory
path: '{{ deploy_dir }}/{{ container_name }}/{{ item.dir }}'
with_items:
- { dir: 'logs' }
- { dir: 'data' }
- { dir: 'conf' }
- { dir: 'init' }
- name: Unarchiving phoenix and conf
unarchive:
src: 'files/phoenix-hbase-2.2-5.1.2-bin.tar'
dest: '{{ deploy_dir }}/{{ container_name }}/'
force: yes
with_items:
- { file_name: 'phoenix-hbase-2.2-5.1.2-bin.tar' }
- { file_name: 'conf.zip' }
- name: Copying image to {{ deploy_dir }}/{{ container_name }}/
copy:
src: 'files/{{ image_name }}-{{ image_tag }}.tar'
dest: '{{ deploy_dir }}/{{ container_name }}/'
force: true
notify:
- Loading Image
- name: Fetching Hadoop config files to /tmp
ansible.builtin.fetch:
src: "{{ deploy_dir }}/hadoop-2.7.1/etc/hadoop/{{ item.filename }}"
dest: "/tmp/"
flat: yes
loop: "{{ hadoop_config_files }}"
run_once: true
delegate_to: "{{ groups.hdfs[0] }}"
- name: Copying Hadoop config files to other nodes
ansible.builtin.copy:
src: "/tmp/{{ item.filename }}"
dest: "{{ deploy_dir }}/{{ container_name }}/conf/"
loop: "{{ hadoop_config_files }}"
- name: Copying HBase config files
template:
src: '{{ item.src }}'
dest: '{{ item.dest }}'
mode: '{{ item.mode }}'
with_items:
- { src: 'hbase-site.xml.j2', dest: '{{ deploy_dir }}/{{ container_name }}/phoenix-hbase-2.2-5.1.2-bin/bin/hbase-site.xml', mode: '0644' }
- { src: 'startsql.sh.j2', dest: '{{ deploy_dir }}/{{ container_name }}/phoenix-hbase-2.2-5.1.2-bin/bin/startsql.sh', mode: '0755' }
- { src: 'hbase-site.xml.j2', dest: '{{ deploy_dir }}/{{ container_name }}/conf/hbase-site.xml', mode: '0644' }
- { src: 'regionservers.j2', dest: '{{ deploy_dir }}/{{ container_name }}/conf/regionservers', mode: '0644' }
- { src: 'backup-masters.j2', dest: '{{ deploy_dir }}/{{ container_name }}/conf/backup-masters', mode: '0644' }
- { src: 'hbase-env.sh.j2', dest: '{{ deploy_dir }}/{{ container_name }}/conf/hbase-env.sh', mode: '0755' }
- { src: 'rsgroup.sh.j2', dest: '{{ deploy_dir }}/{{ container_name }}/init/rsgroup.sh', mode: '0755' }
- { src: 'docker-compose.yml.j2', dest: '{{ deploy_dir }}/{{ container_name }}/docker-compose.yml', mode: '0644' }
notify:
- Loading Image
- Start Container
- meta: flush_handlers

View File

@@ -0,0 +1,44 @@
- name: Creating directory
file:
state: directory
path: '{{ deploy_dir }}/{{ container_name }}/{{ item.dir }}'
with_items:
- { dir: 'logs' }
- { dir: 'data' }
- { dir: 'conf' }
- { dir: 'init' }
- name: Unarchiving phoenix and conf
unarchive:
src: 'files/phoenix-hbase-2.2-5.1.2-bin.tar'
dest: '{{ deploy_dir }}/{{ container_name }}/'
force: yes
with_items:
- { file_name: 'phoenix-hbase-2.2-5.1.2-bin.tar' }
- { file_name: 'conf.zip' }
- name: Copying image to {{ deploy_dir }}/{{ container_name }}/
copy:
src: 'files/{{ image_name }}-{{ image_tag }}.tar'
dest: '{{ deploy_dir }}/{{ container_name }}/'
force: true
notify:
- Loading Image
- name: Copying HBase config files
template:
src: '{{ item.src }}'
dest: '{{ item.dest }}'
mode: '{{ item.mode }}'
with_items:
- { src: 'hbase-site.xml.j2', dest: '{{ deploy_dir }}/{{ container_name }}/phoenix-hbase-2.2-5.1.2-bin/bin/hbase-site.xml', mode: '0644' }
- { src: 'startsql.sh.j2', dest: '{{ deploy_dir }}/{{ container_name }}/phoenix-hbase-2.2-5.1.2-bin/bin/startsql.sh', mode: '0755' }
- { src: 'hbase-site.xml.j2', dest: '{{ deploy_dir }}/{{ container_name }}/conf/hbase-site.xml', mode: '0644' }
- { src: 'regionservers.j2', dest: '{{ deploy_dir }}/{{ container_name }}/conf/regionservers', mode: '0644' }
- { src: 'hbase-env.sh.j2', dest: '{{ deploy_dir }}/{{ container_name }}/conf/hbase-env.sh', mode: '0755' }
- { src: 'docker-compose.yml.j2', dest: '{{ deploy_dir }}/{{ container_name }}/docker-compose.yml', mode: '0644' }
notify:
- Loading Image
- Start Container
- meta: flush_handlers

View File

@@ -0,0 +1,11 @@
- block:
- include: uninstall.yml
- include: "{{ playbook_name }}"
vars:
playbook_name: "{{ 'deploy-cluster.yml' if groups.hbase | length > 1 else 'deploy-standalone.yml' }}"
- include: status-check.yml
when: (operation) == "install"
- block:
- include: uninstall.yml
when: (operation) == "uninstall"

View File

@@ -0,0 +1,47 @@
- name: Creating directory
file:
state: directory
path: '{{ deploy_dir }}/{{ container_name }}/{{ item.dir }}'
with_items:
- { dir: 'logs' }
- { dir: 'data' }
- { dir: 'conf' }
- { dir: 'init' }
- name: Copying conf to {{ deploy_dir }}/{{ container_name }}/
copy:
src: 'files/conf'
dest: '{{ deploy_dir }}/{{ container_name }}/'
force: true
- name: Unarchiving phoenix
unarchive:
src: '{{ role_path }}/../../../software-packages/phoenix-hbase-2.2-5.1.2-bin.tar'
dest: '{{ deploy_dir }}/{{ container_name }}/'
force: yes
- name: Copying image to {{ deploy_dir }}/{{ container_name }}/
copy:
src: '{{ role_path }}/../../../software-packages/{{ image_name }}-{{ image_tag }}.tar'
dest: '{{ deploy_dir }}/{{ container_name }}/'
force: true
notify:
- Loading Image
- name: Copying HBase config files
template:
src: '{{ item.src }}'
dest: '{{ item.dest }}'
mode: '{{ item.mode }}'
with_items:
- { src: 'hbase-site.xml.j2', dest: '{{ deploy_dir }}/{{ container_name }}/phoenix-hbase-2.2-5.1.2-bin/bin/hbase-site.xml', mode: '0644' }
- { src: 'startsql.sh.j2', dest: '{{ deploy_dir }}/{{ container_name }}/phoenix-hbase-2.2-5.1.2-bin/bin/startsql.sh', mode: '0755' }
- { src: 'hbase-site.xml.j2', dest: '{{ deploy_dir }}/{{ container_name }}/conf/hbase-site.xml', mode: '0644' }
- { src: 'regionservers.j2', dest: '{{ deploy_dir }}/{{ container_name }}/conf/regionservers', mode: '0644' }
- { src: 'hbase-env.sh.j2', dest: '{{ deploy_dir }}/{{ container_name }}/conf/hbase-env.sh', mode: '0755' }
- { src: 'docker-compose.yml.j2', dest: '{{ deploy_dir }}/{{ container_name }}/docker-compose.yml', mode: '0644' }
notify:
- Loading Image
- Start Container
- meta: flush_handlers

View File

@@ -0,0 +1,31 @@
- block:
- name: Stopping and removing container
docker_container:
name: '{{ item }}'
state: absent
with_items:
- ['HMaster']
- ['HRegionServer']
- name: Removing old {{ image_name }} image
docker_image:
name: '{{ image_name }}'
tag: '{{ image_tag }}'
state: absent
- name: Ansible delete old {{ deploy_dir }}/{{ container_name }}
file:
path: '{{ deploy_dir }}/{{ container_name }}'
state: absent
- name: Checking ZooKeeper has HBase nodes
shell: "docker exec -it zookeeper zkCli.sh ls / | grep hbase | wc -l"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
register: has_zknode
- name: Delete HBase nodes in ZooKeeper
shell: "docker exec -it zookeeper zkCli.sh rmr /hbase"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
when: has_zknode.stdout >= '1'

View File

@@ -0,0 +1,36 @@
- name: Waitting for HBase running,10s
shell: sleep 10
- block:
- name: Check the HBase Master node status
shell: ps -ef | grep "org.apache.hadoop.hbase.master.HMaster" | grep -v grep |wc -l
register: check_master
- name: To terminate execution
fail:
msg: "检测到{{ inventory_hostname }}节点HBase未正常启动请保留日志反馈路径{{ deploy_dir }}/{{ container_name }}/logs"
run_once: true
delegate_to: 127.0.0.1
when: check_master.stdout != '1'
when: inventory_hostname in groups['hbase'][0:3]
- block:
- name: Check the HBase HRegionServer node status
shell: ps -ef | egrep "org.apache.hadoop.hbase.regionserver.HRegionServer" | grep -v grep |wc -l
register: check_region
- name: To terminate execution
fail:
msg: "检测到{{ inventory_hostname }}节点HBase未正常启动请保留日志反馈路径{{ deploy_dir }}/{{ container_name }}/logs"
run_once: true
delegate_to: 127.0.0.1
when: check_region.stdout != '1'
- name: Initializing phoenix
shell: cd {{ deploy_dir }}/{{ container_name }}/phoenix-hbase-2.2-5.1.2-bin/bin/ && ./startsql.sh
- name: Enable RsGroup
shell: cd {{ deploy_dir }}/{{ container_name }}/init/ && ./rsgroup.sh | grep ERROR | egrep -v "already exists|Target RSGroup important is same as source|Source RSGroup important is same as target"
register: result
failed_when: "'ERROR' in result.stdout"
when: hbase.common.enable_rsgroup

View File

@@ -0,0 +1,45 @@
- block:
- name: Stopping and removing container
docker_container:
name: '{{ item }}'
state: absent
with_items:
- ['HMaster']
- ['HRegionServer']
- name: Removing old {{ image_name }} image
docker_image:
name: '{{ image_name }}'
tag: '{{ image_tag }}'
state: absent
- name: Ansible delete old {{ deploy_dir }}/{{ container_name }}
file:
path: '{{ deploy_dir }}/{{ container_name }}'
state: absent
- name: Checking ZooKeeper has HBase nodes
shell: "docker exec -it zookeeper zkCli.sh ls / | grep hbase | wc -l"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
register: has_zknode
- name: Delete HBase nodes in ZooKeeper
shell: "docker exec -it zookeeper zkCli.sh rmr /hbase"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
when: has_zknode.stdout >= '1'
- block:
- name: Checking HDFS has hbase folder
shell: source /etc/profile && hdfs dfs -ls / | grep hbase | wc -l
register: folder_exists
run_once: true
delegate_to: "{{ groups.hdfs[0] }}"
- name: Delete HBase data folder in HDFS
shell: source /etc/profile && hadoop fs -rm -r /hbase
run_once: true
delegate_to: "{{ groups.hdfs[0] }}"
when: folder_exists.stdout >= '1'
when: (groups.hbase) | length > 1

View File

@@ -0,0 +1,2 @@
{{ groups.hbase[1] }}
{{ groups.hbase[2] }}

View File

@@ -0,0 +1,45 @@
version: "3"
services:
{% if inventory_hostname in groups['hbase'][0:3] %}
hmaster:
image: {{ image_name }}:{{ image_tag }}
restart: always
container_name: HMaster
hostname: {{ansible_hostname}}
environment:
MODE: master
volumes:
- "{{ deploy_dir }}/{{ container_name }}/data:/opt/hbase-2.2.3/data"
- "{{ deploy_dir }}/{{ container_name }}/logs:/opt/hbase-2.2.3/logs"
- "{{ deploy_dir }}/{{ container_name }}/conf:/opt/hbase-2.2.3/conf"
network_mode: "host"
regionserver:
image: {{ image_name }}:{{ image_tag }}
restart: always
container_name: HRegionServer
hostname: {{ansible_hostname}}
environment:
MODE: regionserver
volumes:
- "{{ deploy_dir }}/{{ container_name }}/data:/opt/hbase-2.2.3/data"
- "{{ deploy_dir }}/{{ container_name }}/logs:/opt/hbase-2.2.3/logs"
- "{{ deploy_dir }}/{{ container_name }}/conf:/opt/hbase-2.2.3/conf"
network_mode: "host"
depends_on:
- hmaster
{% else %}
regionserver:
image: {{ image_name }}:{{ image_tag }}
restart: always
container_name: HRegionServer
hostname: {{ansible_hostname}}
environment:
MODE: regionserver
volumes:
- "{{ deploy_dir }}/{{ container_name }}/data:/opt/hbase-2.2.3/data"
- "{{ deploy_dir }}/{{ container_name }}/logs:/opt/hbase-2.2.3/logs"
- "{{ deploy_dir }}/{{ container_name }}/conf:/opt/hbase-2.2.3/conf"
network_mode: "host"
{% endif %}

View File

@@ -0,0 +1,143 @@
#
#/**
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
# * regarding copyright ownership. The ASF licenses this file
# * to you under the Apache License, Version 2.0 (the
# * "License"); you may not use this file except in compliance
# * with the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# Set environment variables here.
# This script sets variables multiple times over the course of starting an hbase process,
# so try to keep things idempotent unless you want to take an even deeper look
# into the startup scripts (bin/hbase, etc.)
# The java implementation to use. Java 1.7+ required.
export JAVA_HOME=/opt/jdk1.8.0_202
# Extra Java CLASSPATH elements. Optional.
# export HBASE_CLASSPATH=
# The maximum amount of heap to use. Default is left to JVM default.
#export HBASE_HEAPSIZE={heap}
# Uncomment below if you intend to use off heap cache. For example, to allocate 8G of
# offheap, set the value to "8G".
#export HBASE_OFFHEAPSIZE=5G
# Extra Java runtime options.
# Below are what we set by default. May only work with SUN JVM.
# For more on why as well as other possible settings,
# see http://wiki.apache.org/hadoop/PerformanceTuning
export HBASE_OPTS="-XX:+UseConcMarkSweepGC "
# Configure PermSize. Only needed in JDK7. You can safely remove it for JDK8+
export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS {{ hbase.regionserver.java_opt }} -Xss256k -XX:MetaspaceSize=512m -XX:MaxMetaspaceSize=512m -XX:SurvivorRatio=2 -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:MaxTenuringThreshold=15 -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=1 -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -XX:-DisableExplicitGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/opt/{{ component_version }}/logs/gc-regionserver-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=100M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/opt/{{ component_version }}/logs/"
export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE {{ hbase.hmaster.java_opt }} -Xss256k -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=128m -XX:SurvivorRatio=2 -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:MaxTenuringThreshold=15 -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=1 -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -XX:-DisableExplicitGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/opt/{{ component_version }}/logs/gc-master-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=100M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/opt/{{ component_version }}/logs/ -javaagent:/opt/{{ component_version }}/monitor/jmx_prometheus_javaagent-0.12.0.jar=9907:/opt/{{ component_version }}/monitor/hbase.yaml"
export HBASE_REGIONSERVER_JMX_OPTS="$HBASE_JMX_BASE -javaagent:/opt/{{ component_version }}/monitor/jmx_prometheus_javaagent-0.12.0.jar=9908:/opt/{{ component_version }}/monitor/hbase.yaml"
# Uncomment one of the below three options to enable java garbage collection logging for the server-side processes.
# This enables basic gc logging to the .out file.
# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
# This enables basic gc logging to its own file.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
# This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
# Uncomment one of the below three options to enable java garbage collection logging for the client processes.
# This enables basic gc logging to the .out file.
# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
# This enables basic gc logging to its own file.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
# This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
# See the package documentation for org.apache.hadoop.hbase.io.hfile for other configurations
# needed setting up off-heap block caching.
# Uncomment and adjust to enable JMX exporting
# See jmxremote.password and jmxremote.access in $JRE_HOME/lib/management to configure remote password access.
# More details at: http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html
# NOTE: HBase provides an alternative JMX implementation to fix the random ports issue, please see JMX
# section in HBase Reference Guide for instructions.
# export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
# export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10101"
# export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10102"
# export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10103"
# export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10104"
# export HBASE_REST_OPTS="$HBASE_REST_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10105"
# File naming hosts on which HRegionServers will run. $HBASE_HOME/conf/regionservers by default.
# export HBASE_REGIONSERVERS=${HBASE_HOME}/conf/regionservers
# Uncomment and adjust to keep all the Region Server pages mapped to be memory resident
#HBASE_REGIONSERVER_MLOCK=true
#HBASE_REGIONSERVER_UID="hbase"
# File naming hosts on which backup HMaster will run. $HBASE_HOME/conf/backup-masters by default.
# export HBASE_BACKUP_MASTERS=${HBASE_HOME}/conf/backup-masters
# Extra ssh options. Empty by default.
# export HBASE_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HBASE_CONF_DIR"
# Where log files are stored. $HBASE_HOME/logs by default.
export HBASE_LOG_DIR=/opt/{{ component_version }}/logs
# Enable remote JDWP debugging of major HBase processes. Meant for Core Developers
# export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8070"
# export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8071"
# export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8072"
# export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8073"
# A string representing this instance of hbase. $USER by default.
# export HBASE_IDENT_STRING=$USER
# The scheduling priority for daemon processes. See 'man nice'.
# export HBASE_NICENESS=10
# The directory where pid files are stored. /tmp by default.
export HBASE_PID_DIR=/opt/{{ component_version }}/pids
# Seconds to sleep between slave commands. Unset by default. This
# can be useful in large clusters, where, e.g., slave rsyncs can
# otherwise arrive faster than the master can service them.
# export HBASE_SLAVE_SLEEP=0.1
# Tell HBase whether it should manage it's own instance of Zookeeper or not.
export HBASE_MANAGES_ZK=false
# The default log rolling policy is RFA, where the log file is rolled as per the size defined for the
# RFA appender. Please refer to the log4j.properties file to see more details on this appender.
# In case one needs to do log rolling on a date change, one should set the environment property
# HBASE_ROOT_LOGGER to "<DESIRED_LOG LEVEL>,DRFA".
# For example:
#HBASE_ROOT_LOGGER=INFO,DRFA
HBASE_ROOT_LOGGER=ERROR,DRFA
# The reason for changing default to RFA is to avoid the boundary case of filling out disk space as
# DRFA doesn't put any cap on the log size. Please refer to HBase-5655 for more context.

View File

@@ -0,0 +1,274 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-->
<configuration>
{% if groups.hbase | length > 1 %}
<property>
<name>hbase.rootdir</name>
<value>hdfs://ns1/hbase</value>
</property>
{% elif groups.hbase | length == 1 %}
<property>
<name>hbase.rootdir</name>
<value>/opt/hbase-2.2.3/data</value>
</property>
{% endif %}
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
{% if groups.hbase | length > 1 %}
<property>
<name>hbase.zookeeper.quorum</name>
{% for dev_info in groups.zookeeper -%}
{% if loop.last -%}
{{dev_info}}</value>
{% elif loop.first %}
<value>{{dev_info}},
{%- else %}
{{dev_info}},
{%- endif %}
{%- endfor %}
</property>
{% elif groups.hbase | length == 1 %}
<property>
<name>hbase.zookeeper.quorum</name>
<value>{{inventory_hostname}}</value>
</property>
{% endif %}
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>hbase.master.info.port</name>
<value>60010</value>
</property>
<property>
<name>hbase.server.keyvalue.maxsize</name>
<value>5368709120</value>
</property>
<property>
<name>zookeeper.znode.parent</name>
<value>/hbase</value>
</property>
<property>
<name>hbase.rpc.timeout</name>
<value>300000</value>
</property>
<property>
<name>zookeeper.session.timeout</name>
<value>300000</value>
</property>
<!--小于该值的文件将在mob compaction中合并-->
<property>
<name>hbase.mob.compaction.mergeable.threshold</name>
<value>1342177280</value>
</property>
<property>
<name>hbase.mob.file.cache.size</name>
<value>1000</value>
</property>
<!--mob cache回收缓存周期-->
<property>
<name>hbase.mob.cache.evict.period</name>
<value>3600</value>
</property>
<!--mob cache回收之后cache中保留文件个数比例cache数量超过hbase.mob.file.cache.size会回收-->
<property>
<name>hbase.mob.cache.evict.remain.ratio</name>
<value>0.5f</value>
</property>
<!--开启mob-->
<property>
<name>hfile.format.version</name>
<value>3</value>
</property>
<property>
<name>hbase.hregion.memstore.flush.size</name>
<value>534217728</value>
</property>
<!-- flush线程数 -->
<property>
<name>hbase.hstore.flusher.count</name>
<value>8</value>
</property>
<property>
<name>hbase.regionserver.global.memstore.size.lower.limit</name>
<value>0.95</value>
</property>
<property>
<name>hbase.regionserver.global.memstore.size</name>
<value>0.45</value>
</property>
<property>
<name>hfile.block.cache.size</name>
<value>0.3</value>
</property>
<property>
<name>hbase.hregion.memstore.block.multiplier</name>
<value>10</value>
</property>
<property>
<name>hbase.ipc.server.max.callqueue.length</name>
<value>1073741824</value>
</property>
<property>
<name>hbase.regionserver.handler.count</name>
<value>{{ hbase.regionserver['hbase.regionserver.handler.count'] }}</value>
<description>Count of RPC Listener instances spun up on RegionServers.
Same property is used by the Master for count of master handlers.</description>
</property>
<property>
<name>hbase.zookeeper.property.maxClientCnxns</name>
<value>1000</value>
</property>
<property>
<name>hbase.ipc.max.request.size</name>
<value>1173741824</value>
</property>
<property>
<name>hbase.hstore.blockingWaitTime</name>
<value>30000</value>
</property>
<property>
<name>hbase.hstore.blockingStoreFiles</name>
<value>100</value>
</property>
<!--split参数-->
<property>
  <name>hbase.hregion.max.filesize</name>
  <value>{{ hbase.regionserver['hbase.hregion.max.filesize'] }}</value>
</property>
<property>
<name>hbase.regionserver.regionSplitLimit</name>
<value>1000</value>
</property>
<!-- phoenix -->
<property>
   <name>phoenix.schema.isNamespaceMappingEnabled</name>
   <value>true</value>
</property>
<property>
  <name>phoenix.schema.mapSystemTablesToNamespace</name>
  <value>true</value>
</property>
<!-- RsGroup -->
<property>
<name>hbase.coprocessor.master.classes</name>
<value>org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint</value>
</property>
<property>
<name>hbase.master.loadbalancer.class</name>
<value>org.apache.hadoop.hbase.rsgroup.RSGroupBasedLoadBalancer</value>
</property>
<!--表region自动平衡-->
<property>
  <name>hbase.master.loadbalance.bytable</name>
  <value>true</value>
</property>
<property>
<name>hbase.bucketcache.ioengine</name>
<value>offheap</value>
</property>
<property>
<name>hbase.bucketcache.size</name>
<value>{{ hbase.regionserver['hbase.bucketcache.size'] }}</value>
</property>
<!-- storefile数量大于该值执行compact -->
<property>
<name>hbase.hstore.compactionThreshold</name>
<value>5</value>
</property>
<property>
<name>hbase.hstore.compaction.min</name>
<value>5</value>
</property>
<!-- 最多选取多少个storefile进行compace -->
<property>
<name>hbase.hstore.compaction.max</name>
<value>20</value>
</property>
<property>
<name>hbase.hstore.compaction.min.size</name>
<value>134217728</value>
</property>
<property>
<name>hbase.hstore.compaction.max.size</name>
<value>10737418240</value>
</property>
<property>
<name>hbase.regionserver.thread.compaction.small</name>
<value>5</value>
</property>
<property>
<name>hbase.regionserver.thread.compaction.large</name>
<value>5</value>
</property>
<property>
<name>hbase.hregion.majorcompaction</name>
<value>604800000</value>
</property>
</configuration>

View File

@@ -0,0 +1,3 @@
{% for dev_info in groups.hbase %}
{{dev_info}}
{% endfor %}

View File

@@ -0,0 +1,23 @@
#!/bin/bash
source /etc/profile
docker exec -it hbase hbase shell <<EOF
add_rsgroup 'important'
move_servers_rsgroup 'important',['{{ hostvars[groups.hbase[0]]['ansible_hostname'] }}:16020']
move_servers_rsgroup 'important',['{{ hostvars[groups.hbase[1]]['ansible_hostname'] }}:16020']
flush 'tsg:report_result'
move_tables_rsgroup 'important',['tsg:report_result']
flush 'tsg_galaxy:job_result'
move_tables_rsgroup 'important',['tsg_galaxy:job_result']
EOF

View File

@@ -0,0 +1,23 @@
#!/bin/bash
source /etc/profile
BASE_DIR=$(cd $(dirname $0); pwd)
{% for dev_info in groups.zookeeper -%}
{% if loop.last -%}
{{dev_info}}
{% elif loop.first %}
ZK_SERVER={{dev_info}},
{%- else %}
{{dev_info}},
{%- endif %}
{%- endfor %}
cd $BASE_DIR
exec python sqlline.py $ZK_SERVER <<EOF
!quit
EOF

View File

@@ -0,0 +1,15 @@
#镜像名称
image_name: hbase
#镜像版本号
image_tag: 2.2.3-alp-2
#容器名称
container_name: hbase
#组件版本
component_version: hbase-2.2.3
hadoop_config_files:
- { filename: 'hdfs-site.xml' }
- { filename: 'core-site.xml' }

View File

@@ -0,0 +1,5 @@
[zookeeper]
192.168.45.102
[hdfs]
192.168.45.102

View File

@@ -0,0 +1,7 @@
- hosts: hdfs
remote_user: root
roles:
- role
vars_files:
- role/vars/main.yml

View File

@@ -0,0 +1,23 @@
#The default installation location
deploy_dir: /data/olap
#The default data storage location,use storing application data,logs and configuration files
data_dir: /data/olap
hadoop:
namenode:
#Running memory of the Hadoop Namenode.
java_opt: '-Xmx1024m -Xms1024m'
#The number of Namenode RPC server threads that listen to requests from clients.
dfs.namenode.handler.count: 30
datanode:
#Running memory of the Hadoop Datanode.
java_opt: '-Xmx1024m -Xms1024m'
#The number of server threads for the datanode.
dfs.datanode.handler.count: 40
journalnode:
#Running memory of the Hadoop JournalNode.
java_opt: '-Xmx1024m -Xms1024m'
zkfc:
#Running memory of the Hadoop DFSZKFailoverController.
java_opt: '-Xmx1024m -Xms1024m'

View File

@@ -0,0 +1,223 @@
- name: Setting node_nums variable
set_fact: node_nums="{{groups.hdfs|length}}"
- name: To terminate execution
fail:
msg: "Fully Distributed Mode at least 3 nodes, please checking configurations/hosts -> hdfs"
when: node_nums < 3
- name: check Jdk version
shell: source /etc/profile && java -version 2>&1 | grep {{ java_version }} | wc -l
ignore_errors: false
register: jdk_out
- name: To terminate execution
fail:
msg: "JDK is not installed in the target cluster, please check!"
when: jdk_out.stdout != '2'
run_once: true
delegate_to: 127.0.0.1
- name: create hadoop package path:{{ deploy_dir }}
file:
state: directory
path: '{{ item.path }}'
with_items:
- { path: '{{ hdfs_data_dir }}' }
- { path: '{{ deploy_dir }}' }
- name: master_ip to ansible variable
set_fact: master_ip={{groups.hdfs[0]}}
- name: slave1_ip to ansible variable
set_fact: slave1_ip={{groups.hdfs[1]}}
- name: slave2_ip to ansible variable
set_fact: slave2_ip={{groups.hdfs[2]}}
#解压tar
- name: unpack hadoop-2.7.1.tar.gz to {{ deploy_dir }}/
unarchive:
src: 'files/{{ hadoop_version }}.tar.gz'
dest: '{{ deploy_dir }}/'
- name: Copying hadoop config files
template:
src: '{{ item.src }}'
dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ item.dest }}'
mode: '{{ item.mode }}'
backup: false
with_items:
- { src: 'core-site.xml.j2', dest: 'etc/hadoop/core-site.xml', mode: '0644' }
- { src: 'hdfs-site.xml.j2', dest: 'etc/hadoop/hdfs-site.xml', mode: '0644' }
- { src: 'mapred-site.xml.j2', dest: 'etc/hadoop/mapred-site.xml', mode: '0644' }
- { src: 'slaves.j2', dest: 'etc/hadoop/slaves', mode: '0644' }
- { src: 'hadoop-env.sh.j2', dest: 'etc/hadoop/hadoop-env.sh', mode: '0755' }
- { src: 'set_hdfs_env.sh.j2', dest: 'bin/set_hdfs_env.sh', mode: '0755' }
- name: Copying HDFS config to {{ master_ip }}
template:
src: '{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: false
with_items:
- { src: 'daemonscript/dae-hdfsjournal.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsjournal.sh' }
- { src: 'daemonscript/dae-hdfsmaster.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsmaster.sh' }
- { src: 'daemonscript/keephdfsmaster.j2', dest: '/etc/init.d/keephdfsmaster' }
- { src: 'daemonscript/keephdfsjournal.j2', dest: '/etc/init.d/keephdfsjournal' }
- { src: 'ini_hdfs.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/bin/ini_hdfs.sh' }
run_once: true
delegate_to: "{{ master_ip }}"
- name: Copying HDFS config to {{ slave1_ip }}
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-hdfsjournal.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsjournal.sh' }
- { src: 'dae-hdfsslave.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsslave.sh' }
- { src: 'keephdfsslave.j2', dest: '/etc/init.d/keephdfsslave' }
- { src: 'keephdfsjournal.j2', dest: '/etc/init.d/keephdfsjournal' }
run_once: true
delegate_to: "{{ slave1_ip }}"
- name: Copying HDFS config to {{ slave2_ip }}
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-hdfsjournal.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsjournal.sh' }
- { src: 'keephdfsjournal.j2', dest: '/etc/init.d/keephdfsjournal' }
run_once: true
delegate_facts: true
delegate_to: "{{ slave2_ip }}"
- name: Copying HDFS config to worker nodes
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-hdfsworker.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsworker.sh' }
- { src: 'keephdfsworker.j2', dest: '/etc/init.d/keephdfsworker' }
- name: set hadoop env
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_hdfs_env.sh {{ item.opeation }}
with_items:
- { opeation: 'chkconfig' }
- { opeation: 'journal' }
- name: Waiting for the JournalNode start,sleep 10s
shell: sleep 10
- block:
- name: checking JournalNode status
shell: source /etc/profile && jps | grep JournalNode | grep -v grep | wc -l
register: status_out
- name: checking JournalNode
fail:
msg: "JournalNode节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*journalnode*"
when: status_out.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: inventory_hostname in [master_ip,slave1_ip,slave2_ip]
- name: Initialization NameNode/ZKFC,Start master NameNode
block:
- name: initialization hadoop NameNode
shell: sh {{ deploy_dir }}/{{ hadoop_version }}/bin/ini_hdfs.sh namenode | grep "yes" | grep -v grep | wc -l
register: ini_namenode_out
- name: checking namenode init status
fail:
msg: "namenode 初始化异常,请登陆[{{ master_ip }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*namenode*"
when: ini_namenode_out.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
- name: initialization hadoop ZKFC
shell: sh {{ deploy_dir }}/{{ hadoop_version }}/bin/ini_hdfs.sh zkfc | grep "yes" | grep -v grep | wc -l
register: ini_zkfc_out
- name: checking hadoop-zk init status
fail:
msg: "hadoop-zk 初始化异常,请登陆[{{ master_ip }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: ini_zkfc_out.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
- name: start hadoop Master node
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_hdfs_env.sh master
- name: Waiting for the Master-namenode start,sleep 20s
shell: sleep 20
- name: checking {{ master_ip }} NameNode status
shell: source /etc/profile && jps | grep NameNode | grep -v grep | wc -l
register: master_namenode_status
- name: checking master NameNode
fail:
msg: "NameNode-master未启动,请登陆[{{ master_ip }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*namenode*"
when: master_namenode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
run_once: true
delegate_facts: true
delegate_to: "{{ master_ip }}"
- name: Start slave NameNode
block:
- name: copying {{ master_ip }} NameNode files to Slave
shell: "yes | {{ deploy_dir }}/{{ hadoop_version }}/bin/hdfs namenode -bootstrapStandby"
- name: start hadoop Slave node
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_hdfs_env.sh slave
- name: Waiting for the Slave-namenode start,sleep 60s
shell: sleep 60
- name: checking {{ slave1_ip }} NameNode status
shell: source /etc/profile && jps | grep NameNode | grep -v grep | wc -l
register: slave1_namenode_status
- name: checking slavel NameNode
fail:
msg: "NameNode-slave未启动,请登陆[{{ slave1_ip }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*namenode*"
when: slave1_namenode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
run_once: true
delegate_facts: true
delegate_to: "{{ slave1_ip }}"
- name: Start DataNode
block:
- name: start hadoop Worker nodes
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_hdfs_env.sh worker
- name: Waiting for the DataNode start,sleep 60s
shell: sleep 60
- name: checking DataNode status
shell: source /etc/profile && jps | grep DataNode | grep -v grep | wc -l
register: datanode_status
- name: checking DataNode
fail:
msg: "DataNode未启动,请登陆[{{ inventory_hostname }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*datanode*"
when: datanode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
- name: delete {{ deploy_dir }}/hadoop-2.7.1.tar.gz
file:
path: "{{ deploy_dir }}/{{ hadoop_version }}.tar.gz"
state: absent

View File

@@ -0,0 +1,9 @@
- block:
- include: uninstall.yml
- include: deploy.yml
- include: status-check.yml
when: (operation) == "install"
- block:
- include: uninstall.yml
when: (operation) == "uninstall"

View File

@@ -0,0 +1,53 @@
- name: Setting node_nums variable
set_fact: node_nums="{{groups.hdfs|length}}"
- name: Waiting for the HDFS start,sleep 30s
shell: sleep 30
- block:
- name: checking JournalNode status
shell: source /etc/profile && jps | grep JournalNode | grep -v grep | wc -l
register: status_out
- name: checking JournalNode
fail:
msg: "JournalNode节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*journalnode*"
when: status_out.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: inventory_hostname in groups['hdfs'][0:3]
- block:
- name: checking DFSZKFailoverController status
shell: source /etc/profile && jps | grep DFSZKFailoverController | grep -v grep | wc -l
register: status_out
- name: checking DFSZKFailoverController
fail:
msg: "DFSZKFailoverController节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*zkfc*"
when: status_out.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
- name: checking NameNode status
shell: source /etc/profile && jps | grep NameNode | grep -v grep | wc -l
register: status_out
- name: checking NameNode
fail:
msg: "NameNode节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*namenode*"
when: status_out.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: inventory_hostname in groups['hdfs'][0:2]
- name: checking DataNode status
shell: source /etc/profile && jps | grep DataNode | grep -v grep | wc -l
register: status_out
- name: checking DataNode
fail:
msg: "DFSZKFailoverController节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/*datanode*"
when: status_out.stdout != '1'
run_once: true
delegate_to: 127.0.0.1

View File

@@ -0,0 +1,38 @@
- block:
- name: copy unload_hdfs.sh to {{ deploy_dir }}/
template:
src: 'unload_hdfs.sh.j2'
dest: '{{ deploy_dir }}/unload_hdfs.sh'
force: true
mode: 0755
- name: unload hadoop
shell: cd {{ deploy_dir }} && sh unload_hdfs.sh
- name: Ansible delete {{ deploy_dir }}/unload_hdfs.sh
file:
path: "{{ deploy_dir }}/unload_hdfs.sh"
state: absent
- name: Checking ZooKeeper has Hadoop nodes
shell: docker exec zookeeper zkCli.sh ls / | grep -w "hadoop-ha" | wc -l
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
register: has_zknode
- name: Delete Hadoop nodes in ZooKeeper
shell: "docker exec zookeeper zkCli.sh rmr /hadoop-ha"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
when: has_zknode.stdout >= '1'
- name: Check if the Hadoop service already exists
shell: source /etc/profile && jps -l | egrep "org.apache.hadoop.hdfs.qjournal.server.JournalNode|org.apache.hadoop.hdfs.tools.DFSZKFailoverController|org.apache.hadoop.hdfs.server.datanode.DataNode|org.apache.hadoop.hdfs.server.namenode.NameNode" | wc -l
register: check_out
- name: To terminate execution
fail:
msg: "卸载失败,组件可能非本安装部署,请手动卸载后继续安装"
run_once: true
delegate_to: 127.0.0.1
when: check_out.stdout >= '1'

View File

@@ -0,0 +1,67 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns1</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:{{ hdfs_data_dir }}/tmp</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131702</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.logfile.size</name>
<value>10000000</value>
<description>The max size of each log file</description>
</property>
<property>
<name>hadoop.logfile.count</name>
<value>1</value>
<description>The max number of log files</description>
</property>
<property>
<name>ha.zookeeper.quorum</name>
{% for dev_info in groups.zookeeper -%}
{% if loop.last -%}
{{dev_info}}:2181</value>
{% elif loop.first %}
<value>{{dev_info}}:2181,
{%- else %}
{{dev_info}}:2181,
{%- endif %}
{%- endfor %}
</property>
<property>
<name>ipc.client.connect.timeout</name>
<value>90000</value>
</property>
</configuration>

View File

@@ -0,0 +1,42 @@
#!/bin/bash
source /etc/profile
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function set_log(){
RES_SUM_FILE=$BASE_DIR/$VERSION/logs
if [ ! -f "$RES_SUM_FILE/" ]
then
mkdir -p $RES_SUM_FILE
fi
if [ ! -d "$RES_SUM_FILE/$1" ];then
echo "0" > $RES_SUM_FILE/$1
fi
OLD_NUM=`cat $RES_SUM_FILE/$1`
RESTART_NUM=`expr $OLD_NUM + 1`
echo $RESTART_NUM > $RES_SUM_FILE/$1
if [ $OLD_NUM -eq "0" ];then
echo "`date "+%Y-%m-%d %H:%M:%S"` - Hadoop $2服务初次启动" >> $BASE_DIR/$VERSION/logs/restart.log
else
echo "`date +%Y-%m-%d` `date +%H:%M:%S` - Hadoop $2服务异常 - 重启次数 -> $RESTART_NUM." >> $BASE_DIR/$VERSION/logs/restart.log
fi
}
while true ; do
HAS_JN=`ps -ef | grep JournalNode | grep -v grep | wc -l`
if [ $HAS_JN -eq "0" ];then
yes | $BASE_DIR/$VERSION/sbin/hadoop-daemon.sh start journalnode > /dev/null
set_log jnRes_sum JournalNode
fi
sleep 60
done

View File

@@ -0,0 +1,53 @@
#!/bin/bash
source /etc/profile
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function set_log(){
RES_SUM_FILE=$BASE_DIR/$VERSION/logs
if [ ! -f "$RES_SUM_FILE/" ]
then
mkdir -p $RES_SUM_FILE
fi
if [ ! -d "$RES_SUM_FILE/$1" ];then
echo "0" > $RES_SUM_FILE/$1
fi
OLD_NUM=`cat $RES_SUM_FILE/$1`
RESTART_NUM=`expr $OLD_NUM + 1`
echo $RESTART_NUM > $RES_SUM_FILE/$1
if [ $OLD_NUM -eq "0" ];then
echo "`date "+%Y-%m-%d %H:%M:%S"` - Hadoop $2服务初次启动" >> $BASE_DIR/$VERSION/logs/restart.log
else
echo "`date +%Y-%m-%d` `date +%H:%M:%S` - Hadoop $2服务异常 - 重启次数 -> $RESTART_NUM." >> $BASE_DIR/$VERSION/logs/restart.log
fi
}
while true ; do
HAS_NN=`ps -ef | grep NameNode | grep -v grep | wc -l`
HAS_ZKFC=`ps -ef | grep DFSZKFailoverController | grep -v grep | wc -l`
#HAS_NM=`ps -ef | grep NodeManager | grep -v grep | wc -l`
if [ $HAS_NN -eq "0" ];then
yes | $BASE_DIR/$VERSION/sbin/hadoop-daemon.sh start namenode > /dev/null
set_log nnRes_sum NameNode
fi
if [ $HAS_ZKFC -eq "0" ];then
yes | $BASE_DIR/$VERSION/sbin/hadoop-daemon.sh start zkfc > /dev/null
set_log zkfcRes_sum DFSZKFailoverController
fi
#if [ $HAS_NM -eq "0" ];then
# $BASE_DIR/$VERSION/sbin/yarn-daemon.sh start nodemanager > /dev/null
# set_log nmRes_sum NodeManager
#fi
sleep 60
done

View File

@@ -0,0 +1,60 @@
#!/bin/bash
source /etc/profile
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function set_log(){
RES_SUM_FILE=$BASE_DIR/$VERSION/logs
if [ ! -f "$RES_SUM_FILE/" ]
then
mkdir -p $RES_SUM_FILE
fi
if [ ! -d "$RES_SUM_FILE/$1" ];then
echo "0" > $RES_SUM_FILE/$1
fi
OLD_NUM=`cat $RES_SUM_FILE/$1`
RESTART_NUM=`expr $OLD_NUM + 1`
echo $RESTART_NUM > $RES_SUM_FILE/$1
if [ $OLD_NUM -eq "0" ];then
echo "`date "+%Y-%m-%d %H:%M:%S"` - Hadoop $2服务初次启动" >> $BASE_DIR/$VERSION/logs/restart.log
else
echo "`date +%Y-%m-%d` `date +%H:%M:%S` - Hadoop $2服务异常 - 重启次数 -> $RESTART_NUM." >> $BASE_DIR/$VERSION/logs/restart.log
fi
}
while true ; do
HAS_NN=`ps -ef | grep NameNode | grep -v grep | wc -l`
HAS_ZKFC=`ps -ef | grep DFSZKFailoverController | grep -v grep | wc -l`
#HAS_NM=`ps -ef | grep NodeManager | grep -v grep | wc -l`
#HAS_RM=`ps -ef | grep ResourceManager | grep -v grep | wc -l`
if [ $HAS_NN -eq "0" ];then
yes | $BASE_DIR/$VERSION/sbin/hadoop-daemon.sh start namenode > /dev/null
set_log nnRes_sum NameNode
fi
if [ $HAS_ZKFC -eq "0" ];then
yes | $BASE_DIR/$VERSION/sbin/hadoop-daemon.sh start zkfc > /dev/null
set_log zkfcRes_sum DFSZKFailoverController
fi
#if [ $HAS_NM -eq "0" ];then
# $BASE_DIR/$VERSION/sbin/yarn-daemon.sh start nodemanager > /dev/null
# set_log nmRes_sum NodeManager
#fi
#if [ $HAS_RM -eq "0" ];then
# $BASE_DIR/$VERSION/sbin/yarn-daemon.sh start resourcemanager > /dev/null
# set_log RMRes_sum ResourceManager
#fi
sleep 60
done

View File

@@ -0,0 +1,47 @@
#!/bin/bash
source /etc/profile
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function set_log(){
RES_SUM_FILE=$BASE_DIR/$VERSION/logs
if [ ! -f "$RES_SUM_FILE/" ]
then
mkdir -p $RES_SUM_FILE
fi
if [ ! -d "$RES_SUM_FILE/$1" ];then
echo "0" > $RES_SUM_FILE/$1
fi
OLD_NUM=`cat $RES_SUM_FILE/$1`
RESTART_NUM=`expr $OLD_NUM + 1`
echo $RESTART_NUM > $RES_SUM_FILE/$1
if [ $OLD_NUM -eq "0" ];then
echo "`date "+%Y-%m-%d %H:%M:%S"` - Hadoop $2服务初次启动" >> $BASE_DIR/$VERSION/logs/restart.log
else
echo "`date +%Y-%m-%d` `date +%H:%M:%S` - Hadoop $2服务异常 - 重启次数 -> $RESTART_NUM." >> $BASE_DIR/$VERSION/logs/restart.log
fi
}
while true ; do
HAS_DN=`ps -ef | grep DataNode | grep -v grep | wc -l`
#HAS_NM=`ps -ef | grep NodeManager | grep -v grep | wc -l`
if [ $HAS_DN -eq "0" ];then
yes | $BASE_DIR/$VERSION/sbin/hadoop-daemon.sh start datanode > /dev/null
set_log dnRes_sum DataNode
fi
#if [ $HAS_NM -eq "0" ];then
# $BASE_DIR/$VERSION/sbin/yarn-daemon.sh start nodemanager > /dev/null
# set_log nmRes_sum NodeManager
#fi
sleep 60
done

View File

@@ -0,0 +1,47 @@
#!/bin/bash
#
# netconsole This loads the netconsole module with the configured parameters.
#
# chkconfig:123456 40 60
# description: keephdfsjournal
source /etc/profile
PRO_NAME=keephdfsjournal
INS_DIR={{ deploy_dir }}
#版本
VERSION={{ hadoop_version }}
case $1 in
start)
journal=`ps -ef | grep dae-hdfsjournal.sh | grep -v grep | wc -l`
if [ $journal -lt 1 ];then
nohup $INS_DIR/$VERSION/sbin/dae-hdfsjournal.sh > /dev/null 2>&1 &
fi
;;
stop)
HAS_KEEP_SHELL=`ps -ef | grep dae-hdfsjournal.sh | grep -v grep | awk '{print $2}'`
if [ $HAS_KEEP_SHELL ];then
echo "守护进程PID$HAS_KEEP_SHELL"
kill -9 $HAS_KEEP_SHELL
fi
sh $INS_DIR/$VERSION/sbin/hadoop-daemon.sh stop journalnode > /dev/null
;;
status)
num=`ps -ef | grep JournalNode | grep -v grep | wc -l`
if [ "$num" -eq "1" ];then
echo "JournalNode进程已启动"
else
echo "JournalNode进程未启动"
fi
;;
* )
echo "use keephdfsjournal [start|stop|status]"
;;
esac

View File

@@ -0,0 +1,42 @@
#!/bin/bash
#
# netconsole This loads the netconsole module with the configured parameters.
#
# chkconfig:123456 40 60
# description: keephdfsmaster
source /etc/profile
PRO_NAME=keephdfsmaster
INS_DIR={{ deploy_dir }}
#版本
VERSION={{ hadoop_version }}
case $1 in
start)
master=`ps -ef | grep dae-hdfsmaster.sh | grep -v grep | wc -l`
if [ $master -lt 1 ];then
nohup $INS_DIR/$VERSION/sbin/dae-hdfsmaster.sh > /dev/null 2>&1 &
fi
;;
stop)
HAS_KEEP_SHELL=`ps -ef | grep dae-hdfsmaster.sh | grep -v grep | awk '{print $2}'`
if [ $HAS_KEEP_SHELL ];then
echo "守护进程PID$HAS_KEEP_SHELL"
kill -9 $HAS_KEEP_SHELL
fi
sh $INS_DIR/$VERSION/sbin/hadoop-daemon.sh stop namenode > /dev/null
sh $INS_DIR/$VERSION/sbin/hadoop-daemon.sh stop zkfc > /dev/null
;;
status)
hdfs haadmin -getServiceState nn1
hdfs dfsadmin -report
;;
* )
echo "use keephdfsmaster [start|stop|status]"
;;
esac

View File

@@ -0,0 +1,42 @@
#!/bin/bash
#
# netconsole This loads the netconsole module with the configured parameters.
#
# chkconfig:123456 40 60
# description: keephdfsslave
source /etc/profile
PRO_NAME=keephdfsslave
INS_DIR={{ deploy_dir }}
#版本
VERSION={{ hadoop_version }}
case $1 in
start)
slave=`ps -ef | grep dae-hdfsslave.sh | grep -v grep | wc -l`
if [ $slave -lt 1 ];then
nohup $INS_DIR/$VERSION/sbin/dae-hdfsslave.sh > /dev/null 2>&1 &
fi
;;
stop)
HAS_KEEP_SHELL=`ps -ef | grep dae-hdfsslave.sh | grep -v grep | awk '{print $2}'`
if [ $HAS_KEEP_SHELL ];then
echo "守护进程PID$HAS_KEEP_SHELL"
kill -9 $HAS_KEEP_SHELL
fi
sh $INS_DIR/$VERSION/sbin/hadoop-daemon.sh stop namenode > /dev/null
sh $INS_DIR/$VERSION/sbin/hadoop-daemon.sh stop zkfc > /dev/null
;;
status)
hdfs haadmin -getServiceState nn2
hdfs dfsadmin -report
;;
* )
echo "use keephdfsslave [start|stop|status]"
;;
esac

View File

@@ -0,0 +1,47 @@
#!/bin/bash
#
# netconsole This loads the netconsole module with the configured parameters.
#
# chkconfig:123456 40 60
# description: keephdfsworker
source /etc/profile
PRO_NAME=keephdfsworker
INS_DIR={{ deploy_dir }}
#版本
VERSION={{ hadoop_version }}
case $1 in
start)
worker=`ps -ef | grep dae-hdfsworker.sh | grep -v grep | wc -l`
if [ $worker -lt 1 ];then
nohup $INS_DIR/$VERSION/sbin/dae-hdfsworker.sh > /dev/null 2>&1 &
fi
;;
stop)
HAS_KEEP_SHELL=`ps -ef | grep dae-hdfsworker.sh | grep -v grep | awk '{print $2}'`
if [ $HAS_KEEP_SHELL ];then
echo "守护进程PID$HAS_KEEP_SHELL"
kill -9 $HAS_KEEP_SHELL
fi
sh $INS_DIR/$VERSION/sbin/hadoop-daemon.sh stop datanode > /dev/null
;;
status)
num=`ps -ef | grep DataNode | grep -v grep | wc -l`
if [ "$num" -eq "1" ];then
echo "DataNode进程已启动"
else
echo "DataNode进程未启动"
fi
;;
* )
echo "use keephdfsworker [start|stop|status]"
;;
esac

View File

@@ -0,0 +1,105 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Set Hadoop-specific environment variables here.
# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.
export HADOOP_NAMENODE_JMX_OPTS="-Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.local.only=false -javaagent:{{ deploy_dir }}/{{ hadoop_version }}/monitor/jmx_prometheus_javaagent-0.12.0.jar=9905:{{ deploy_dir }}/{{ hadoop_version }}/monitor/hdfs.yaml"
export HADOOP_DATANODE_JMX_OPTS="-Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.local.only=false -javaagent:{{ deploy_dir }}/{{ hadoop_version }}/monitor/jmx_prometheus_javaagent-0.12.0.jar=9906:{{ deploy_dir }}/{{ hadoop_version }}/monitor/hdfs.yaml"
# The java implementation to use.
#export HADOOP_HEAPSIZE=m
#export JAVA_HOME=/usr/local/jdk/jdk1.8.0_73
export JAVA_HOME=$JAVA_HOME
# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
# Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="$HADOOP_NAMENODE_OPTS {{ hadoop.namenode.java_opt }} -Xss256k -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=256m -XX:SurvivorRatio=2 -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:MaxTenuringThreshold=15 -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=1 -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -XX:-DisableExplicitGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:{{ deploy_dir }}/{{ hadoop_version }}/logs/gc-namenode-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=100M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath={{ deploy_dir }}/{{ hadoop_version }}/logs/ -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender}"
export HADOOP_DATANODE_OPTS="$HADOOP_DATANODE_OPTS {{ hadoop.datanode.java_opt }} -Xss256k -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=256m -XX:SurvivorRatio=2 -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:MaxTenuringThreshold=15 -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=1 -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:{{ deploy_dir }}/{{ hadoop_version }}/logs/gc-datanode-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=100M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath={{ deploy_dir }}/{{ hadoop_version }}/logs/ -Dhadoop.security.logger=ERROR,RFAS"
export HADOOP_JOURNALNODE_OPTS="$HADOOP_JOURNALNODE_OPTS {{ hadoop.journalnode.java_opt }}"
export HADOOP_ZKFC_OPTS="$HADOOP_ZKFC_OPTS {{ hadoop.zkfc.java_opt }}"
export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol. This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
# Where log files are stored. $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_MOVER_OPTS=""
###
# Advanced Users Only!
###
# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the user that will run the hadoop daemons. Otherwise there is the
# potential for a symlink attack.
export HADOOP_PID_DIR={{ deploy_dir }}/{{ hadoop_version }}/pids
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER

View File

@@ -0,0 +1,142 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:{{ hdfs_data_dir }}/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:{{ hdfs_data_dir }}/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址nn1所在地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>{{ groups.hdfs[0] }}:9000</value>
</property>
<!-- nn1的http通信地址外部访问地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>{{ groups.hdfs[0] }}:50070</value>
</property>
<!-- nn2的RPC通信地址nn2所在地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>{{ groups.hdfs[1] }}:9000</value>
</property>
<!-- nn2的http通信地址外部访问地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>{{ groups.hdfs[1] }}:50070</value>
</property>
<!-- 指定NameNode的元数据在JournalNode日志上的存放位置(一般和zookeeper部署在一起) -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://{{groups.hdfs[0]}}:8485;{{groups.hdfs[1]}}:8485;{{groups.hdfs[2]}}:8485/ns1</value>
</property>
<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>{{ hdfs_data_dir }}/journal</value>
</property>
<!--客户端通过代理访问namenode访问文件系统HDFS 客户端与Active 节点通信的Java 类使用其确定Active 节点是否活跃 -->
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--这是配置自动切换的方法,有多种使用方法,具体可以看官网,在文末会给地址,这里是远程登录杀死的方法 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
<value>shell(true)</value>
</property>
<!-- 这个是使用sshfence隔离机制时才需要配置ssh免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔离机制超时时间这个属性同上如果你是用脚本的方法切换这个应该是可以不配置的 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<!-- 这个是开启自动故障转移,如果你没有自动故障转移,这个可以先不配 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>8192</value>
</property>
<!-- namenode处理RPC请求线程数增大该值资源占用不大 -->
<property>
<name>dfs.namenode.handler.count</name>
<value>{{ hadoop.namenode['dfs.namenode.handler.count'] }}</value>
</property>
<!-- datanode处理RPC请求线程数增大该值会占用更多内存 -->
<property>
<name>dfs.datanode.handler.count</name>
<value>{{ hadoop.datanode['dfs.datanode.handler.count'] }}</value>
</property>
<!-- balance时可占用的带宽 -->
<property>
<name>dfs.balance.bandwidthPerSec</name>
<value>104857600</value>
</property>
<!-- 磁盘预留空间该空间不会被hdfs占用单位字节-->
<property>
<name>dfs.datanode.du.reserved</name>
<value>53687091200</value>
</property>
<!-- datanode与namenode连接超时时间单位毫秒 2 * heartbeat.recheck.interval + 30000 -->
<property>
<name>heartbeat.recheck.interval</name>
<value>100000</value>
</property>
</configuration>

View File

@@ -0,0 +1,46 @@
#!/bin/bash
MASTER_IP={{ groups.hdfs[0] }}
SLAVE1_IP={{ groups.hdfs[1] }}
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function ini_namenode() {
cd $BASE_DIR/$VERSION/bin
yes | ./hadoop namenode -format
if [ $? -eq "0" ];then
# scp -r $BASE_DIR/hadoop/ root@$SLAVE1_IP:$BASE_DIR/
echo yes
else
echo no
fi
}
function ini_zk() {
cd $BASE_DIR/$VERSION/bin
yes | ./hdfs zkfc -formatZK
if [ $? -eq "0" ];then
echo yes
else
echo no
fi
}
case $1 in
[namenode]*)
ini_namenode
;;
[zkfc]*)
ini_zk
;;
* )
echo "请输入已有的指令."
;;
esac

View File

@@ -0,0 +1,33 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>{{ groups.hdfs[0] }}:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>{{ groups.hdfs[0] }}:19888</value>
</property>
</configuration>

View File

@@ -0,0 +1,71 @@
#!/bin/bash
source /etc/profile
function setChkconfig(){
echo -e "\n#hadoop\nexport HADOOP_HOME={{ deploy_dir }}/{{ hadoop_version }}\nexport PATH=\$HADOOP_HOME/sbin:\$PATH\nexport PATH=\$HADOOP_HOME/bin:\$PATH\nexport HADOOP_CLASSPATH=\`hadoop classpath\`" >> /etc/profile.d/hadoop.sh
chmod +x /etc/profile.d/hadoop.sh
if [ -x '/etc/init.d/keephdfsmaster' ];then
chkconfig --add keephdfsmaster
chkconfig keephdfsmaster on
fi
if [ -x '/etc/init.d/keephdfsslave' ];then
chkconfig --add keephdfsslave
chkconfig keephdfsslave on
fi
if [ -x '/etc/init.d/keephdfsworker' ];then
chkconfig --add keephdfsworker
chkconfig keephdfsworker on
fi
if [ -x '/etc/init.d/keephdfsjournal' ];then
chkconfig --add keephdfsjournal
chkconfig keephdfsjournal on
fi
}
case $1 in
journal)
if [ -x '/etc/init.d/keephdfsjournal' ];then
service keephdfsjournal start && sleep 5
journal_dae=`ps -ef | grep dae-hdfsjournal.sh | grep -v grep | wc -l`
if [ $journal_dae -lt 1 ];then
nohup {{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsjournal.sh > /dev/null 2>&1 &
fi
fi
;;
master)
if [ -x '/etc/init.d/keephdfsmaster' ];then
service keephdfsmaster start && sleep 5
master_dae=`ps -ef | grep dae-hdfsmaster.sh | grep -v grep | wc -l`
if [ $master_dae -lt 1 ];then
nohup {{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsmaster.sh > /dev/null 2>&1 &
fi
fi
;;
slave)
if [ -x '/etc/init.d/keephdfsslave' ];then
service keephdfsslave start && sleep 5
slave_dae=`ps -ef | grep dae-hdfsslave.sh | grep -v grep | wc -l`
if [ $slave_dae -lt 1 ];then
nohup {{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsslave.sh > /dev/null 2>&1 &
fi
fi
;;
worker)
if [ -x '/etc/init.d/keephdfsworker' ];then
service keephdfsworker start && sleep 5
worker_dae=`ps -ef | grep dae-hdfsworker.sh | grep -v grep | wc -l`
if [ $worker_dae -lt 1 ];then
nohup {{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-hdfsworker.sh > /dev/null 2>&1 &
fi
fi
;;
chkconfig)
setChkconfig;;
* )
;;
esac

View File

@@ -0,0 +1,4 @@
{% set combined_group = groups.hdfs %}
{% for dev_info in combined_group %}
{{dev_info}}
{% endfor %}

View File

@@ -0,0 +1,86 @@
#!/bin/bash
source /etc/profile
function killService(){
keeppath='/etc/init.d/keephdfsjournal'
if [ -x $keeppath ];then
service keephdfsjournal stop
chkconfig keephdfsjournal off
systemctl daemon-reload
rm -rf /etc/init.d/keephdfsjournal
fi
keeppath='/etc/init.d/keephdfsmaster'
if [ -x $keeppath ];then
service keephdfsmaster stop
chkconfig keephdfsmaster off
systemctl daemon-reload
rm -rf /etc/init.d/keephdfsmaster
fi
keeppath='/etc/init.d/keephdfsslave'
if [ -x $keeppath ];then
service keephdfsslave stop
chkconfig keephdfsslave off
systemctl daemon-reload
rm -rf /etc/init.d/keephdfsslave
fi
keeppath='/etc/init.d/keephdfsworker'
if [ -x $keeppath ];then
service keephdfsworker stop
chkconfig keephdfsworker off
systemctl daemon-reload
rm -rf /etc/init.d/keephdfsworker
fi
}
function killPid(){
livenum=`jps -l | egrep -w "org.apache.hadoop.hdfs.qjournal.server.JournalNode" | grep -v grep |wc -l`
if [ $livenum -ne 0 ];then
keeppid=`jps -l |egrep -w "org.apache.hadoop.hdfs.qjournal.server.JournalNode" | awk '{print $1}'`
kill -9 $keeppid
fi
livenum=`jps -l | egrep -w "org.apache.hadoop.hdfs.tools.DFSZKFailoverController" | grep -v grep |wc -l`
if [ $livenum -ne 0 ];then
keeppid=`jps -l |egrep -w "org.apache.hadoop.hdfs.tools.DFSZKFailoverController" | awk '{print $1}'`
kill -9 $keeppid
fi
livenum=`jps -l | egrep -w "org.apache.hadoop.hdfs.server.datanode.DataNode" | grep -v grep |wc -l`
if [ $livenum -ne 0 ];then
keeppid=`jps -l |egrep -w "org.apache.hadoop.hdfs.server.datanode.DataNode" | awk '{print $1}'`
kill -9 $keeppid
fi
livenum=`jps -l | egrep -w "org.apache.hadoop.hdfs.server.namenode.NameNode" | grep -v grep |wc -l`
if [ $livenum -ne 0 ];then
keeppid=`jps -l |egrep -w "org.apache.hadoop.hdfs.server.namenode.NameNode" | awk '{print $1}'`
kill -9 $keeppid
fi
}
function drop_folder(){
FOLDER_NAME=$1
if [ -d "$FOLDER_NAME" ];then
rm -rf $FOLDER_NAME
fi
}
function drop_file(){
FILE_NAME=$1
if [ -f "$FILE_NAME" ];then
rm -rf $FILE_NAME
fi
}
killService
sleep 15
killPid
drop_folder {{ deploy_dir }}/{{ hadoop_version }}
drop_folder {{ data_dir }}/{{ hadoop_version }}
drop_file /etc/profile.d/hadoop.sh

View File

@@ -0,0 +1,8 @@
#hadoop版本
hadoop_version: hadoop-2.7.1
#数据目录
hdfs_data_dir: "{{ data_dir }}/{{ hadoop_version }}/data/hadoop"
#jdk版本
java_version: 1.8.0_73

View File

@@ -0,0 +1,7 @@
[zookeeper]
192.168.45.102
[hdfs]
[yarn]
192.168.45.102

View File

@@ -0,0 +1,7 @@
- hosts: yarn
remote_user: root
roles:
- role
vars_files:
- role/vars/main.yml

View File

@@ -0,0 +1,56 @@
#The default installation location
deploy_dir: /data/olap
#The default data storage location,use storing application data,logs and configuration files
data_dir: /data/olap
hadoop:
namenode:
#Running memory of the Hadoop Namenode.
java_opt: '-Xmx1024m -Xms1024m'
#The number of Namenode RPC server threads that listen to requests from clients.
dfs.namenode.handler.count: 30
datanode:
#Running memory of the Hadoop Datanode.
java_opt: '-Xmx1024m -Xms1024m'
#The number of server threads for the datanode.
dfs.datanode.handler.count: 40
journalnode:
#Running memory of the Hadoop JournalNode.
java_opt: '-Xmx1024m -Xms1024m'
zkfc:
#Running memory of the Hadoop DFSZKFailoverController.
java_opt: '-Xmx1024m -Xms1024m'
yarn:
resourcemanager:
#Running memory of the Hadoop ResourceManager.
java_opt: '-Xmx1024m -Xms1024m'
nodemanager:
#Running memory of the Hadoop NodeManager.
java_opt: '-Xmx1024m -Xms1024m'
#Amount of physical memory, in MB, that can be allocated for containers.
yarn.nodemanager.resource.memory-mb: 16384
#The maximum allocation for every container request at the RM in MBs.
yarn.scheduler.maximum-allocation-mb: 16384
#Number of vcores that can be allocated for containers. This is used by the RM scheduler when allocating resources for containers.
yarn.nodemanager.resource.cpu-vcores: 48
#The maximum allocation for every container request at the RM in terms of virtual CPU cores.
yarn.scheduler.maximum-allocation-vcores: 48
flink:
#Total Process Memory size for the JobManager.
jobmanager.memory.process.size: 1024M
#Total Process Memory size for the TaskExecutors.
taskmanager.memory.process.size: 2048M
#This is the size of off-heap memory managed for sorting, hash tables, caching of intermediate results and state backend.
taskmanager.memory.managed.size: 128M
#Framework Off-Heap Memory size for TaskExecutors. This is the size of off-heap memory reserved for TaskExecutor framework
taskmanager.memory.framework.off-heap.size: 128M
#JVM Metaspace Size for the TaskExecutors.
taskmanager.memory.jvm-metaspace.size: 256M
#Max Network Memory size for TaskExecutors. Network Memory is off-heap memory reserved for ShuffleEnvironment.
taskmanager.memory.network.max: 256M
#The number of parallel operator or user function instances that a single TaskManager can run.
#This value is typically proportional to the number of physical CPU cores that the TaskManager's machine has (e.g., equal to the number of cores, or half the number of cores).
taskmanager.numberOfTaskSlots: 1

View File

@@ -0,0 +1,194 @@
- name: Setting node_nums variable
set_fact: node_nums="{{groups.hdfs|length}}"
- name: To terminate execution
fail:
msg: "Fully Distributed Mode at least 3 nodes, please checking configurations/hosts -> hdfs"
when: node_nums < 3
- name: check Jdk version
shell: source /etc/profile && java -version 2>&1 | grep {{ java_version }} | wc -l
ignore_errors: false
register: jdk_out
- name: To terminate execution
fail:
msg: "JDK is not installed in the target cluster, please check!"
when: jdk_out.stdout != '2'
run_once: true
delegate_to: 127.0.0.1
- name: create hadoop package path:{{ deploy_dir }}
file:
state: directory
path: '{{ deploy_dir }}'
- block:
- name: unpack hadoop-2.7.1.tar.gz to {{ deploy_dir }}/
unarchive:
src: 'files/{{ hadoop_version }}.tar.gz'
dest: '{{ deploy_dir }}/'
- name: copying yarn master config files
template:
src: '{{ item.src }}'
dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ item.dest }}'
mode: '{{ item.mode }}'
backup: false
with_items:
- { src: 'yarn-site.xml.j2', dest: 'etc/hadoop/yarn-site.xml', mode: '0644' }
- { src: 'slaves.j2', dest: 'etc/hadoop/slaves', mode: '0644' }
- { src: 'set_yarn_env.sh.j2', dest: 'bin/set_yarn_env.sh', mode: '0755' }
- { src: 'core-site.xml.j2', dest: 'etc/hadoop/core-site.xml', mode: '0644' }
- { src: 'hdfs-site.xml.j2', dest: 'etc/hadoop/hdfs-site.xml', mode: '0644' }
- { src: 'mapred-site.xml.j2', dest: 'etc/hadoop/mapred-site.xml', mode: '0644' }
- { src: 'capacity-scheduler.xml.j2', dest: 'etc/hadoop/capacity-scheduler.xml', mode: '0644' }
- { src: 'yarn-env.sh.j2', dest: 'etc/hadoop/yarn-env.sh', mode: '0755' }
- { src: 'hadoop-env.sh.j2', dest: 'etc/hadoop/hadoop-env.sh', mode: '0755' }
when: inventory_hostname not in groups['hdfs']
- name: copying yarn master config files
template:
src: '{{ item.src }}'
dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ item.dest }}'
mode: '{{ item.mode }}'
backup: false
with_items:
- { src: 'yarn-site.xml.j2', dest: 'etc/hadoop/yarn-site.xml', mode: '0644' }
- { src: 'slaves.j2', dest: 'etc/hadoop/slaves', mode: '0644' }
- { src: 'mapred-site.xml.j2', dest: 'etc/hadoop/mapred-site.xml', mode: '0644' }
- { src: 'yarn-env.sh.j2', dest: 'etc/hadoop/yarn-env.sh', mode: '0755' }
- { src: 'set_yarn_env.sh.j2', dest: 'bin/set_yarn_env.sh', mode: '0755' }
- { src: 'capacity-scheduler.xml.j2', dest: 'etc/hadoop/capacity-scheduler.xml', mode: '0644' }
when: inventory_hostname in groups['hdfs']
- block:
- name: copying yarn worker
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-yarnhistory.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnhistory.sh' }
- { src: 'dae-yarnmaster.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnmaster.sh' }
- { src: 'keepyarnhistory.j2', dest: '/etc/init.d/keepyarnhistory' }
- { src: 'keepyarnmaster.j2', dest: '/etc/init.d/keepyarnmaster' }
when: inventory_hostname in groups['yarn'][0:2]
- block:
- name: Start ResourceManager and JobHistoryServer
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_yarn_env.sh {{ item.opeation }}
with_items:
- { opeation: 'chkconfig' }
- { opeation: 'master' }
- { opeation: 'history' }
- name: Waiting for the ResourceManager start,sleep 60s
shell: sleep 60
- name: checking ResourceManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.resourcemanager.ResourceManager" | grep -v grep | wc -l
register: resourcemanager_check
- name: checking ResourceManager
fail:
msg: "ResourceManager节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: resourcemanager_check.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
- name: checking JobHistoryServer status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | grep -v grep | wc -l
register: history_check
- name: checking JobHistoryServer
fail:
msg: "JobHistoryServer节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: history_check.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: inventory_hostname in groups['yarn'][0:2]
- block:
- name: copying yarn worker
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-yarnworker.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnworker.sh' }
- { src: 'keepyarnworker.j2', dest: '/etc/init.d/keepyarnworker' }
- name: Start NodeManager
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_yarn_env.sh {{ item.opeation }}
with_items:
- { opeation: 'chkconfig' }
- { opeation: 'worker' }
- name: Waiting for the NodeManager start,sleep 60s
shell: sleep 60
- name: checking NodeManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep | wc -l
register: datanode_status
- name: checking NodeManager
fail:
msg: "NodeManager未启动,请登陆[{{ inventory_hostname }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: datanode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: node_nums >= cluster_limit and inventory_hostname not in groups['yarn'][0:2]
- block:
- name: copying yarn worker
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-yarnworker.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnworker.sh' }
- { src: 'keepyarnworker.j2', dest: '/etc/init.d/keepyarnworker' }
- name: Start NodeManager
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_yarn_env.sh {{ item.opeation }}
with_items:
- { opeation: 'chkconfig' }
- { opeation: 'worker' }
- name: Waiting for the NodeManager start,sleep 60s
shell: sleep 60
- name: checking NodeManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep | wc -l
register: datanode_status
- name: checking NodeManager
fail:
msg: "NodeManager未启动,请登陆[{{ inventory_hostname }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: datanode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: node_nums < cluster_limit
#--------------------------------------------Flink----------------------------------------------#
- name: Copying Flink installation package
unarchive:
src: 'files/{{ flink_version }}.tgz'
dest: '{{ deploy_dir }}/{{ hadoop_version }}/'
- name: Config flink configuration
template:
src: '{{ item.src }}'
dest: '{{ item.dest }}'
mode: '{{ item.mode }}'
with_items:
- { src: 'flink/flink.sh.j2', dest: '/etc/profile.d/flink.sh', mode: '0755' }
- { src: 'flink/flink-conf.yaml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/flink-conf.yaml', mode: '0644' }
- { src: 'yarn-site.xml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/yarn-site.xml', mode: '0644' }
- { src: 'core-site.xml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/core-site.xml', mode: '0644' }
- { src: 'hdfs-site.xml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/hdfs-site.xml', mode: '0644' }

View File

@@ -0,0 +1,136 @@
- name: check Jdk version
shell: source /etc/profile && java -version 2>&1 | grep {{ java_version }} | wc -l
ignore_errors: false
register: jdk_out
- name: To terminate execution
fail:
msg: "JDK is not installed in the target cluster, please check!"
when: jdk_out.stdout != '2'
run_once: true
delegate_to: 127.0.0.1
- name: create hadoop package path:{{ deploy_dir }}
file:
state: directory
path: '{{ deploy_dir }}'
- name: unpack hadoop-2.7.1.tar.gz to {{ deploy_dir }}/
unarchive:
src: 'files/{{ hadoop_version }}.tar.gz'
dest: '{{ deploy_dir }}/'
- name: copying yarn master config files
template:
src: '{{ item.src }}'
dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ item.dest }}'
mode: '{{ item.mode }}'
backup: false
with_items:
- { src: 'standalone/yarn-site.xml.j2', dest: 'etc/hadoop/yarn-site.xml', mode: '0644' }
- { src: 'standalone/hdfs-site.xml.j2', dest: 'etc/hadoop/hdfs-site.xml', mode: '0644' }
- { src: 'standalone/core-site.xml.j2', dest: 'etc/hadoop/core-site.xml', mode: '0644' }
- { src: 'slaves.j2', dest: 'etc/hadoop/slaves', mode: '0644' }
- { src: 'set_yarn_env.sh.j2', dest: 'bin/set_yarn_env.sh', mode: '0755' }
- { src: 'mapred-site.xml.j2', dest: 'etc/hadoop/mapred-site.xml', mode: '0644' }
- { src: 'capacity-scheduler.xml.j2', dest: 'etc/hadoop/capacity-scheduler.xml', mode: '0644' }
- { src: 'yarn-env.sh.j2', dest: 'etc/hadoop/yarn-env.sh', mode: '0755' }
- { src: 'hadoop-env.sh.j2', dest: 'etc/hadoop/hadoop-env.sh', mode: '0755' }
- name: copying yarn worker
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-yarnhistory.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnhistory.sh' }
- { src: 'dae-yarnmaster.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnmaster.sh' }
- { src: 'keepyarnhistory.j2', dest: '/etc/init.d/keepyarnhistory' }
- { src: 'keepyarnmaster.j2', dest: '/etc/init.d/keepyarnmaster' }
- block:
- name: Start ResourceManager and JobHistoryServer
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_yarn_env.sh {{ item.opeation }}
with_items:
- { opeation: 'chkconfig' }
- { opeation: 'master' }
- { opeation: 'history' }
- name: Waiting for the ResourceManager start,sleep 60s
shell: sleep 60
- name: checking ResourceManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.resourcemanager.ResourceManager" | grep -v grep | wc -l
register: resourcemanager_check
- name: checking ResourceManager
fail:
msg: "ResourceManager节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: resourcemanager_check.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
- name: checking JobHistoryServer status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | grep -v grep | wc -l
register: history_check
- name: checking JobHistoryServer
fail:
msg: "JobHistoryServer节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: history_check.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: inventory_hostname in groups['yarn'][0:2]
- block:
- name: copying yarn worker
template:
src: 'daemonscript/{{ item.src }}'
dest: '{{ item.dest }}'
mode: 0755
backup: yes
with_items:
- { src: 'dae-yarnworker.sh.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnworker.sh' }
- { src: 'keepyarnworker.j2', dest: '/etc/init.d/keepyarnworker' }
- name: Start NodeManager
shell: cd {{ deploy_dir }}/{{ hadoop_version }}/bin/ && ./set_yarn_env.sh {{ item.opeation }}
with_items:
- { opeation: 'chkconfig' }
- { opeation: 'worker' }
- name: Waiting for the NodeManager start,sleep 60s
shell: sleep 60
- name: checking NodeManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep | wc -l
register: datanode_status
- name: checking NodeManager
fail:
msg: "NodeManager未启动,请登陆[{{ inventory_hostname }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: datanode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
#--------------------------------------------Flink----------------------------------------------#
- name: Copying Flink installation package
unarchive:
src: 'files/{{ flink_version }}.tgz'
dest: '{{ deploy_dir }}/{{ hadoop_version }}/'
- name: Config flink configuration
template:
src: '{{ item.src }}'
dest: '{{ item.dest }}'
mode: '{{ item.mode }}'
with_items:
- { src: 'flink/flink.sh.j2', dest: '/etc/profile.d/flink.sh', mode: '0755' }
- { src: 'flink/flink-conf.yaml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/flink-conf.yaml', mode: '0644' }
- { src: 'standalone/yarn-site.xml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/yarn-site.xml', mode: '0644' }
- { src: 'standalone/core-site.xml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/core-site.xml', mode: '0644' }
- { src: 'standalone/hdfs-site.xml.j2', dest: '{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/conf/hdfs-site.xml', mode: '0644' }
- name: Start flink session
shell: source /etc/profile && cd {{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/bin/ && ./yarn-session.sh -d

View File

@@ -0,0 +1,12 @@
- block:
- include: uninstall.yml
- include: "{{ playbook_name }}"
vars:
playbook_name: "{{ 'deploy-cluster.yml' if groups.yarn | length > 1 else 'deploy-standalone.yml' }}"
- include: status-check.yml
when: (operation) == "install"
- block:
- include: uninstall.yml
when: (operation) == "uninstall"

View File

@@ -0,0 +1,57 @@
- name: Setting node_nums variable
set_fact: node_nums="{{groups.yarn|length}}"
- name: Waiting for the Yarn start,sleep 30s
shell: sleep 30
- block:
- name: checking ResourceManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.resourcemanager.ResourceManager" | grep -v grep | wc -l
register: resourcemanager_check
- name: checking ResourceManager
fail:
msg: "ResourceManager节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: resourcemanager_check.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
- name: checking JobHistoryServer status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | grep -v grep | wc -l
register: history_check
- name: checking JobHistoryServer
fail:
msg: "JobHistoryServer节点启动异常请登陆{{ inventory_hostname }},保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: history_check.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: inventory_hostname in groups['yarn'][0:2]
- block:
- name: checking NodeManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep | wc -l
register: datanode_status
- name: checking NodeManager
fail:
msg: "NodeManager未启动,请登陆[{{ inventory_hostname }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: datanode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: node_nums >= cluster_limit and inventory_hostname not in groups['yarn'][0:2]
- block:
- name: checking NodeManager status
shell: source /etc/profile && jps -l | grep "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep | wc -l
register: datanode_status
- name: checking NodeManager
fail:
msg: "NodeManager未启动,请登陆[{{ inventory_hostname }}],保留日志反馈,路径:{{ deploy_dir }}/{{ hadoop_version }}/logs/"
when: datanode_status.stdout != '1'
run_once: true
delegate_to: 127.0.0.1
when: node_nums < cluster_limit

View File

@@ -0,0 +1,55 @@
- block:
- name: copy unload_hadoop_yarn.sh to {{ deploy_dir }}/
template:
src: 'unload_hadoop_yarn.sh.j2'
dest: '{{ deploy_dir }}/unload_hadoop_yarn.sh'
force: true
mode: 0755
- name: unload hadoop
shell: cd {{ deploy_dir }} && sh unload_hadoop_yarn.sh
- name: Ansible delete {{ deploy_dir }}/unload_hadoop_yarn.sh
file:
path: "{{ deploy_dir }}/unload_hadoop_yarn.sh"
state: absent
- name: Ansible delete old /etc/profile.d/flink.sh
file:
path: '/etc/profile.d/flink.sh'
state: absent
- name: Checking ZooKeeper has yarn nodes
shell: "docker exec zookeeper zkCli.sh ls / | grep rmstore | wc -l"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
register: rmstore_zknode
- name: Delete Hadoop nodes in ZooKeeper
shell: "docker exec zookeeper zkCli.sh rmr /rmstore"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
when: rmstore_zknode.stdout >= '1'
- name: Checking ZooKeeper has yarn nodes
shell: docker exec zookeeper zkCli.sh ls / | grep "yarn-leader-election" | wc -l
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
register: leader_zknode
- name: Delete Hadoop nodes in ZooKeeper
shell: "docker exec zookeeper zkCli.sh rmr /yarn-leader-election"
run_once: true
delegate_to: "{{ groups.zookeeper[0] }}"
when: leader_zknode.stdout >= '1'
- name: Check if the Hadoop service already exists
shell: source /etc/profile && jps -l | egrep "org.apache.hadoop.yarn.server.resourcemanager.ResourceManager|org.apache.hadoop.yarn.server.nodemanager.NodeManager|org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | wc -l
register: check_out
- name: To terminate execution
fail:
msg: "卸载失败,组件可能非本安装部署,请手动卸载后继续安装"
run_once: true
delegate_to: 127.0.0.1
when: check_out.stdout >= '1'

View File

@@ -0,0 +1,134 @@
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>yarn.scheduler.capacity.maximum-applications</name>
<value>10000</value>
<description>
Maximum number of applications that can be pending and running.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>0.5</value>
<description>
Maximum percent of resources in the cluster which can be used to run
application masters i.e. controls number of concurrent running
applications.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.resource-calculator</name>
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
<description>
The ResourceCalculator implementation to be used to compare
Resources in the scheduler.
The default i.e. DefaultResourceCalculator only uses Memory while
DominantResourceCalculator uses dominant-resource to compare
multi-dimensional resources such as Memory, CPU etc.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>default</value>
<description>
The queues at the this level (root is the root queue).
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.capacity</name>
<value>100</value>
<description>Default queue target capacity.</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
<value>1</value>
<description>
Default queue user limit a percentage from 0.0 to 1.0.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
<value>100</value>
<description>
The maximum capacity of the default queue.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.state</name>
<value>RUNNING</value>
<description>
The state of the default queue. State can be one of RUNNING or STOPPED.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
<value>*</value>
<description>
The ACL of who can submit jobs to the default queue.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
<value>*</value>
<description>
The ACL of who can administer jobs on the default queue.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.node-locality-delay</name>
<value>40</value>
<description>
Number of missed scheduling opportunities after which the CapacityScheduler
attempts to schedule rack-local containers.
Typically this should be set to number of nodes in the cluster, By default is setting
approximately number of nodes in one rack which is 40.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.queue-mappings</name>
<value></value>
<description>
A list of mappings that will be used to assign jobs to queues
The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
Typically this list will be used to map users to queues,
for example, u:%user:%user maps all users to queues with the same name
as the user.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
<value>false</value>
<description>
If a queue mapping is present, will it override the value specified
by the user? This can be used by administrators to place jobs in queues
that are different than the one specified by the user.
The default is false.
</description>
</property>
</configuration>

View File

@@ -0,0 +1,77 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns1</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:{{ hdfs_data_dir }}/tmp</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131702</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.logfile.size</name>
<value>10000000</value>
<description>The max size of each log file</description>
</property>
<property>
<name>hadoop.logfile.count</name>
<value>1</value>
<description>The max number of log files</description>
</property>
<property>
<name>ha.zookeeper.quorum</name>
{% for dev_info in groups.zookeeper -%}
{% if loop.last -%}
{{dev_info}}:2181</value>
{% elif loop.first %}
<value>{{dev_info}}:2181,
{%- else %}
{{dev_info}}:2181,
{%- endif %}
{%- endfor %}
</property>
<property>
<name>ipc.client.connect.timeout</name>
<value>90000</value>
</property>
</configuration>

View File

@@ -0,0 +1,41 @@
#!/bin/bash
source /etc/profile
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function set_log(){
RES_SUM_FILE=$BASE_DIR/$VERSION/logs
if [ ! -f "$RES_SUM_FILE/" ]
then
mkdir -p $RES_SUM_FILE
fi
if [ ! -d "$RES_SUM_FILE/$1" ];then
echo "0" > $RES_SUM_FILE/$1
fi
OLD_NUM=`cat $RES_SUM_FILE/$1`
RESTART_NUM=`expr $OLD_NUM + 1`
echo $RESTART_NUM > $RES_SUM_FILE/$1
if [ $OLD_NUM -eq "0" ];then
echo "`date "+%Y-%m-%d %H:%M:%S"` - Yarn $2服务初次启动" >> $BASE_DIR/$VERSION/logs/restart.log
else
echo "`date +%Y-%m-%d` `date +%H:%M:%S` - Yarn $2服务异常 - 重启次数 -> $RESTART_NUM." >> $BASE_DIR/$VERSION/logs/restart.log
fi
}
while true ; do
HAS_HISTORY=`ps -ef | grep "org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | grep -v grep | wc -l`
if [ $HAS_HISTORY -eq "0" ];then
$BASE_DIR/$VERSION/sbin/mr-jobhistory-daemon.sh start historyserver > /dev/null
set_log nmRes_sum JobHistoryServer
fi
sleep 60
done

View File

@@ -0,0 +1,41 @@
#!/bin/bash
source /etc/profile
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function set_log(){
RES_SUM_FILE=$BASE_DIR/$VERSION/logs
if [ ! -f "$RES_SUM_FILE/" ]
then
mkdir -p $RES_SUM_FILE
fi
if [ ! -d "$RES_SUM_FILE/$1" ];then
echo "0" > $RES_SUM_FILE/$1
fi
OLD_NUM=`cat $RES_SUM_FILE/$1`
RESTART_NUM=`expr $OLD_NUM + 1`
echo $RESTART_NUM > $RES_SUM_FILE/$1
if [ $OLD_NUM -eq "0" ];then
echo "`date "+%Y-%m-%d %H:%M:%S"` - Yarn $2服务初次启动" >> $BASE_DIR/$VERSION/logs/restart.log
else
echo "`date +%Y-%m-%d` `date +%H:%M:%S` - Yarn $2服务异常 - 重启次数 -> $RESTART_NUM." >> $BASE_DIR/$VERSION/logs/restart.log
fi
}
while true ; do
HAS_RM=`ps -ef | grep "org.apache.hadoop.yarn.server.resourcemanager.ResourceManager" | grep -v grep | wc -l`
if [ $HAS_RM -eq "0" ];then
$BASE_DIR/$VERSION/sbin/yarn-daemon.sh start resourcemanager > /dev/null
set_log nmRes_sum ResourceManager
fi
sleep 60
done

View File

@@ -0,0 +1,41 @@
#!/bin/bash
source /etc/profile
BASE_DIR={{ deploy_dir }}
VERSION={{ hadoop_version }}
function set_log(){
RES_SUM_FILE=$BASE_DIR/$VERSION/logs
if [ ! -f "$RES_SUM_FILE/" ]
then
mkdir -p $RES_SUM_FILE
fi
if [ ! -d "$RES_SUM_FILE/$1" ];then
echo "0" > $RES_SUM_FILE/$1
fi
OLD_NUM=`cat $RES_SUM_FILE/$1`
RESTART_NUM=`expr $OLD_NUM + 1`
echo $RESTART_NUM > $RES_SUM_FILE/$1
if [ $OLD_NUM -eq "0" ];then
echo "`date "+%Y-%m-%d %H:%M:%S"` - Yarn $2服务初次启动" >> $BASE_DIR/$VERSION/logs/restart.log
else
echo "`date +%Y-%m-%d` `date +%H:%M:%S` - Yarn $2服务异常 - 重启次数 -> $RESTART_NUM." >> $BASE_DIR/$VERSION/logs/restart.log
fi
}
while true ; do
HAS_NM=`ps -ef | grep "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep | wc -l`
if [ $HAS_NM -eq "0" ];then
$BASE_DIR/$VERSION/sbin/yarn-daemon.sh start nodemanager > /dev/null
set_log nmRes_sum NodeManager
fi
sleep 60
done

View File

@@ -0,0 +1,46 @@
#!/bin/bash
#
# netconsole This loads the netconsole module with the configured parameters.
#
# chkconfig:123456 40 60
# description: keepyarnhistory
source /etc/profile
PRO_NAME=keepyarnhistory
INS_DIR={{ deploy_dir }}
#版本
VERSION={{ hadoop_version }}
case $1 in
start)
master=`ps -ef | grep "dae-yarnhistory.sh" | grep -v grep | wc -l`
if [ $master -lt 1 ];then
nohup $INS_DIR/$VERSION/sbin/dae-yarnhistory.sh > /dev/null 2>&1 &
fi
;;
stop)
HAS_KEEP_SHELL=`ps -ef | grep "dae-yarnhistory.sh" | grep -v grep | awk '{print $2}'`
if [ $HAS_KEEP_SHELL ];then
echo "守护进程PID$HAS_KEEP_SHELL"
kill -9 $HAS_KEEP_SHELL
fi
sh $INS_DIR/$VERSION/sbin/mr-jobhistory-daemon.sh stop historyserver > /dev/null
;;
status)
HAS_HISTORY=`ps -ef | grep "org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | grep -v grep | wc -l`
if [ $HAS_HISTORY -eq "0" ];then
echo "JobHistoryServer not running!"
else
echo "JobHistoryServer is running!"
fi
;;
* )
echo "use keepyarnhistory [start|stop|status]"
;;
esac

View File

@@ -0,0 +1,40 @@
#!/bin/bash
#
# netconsole This loads the netconsole module with the configured parameters.
#
# chkconfig:123456 40 60
# description: keepyarnmaster
source /etc/profile
PRO_NAME=keepyarnmaster
INS_DIR={{ deploy_dir }}
#版本
VERSION={{ hadoop_version }}
case $1 in
start)
master=`ps -ef | grep "dae-yarnmaster.sh" | grep -v grep | wc -l`
if [ $master -lt 1 ];then
nohup $INS_DIR/$VERSION/sbin/dae-yarnmaster.sh > /dev/null 2>&1 &
fi
;;
stop)
HAS_KEEP_SHELL=`ps -ef | grep "dae-yarnmaster.sh" | grep -v grep | awk '{print $2}'`
if [ $HAS_KEEP_SHELL ];then
echo "守护进程PID$HAS_KEEP_SHELL"
kill -9 $HAS_KEEP_SHELL
fi
sh $INS_DIR/$VERSION/sbin/yarn-daemon.sh stop resourcemanager > /dev/null
;;
status)
yarn rmadmin -getServiceState rsm1
;;
* )
echo "use keepyarnmaster [start|stop|status]"
;;
esac

View File

@@ -0,0 +1,46 @@
#!/bin/bash
#
# netconsole This loads the netconsole module with the configured parameters.
#
# chkconfig:123456 40 60
# description: keepyarnworker
source /etc/profile
PRO_NAME=keepyarnworker
INS_DIR={{ deploy_dir }}
#版本
VERSION={{ hadoop_version }}
case $1 in
start)
master=`ps -ef | grep "dae-yarnworker.sh" | grep -v grep | wc -l`
if [ $master -lt 1 ];then
nohup $INS_DIR/$VERSION/sbin/dae-yarnworker.sh > /dev/null 2>&1 &
fi
;;
stop)
HAS_KEEP_SHELL=`ps -ef | grep "dae-yarnworker.sh" | grep -v grep | awk '{print $2}'`
if [ $HAS_KEEP_SHELL ];then
echo "守护进程PID$HAS_KEEP_SHELL"
kill -9 $HAS_KEEP_SHELL
fi
sh $INS_DIR/$VERSION/sbin/yarn-daemon.sh stop nodemanager > /dev/null
;;
status)
HAS_NM=`ps -ef | grep "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep | wc -l`
if [ $HAS_NM -eq "0" ];then
echo "NodeManager not running!"
else
echo "NodeManager is running!"
fi
;;
* )
echo "use keepyarnworker [start|stop|status]"
;;
esac

View File

@@ -0,0 +1,198 @@
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
#==============================================================================
# Common
#==============================================================================
# The external address of the host on which the JobManager runs and can be
# reached by the TaskManagers and any clients which want to connect. This setting
# is only used in Standalone mode and may be overwritten on the JobManager side
# by specifying the --host <hostname> parameter of the bin/jobmanager.sh executable.
# In high availability mode, if you use the bin/start-cluster.sh script and setup
# the conf/masters file, this will be taken care of automatically. Yarn/Mesos
# automatically configure the host name based on the hostname of the node where the
# JobManager runs.
jobmanager.rpc.address: {{ groups.yarn[0] }}
#jobmanager rpc 端口
jobmanager.rpc.port: 6123
#允许任务在所有taskmanager上均匀分布
cluster.evenly-spread-out-slots: true
#避免报出metaspace oom而是flink jvm进程挂掉
classloader.fail-on-metaspace-oom-error: false
#规避第三方库堆栈泄漏问题
classloader.check-leaked-classloader: false
#避免由于task不能正常取消而使taskmanager服务挂掉
task.cancellation.timeout: 0
#JobManager进程占用的所有与Flink相关的内存
jobmanager.memory.process.size: {{ flink['jobmanager.memory.process.size'] }}
#TaskManager进程占用的所有与Flink相关的内存
taskmanager.memory.process.size: {{ flink['taskmanager.memory.process.size'] }}
#taskmanager使用的堆外内存的大小
taskmanager.memory.managed.size: 128M
#taskmanager.memory.off-heap默认为false主要指的是Flink Managed Memory使用Heap还是Non-heap
#默认使用Heap如果开启使用Non-heap将再减少一部分资源
taskmanager.memory.off-heap: false
#堆外部分Framework Off-Heap以直接内存形式分配
taskmanager.memory.framework.off-heap.size: {{ flink['taskmanager.memory.framework.off-heap.size'] }}
#taskmanager元数据大小 默认256M
taskmanager.memory.jvm-metaspace.size: {{ flink['taskmanager.memory.jvm-metaspace.size'] }}
#每个排序合并阻塞结果分区所需的最小网络缓冲区数默认64。对于生产使用建议将该配置值增加到2048以提高数据压缩比并减少较小的网络数据包。增加该参数值需要增加总网络内存大小。
taskmanager.network.sort-shuffle.min-buffers: 64
#用于读取shuffle数据的内存大小目前只用于排序合并shuffle。该内存参数占用framework.off-heap.size内存默认32M当更改该参数时需要增加framework.off-heap.size内存大小。
taskmanager.memory.framework.off-heap.batch-shuffle.size: 8M
#每个通道可以使用的最大缓冲区数默认为10。该参数可以通过防止在数据倾斜和配置的浮动缓冲区数量高的情况下缓冲的动态数据的过度增长来加速检查点对齐。
taskmanager.network.memory.max-buffers-per-channel: 10
# The number of task slots that each TaskManager offers. Each slot runs one parallel pipeline.
taskmanager.numberOfTaskSlots: {{ flink['taskmanager.numberOfTaskSlots'] }}
# The parallelism used for programs that did not specify and other parallelism.
parallelism.default: 1
# The default file system scheme and authority.
#
# By default file paths without scheme are interpreted relative to the local
# root file system 'file:///'. Use this to override the default and interpret
# relative paths relative to a different file system,
# for example 'hdfs://mynamenode:12345'
#
# fs.default-scheme
#==============================================================================
# NetWork
#==============================================================================
#网络缓冲区数目默认为8。帮助缓解由于子分区之间的数据分布不均匀造成的背压。
taskmanager.network.memory.floating-buffers-per-gate: 8
#输入/输出通道使用的独占网络缓冲区的数量。至少配置2。
taskmanager.network.memory.buffers-per-channel: 2
#用于TaskManager之间shuffle、广播等及与外部组件的数据传输
#Min
taskmanager.memory.network.min: 128M
#Max
taskmanager.memory.network.max: {{ flink['taskmanager.memory.network.max'] }}
#==============================================================================
# High Availability
#==============================================================================
# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
#
# high-availability: zookeeper
# The path where metadata for master recovery is persisted. While ZooKeeper stores
# the small ground truth for checkpoint and leader election, this location stores
# the larger objects, like persisted dataflow graphs.
#
# Must be a durable file system that is accessible from all nodes
# (like HDFS, S3, Ceph, nfs, ...)
#
# high-availability.storageDir: hdfs:///flink/ha/
# The list of ZooKeeper quorum peers that coordinate the high-availability
# setup. This must be a list of the form:
# "host1:clientPort,host2:clientPort,..." (default clientPort: 2181)
{% if groups.yarn | length > 1 %}
state.checkpoints.dir: hdfs:///flink/checkpoint/
{% elif groups.yarn | length == 1 %}
state.checkpoints.dir: file://{{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}/checkpoint
{% endif %}
heartbeat.timeout: 180000
heartbeat.interval: 20000
akka.ask.timeout: 300 s
# ACL options are based on https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_BuiltinACLSchemes
# It can be either "creator" (ZOO_CREATE_ALL_ACL) or "open" (ZOO_OPEN_ACL_UNSAFE)
# The default value is "open" and it can be changed to "creator" if ZK security is enabled
#
# high-availability.zookeeper.client.acl: open
# The failover strategy, i.e., how the job computation recovers from task failures.
# Only restart tasks that may have been affected by the task failure, which typically includes
# downstream tasks and potentially upstream tasks if their produced data is no longer available for consumption.
jobmanager.execution.failover-strategy: region
restart-strategy: fixed-delay
restart-strategy.fixed-delay.attempts: 2147483647
yarn.application-attempts: 10000
restart-strategy.fixed-delay.delay: 5 s
web.submit.enable: false
#==============================================================================
# Advanced
#==============================================================================
# Override the directories for temporary files. If not specified, the
# system-specific Java temporary directory (java.io.tmpdir property) is taken.
#
# For framework setups on Yarn or Mesos, Flink will automatically pick up the
# containers' temp directories without any need for configuration.
#
# Add a delimited list for multiple directories, using the system directory
# delimiter (colon ':' on unix) or a comma, e.g.:
# /data1/tmp:/data2/tmp:/data3/tmp
#
# Note: Each directory entry is read from and written to by a different I/O
# thread. You can include the same directory multiple times in order to create
# multiple I/O threads against that directory. This is for example relevant for
# high-throughput RAIDs.
#
# io.tmp.dirs: /tmp
# The classloading resolve order. Possible values are 'child-first' (Flink's default)
# and 'parent-first' (Java's default).
#
# Child first classloading allows users to use different dependency/library
# versions in their application than those in the classpath. Switching back
# to 'parent-first' may help with debugging dependency issues.
#
# classloader.resolve-order: child-first
classloader.resolve-order: parent-first
metrics.reporter.promgateway.class: org.apache.flink.metrics.prometheus.PrometheusPushGatewayReporter
metrics.reporter.promgateway.randomJobNameSuffix: true
metrics.reporter.promgateway.deleteOnShutdown: true
metrics.reporter.promgateway.interval: 10 SECONDS
metrics.reporter.promgateway.host: 127.0.0.1
metrics.reporter.promgateway.port: 9091

View File

@@ -0,0 +1,4 @@
#flink
export FLINK_HOME={{ deploy_dir }}/{{ hadoop_version }}/{{ flink_version }}
export PATH=$FLINK_HOME/bin:$PATH

View File

@@ -0,0 +1,105 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Set Hadoop-specific environment variables here.
# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.
export HADOOP_NAMENODE_JMX_OPTS="-Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.local.only=false -javaagent:{{ deploy_dir }}/{{ hadoop_version }}/monitor/jmx_prometheus_javaagent-0.12.0.jar=9905:{{ deploy_dir }}/{{ hadoop_version }}/monitor/hdfs.yaml"
export HADOOP_DATANODE_JMX_OPTS="-Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.local.only=false -javaagent:{{ deploy_dir }}/{{ hadoop_version }}/monitor/jmx_prometheus_javaagent-0.12.0.jar=9906:{{ deploy_dir }}/{{ hadoop_version }}/monitor/hdfs.yaml"
# The java implementation to use.
#export HADOOP_HEAPSIZE=m
#export JAVA_HOME=/usr/local/jdk/jdk1.8.0_73
export JAVA_HOME=$JAVA_HOME
# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
# Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="$HADOOP_NAMENODE_OPTS {{ hadoop.namenode.java_opt }} -Xss256k -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=256m -XX:SurvivorRatio=2 -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:MaxTenuringThreshold=15 -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=1 -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -XX:-DisableExplicitGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:{{ deploy_dir }}/{{ hadoop_version }}/logs/gc-namenode-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=100M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath={{ deploy_dir }}/{{ hadoop_version }}/logs/ -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender}"
export HADOOP_DATANODE_OPTS="$HADOOP_DATANODE_OPTS {{ hadoop.datanode.java_opt }} -Xss256k -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=256m -XX:SurvivorRatio=2 -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:MaxTenuringThreshold=15 -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=1 -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:{{ deploy_dir }}/{{ hadoop_version }}/logs/gc-datanode-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=100M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath={{ deploy_dir }}/{{ hadoop_version }}/logs/ -Dhadoop.security.logger=ERROR,RFAS"
export HADOOP_JOURNALNODE_OPTS="$HADOOP_JOURNALNODE_OPTS {{ hadoop.journalnode.java_opt }}"
export HADOOP_ZKFC_OPTS="$HADOOP_ZKFC_OPTS {{ hadoop.zkfc.java_opt }}"
export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol. This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
# Where log files are stored. $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_MOVER_OPTS=""
###
# Advanced Users Only!
###
# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the user that will run the hadoop daemons. Otherwise there is the
# potential for a symlink attack.
export HADOOP_PID_DIR={{ deploy_dir }}/{{ hadoop_version }}/pids
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER

View File

@@ -0,0 +1,142 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:{{ hdfs_data_dir }}/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:{{ hdfs_data_dir }}/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址nn1所在地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>{{ groups.hdfs[0] }}:9000</value>
</property>
<!-- nn1的http通信地址外部访问地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>{{ groups.hdfs[0] }}:50070</value>
</property>
<!-- nn2的RPC通信地址nn2所在地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>{{ groups.hdfs[1] }}:9000</value>
</property>
<!-- nn2的http通信地址外部访问地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>{{ groups.hdfs[1] }}:50070</value>
</property>
<!-- 指定NameNode的元数据在JournalNode日志上的存放位置(一般和zookeeper部署在一起) -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://{{groups.hdfs[0]}}:8485;{{groups.hdfs[1]}}:8485;{{groups.hdfs[2]}}:8485/ns1</value>
</property>
<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>{{ hdfs_data_dir }}/journal</value>
</property>
<!--客户端通过代理访问namenode访问文件系统HDFS 客户端与Active 节点通信的Java 类使用其确定Active 节点是否活跃 -->
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--这是配置自动切换的方法,有多种使用方法,具体可以看官网,在文末会给地址,这里是远程登录杀死的方法 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
<value>shell(true)</value>
</property>
<!-- 这个是使用sshfence隔离机制时才需要配置ssh免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔离机制超时时间这个属性同上如果你是用脚本的方法切换这个应该是可以不配置的 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<!-- 这个是开启自动故障转移,如果你没有自动故障转移,这个可以先不配 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>8192</value>
</property>
<!-- namenode处理RPC请求线程数增大该值资源占用不大 -->
<property>
<name>dfs.namenode.handler.count</name>
<value>{{ hadoop.namenode['dfs.namenode.handler.count'] }}</value>
</property>
<!-- datanode处理RPC请求线程数增大该值会占用更多内存 -->
<property>
<name>dfs.datanode.handler.count</name>
<value>{{ hadoop.datanode['dfs.datanode.handler.count'] }}</value>
</property>
<!-- balance时可占用的带宽 -->
<property>
<name>dfs.balance.bandwidthPerSec</name>
<value>104857600</value>
</property>
<!-- 磁盘预留空间该空间不会被hdfs占用单位字节-->
<property>
<name>dfs.datanode.du.reserved</name>
<value>53687091200</value>
</property>
<!-- datanode与namenode连接超时时间单位毫秒 2 * heartbeat.recheck.interval + 30000 -->
<property>
<name>heartbeat.recheck.interval</name>
<value>100000</value>
</property>
</configuration>

View File

@@ -0,0 +1,33 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>{{ groups.yarn[0] }}:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>{{ groups.yarn[0] }}:19888</value>
</property>
</configuration>

View File

@@ -0,0 +1,58 @@
#!/bin/bash
source /etc/profile
function setChkconfig(){
echo -e "\n#hadoop\nexport HADOOP_HOME={{ deploy_dir }}/{{ hadoop_version }}\nexport PATH=\$HADOOP_HOME/sbin:\$PATH\nexport PATH=\$HADOOP_HOME/bin:\$PATH\nexport HADOOP_CLASSPATH=\`hadoop classpath\`" >> /etc/profile.d/hadoop.sh
chmod +x /etc/profile.d/hadoop.sh
if [ -x '/etc/init.d/keepyarnhistory' ];then
chkconfig --add keepyarnhistory
chkconfig keepyarnhistory on
fi
if [ -x '/etc/init.d/keepyarnmaster' ];then
chkconfig --add keepyarnmaster
chkconfig keepyarnmaster on
fi
if [ -x '/etc/init.d/keepyarnworker' ];then
chkconfig --add keepyarnworker
chkconfig keepyarnworker on
fi
}
case $1 in
history)
if [ -x '/etc/init.d/keepyarnhistory' ];then
service keepyarnhistory start && sleep 5
history_dae=`ps -ef | grep "dae-yarnhistory.sh" | grep -v grep | wc -l`
if [ $history_dae -lt 1 ];then
nohup {{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnhistory.sh > /dev/null 2>&1 &
fi
fi
;;
master)
if [ -x '/etc/init.d/keepyarnmaster' ];then
service keepyarnmaster start && sleep 5
master_dae=`ps -ef | grep "dae-yarnmaster.sh" | grep -v grep | wc -l`
if [ $master_dae -lt 1 ];then
nohup {{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnmaster.sh > /dev/null 2>&1 &
fi
fi
;;
worker)
if [ -x '/etc/init.d/keepyarnworker' ];then
service keepyarnworker start && sleep 5
worker_dae=`ps -ef | grep dae-yarnworker.sh | grep -v grep | wc -l`
if [ $worker_dae -lt 1 ];then
nohup {{ deploy_dir }}/{{ hadoop_version }}/sbin/dae-yarnworker.sh > /dev/null 2>&1 &
fi
fi
;;
chkconfig)
setChkconfig;;
* )
;;
esac

View File

@@ -0,0 +1,4 @@
{% set combined_group = groups.yarn | union(groups.hdfs) %}
{% for dev_info in combined_group %}
{{dev_info}}
{% endfor %}

View File

@@ -0,0 +1,65 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:{{ hdfs_data_dir }}/tmp</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131702</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.logfile.size</name>
<value>10000000</value>
<description>The max size of each log file</description>
</property>
<property>
<name>hadoop.logfile.count</name>
<value>1</value>
<description>The max number of log files</description>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>{{inventory_hostname}}:2181</value>
</property>
<property>
<name>ipc.client.connect.timeout</name>
<value>90000</value>
</property>
</configuration>

View File

@@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
</configuration>

View File

@@ -0,0 +1,183 @@
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>false</value>
</property>
<!--声明两台resourcemanager的地址-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rsmcluster</value>
</property>
<!-- 配置rm1-->
<!-- 配置rm1 hostname-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>{{ groups.yarn[0] }}</value>
</property>
<!-- 配置rm1 web application-->
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>{{ groups.yarn[0] }}:8080</value>
</property>
<!-- 配置rm1 调度端口默认8030-->
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>{{ groups.yarn[0] }}:8030</value>
</property>
<!-- 默认端口8031-->
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>{{ groups.yarn[0] }}:8031</value>
</property>
<!-- 配置rm1 应用程序管理器接口的地址端口默认8032-->
<property>
<name>yarn.resourcemanager.address</name>
<value>{{ groups.yarn[0] }}:8032</value>
</property>
<!-- 配置rm1 管理端口默认8033-->
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>{{ groups.yarn[0] }}:8033</value>
</property>
<property>
<name>yarn.resourcemanager.ha.admin.address</name>
<value>{{ groups.yarn[0] }}:23142</value>
</property>
<!--指定zookeeper集群的地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>{{inventory_hostname}}:2181</value>
</property>
<!--启用自动恢复当任务进行一半rm坏掉就要启动自动恢复默认是false-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--启用Nodemanager自动恢复默认是false-->
<property>
<name>yarn.nodemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--配置NodeManager保存运行状态的本地文件系统目录路径 -->
<property>
<name>yarn.nodemanager.recovery.dir</name>
<value>{{ deploy_dir }}/{{ hadoop_version }}/yarn</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<!--配置nm可用的RPC地址默认${yarn.nodemanager.hostname}:0为临时端口。集群重启后nm与rm连接的端口会变化这里指定端口保障nm restart功能 -->
<property>
<name>yarn.nodemanager.address</name>
<value>${yarn.nodemanager.hostname}:9923</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds</name>
<value>3600</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>{{ deploy_dir }}/{{ hadoop_version }}/logs/app-logs/</value>
</property>
<!--NM可以为容器分配的物理内存量以MB为单位 默认8192-->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>{{ hadoop.yarn.nodemanager['yarn.nodemanager.resource.memory-mb'] }}</value>
</property>
<!-- RM上每个容器请求的最小分配以mb为单位默认1024-->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
<!-- RM上每个容器请求的最大分配以mb为单位一般设置为 yarn.nodemanager.resource.memory-mb 一致默认8192-->
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>{{ hadoop.yarn.nodemanager['yarn.scheduler.maximum-allocation-mb'] }}</value>
</property>
<!--可为容器分配的vcore数。RM调度器在为容器分配资源时使用它。这不是用来限制YARN容器使用的物理内核的数量默认8一般配置为服务器cpu总核数一致 -->
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>{{ hadoop.yarn.nodemanager['yarn.nodemanager.resource.cpu-vcores'] }}</value>
</property>
<!--RM上每个容器请求的最小分配(以虚拟CPU内核为单位) ,默认1-->
<property>
<name>yarn.scheduler.minimum-allocation-vcores</name>
<value>1</value>
</property>
<!--RM上每个容器请求的最大分配(以虚拟CPU内核为单位) ,默认32一般配置为略小于yarn.nodemanager.resource.cpu-vcores同时指定任务的slot不应超过该值-->
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>{{ hadoop.yarn.nodemanager['yarn.scheduler.maximum-allocation-vcores'] }}</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<!--ApplicationMaster重启次数配置HA后默认为2生产环境可增大该值-->
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>10000</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://{{ groups.yarn[0] }}:19888/jobhistory/logs</value>
</property>
</configuration>

View File

@@ -0,0 +1,79 @@
#!/bin/bash
source /etc/profile
function killService(){
keeppath='/etc/init.d/keepyarnhistory'
if [ -x $keeppath ];then
service keepyarnhistory stop
chkconfig keepyarnhistory off
systemctl daemon-reload
rm -rf /etc/init.d/keepyarnhistory
fi
keeppath='/etc/init.d/keepyarnmaster'
if [ -x $keeppath ];then
service keepyarnmaster stop
chkconfig keepyarnmaster off
systemctl daemon-reload
rm -rf /etc/init.d/keepyarnmaster
fi
keeppath='/etc/init.d/keepyarnworker'
if [ -x $keeppath ];then
service keepyarnworker stop
chkconfig keepyarnworker off
systemctl daemon-reload
rm -rf /etc/init.d/keepyarnworker
fi
}
function killPid(){
livenum=`jps -l | egrep -w "org.apache.hadoop.yarn.server.resourcemanager.ResourceManager" | grep -v grep |wc -l`
if [ $livenum -ne 0 ];then
keeppid=`jps -l |egrep -w "org.apache.hadoop.yarn.server.resourcemanager.ResourceManager" | awk '{print $1}'`
kill -9 $keeppid
fi
livenum=`jps -l | egrep -w "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | grep -v grep |wc -l`
if [ $livenum -ne 0 ];then
keeppid=`jps -l |egrep -w "org.apache.hadoop.yarn.server.nodemanager.NodeManager" | awk '{print $1}'`
kill -9 $keeppid
fi
livenum=`jps -l | egrep -w "org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | grep -v grep |wc -l`
if [ $livenum -ne 0 ];then
keeppid=`jps -l |egrep -w "org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer" | awk '{print $1}'`
kill -9 $keeppid
fi
}
function drop_folder(){
FOLDER_NAME=$1
if [ -d "$FOLDER_NAME" ];then
rm -rf $FOLDER_NAME
fi
}
function drop_file(){
FILE_NAME=$1
if [ -f "$FILE_NAME" ];then
rm -rf $FILE_NAME
fi
}
killService
sleep 15
killPid
HAS_HDFS=`jps -l | egrep "org.apache.hadoop.hdfs.qjournal.server.JournalNode|org.apache.hadoop.hdfs.tools.DFSZKFailoverController|org.apache.hadoop.hdfs.server.datanode.DataNode|org.apache.hadoop.hdfs.server.namenode.NameNode" | wc -l`
if [ $HAS_HDFS -eq "0" ];then
drop_folder {{ deploy_dir }}/{{ hadoop_version }}
drop_folder {{ deploy_dir }}/hadoop
drop_folder {{ data_dir }}/hadoop
drop_file /etc/profile.d/hadoop.sh
fi

View File

@@ -0,0 +1,127 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
export YARN_RESOURCEMANAGER_JMX_OPTS="-Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.local.only=false -javaagent:{{ deploy_dir }}/{{ hadoop_version }}/monitor/jmx_prometheus_javaagent-0.12.0.jar=9909:{{ deploy_dir }}/{{ hadoop_version }}/monitor/yarn.yaml"
export YARN_NODEMANAGER_JMX_OPTS="-Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.local.only=false -javaagent:{{ deploy_dir }}/{{ hadoop_version }}/monitor/jmx_prometheus_javaagent-0.12.0.jar=9910:{{ deploy_dir }}/{{ hadoop_version }}/monitor/yarn.yaml"
# User for YARN daemons
export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
# resolve links - $0 may be a softlink
export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
# some Java parameters
export JAVA_HOME=$JAVA_HOME
if [ "$JAVA_HOME" != "" ]; then
#echo "run java in $JAVA_HOME"
JAVA_HOME=$JAVA_HOME
fi
if [ "$JAVA_HOME" = "" ]; then
echo "Error: JAVA_HOME is not set."
exit 1
fi
JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx1000m
# For setting YARN specific HEAP sizes please use this
# Parameter and set appropriately
# YARN_HEAPSIZE=1000
# check envvars which might override default args
if [ "$YARN_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
fi
# Resource Manager specific parameters
# Specify the max Heapsize for the ResourceManager using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS
# and/or YARN_RESOURCEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#export YARN_RESOURCEMANAGER_HEAPSIZE=1000
export YARN_RESOURCEMANAGER_OPTS="$YARN_RESOURCEMANAGER_OPTS {{ hadoop.yarn.resourcemanager.java_opt }}"
# Specify the max Heapsize for the timeline server using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS
# and/or YARN_TIMELINESERVER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#export YARN_TIMELINESERVER_HEAPSIZE=1000
# Specify the JVM options to be used when starting the ResourceManager.
# These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS
#export YARN_RESOURCEMANAGER_OPTS=
# Node Manager specific parameters
# Specify the max Heapsize for the NodeManager using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS
# and/or YARN_NODEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#export YARN_NODEMANAGER_HEAPSIZE=1000
export YARN_NODEMANAGER_OPTS="$YARN_NODEMANAGER_OPTS {{ hadoop.yarn.nodemanager.java_opt }}"
# Specify the JVM options to be used when starting the NodeManager.
# These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS
#export YARN_NODEMANAGER_OPTS=
# so that filenames w/ spaces are handled correctly in loops below
IFS=
# default log directory & file
if [ "$YARN_LOG_DIR" = "" ]; then
YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
fi
if [ "$YARN_LOGFILE" = "" ]; then
YARN_LOGFILE='yarn.log'
fi
# default policy file for service-level authorization
if [ "$YARN_POLICYFILE" = "" ]; then
YARN_POLICYFILE="hadoop-policy.xml"
fi
# restore ordinary behaviour
unset IFS
YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME"
YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
fi
YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"

View File

@@ -0,0 +1,232 @@
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!--声明两台resourcemanager的地址-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rsmcluster</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rsm1,rsm2</value>
</property>
<!-- 配置rm1-->
<!-- 配置rm1 hostname-->
<property>
<name>yarn.resourcemanager.hostname.rsm1</name>
<value>{{ groups.yarn[0] }}</value>
</property>
<!-- 配置rm1 web application-->
<property>
<name>yarn.resourcemanager.webapp.address.rsm1</name>
<value>{{ groups.yarn[0] }}:8080</value>
</property>
<!-- 配置rm1 调度端口默认8030-->
<property>
<name>yarn.resourcemanager.scheduler.address.rsm1</name>
<value>{{ groups.yarn[0] }}:8030</value>
</property>
<!-- 默认端口8031-->
<property>
<name>yarn.resourcemanager.resource-tracker.address.rsm1</name>
<value>{{ groups.yarn[0] }}:8031</value>
</property>
<!-- 配置rm1 应用程序管理器接口的地址端口默认8032-->
<property>
<name>yarn.resourcemanager.address.rsm1</name>
<value>{{ groups.yarn[0] }}:8032</value>
</property>
<!-- 配置rm1 管理端口默认8033-->
<property>
<name>yarn.resourcemanager.admin.address.rsm1</name>
<value>{{ groups.yarn[0] }}:8033</value>
</property>
<property>
<name>yarn.resourcemanager.ha.admin.address.rsm1</name>
<value>{{ groups.yarn[0] }}:23142</value>
</property>
<!-- 配置rm2-->
<property>
<name>yarn.resourcemanager.hostname.rsm2</name>
<value>{{ groups.yarn[1] }}</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rsm2</name>
<value>{{ groups.yarn[1] }}:8080</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rsm2</name>
<value>{{ groups.yarn[1] }}:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rsm2</name>
<value>{{ groups.yarn[1] }}:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address.rsm2</name>
<value>{{ groups.yarn[1] }}:8032</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rsm2</name>
<value>{{ groups.yarn[1] }}:8033</value>
</property>
<property>
<name>yarn.resourcemanager.ha.admin.address.rsm2</name>
<value>{{ groups.yarn[1] }}:23142</value>
</property>
<!--指定zookeeper集群的地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
{% for dev_info in groups.zookeeper -%}
{% if loop.last -%}
{{dev_info}}:2181</value>
{% elif loop.first %}
<value>{{dev_info}}:2181,
{%- else %}
{{dev_info}}:2181,
{%- endif %}
{%- endfor %}
</property>
<!--启用自动恢复当任务进行一半rm坏掉就要启动自动恢复默认是false-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--启用Nodemanager自动恢复默认是false-->
<property>
<name>yarn.nodemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--配置NodeManager保存运行状态的本地文件系统目录路径 -->
<property>
<name>yarn.nodemanager.recovery.dir</name>
<value>{{ deploy_dir }}/{{ hadoop_version }}/yarn</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<!--配置nm可用的RPC地址默认${yarn.nodemanager.hostname}:0为临时端口。集群重启后nm与rm连接的端口会变化这里指定端口保障nm restart功能 -->
<property>
<name>yarn.nodemanager.address</name>
<value>${yarn.nodemanager.hostname}:9923</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds</name>
<value>3600</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>{{ deploy_dir }}/{{ hadoop_version }}/logs/app-logs/</value>
</property>
<!--NM可以为容器分配的物理内存量以MB为单位 默认8192-->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>{{ hadoop.yarn.nodemanager['yarn.nodemanager.resource.memory-mb'] }}</value>
</property>
<!-- RM上每个容器请求的最小分配以mb为单位默认1024-->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
<!-- RM上每个容器请求的最大分配以mb为单位一般设置为 yarn.nodemanager.resource.memory-mb 一致默认8192-->
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>{{ hadoop.yarn.nodemanager['yarn.scheduler.maximum-allocation-mb'] }}</value>
</property>
<!--可为容器分配的vcore数。RM调度器在为容器分配资源时使用它。这不是用来限制YARN容器使用的物理内核的数量默认8一般配置为服务器cpu总核数一致 -->
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>{{ hadoop.yarn.nodemanager['yarn.nodemanager.resource.cpu-vcores'] }}</value>
</property>
<!--RM上每个容器请求的最小分配(以虚拟CPU内核为单位) ,默认1-->
<property>
<name>yarn.scheduler.minimum-allocation-vcores</name>
<value>1</value>
</property>
<!--RM上每个容器请求的最大分配(以虚拟CPU内核为单位) ,默认32一般配置为略小于yarn.nodemanager.resource.cpu-vcores同时指定任务的slot不应超过该值-->
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>{{ hadoop.yarn.nodemanager['yarn.scheduler.maximum-allocation-vcores'] }}</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<!--ApplicationMaster重启次数配置HA后默认为2生产环境可增大该值-->
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>10000</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://{{ groups.yarn[0] }}:19888/jobhistory/logs</value>
</property>
</configuration>

View File

@@ -0,0 +1,15 @@
#Hadoop版本
hadoop_version: hadoop-2.7.1
#Flink版本
flink_version: flink-1.13.1
#Jdk版本
java_version: 1.8.0_73
#数据目录
hdfs_data_dir: "{{ data_dir }}/{{ hadoop_version }}/data/hadoop"
#大于5台的集群前两台启动ResourceManager其余启动NodeManager
#小于5台的集群前两台启动ResourceManager每台都启动NodeManager
cluster_limit: "5"

View File

@@ -0,0 +1,5 @@
[zookeeper]
192.168.45.102
[ignite]
192.168.45.102

View File

@@ -0,0 +1,7 @@
- hosts: ignite
remote_user: root
roles:
- role
vars_files:
- role/vars/main.yml

Some files were not shown because too many files have changed in this diff Show More