galaxy-deployment-ansible-d…/full_hosts

#==============================================================================
# Basic Components
#
# Orchestration & Coordinator & Configuration & Cold Storage
#==============================================================================

#Configure Mariadb deployed nodes,Use master-master replication mode,maximum 2 servers.
[mariadb]


#Apache Zookeeper is a centralized service for maintaining configuration information, naming, providing distributed synchronization, and providing group services.
#At least 3 servers,And it is strongly recommended that you have an odd number of servers.
[zookeeper]


#Alibaba Nacos an easy-to-use dynamic service discovery, configuration and service management platform
#At least 3 servers,Multi-node HA mode.
[nacos]


#Apache Hadoop HDFS(Hadoop Distributed File System)
#At least 3 servers,An HDFS cluster consists of two Namenodes and a certain number of Datanodes node.
[hdfs]


#==============================================================================
# BigData Processing Components
#
# Big data is a term that refers to the massive volume, variety, and velocity of data that is generated from various sources and needs to be stored, processed, and analyzed efficiently.
# The Big Data processing component is used to provide a platform for fast and efficient processing
#==============================================================================

#Apache Kafka is a distributed event streaming platform,used for high-performance data pipelines, streaming analytics.
#At least 3 servers,By default install CMAK(Management tool) on the first server.
[kafka]


#Apache Hadoop Yarn,Includes the Flink runtime environment.
#Yarn is the resource management and job scheduling technology in the open source Hadoop distributed processing framework.
#At least 3 servers,A Yarn cluster consists of two ResourceManager (RM) and a certain number of NodeManager(NM) node.
[yarn]


#Groot Stream is a real-time data stream processing platform.
[grootstream]

#==============================================================================
# BigData Processing Components
#
# Big data is a term that refers to the massive volume, variety, and velocity of data that is generated from various sources and needs to be stored, processed, and analyzed efficiently.
# The Big Data processing component is used to provide a platform for fast and efficient processing
#==============================================================================

#Apache HBase is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters of commodity hardware
#At least 3 servers,A HBase cluster consists of three HMaster and a certain number of HRegionServer node.
[hbase]


#Apache Druid is a high performance, real-time analytics database that delivers sub-second queries on streaming and batch data at scale and under load.
#At least 3 servers,A Druid cluster consists of two master/query and a certain number of worker node.
[druid]


#Yandex ClickHouse is the fastest and most resource efficient open-source database for real-time apps and analytics.
#At least 3 servers,A Clickhouse cluster consists of two query and a certain number of data node.
[clickhouse]


#ArangoDB is a scalable graph database system to drive value from connected data, faster.
#Only support single server deployment
[arangodb]

#Redis is an open source (BSD licensed), in-memory data structure store, used as a database, cache, and message broker.
#Supports single-node and master-slave modes
[redis]

#Apache Ignite is a leading distributed database management system for high-performance computing with in-memory speed
#Supports single-node and cluster modes
[ignite]

#==============================================================================
# OLAP Self-research service
#
#==============================================================================
#Default load balancer include keepalived/Galaxy-gateway-nginx
[loadbalancer]


#Chproxy is an HTTP proxy and load balancer for ClickHouse
[chproxy]


#Galaxy-hos-service is a distributed object storage service.
#Include components:Keepalived/Nginx/galaxy-hos-service
#At least 2 servers,keepalived and nginx services are deployed on the first two nodes by default.
[galaxy_hos_service]


#The query gateway,Provides a unified query entry
[galaxy_qgw_service]


#A lightweight distributed task scheduling framework.
#Include components: Galaxy-job-admin/Galaxy-job-executor
[galaxy_job_service]

#
[saved_query_scheduler]


#==============================================================================
# Monitoring module
#
#==============================================================================

#Receive metrics pushed by the program. Pushgateway then exposed these metrics to Prometheus.
[pushgateway]
提交各组件部署Ansible剧本初版 2024-01-18 15:35:33 +08:00			`#==============================================================================`
			`# Basic Components`
			`#`
			`# Orchestration & Coordinator & Configuration & Cold Storage`
			`#==============================================================================`

			`#Configure Mariadb deployed nodes,Use master-master replication mode,maximum 2 servers.`
			`[mariadb]`


			`#Apache Zookeeper is a centralized service for maintaining configuration information, naming, providing distributed synchronization, and providing group services.`
			`#At least 3 servers,And it is strongly recommended that you have an odd number of servers.`
			`[zookeeper]`


			`#Alibaba Nacos an easy-to-use dynamic service discovery, configuration and service management platform`
			`#At least 3 servers,Multi-node HA mode.`
			`[nacos]`


			`#Apache Hadoop HDFS(Hadoop Distributed File System)`
			`#At least 3 servers,An HDFS cluster consists of two Namenodes and a certain number of Datanodes node.`
			`[hdfs]`


			`#==============================================================================`
			`# BigData Processing Components`
			`#`
			`# Big data is a term that refers to the massive volume, variety, and velocity of data that is generated from various sources and needs to be stored, processed, and analyzed efficiently.`
			`# The Big Data processing component is used to provide a platform for fast and efficient processing`
			`#==============================================================================`

			`#Apache Kafka is a distributed event streaming platform,used for high-performance data pipelines, streaming analytics.`
			`#At least 3 servers,By default install CMAK(Management tool) on the first server.`
			`[kafka]`


			`#Apache Hadoop Yarn,Includes the Flink runtime environment.`
			`#Yarn is the resource management and job scheduling technology in the open source Hadoop distributed processing framework.`
			`#At least 3 servers,A Yarn cluster consists of two ResourceManager (RM) and a certain number of NodeManager(NM) node.`
			`[yarn]`


			`#Groot Stream is a real-time data stream processing platform.`
			`[grootstream]`

			`#==============================================================================`
			`# BigData Processing Components`
			`#`
			`# Big data is a term that refers to the massive volume, variety, and velocity of data that is generated from various sources and needs to be stored, processed, and analyzed efficiently.`
			`# The Big Data processing component is used to provide a platform for fast and efficient processing`
			`#==============================================================================`

			`#Apache HBase is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters of commodity hardware`
			`#At least 3 servers,A HBase cluster consists of three HMaster and a certain number of HRegionServer node.`
			`[hbase]`


			`#Apache Druid is a high performance, real-time analytics database that delivers sub-second queries on streaming and batch data at scale and under load.`
			`#At least 3 servers,A Druid cluster consists of two master/query and a certain number of worker node.`
			`[druid]`


			`#Yandex ClickHouse is the fastest and most resource efficient open-source database for real-time apps and analytics.`
			`#At least 3 servers,A Clickhouse cluster consists of two query and a certain number of data node.`
			`[clickhouse]`


			`#ArangoDB is a scalable graph database system to drive value from connected data, faster.`
			`#Only support single server deployment`
			`[arangodb]`

			`#Redis is an open source (BSD licensed), in-memory data structure store, used as a database, cache, and message broker.`
			`#Supports single-node and master-slave modes`
			`[redis]`

			`#Apache Ignite is a leading distributed database management system for high-performance computing with in-memory speed`
			`#Supports single-node and cluster modes`
			`[ignite]`

			`#==============================================================================`
			`# OLAP Self-research service`
			`#`
			`#==============================================================================`
			`#Default load balancer include keepalived/Galaxy-gateway-nginx`
			`[loadbalancer]`


			`#Chproxy is an HTTP proxy and load balancer for ClickHouse`
			`[chproxy]`


			`#Galaxy-hos-service is a distributed object storage service.`
			`#Include components:Keepalived/Nginx/galaxy-hos-service`
			`#At least 2 servers,keepalived and nginx services are deployed on the first two nodes by default.`
			`[galaxy_hos_service]`


			`#The query gateway,Provides a unified query entry`
			`[galaxy_qgw_service]`


			`#A lightweight distributed task scheduling framework.`
			`#Include components: Galaxy-job-admin/Galaxy-job-executor`
			`[galaxy_job_service]`

			`#`
			`[saved_query_scheduler]`


			`#==============================================================================`
			`# Monitoring module`
			`#`
			`#==============================================================================`

			`#Receive metrics pushed by the program. Pushgateway then exposed these metrics to Prometheus.`
			`[pushgateway]`