提交初版
This commit is contained in:
30
galaxy-data-platform/configuration/config.yml
Normal file
30
galaxy-data-platform/configuration/config.yml
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
#============================Galaxy OLAP Configuration===============================#
|
||||||
|
#The default installation location
|
||||||
|
deploy_dir: /data/hdd/olap
|
||||||
|
|
||||||
|
#The default data storage location,use storing application data,logs and configuration files
|
||||||
|
data_dir: /data/sdd/olap
|
||||||
|
|
||||||
|
#Use commas (,) to separate the network segments that the firewall allows to access
|
||||||
|
allowed_ips: 192.168.44.0/24,192.168.45.0/24,192.168.58.0/24,192.168.41.0/24
|
||||||
|
|
||||||
|
#============================Keepalived Configuration===============================#
|
||||||
|
#Specify the interface virtual IP address.It used to configure high availability for OLAP query engine
|
||||||
|
#Only one IP address can be configured on the same network segment
|
||||||
|
#The following three ip addresses must be unique
|
||||||
|
vrrp_instance:
|
||||||
|
default:
|
||||||
|
virtual_ipaddress: 192.168.45.102
|
||||||
|
interface: eth0
|
||||||
|
virtual_router_id: 61
|
||||||
|
oss:
|
||||||
|
virtual_ipaddress: 192.168.45.102
|
||||||
|
interface: eth0
|
||||||
|
virtual_router_id: 62
|
||||||
|
|
||||||
|
#============================OSS Configuration===============================#
|
||||||
|
#HOS token
|
||||||
|
hos_server_token: fea0ee76be8147bd9b53ad995b7ef603
|
||||||
|
|
||||||
|
#HOS token(加密)
|
||||||
|
hos_servrt_token_encrypt: M+0IMYS1+XENltUe585ahhqwY+QZJmnvtxML1vXExB9aO+CPT3GfsP4rtbVuWXpf
|
||||||
143
galaxy-data-platform/configuration/galaxy-data-platform.yml
Normal file
143
galaxy-data-platform/configuration/galaxy-data-platform.yml
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
zookeeper:
|
||||||
|
#Running memory of the Zookeeper.
|
||||||
|
java_opts: -Xmx1024m -Xms1024m
|
||||||
|
|
||||||
|
mariadb:
|
||||||
|
#Used to cache data and index data from tables in the InnoDB storage engine.
|
||||||
|
innodb_buffer_pool_size: 2048
|
||||||
|
|
||||||
|
nacos:
|
||||||
|
#Running memory of the Nacos.
|
||||||
|
java_opt: '-Xmx1024m -Xms1024m -Xmn256m'
|
||||||
|
|
||||||
|
druid:
|
||||||
|
broker:
|
||||||
|
#Running memory of the Druid-Broker.
|
||||||
|
java_opts: -Xmx1024m -Xms1024m
|
||||||
|
#Worker tasks also use off-heap ("direct") memory. Set the amount of direct memory available (-XX:MaxDirectMemorySize) to at least (druid.processing.numThreads + 1) * druid.processing.buffer.sizeBytes
|
||||||
|
MaxDirectMemorySize: 512m
|
||||||
|
#This specifies a buffer size (less than 2GiB), for the storage of intermediate results
|
||||||
|
druid.processing.buffer.sizeBytes: 50000000
|
||||||
|
#The number of direct memory buffers available for merging query results.
|
||||||
|
druid.processing.numMergeBuffers: 4
|
||||||
|
#The number of processing threads to have available for parallel processing of segments.
|
||||||
|
druid.processing.numThreads: 5
|
||||||
|
coordinator:
|
||||||
|
#Running memory of the Druid-Coordinator.
|
||||||
|
java_opts: -Xmx1024m -Xms1024m
|
||||||
|
historical:
|
||||||
|
#Running memory of the Druid-Historical.
|
||||||
|
java_opts: -Xmx1024m -Xms1024m
|
||||||
|
#The size of the process's temporary cache data on disk
|
||||||
|
druid.segmentCache.locations: 300000000000
|
||||||
|
#Worker tasks also use off-heap ("direct") memory. Set the amount of direct memory available (-XX:MaxDirectMemorySize) to at least (druid.processing.numThreads + 1) * druid.processing.buffer.sizeBytes
|
||||||
|
MaxDirectMemorySize: 512m
|
||||||
|
#This specifies a buffer size (less than 2GiB), for the storage of intermediate results
|
||||||
|
druid.processing.buffer.sizeBytes: 50000000
|
||||||
|
#The number of direct memory buffers available for merging query results.
|
||||||
|
druid.processing.numMergeBuffers: 4
|
||||||
|
#The number of processing threads to have available for parallel processing of segments.
|
||||||
|
druid.processing.numThreads: 5
|
||||||
|
middlemanager:
|
||||||
|
#Running memory of the Druid-Middlemanager.
|
||||||
|
java_opts: -Xmx1024m -Xms1024m
|
||||||
|
druid.indexer.fork.property.druid.processing.numMergeBuffers: 2
|
||||||
|
druid.indexer.fork.property.druid.processing.buffer.sizeBytes: 20000000
|
||||||
|
druid.indexer.fork.property.druid.processing.numThreads: 1
|
||||||
|
|
||||||
|
hadoop:
|
||||||
|
namenode:
|
||||||
|
#Running memory of the Hadoop Namenode.
|
||||||
|
java_opt: '-Xmx1024m -Xms1024m'
|
||||||
|
#The number of Namenode RPC server threads that listen to requests from clients.
|
||||||
|
dfs.namenode.handler.count: 30
|
||||||
|
datanode:
|
||||||
|
#Running memory of the Hadoop Datanode.
|
||||||
|
java_opt: '-Xmx1024m -Xms1024m'
|
||||||
|
#The number of server threads for the datanode.
|
||||||
|
dfs.datanode.handler.count: 40
|
||||||
|
journalnode:
|
||||||
|
#Running memory of the Hadoop JournalNode.
|
||||||
|
java_opt: '-Xmx1024m -Xms1024m'
|
||||||
|
zkfc:
|
||||||
|
#Running memory of the Hadoop DFSZKFailoverController.
|
||||||
|
java_opt: '-Xmx1024m -Xms1024m'
|
||||||
|
yarn:
|
||||||
|
resourcemanager:
|
||||||
|
#Running memory of the Hadoop ResourceManager.
|
||||||
|
java_opt: '-Xmx1024m -Xms1024m'
|
||||||
|
nodemanager:
|
||||||
|
#Running memory of the Hadoop NodeManager.
|
||||||
|
java_opt: '-Xmx1024m -Xms1024m'
|
||||||
|
#Amount of physical memory, in MB, that can be allocated for containers.
|
||||||
|
yarn.nodemanager.resource.memory-mb: 16384
|
||||||
|
#The maximum allocation for every container request at the RM in MBs.
|
||||||
|
yarn.scheduler.maximum-allocation-mb: 16384
|
||||||
|
#Number of vcores that can be allocated for containers. This is used by the RM scheduler when allocating resources for containers.
|
||||||
|
yarn.nodemanager.resource.cpu-vcores: 48
|
||||||
|
#The maximum allocation for every container request at the RM in terms of virtual CPU cores.
|
||||||
|
yarn.scheduler.maximum-allocation-vcores: 48
|
||||||
|
|
||||||
|
flink:
|
||||||
|
#Total Process Memory size for the JobManager.
|
||||||
|
jobmanager.memory.process.size: 1024M
|
||||||
|
#Total Process Memory size for the TaskExecutors.
|
||||||
|
taskmanager.memory.process.size: 10240M
|
||||||
|
#This is the size of off-heap memory managed for sorting, hash tables, caching of intermediate results and state backend.
|
||||||
|
taskmanager.memory.managed.size: 512M
|
||||||
|
#Framework Off-Heap Memory size for TaskExecutors. This is the size of off-heap memory reserved for TaskExecutor framework
|
||||||
|
taskmanager.memory.framework.off-heap.size: 128M
|
||||||
|
#JVM Metaspace Size for the TaskExecutors.
|
||||||
|
taskmanager.memory.jvm-metaspace.size: 1024M
|
||||||
|
#Max Network Memory size for TaskExecutors. Network Memory is off-heap memory reserved for ShuffleEnvironment.
|
||||||
|
taskmanager.memory.network.max: 256M
|
||||||
|
#The number of parallel operator or user function instances that a single TaskManager can run.
|
||||||
|
#This value is typically proportional to the number of physical CPU cores that the TaskManager's machine has (e.g., equal to the number of cores, or half the number of cores).
|
||||||
|
taskmanager.numberOfTaskSlots: 1
|
||||||
|
|
||||||
|
hbase:
|
||||||
|
common:
|
||||||
|
#The HBase resource isolation function is used to group tables for storage.
|
||||||
|
enable_rsgroup: false
|
||||||
|
hmaster:
|
||||||
|
#Running memory of the HBase HMaster.
|
||||||
|
java_opt: '-Xmx1024m -Xms1024m'
|
||||||
|
regionserver:
|
||||||
|
#Running memory of the HBase HRegionserver.
|
||||||
|
java_opt: '-Xmx1024m -Xms1024m -Xmn128m'
|
||||||
|
#This defines the number of threads the region server keeps open to serve requests to tables,It should generally be set to (number of cores - 1)
|
||||||
|
hbase.regionserver.handler.count: 40
|
||||||
|
#If any one of a column families' HStoreFiles has grown to exceed this value, the hosting HRegion is split in two.
|
||||||
|
hbase.hregion.max.filesize: 10737418240
|
||||||
|
#Indicates the memory used by all read caches. The value can be the actual memory value, expressed in MB
|
||||||
|
hbase.bucketcache.size: 100
|
||||||
|
|
||||||
|
kafka:
|
||||||
|
#Running memory of the Kafka.
|
||||||
|
java_opt: '-Xmx1024m -Xms1024m'
|
||||||
|
#The minimum age of a log file to be eligible for deletion due to age
|
||||||
|
log.retention.hours: 168
|
||||||
|
#A size-based retention policy for logs,unit byte
|
||||||
|
log.retention.bytes: 10737418240
|
||||||
|
|
||||||
|
clickhouse:
|
||||||
|
#Limit on total memory usage. Zero means Unlimited.
|
||||||
|
max_server_memory_usage: 30000000000
|
||||||
|
#Sets the number of threads performing background merges and mutations for tables with MergeTree engines.
|
||||||
|
background_pool_size: 16
|
||||||
|
|
||||||
|
hos:
|
||||||
|
#Running memory of the Kafka.
|
||||||
|
java_opt: '-Xmx1024m -Xms1024m -Xmn512m'
|
||||||
|
#Download files quickly,Used for HBase with a memory larger than 20GB.open: 1 , close: 0
|
||||||
|
isQuickDownloadFile: 0
|
||||||
|
#Whether to enable SSL.open: 1 , close: 0
|
||||||
|
enable_ssl: 0
|
||||||
|
#nacos contains the name of the namespace where the configuration is stored
|
||||||
|
nacos.config.namespace: prod
|
||||||
|
|
||||||
|
ignite:
|
||||||
|
#Running memory of the Nacos.
|
||||||
|
java_opt: '-Xmx1024m -Xms1024m'
|
||||||
|
#Setting region max size equal to physical RAM size(5 GB).
|
||||||
|
maxSize: '#{5L * 1024 * 1024 * 1024}'
|
||||||
111
galaxy-data-platform/configuration/hosts
Normal file
111
galaxy-data-platform/configuration/hosts
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
#==============================================================================
|
||||||
|
# Basic Components
|
||||||
|
#
|
||||||
|
# Orchestration & Coordinator & Configuration & Cold Storage
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
|
#The cluster use master-master replication mode,maximum 2 servers.
|
||||||
|
[mariadb]
|
||||||
|
|
||||||
|
#Apache Zookeeper is a centralized service for maintaining configuration information, naming, providing distributed synchronization, and providing group services.
|
||||||
|
#The cluster mode at least 3 servers,The number of nodes must be odd,Like 3/5 nodes.
|
||||||
|
[zookeeper]
|
||||||
|
|
||||||
|
|
||||||
|
#Alibaba Nacos an easy-to-use dynamic service discovery, configuration and service management platform
|
||||||
|
#The cluster mode at least 3 servers,Multi-node HA mode.
|
||||||
|
[nacos]
|
||||||
|
|
||||||
|
|
||||||
|
#Apache Hadoop HDFS(Hadoop Distributed File System)
|
||||||
|
#HDFS is deployed only in cluster mode.
|
||||||
|
#At least 3 servers,An HDFS cluster consists of two Namenodes and a certain number of Datanodes node.
|
||||||
|
[hdfs]
|
||||||
|
|
||||||
|
#==============================================================================
|
||||||
|
# BigData Processing Components
|
||||||
|
#
|
||||||
|
# Big data is a term that refers to the massive volume, variety, and velocity of data that is generated from various sources and needs to be stored, processed, and analyzed efficiently.
|
||||||
|
# The Big Data processing component is used to provide a platform for fast and efficient processing
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
|
#Apache Kafka is a distributed event streaming platform,used for high-performance data pipelines, streaming analytics.
|
||||||
|
#The cluster mode at least 3 servers,By default install CMAK(Management tool) on the first server.
|
||||||
|
[kafka]
|
||||||
|
|
||||||
|
|
||||||
|
#Apache Hadoop Yarn,Includes the Flink/Groot-stream runtime environment.
|
||||||
|
#Yarn is the resource management and job scheduling technology in the open source Hadoop distributed processing framework.
|
||||||
|
#The cluster mode at least 3 servers,A Yarn cluster consists of two ResourceManager (RM) and a certain number of NodeManager(NM) node.
|
||||||
|
[yarn]
|
||||||
|
|
||||||
|
|
||||||
|
#==============================================================================
|
||||||
|
# Analytic Storage Components
|
||||||
|
#
|
||||||
|
# This is a data storage solution designed to support large-scale data analysis and data mining workloads.
|
||||||
|
# The analytic Storage component it offers high performance, scalability, and flexibility to meet the demands of processing vast amounts of structured and unstructured data.
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
|
#Apache HBase is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters of commodity hardware
|
||||||
|
#The cluster mode at least 3 servers,A HBase cluster consists of three HMaster and a certain number of HRegionServer node.
|
||||||
|
[hbase]
|
||||||
|
|
||||||
|
|
||||||
|
#Apache Druid is a high performance, real-time analytics database that delivers sub-second queries on streaming and batch data at scale and under load.
|
||||||
|
#The cluster mode at least 3 servers,A Druid cluster consists of two master/query and a certain number of worker node.
|
||||||
|
[druid]
|
||||||
|
|
||||||
|
|
||||||
|
#Yandex ClickHouse is the fastest and most resource efficient open-source database for real-time apps and analytics.
|
||||||
|
#The cluster mode at least 3 servers,A Clickhouse cluster consists of two query and a certain number of data node.
|
||||||
|
[clickhouse]
|
||||||
|
|
||||||
|
|
||||||
|
#ArangoDB is a scalable graph database system to drive value from connected data, faster.
|
||||||
|
#Only support single server deployment.
|
||||||
|
[arangodb]
|
||||||
|
|
||||||
|
|
||||||
|
#Redis is an open source (BSD licensed), in-memory data structure store, used as a database, cache, and message broker.
|
||||||
|
#Supports single-node and master-slave modes
|
||||||
|
[redis]
|
||||||
|
|
||||||
|
|
||||||
|
#Apache Ignite is a leading distributed database management system for high-performance computing with in-memory speed
|
||||||
|
#Supports single-node and cluster modes
|
||||||
|
[ignite]
|
||||||
|
|
||||||
|
|
||||||
|
#==============================================================================
|
||||||
|
# OLAP Self-research service
|
||||||
|
#
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
|
#The default proxy,Includes the Nginx/Keepalived,If it is a standalone mode, only Nginx.
|
||||||
|
#A maximum of two nodes.
|
||||||
|
[loadbalancer]
|
||||||
|
|
||||||
|
|
||||||
|
#The clickhouse query proxy,Usually deployed with loadbalancer.
|
||||||
|
[chproxy]
|
||||||
|
|
||||||
|
|
||||||
|
#Galaxy-hos-service is a distributed object storage service.
|
||||||
|
#Include components:Keepalived/Nginx/Galaxy-hos-service,If it is a standalone mode, only Galaxy-hos-service/Nginx.
|
||||||
|
#The cluster mode at least 2 servers,keepalived and nginx services are deployed on the first two nodes by default.
|
||||||
|
[galaxy_hos_service]
|
||||||
|
|
||||||
|
|
||||||
|
#The query gateway,Provides a unified query entry
|
||||||
|
[galaxy_qgw_service]
|
||||||
|
|
||||||
|
|
||||||
|
#A lightweight distributed task scheduling framework.
|
||||||
|
#Include components: Galaxy-job-admin/Galaxy-job-executor
|
||||||
|
[galaxy_job_service]
|
||||||
|
|
||||||
|
|
||||||
|
#The report execution service.
|
||||||
|
[saved_query_scheduler]
|
||||||
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user