提交初版

This commit is contained in:
qidaijie
2024-02-22 10:53:59 +08:00
parent b368a1e16a
commit f8c919d9d0
416 changed files with 10167 additions and 0 deletions

View File

@@ -0,0 +1,30 @@
#============================Galaxy OLAP Configuration===============================#
#The default installation location
deploy_dir: /data/hdd/olap
#The default data storage location,use storing application data,logs and configuration files
data_dir: /data/sdd/olap
#Use commas (,) to separate the network segments that the firewall allows to access
allowed_ips: 192.168.44.0/24,192.168.45.0/24,192.168.58.0/24,192.168.41.0/24
#============================Keepalived Configuration===============================#
#Specify the interface virtual IP address.It used to configure high availability for OLAP query engine
#Only one IP address can be configured on the same network segment
#The following three ip addresses must be unique
vrrp_instance:
default:
virtual_ipaddress: 192.168.45.102
interface: eth0
virtual_router_id: 61
oss:
virtual_ipaddress: 192.168.45.102
interface: eth0
virtual_router_id: 62
#============================OSS Configuration===============================#
#HOS token
hos_server_token: fea0ee76be8147bd9b53ad995b7ef603
#HOS token加密
hos_servrt_token_encrypt: M+0IMYS1+XENltUe585ahhqwY+QZJmnvtxML1vXExB9aO+CPT3GfsP4rtbVuWXpf

View File

@@ -0,0 +1,143 @@
zookeeper:
#Running memory of the Zookeeper.
java_opts: -Xmx1024m -Xms1024m
mariadb:
#Used to cache data and index data from tables in the InnoDB storage engine.
innodb_buffer_pool_size: 2048
nacos:
#Running memory of the Nacos.
java_opt: '-Xmx1024m -Xms1024m -Xmn256m'
druid:
broker:
#Running memory of the Druid-Broker.
java_opts: -Xmx1024m -Xms1024m
#Worker tasks also use off-heap ("direct") memory. Set the amount of direct memory available (-XX:MaxDirectMemorySize) to at least (druid.processing.numThreads + 1) * druid.processing.buffer.sizeBytes
MaxDirectMemorySize: 512m
#This specifies a buffer size (less than 2GiB), for the storage of intermediate results
druid.processing.buffer.sizeBytes: 50000000
#The number of direct memory buffers available for merging query results.
druid.processing.numMergeBuffers: 4
#The number of processing threads to have available for parallel processing of segments.
druid.processing.numThreads: 5
coordinator:
#Running memory of the Druid-Coordinator.
java_opts: -Xmx1024m -Xms1024m
historical:
#Running memory of the Druid-Historical.
java_opts: -Xmx1024m -Xms1024m
#The size of the process's temporary cache data on disk
druid.segmentCache.locations: 300000000000
#Worker tasks also use off-heap ("direct") memory. Set the amount of direct memory available (-XX:MaxDirectMemorySize) to at least (druid.processing.numThreads + 1) * druid.processing.buffer.sizeBytes
MaxDirectMemorySize: 512m
#This specifies a buffer size (less than 2GiB), for the storage of intermediate results
druid.processing.buffer.sizeBytes: 50000000
#The number of direct memory buffers available for merging query results.
druid.processing.numMergeBuffers: 4
#The number of processing threads to have available for parallel processing of segments.
druid.processing.numThreads: 5
middlemanager:
#Running memory of the Druid-Middlemanager.
java_opts: -Xmx1024m -Xms1024m
druid.indexer.fork.property.druid.processing.numMergeBuffers: 2
druid.indexer.fork.property.druid.processing.buffer.sizeBytes: 20000000
druid.indexer.fork.property.druid.processing.numThreads: 1
hadoop:
namenode:
#Running memory of the Hadoop Namenode.
java_opt: '-Xmx1024m -Xms1024m'
#The number of Namenode RPC server threads that listen to requests from clients.
dfs.namenode.handler.count: 30
datanode:
#Running memory of the Hadoop Datanode.
java_opt: '-Xmx1024m -Xms1024m'
#The number of server threads for the datanode.
dfs.datanode.handler.count: 40
journalnode:
#Running memory of the Hadoop JournalNode.
java_opt: '-Xmx1024m -Xms1024m'
zkfc:
#Running memory of the Hadoop DFSZKFailoverController.
java_opt: '-Xmx1024m -Xms1024m'
yarn:
resourcemanager:
#Running memory of the Hadoop ResourceManager.
java_opt: '-Xmx1024m -Xms1024m'
nodemanager:
#Running memory of the Hadoop NodeManager.
java_opt: '-Xmx1024m -Xms1024m'
#Amount of physical memory, in MB, that can be allocated for containers.
yarn.nodemanager.resource.memory-mb: 16384
#The maximum allocation for every container request at the RM in MBs.
yarn.scheduler.maximum-allocation-mb: 16384
#Number of vcores that can be allocated for containers. This is used by the RM scheduler when allocating resources for containers.
yarn.nodemanager.resource.cpu-vcores: 48
#The maximum allocation for every container request at the RM in terms of virtual CPU cores.
yarn.scheduler.maximum-allocation-vcores: 48
flink:
#Total Process Memory size for the JobManager.
jobmanager.memory.process.size: 1024M
#Total Process Memory size for the TaskExecutors.
taskmanager.memory.process.size: 10240M
#This is the size of off-heap memory managed for sorting, hash tables, caching of intermediate results and state backend.
taskmanager.memory.managed.size: 512M
#Framework Off-Heap Memory size for TaskExecutors. This is the size of off-heap memory reserved for TaskExecutor framework
taskmanager.memory.framework.off-heap.size: 128M
#JVM Metaspace Size for the TaskExecutors.
taskmanager.memory.jvm-metaspace.size: 1024M
#Max Network Memory size for TaskExecutors. Network Memory is off-heap memory reserved for ShuffleEnvironment.
taskmanager.memory.network.max: 256M
#The number of parallel operator or user function instances that a single TaskManager can run.
#This value is typically proportional to the number of physical CPU cores that the TaskManager's machine has (e.g., equal to the number of cores, or half the number of cores).
taskmanager.numberOfTaskSlots: 1
hbase:
common:
#The HBase resource isolation function is used to group tables for storage.
enable_rsgroup: false
hmaster:
#Running memory of the HBase HMaster.
java_opt: '-Xmx1024m -Xms1024m'
regionserver:
#Running memory of the HBase HRegionserver.
java_opt: '-Xmx1024m -Xms1024m -Xmn128m'
#This defines the number of threads the region server keeps open to serve requests to tables,It should generally be set to (number of cores - 1)
hbase.regionserver.handler.count: 40
#If any one of a column families' HStoreFiles has grown to exceed this value, the hosting HRegion is split in two.
hbase.hregion.max.filesize: 10737418240
#Indicates the memory used by all read caches. The value can be the actual memory value, expressed in MB
hbase.bucketcache.size: 100
kafka:
#Running memory of the Kafka.
java_opt: '-Xmx1024m -Xms1024m'
#The minimum age of a log file to be eligible for deletion due to age
log.retention.hours: 168
#A size-based retention policy for logs,unit byte
log.retention.bytes: 10737418240
clickhouse:
#Limit on total memory usage. Zero means Unlimited.
max_server_memory_usage: 30000000000
#Sets the number of threads performing background merges and mutations for tables with MergeTree engines.
background_pool_size: 16
hos:
#Running memory of the Kafka.
java_opt: '-Xmx1024m -Xms1024m -Xmn512m'
#Download files quickly,Used for HBase with a memory larger than 20GB.open: 1 , close: 0
isQuickDownloadFile: 0
#Whether to enable SSL.open: 1 , close: 0
enable_ssl: 0
#nacos contains the name of the namespace where the configuration is stored
nacos.config.namespace: prod
ignite:
#Running memory of the Nacos.
java_opt: '-Xmx1024m -Xms1024m'
#Setting region max size equal to physical RAM size(5 GB).
maxSize: '#{5L * 1024 * 1024 * 1024}'

View File

@@ -0,0 +1,111 @@
#==============================================================================
# Basic Components
#
# Orchestration & Coordinator & Configuration & Cold Storage
#==============================================================================
#The cluster use master-master replication mode,maximum 2 servers.
[mariadb]
#Apache Zookeeper is a centralized service for maintaining configuration information, naming, providing distributed synchronization, and providing group services.
#The cluster mode at least 3 servers,The number of nodes must be odd,Like 3/5 nodes.
[zookeeper]
#Alibaba Nacos an easy-to-use dynamic service discovery, configuration and service management platform
#The cluster mode at least 3 servers,Multi-node HA mode.
[nacos]
#Apache Hadoop HDFS(Hadoop Distributed File System)
#HDFS is deployed only in cluster mode.
#At least 3 servers,An HDFS cluster consists of two Namenodes and a certain number of Datanodes node.
[hdfs]
#==============================================================================
# BigData Processing Components
#
# Big data is a term that refers to the massive volume, variety, and velocity of data that is generated from various sources and needs to be stored, processed, and analyzed efficiently.
# The Big Data processing component is used to provide a platform for fast and efficient processing
#==============================================================================
#Apache Kafka is a distributed event streaming platform,used for high-performance data pipelines, streaming analytics.
#The cluster mode at least 3 servers,By default install CMAK(Management tool) on the first server.
[kafka]
#Apache Hadoop Yarn,Includes the Flink/Groot-stream runtime environment.
#Yarn is the resource management and job scheduling technology in the open source Hadoop distributed processing framework.
#The cluster mode at least 3 servers,A Yarn cluster consists of two ResourceManager (RM) and a certain number of NodeManager(NM) node.
[yarn]
#==============================================================================
# Analytic Storage Components
#
# This is a data storage solution designed to support large-scale data analysis and data mining workloads.
# The analytic Storage component it offers high performance, scalability, and flexibility to meet the demands of processing vast amounts of structured and unstructured data.
#==============================================================================
#Apache HBase is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters of commodity hardware
#The cluster mode at least 3 servers,A HBase cluster consists of three HMaster and a certain number of HRegionServer node.
[hbase]
#Apache Druid is a high performance, real-time analytics database that delivers sub-second queries on streaming and batch data at scale and under load.
#The cluster mode at least 3 servers,A Druid cluster consists of two master/query and a certain number of worker node.
[druid]
#Yandex ClickHouse is the fastest and most resource efficient open-source database for real-time apps and analytics.
#The cluster mode at least 3 servers,A Clickhouse cluster consists of two query and a certain number of data node.
[clickhouse]
#ArangoDB is a scalable graph database system to drive value from connected data, faster.
#Only support single server deployment.
[arangodb]
#Redis is an open source (BSD licensed), in-memory data structure store, used as a database, cache, and message broker.
#Supports single-node and master-slave modes
[redis]
#Apache Ignite is a leading distributed database management system for high-performance computing with in-memory speed
#Supports single-node and cluster modes
[ignite]
#==============================================================================
# OLAP Self-research service
#
#==============================================================================
#The default proxy,Includes the Nginx/Keepalived,If it is a standalone mode, only Nginx.
#A maximum of two nodes.
[loadbalancer]
#The clickhouse query proxy,Usually deployed with loadbalancer.
[chproxy]
#Galaxy-hos-service is a distributed object storage service.
#Include components:Keepalived/Nginx/Galaxy-hos-service,If it is a standalone mode, only Galaxy-hos-service/Nginx.
#The cluster mode at least 2 servers,keepalived and nginx services are deployed on the first two nodes by default.
[galaxy_hos_service]
#The query gateway,Provides a unified query entry
[galaxy_qgw_service]
#A lightweight distributed task scheduling framework.
#Include components: Galaxy-job-admin/Galaxy-job-executor
[galaxy_job_service]
#The report execution service.
[saved_query_scheduler]

Some files were not shown because too many files have changed in this diff Show More