提交初版

2024-02-22 10:53:59 +08:00
parent b368a1e16a
commit f8c919d9d0
416 changed files with 10167 additions and 0 deletions
--- a/tsg-olap/configuration/config.yml
+++ b/tsg-olap/configuration/config.yml
@@ -0,0 +1,39 @@
+#============================Galaxy OLAP Configuration===============================#
+#The default installation location
+deploy_dir: /data/hdd/olap
+
+#The default data storage location,use storing application data,logs and configuration files
+data_dir: /data/ssd/olap
+
+#Use commas (,) to separate the network segments that the firewall allows to access
+allowed_ips: 192.168.44.0/24,192.168.45.0/24,192.168.54.0/24,192.168.58.0/24,192.168.41.0/24
+
+#============================Keepalived Configuration===============================#
+#Specify the interface virtual IP address.It used to configure high availability for OLAP query engine
+#Only one IP address can be configured on the same network segment
+#The following three ip addresses must be unique
+vrrp_instance:
+  default:
+    virtual_ipaddress: 192.168.45.102
+    interface: eth0
+    virtual_router_id: 61
+  oss:
+    virtual_ipaddress: 192.168.45.102
+    interface: eth0
+    virtual_router_id: 62
+
+#============================OSS Configuration===============================#
+#HOS token
+hos_token: fea0ee76be8147bd9b53ad995b7ef603
+
+#HOS token（加密）
+encrypted_hos_token: M+0IMYS1+XENltUe585ahhqwY+QZJmnvtxML1vXExB9aO+CPT3GfsP4rtbVuWXpf
+
+#===========================Central Management Settings==============================#
+#Central Management Server IP, Used for Dos detection task get policy.
+cm_api: 192.168.44.3
+
+#Central Management Server Token, Each environment Token is different.
+#Requires communication with CM developers.
+cm_api_token: aa2bdec5518ad131f71944b13ce5c298&1&
+
--- a/tsg-olap/configuration/hosts
+++ b/tsg-olap/configuration/hosts
@@ -0,0 +1,105 @@
+#==============================================================================
+# Basic Components
+#
+# Orchestration & Coordinator & Configuration & Cold Storage
+#==============================================================================
+
+#The cluster use master-master replication mode,maximum 2 servers.
+[mariadb]
+192.168.45.102
+
+#Apache Zookeeper is a centralized service for maintaining configuration information, naming, providing distributed synchronization, and providing group services.
+#The cluster mode at least 3 servers,The number of nodes must be odd,Like 3/5 nodes.
+[zookeeper]
+192.168.45.102
+
+#Alibaba Nacos an easy-to-use dynamic service discovery, configuration and service management platform
+#The cluster mode at least 3 servers,Multi-node HA mode.
+[nacos]
+192.168.45.102
+
+#Apache Hadoop HDFS(Hadoop Distributed File System)
+#HDFS is deployed only in cluster mode.
+#At least 3 servers,An HDFS cluster consists of two Namenodes and a certain number of Datanodes node.
+[hdfs]
+192.168.45.102
+
+#==============================================================================
+# BigData Processing Components
+#
+# Big data is a term that refers to the massive volume, variety, and velocity of data that is generated from various sources and needs to be stored, processed, and analyzed efficiently. 
+# The Big Data processing component is used to provide a platform for fast and efficient processing
+#==============================================================================
+
+#Apache Kafka is a distributed event streaming platform,used for high-performance data pipelines, streaming analytics.
+#The cluster mode at least 3 servers,By default install CMAK(Management tool) on the first server.
+[kafka]
+192.168.45.102
+
+#Apache Hadoop Yarn,Includes the Flink/Groot-stream runtime environment.
+#Yarn is the resource management and job scheduling technology in the open source Hadoop distributed processing framework.
+#The cluster mode at least 3 servers,A Yarn cluster consists of two ResourceManager (RM) and a certain number of NodeManager(NM) node.
+[yarn]
+192.168.45.102
+
+#==============================================================================
+# Analytic Storage Components
+#
+# This is a data storage solution designed to support large-scale data analysis and data mining workloads. 
+# The analytic Storage component it offers high performance, scalability, and flexibility to meet the demands of processing vast amounts of structured and unstructured data.
+#==============================================================================
+
+#Apache HBase is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters of commodity hardware
+#The cluster mode at least 3 servers,A HBase cluster consists of three HMaster and a certain number of HRegionServer node.
+[hbase]
+192.168.45.102
+#Apache Druid is a high performance, real-time analytics database that delivers sub-second queries on streaming and batch data at scale and under load.
+#The cluster mode at least 3 servers,A Druid cluster consists of two master/query and a certain number of worker node.
+[druid]
+192.168.45.102
+
+#Yandex ClickHouse is the fastest and most resource efficient open-source database for real-time apps and analytics.
+#The cluster mode at least 3 servers,A Clickhouse cluster consists of two query and a certain number of data node.
+[clickhouse]
+192.168.45.102
+
+#ArangoDB is a scalable graph database system to drive value from connected data, faster.
+#Only support single server deployment.
+[arangodb]
+192.168.45.102
+
+#==============================================================================
+# OLAP Self-research service
+#
+#==============================================================================
+
+#The default proxy,Includes the Nginx/Keepalived,If it is a standalone mode, only Nginx.
+#A maximum of two nodes.
+[loadbalancer]
+192.168.45.102
+
+#The clickhouse query proxy,Usually deployed with loadbalancer.
+[chproxy]
+192.168.45.102
+
+#Galaxy-hos-service is a distributed object storage service.
+#Include components:Keepalived/Nginx/Galaxy-hos-service,If it is a standalone mode, only Galaxy-hos-service/Nginx.
+#The cluster mode at least 2 servers,keepalived and nginx services are deployed on the first two nodes by default.
+[galaxy_hos_service]
+192.168.45.102
+
+
+#The query gateway,Provides a unified query entry
+[galaxy_qgw_service]
+192.168.45.102
+
+#A lightweight distributed task scheduling framework.
+#Include components: Galaxy-job-admin/Galaxy-job-executor
+[galaxy_job_service]
+192.168.45.102
+
+#The report execution service.
+[saved_query_scheduler]
+192.168.45.102
+
+
--- a/tsg-olap/configuration/tsg-olap.yml
+++ b/tsg-olap/configuration/tsg-olap.yml
@@ -0,0 +1,93 @@
+config_namespace: "prod"
+
+# Name of the data center
+data_center_name: xxg
+
+galaxy_qgw_service:
+  # Running memory of the Galaxy-qgw-service.
+  java_opts: "-Xms1024m -Xmx3120m -XX:+ExitOnOutOfMemoryError"
+
+galaxy_job_service:
+  # Running memory of the Galaxy-job-admin.
+  admin_java_opts: '-Xms512m -Xmx740m'
+  # Running memory of the Galaxy-job-executor.
+  executor_java_opts: '-Xms512m -Xmx1024m'
+
+saved_query_scheduler:
+  # Running memory of the saved-query-scheduler.
+  java_opts: "-Xms512m -Xmx1024m"
+  
+druid:
+  # Druid job parallelism
+  index_kafka_statistics_rule:
+    taskCount: 1  
+    druid.indexer.runner.javaOpts: "-server -Xms1g -Xmx2g -XX:MaxDirectMemorySize=1g"
+
+flink:
+  agg_app_protocol_traffic:
+    parallelism: 1
+    taskmanager.memory.process.size: 2048m
+    taskmanager.memory.jvm-metaspace.size: 256m
+    taskmanager.numberOfTaskSlots: 1
+    taskmanager.memory.framework.off-heap.size: 256m
+  detection_dos_attack:
+    parallelism: 1
+    taskmanager.memory.process.size: 2048m
+    taskmanager.memory.jvm-metaspace.size: 256m
+    taskmanager.numberOfTaskSlots: 1
+    taskmanager.memory.framework.off-heap.size: 256m
+  agg_traffic_file_chunk_combine:
+    parallelism: 1
+    taskmanager.memory.process.size: 2048m
+    taskmanager.memory.jvm-metaspace.size: 256m
+    taskmanager.numberOfTaskSlots: 1
+    taskmanager.memory.framework.off-heap.size: 256m
+  agg_troubleshooting_file_chunk_combine:
+    parallelism: 1
+    taskmanager.memory.process.size: 2048m
+    taskmanager.memory.jvm-metaspace.size: 256m
+    taskmanager.numberOfTaskSlots: 1
+    taskmanager.memory.framework.off-heap.size: 256m
+  correlation_sip_rtp_session:
+    parallelism: 1
+    taskmanager.memory.process.size: 2048m
+    taskmanager.memory.jvm-metaspace.size: 256m
+    taskmanager.numberOfTaskSlots: 1
+    taskmanager.memory.framework.off-heap.size: 256m
+  agg_session_record_topn:
+    parallelism: 1
+    taskmanager.memory.process.size: 2048m
+    taskmanager.memory.jvm-metaspace.size: 256m
+    taskmanager.numberOfTaskSlots: 1
+    taskmanager.memory.framework.off-heap.size: 256m
+  dos_event_kafka_to_clickhouse:
+    parallelism: 1
+    taskmanager.memory.process.size: 2048m
+    taskmanager.memory.jvm-metaspace.size: 128m
+    taskmanager.numberOfTaskSlots: 1
+    taskmanager.memory.framework.off-heap.size: 256m
+  etl_proxy_event_kafka_to_clickhouse:
+    parallelism: 1
+    taskmanager.memory.process.size: 2048m
+    taskmanager.memory.jvm-metaspace.size: 256m
+    taskmanager.numberOfTaskSlots: 1
+    taskmanager.memory.framework.off-heap.size: 256m
+  etl_session_record_kafka_to_clickhouse:
+    parallelism: 1
+    taskmanager.memory.process.size: 3072m
+    taskmanager.memory.jvm-metaspace.size: 256m
+    taskmanager.numberOfTaskSlots: 1
+    taskmanager.memory.framework.off-heap.size: 256m
+  etl_transaction_record_kafka_to_clickhouse:
+    parallelism: 1
+    taskmanager.memory.process.size: 3072m
+    taskmanager.memory.jvm-metaspace.size: 256m
+    taskmanager.numberOfTaskSlots: 1
+    taskmanager.memory.framework.off-heap.size: 256m
+  etl_voip_record_kafka_to_clickhouse:
+    parallelism: 1
+    taskmanager.memory.process.size: 2048m
+    taskmanager.memory.jvm-metaspace.size: 128m
+    taskmanager.numberOfTaskSlots: 1
+    taskmanager.memory.framework.off-heap.size: 256m
+