修改hos建桶语句,更新hos和file-chunk-combiner配置文件

This commit is contained in:
houjinchuan
2024-07-18 11:03:40 +08:00
parent 4ea7577382
commit 323f9fbc5e
4 changed files with 210 additions and 0 deletions

View File

@@ -0,0 +1,77 @@
flink.job.name=agg_traffic_file_chunk_combine
#kafka source配置
#9092为无验证 9095为ssl 9094为sasl
source.kafka.broker=192.168.44.12:9092
source.kafka.topic=TRAFFIC-FILE-STREAM-RECORD
source.kafka.group.id=test
#earliest从头开始 latest最新
source.kafka.auto.offset.reset=latest
source.kafka.session.timeout.ms=60000
#每次拉取操作从分区中获取的最大记录数
source.kafka.max.poll.records=1000
#消费者从单个分区中一次性获取的最大字节数
source.kafka.max.partition.fetch.bytes=31457280
source.kafka.enable.auto.commit=true
#kafka SASL验证用户名
source.kafka.user=admin
#kafka SASL及SSL验证密码
source.kafka.pin=galaxy2019
#SSL需要
source.kafka.tools.library=/opt/tsg/olap/topology/data/
map.filter.expression=FileChunk.offset <= 1073741824
#窗口相关配置
combiner.window.parallelism=1
#窗口大小,单位秒
combiner.window.size=30
#combiner.window.enable.last.chunk.trigger=true
#sink相关参数
sink.parallelism=1
#可选hos、oss、hbase
sink.type=hos
sink.async=true
#sink.filter.expression=
#限流配置
#sink.rate.limit.threshold=0
#sink.rate.limit.exclusion.expression=FileChunk.fileType == "eml"
#hos sink相关配置
#访问nginx或单个hos配置为ip:port访问多个hos配置为ip1:port,ip2:port...
sink.hos.endpoint=192.168.44.12:8186
sink.hos.bucket=traffic_file_bucket
sink.hos.token=c21f969b5f03d33d43e04f8f136e7682
#sink.hos.batch.size=1048576
#sink.hos.batch.interval.ms=10000
#sink.hos.health.check.interval.ms=60000
#http相关配置
sink.http.client.retries.number=3
sink.http.client.max.total=20
sink.http.client.max.per.route=10
sink.http.client.connect.timeout.ms=10000
sink.http.client.request.timeout.ms=10000
sink.http.client.socket.timeout.ms=60000
#hbase sink相关配置
sink.hbase.zookeeper=192.168.44.12
sink.hbase.retries.number=10
sink.hbase.rpc.timeout.ms=600000
sink.hbase.client.write.buffer=10971520
sink.hbase.client.ipc.pool.size=3
sink.hbase.batch.size=1048576
sink.hbase.batch.interval.ms=10000
#oss sink相关配置
#可以多个地址ip1:port,ip2:port...
#sink.oss.endpoint=192.168.44.12:8186
#sink.oss.filter.expression=FileChunk.offset == 0 && FileChunk.lastChunkFlag == 1
#sink.oss.cache.time.ms=30000
#sink.oss.cache.size=1073741824
#文件关联元信息相关配置
#source.kafka.file.meta.session.topic=SESSION-RECORD
#source.kafka.file.meta.proxy.topic=PROXY-EVENT
#source.kafka.file.meta.group.id=file_chunk_combine_1
#file.meta.filter.expression=FileChunk.meta.fileId.contains("_9")

View File

@@ -0,0 +1,24 @@
alter 'traffic_file_bucket',{METADATA => {'REGION_MEMSTORE_REPLICATION' => 'false', 'hbase.region.replica.wait.for.primary.flush' => 'false','SPLIT_ENABLED' => 'false',}}
alter 'index_time_traffic_file_bucket',{METADATA => {'REGION_MEMSTORE_REPLICATION' => 'false', 'hbase.region.replica.wait.for.primary.flush' => 'false','SPLIT_ENABLED' => 'false',}}
alter 'index_filename_traffic_file_bucket',{METADATA => {'REGION_MEMSTORE_REPLICATION' => 'false', 'hbase.region.replica.wait.for.primary.flush' => 'false','SPLIT_ENABLED' => 'false',}}
alter 'index_partfile_traffic_file_bucket',{METADATA => {'REGION_MEMSTORE_REPLICATION' => 'false', 'hbase.region.replica.wait.for.primary.flush' => 'false','SPLIT_ENABLED' => 'false',}}
alter 'troubleshooting_file_bucket',{METADATA => {'REGION_MEMSTORE_REPLICATION' => 'false', 'hbase.region.replica.wait.for.primary.flush' => 'false','SPLIT_ENABLED' => 'false',}}
alter 'index_time_troubleshooting_file_bucket',{METADATA => {'REGION_MEMSTORE_REPLICATION' => 'false', 'hbase.region.replica.wait.for.primary.flush' => 'false','SPLIT_ENABLED' => 'false',}}
alter 'index_filename_troubleshooting_file_bucket',{METADATA => {'REGION_MEMSTORE_REPLICATION' => 'false', 'hbase.region.replica.wait.for.primary.flush' => 'false','SPLIT_ENABLED' => 'false',}}
alter 'index_partfile_troubleshooting_file_bucket',{METADATA => {'REGION_MEMSTORE_REPLICATION' => 'false', 'hbase.region.replica.wait.for.primary.flush' => 'false','SPLIT_ENABLED' => 'false',}}
alter 'knowledge_base_bucket',{DURABILITY =>'SYNC_WAL'}
alter 'index_time_knowledge_base_bucket',{DURABILITY =>'SYNC_WAL'}
alter 'index_filename_knowledge_base_bucket',{DURABILITY =>'SYNC_WAL'}
alter 'index_partfile_knowledge_base_bucket',{DURABILITY =>'SYNC_WAL'}
alter 'report_snapshot_bucket',{DURABILITY =>'SYNC_WAL'}
alter 'index_time_report_snapshot_bucket',{DURABILITY =>'SYNC_WAL'}
alter 'index_filename_report_snapshot_bucket',{DURABILITY =>'SYNC_WAL'}
alter 'index_partfile_report_snapshot_bucket',{DURABILITY =>'SYNC_WAL'}
alter 'assessment_file_bucket',{DURABILITY =>'SYNC_WAL'}
alter 'index_time_assessment_file_bucket',{DURABILITY =>'SYNC_WAL'}
alter 'index_filename_assessment_file_bucket',{DURABILITY =>'SYNC_WAL'}
alter 'index_partfile_assessment_file_bucket',{DURABILITY =>'SYNC_WAL'}

View File

@@ -0,0 +1,5 @@
curl -X PUT http://192.168.44.67:9098/hos/traffc_file_bucket -H 'token:c21f969b5f03d33d43e04f8f136e7682' -H 'x-hos-region-count:64*hbase服务器数'
curl -X PUT http://192.168.44.67:9098/hos/troubleshooting_file_bucket -H 'token:c21f969b5f03d33d43e04f8f136e7682' -H 'x-hos-region-count:16'
curl -X PUT http://192.168.44.67:9098/hos/knowledge_base_bucket -H 'token:c21f969b5f03d33d43e04f8f136e7682' -H 'x-hos-region-count:16' -H 'x-hos-wal:open'
curl -X PUT http://192.168.44.67:9098/hos/report_snapshot_bucket -H 'token:c21f969b5f03d33d43e04f8f136e7682' -H 'x-hos-region-count:16' -H 'x-hos-wal:open'
curl -X PUT http://192.168.44.67:9098/hos/assessment_file_bucket -H 'token:c21f969b5f03d33d43e04f8f136e7682' -H 'x-hos-region-count:16' -H 'x-hos-wal:open'

View File

@@ -0,0 +1,104 @@
#服务端口
server:
port: 8186
max-http-header-size: 20MB
tomcat:
max-threads: 400
#tomcat缓存大小单位KB系统默认10M配置10g
tomcat:
cacheMaxSize: 1000000
#hbase参数
hbase:
zookeeperQuorum: 192.168.44.11:2181,192.168.44.14:2181,192.168.44.15:2181
zookeeperPort: 2181
zookeeperNodeParent: /hbase
clientRetriesNumber: 9
rpcTimeout: 100000
connectPool: 10
clientWriteBuffer: 10485760
clientKeyValueMaxsize: 1073741824
mobThreshold: 10485760
#part的最大数量
maxParts: 100000
#每次获取的part数
getPartBatch: 10
#hbase索引表前缀前缀为以下的都为索引表
timeIndexTablePrefix: index_time_
filenameIndexTablePrefix: index_filename_
partFileIndexTablePrefix: index_partfile_
systemBucketMeta: system:bucket_meta
#创建表的分区数
regionCount: 16
filenameHead: 0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f
partHead: 0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f
#获取文件大小的目录
dataPath: /hbase
#hadoop集群namenode节点单机为单个ip集群为ip1,ip2
hadoopNameNodes: 192.168.44.10,192.168.44.11
#副本数单机为1集群为2
hadoopReplication: 2
#hadoop端口
hadoopPort: 9000
hadoopUser: root
hadoopNameServices: ns1
hadoopNameNodesNs1: nn1,nn2
#ttl相关参数
ttlScanBatch: 1000
ttlDeleteBatch: 1000
asyncPut: 0
#是否打开验证0打开打开需要使用S3身份验证或者token访问服务
auth:
open: 0
#http访问使用的token
token: ENC(vknRT6U4I739rLIha9CvojM+4uFyXZLEYpO2HZayLnRak1HPW0K2yZ3vnQBA2foo)
#s3验证
s3:
accesskey: ENC(FUQDvVP+zqCiwHQhXcRvbw==)
secretkey: ENC(FUQDvVP+zqCiwHQhXcRvbw==)
hos:
#文件大小阈值
maxFileSize: 5073741800
#大文件阈值
uploadThreshold: 104857600
#长连接超时时间
keepAliveTimeout: 60000
#批量删除对象的最大数量
deleteMultipleNumber: 1000
#获取对象列表等操作的最大值
maxResultLimit: 100000
#分块上传的最大分块数
maxPartNumber: 10000
#追加上传的最大次数
maxAppendNumber: 100000
#是否快速上传
isQuickUpload: 0
#是否快速下载文件1打开hbase内存小于20G的集群设为0
isQuickDownloadFile: 0
#用户白名单hbase的namespace获取存储配额
users: default
#是否打开限流,0:关闭1:打开
openRateLimiter: 0
#限流每秒请求数
rateLimiterQps: 20000
#ttl方式默认为0
ttlMode: 0
#执行ttl的线程数
ttlThread: 10
#设置上传文件大小的最大值
spring:
servlet:
multipart:
max-file-size: 5GB
max-request-size: 5GB
#Prometheus参数
application:
name: HosServiceApplication
#Prometheus参数
management:
endpoints:
web:
exposure:
include: '*'
metrics:
tags:
application: ${spring.application.name}