代码优化,优化clickhousesink写入
This commit is contained in:
@@ -70,6 +70,13 @@ public enum LogMetadata {
|
|||||||
String[] placeholders = new String[fields.length];
|
String[] placeholders = new String[fields.length];
|
||||||
Arrays.fill(placeholders, "?");
|
Arrays.fill(placeholders, "?");
|
||||||
|
|
||||||
|
return StrUtil.concat(true, "INSERT INTO ", CK_DATABASE, ".", tableName,
|
||||||
|
"(", StrUtil.join(",", fields), ") VALUES (", StrUtil.join(",", placeholders), ")");
|
||||||
|
}
|
||||||
|
public static String preparedSql(String tableName, String[] fields) {
|
||||||
|
String[] placeholders = new String[fields.length];
|
||||||
|
Arrays.fill(placeholders, "?");
|
||||||
|
|
||||||
return StrUtil.concat(true, "INSERT INTO ", CK_DATABASE, ".", tableName,
|
return StrUtil.concat(true, "INSERT INTO ", CK_DATABASE, ".", tableName,
|
||||||
"(", StrUtil.join(",", fields), ") VALUES (", StrUtil.join(",", placeholders), ")");
|
"(", StrUtil.join(",", fields), ") VALUES (", StrUtil.join(",", placeholders), ")");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,8 +16,7 @@ public class CKBatchWindow implements AllWindowFunction<Map<String, Object>, Lis
|
|||||||
Iterator<Map<String, Object>> iterator = iterable.iterator();
|
Iterator<Map<String, Object>> iterator = iterable.iterator();
|
||||||
List<Map<String, Object>> batchLog = new ArrayList<>();
|
List<Map<String, Object>> batchLog = new ArrayList<>();
|
||||||
while (iterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
Map<String, Object> next = iterator.next();
|
batchLog.add(iterator.next());
|
||||||
batchLog.add(next);
|
|
||||||
}
|
}
|
||||||
out.collect(batchLog);
|
out.collect(batchLog);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,9 +21,9 @@ public class ConnLogService {
|
|||||||
|
|
||||||
public static void connLogStream(StreamExecutionEnvironment env) throws Exception{
|
public static void connLogStream(StreamExecutionEnvironment env) throws Exception{
|
||||||
//connection
|
//connection
|
||||||
DataStream<Map<String, Object>> connSource = ConnLogService.getLogSource(env, SOURCE_KAFKA_TOPIC_CONNECTION);
|
DataStream<Map<String, Object>> connSource = getLogSource(env, SOURCE_KAFKA_TOPIC_CONNECTION);
|
||||||
//sketch
|
//sketch
|
||||||
DataStream<Map<String, Object>> sketchSource = ConnLogService.getLogSource(env, SOURCE_KAFKA_TOPIC_SKETCH);
|
DataStream<Map<String, Object>> sketchSource = getLogSource(env, SOURCE_KAFKA_TOPIC_SKETCH);
|
||||||
|
|
||||||
//写入CKsink,批量处理
|
//写入CKsink,批量处理
|
||||||
LogService.getLogCKSink(connSource, SINK_CK_TABLE_CONNECTION);
|
LogService.getLogCKSink(connSource, SINK_CK_TABLE_CONNECTION);
|
||||||
@@ -31,15 +31,15 @@ public class ConnLogService {
|
|||||||
LogService.getLogCKSink(sketchSource, SINK_CK_TABLE_SKETCH);
|
LogService.getLogCKSink(sketchSource, SINK_CK_TABLE_SKETCH);
|
||||||
|
|
||||||
//transform
|
//transform
|
||||||
DataStream<Map<String, Object>> connTransformStream = ConnLogService.getConnTransformStream(connSource);
|
DataStream<Map<String, Object>> connTransformStream = getConnTransformStream(connSource);
|
||||||
|
|
||||||
//写入ck通联relation表
|
//写入ck通联relation表
|
||||||
LogService.getLogCKSink(connTransformStream, SINK_CK_TABLE_RELATION_CONNECTION);
|
LogService.getLogCKSink(connTransformStream, SINK_CK_TABLE_RELATION_CONNECTION);
|
||||||
|
|
||||||
DataStream<Map<String, Object>> sketchTransformStream = ConnLogService.getSketchTransformStream(sketchSource);
|
DataStream<Map<String, Object>> sketchTransformStream = getSketchTransformStream(sketchSource);
|
||||||
|
|
||||||
//合并通联和通联sketch
|
//合并通联和通联sketch
|
||||||
DataStream<Map<String, Object>> ip2ipGraph = ConnLogService.getConnUnion(connTransformStream, sketchTransformStream);
|
DataStream<Map<String, Object>> ip2ipGraph = getConnUnion(connTransformStream, sketchTransformStream);
|
||||||
|
|
||||||
//写入arangodb
|
//写入arangodb
|
||||||
LogService.getLogArangoSink(ip2ipGraph, R_VISIT_IP2IP);
|
LogService.getLogArangoSink(ip2ipGraph, R_VISIT_IP2IP);
|
||||||
|
|||||||
@@ -19,14 +19,14 @@ import static com.zdjizhi.common.FlowWriteConfig.*;
|
|||||||
|
|
||||||
public class DnsLogService {
|
public class DnsLogService {
|
||||||
|
|
||||||
public static void dnsLogStream(StreamExecutionEnvironment env) throws Exception{
|
public static void dnsLogStream(StreamExecutionEnvironment env) throws Exception {
|
||||||
|
|
||||||
DataStream<Map<String, Object>> dnsSource = DnsLogService.getLogSource(env, SOURCE_KAFKA_TOPIC_DNS);
|
DataStream<Map<String, Object>> dnsSource = getLogSource(env, SOURCE_KAFKA_TOPIC_DNS);
|
||||||
|
|
||||||
//dns 原始日志 ck入库
|
//dns 原始日志 ck入库
|
||||||
LogService.getLogCKSink(dnsSource, SINK_CK_TABLE_DNS);
|
LogService.getLogCKSink(dnsSource, SINK_CK_TABLE_DNS);
|
||||||
|
|
||||||
DataStream<Map<String, Object>> dnsTransform = DnsLogService.getDnsTransformStream(dnsSource);
|
DataStream<Map<String, Object>> dnsTransform = getDnsTransformStream(dnsSource);
|
||||||
|
|
||||||
//dns 拆分后relation日志 ck入库
|
//dns 拆分后relation日志 ck入库
|
||||||
LogService.getLogCKSink(dnsTransform, SINK_CK_TABLE_RELATION_DNS);
|
LogService.getLogCKSink(dnsTransform, SINK_CK_TABLE_RELATION_DNS);
|
||||||
@@ -46,7 +46,7 @@ public class DnsLogService {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static DataStream<Map<String, Object>> getLogSource(StreamExecutionEnvironment env, String source) throws Exception{
|
private static DataStream<Map<String, Object>> getLogSource(StreamExecutionEnvironment env, String source) throws Exception {
|
||||||
|
|
||||||
DataStream<Map<String, Object>> dnsSource = env.addSource(KafkaConsumer.myDeserializationConsumer(source))
|
DataStream<Map<String, Object>> dnsSource = env.addSource(KafkaConsumer.myDeserializationConsumer(source))
|
||||||
.setParallelism(SOURCE_PARALLELISM)
|
.setParallelism(SOURCE_PARALLELISM)
|
||||||
@@ -58,7 +58,7 @@ public class DnsLogService {
|
|||||||
return dnsSource;
|
return dnsSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static DataStream<Map<String, Object>> getDnsTransformStream(DataStream<Map<String, Object>> dnsSource) throws Exception{
|
private static DataStream<Map<String, Object>> getDnsTransformStream(DataStream<Map<String, Object>> dnsSource) throws Exception {
|
||||||
DataStream<Map<String, Object>> dnsTransform = dnsSource.filter(x -> Objects.nonNull(x.get("response")))
|
DataStream<Map<String, Object>> dnsTransform = dnsSource.filter(x -> Objects.nonNull(x.get("response")))
|
||||||
.assignTimestampsAndWatermarks(WatermarkStrategy
|
.assignTimestampsAndWatermarks(WatermarkStrategy
|
||||||
.<Map<String, Object>>forBoundedOutOfOrderness(Duration.ofSeconds(FLINK_WATERMARK_MAX_DELAY_TIME))
|
.<Map<String, Object>>forBoundedOutOfOrderness(Duration.ofSeconds(FLINK_WATERMARK_MAX_DELAY_TIME))
|
||||||
|
|||||||
@@ -7,10 +7,10 @@ import com.zdjizhi.enums.LogMetadata;
|
|||||||
import org.apache.commons.lang3.time.StopWatch;
|
import org.apache.commons.lang3.time.StopWatch;
|
||||||
import org.apache.flink.configuration.Configuration;
|
import org.apache.flink.configuration.Configuration;
|
||||||
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
|
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
|
||||||
|
import ru.yandex.clickhouse.ClickHousePreparedStatement;
|
||||||
|
|
||||||
import java.sql.Connection;
|
import java.sql.Connection;
|
||||||
import java.sql.PreparedStatement;
|
import java.util.HashMap;
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
@@ -19,9 +19,19 @@ public class ClickhouseSink extends RichSinkFunction<List<Map<String, Object>>>
|
|||||||
private static final Log log = LogFactory.get();
|
private static final Log log = LogFactory.get();
|
||||||
|
|
||||||
private Connection connection;
|
private Connection connection;
|
||||||
private PreparedStatement preparedStatement;
|
private ClickHousePreparedStatement preparedStatement;
|
||||||
public String sink;
|
public String sink;
|
||||||
|
|
||||||
|
private static final Map<String, String[]> logMetadataFields = new HashMap<>();
|
||||||
|
private static final Map<String, String> logMetadataSql = new HashMap<>();
|
||||||
|
|
||||||
|
static {
|
||||||
|
for (LogMetadata value : LogMetadata.values()) {
|
||||||
|
logMetadataSql.put(value.getSink(), LogMetadata.preparedSql(value.getSink()));
|
||||||
|
logMetadataFields.put(value.getSink(), value.getFields());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public ClickhouseSink(String sink) {
|
public ClickhouseSink(String sink) {
|
||||||
this.sink = sink;
|
this.sink = sink;
|
||||||
}
|
}
|
||||||
@@ -55,35 +65,31 @@ public class ClickhouseSink extends RichSinkFunction<List<Map<String, Object>>>
|
|||||||
try {
|
try {
|
||||||
StopWatch stopWatch = new StopWatch();
|
StopWatch stopWatch = new StopWatch();
|
||||||
stopWatch.start();
|
stopWatch.start();
|
||||||
log.debug("开始写入ck数据 :{}", data.size());
|
log.info("开始写入ck数据 :{}", data.size());
|
||||||
|
|
||||||
boolean autoCommit = connection.getAutoCommit();
|
boolean autoCommit = connection.getAutoCommit();
|
||||||
connection.setAutoCommit(false);
|
connection.setAutoCommit(false);
|
||||||
|
|
||||||
batch(data, tableName);
|
String[] logFields = logMetadataFields.get(tableName);
|
||||||
|
String sql = logMetadataSql.get(tableName);
|
||||||
|
log.debug(sql);
|
||||||
|
preparedStatement = (ClickHousePreparedStatement) connection.prepareStatement(sql);
|
||||||
|
|
||||||
|
for (Map<String, Object> map : data) {
|
||||||
|
for (int i = 0; i < logFields.length; i++) {
|
||||||
|
preparedStatement.setObject(i + 1, map.get(logFields[i]));
|
||||||
|
}
|
||||||
|
preparedStatement.addBatch();
|
||||||
|
}
|
||||||
|
|
||||||
preparedStatement.executeBatch();
|
preparedStatement.executeBatch();
|
||||||
connection.commit();
|
connection.commit();
|
||||||
connection.setAutoCommit(autoCommit);
|
connection.setAutoCommit(autoCommit);
|
||||||
stopWatch.stop();
|
stopWatch.stop();
|
||||||
log.debug("总共花费时间 {}", stopWatch.getTime());
|
log.info("总共花费时间 {}", stopWatch.getTime());
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
log.error("ClickhouseSink插入报错", ex);
|
log.error("ClickhouseSink插入报错", ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void batch(List<Map<String, Object>> data, String tableName) throws SQLException {
|
|
||||||
String[] logFields = LogMetadata.getLogFields(tableName);
|
|
||||||
String sql = LogMetadata.preparedSql(tableName);
|
|
||||||
log.debug(sql);
|
|
||||||
preparedStatement = connection.prepareStatement(sql);
|
|
||||||
|
|
||||||
for (Map<String, Object> map : data) {
|
|
||||||
for (int i = 0; i < logFields.length; i++) {
|
|
||||||
preparedStatement.setObject(i + 1, map.get(logFields[i]));
|
|
||||||
}
|
|
||||||
preparedStatement.addBatch();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user