新增通用跨版本迁移数据步骤说明

This commit is contained in:
wangkuan
2024-04-25 14:29:49 +08:00
parent 2f8baa28d1
commit 8134cddf0a
11 changed files with 7436 additions and 0 deletions

View File

@@ -0,0 +1,11 @@
select 'session_record_old' as table_name, count(*) as cnt from tsg_galaxy_tmp.session_record_old;
select 'security_event_old' as table_name, count(*) as cnt from tsg_galaxy_tmp.security_event_old;
select 'transaction_record_old' as table_name, count(*) as cnt from tsg_galaxy_tmp.transaction_record_old;
select 'voip_record_old' as table_name, count(*) as cnt from tsg_galaxy_tmp.voip_record_old;
select 'proxy_event_old' as table_name, count(*) as cnt from tsg_galaxy_tmp.proxy_event_old;
select 'dos_event_old' as table_name, count(*) as cnt from tsg_galaxy_tmp.dos_event_old;

View File

@@ -0,0 +1,20 @@
SELECT log_id, recv_time, vsys_id, assessment_date, lot_number, file_name, assessment_file, assessment_type, features, `size`, file_checksum_sha
FROM tsg_galaxy_tmp.assessment_event where recv_time >= toUnixTimestamp('2030-01-01 00:00:00') AND recv_time <toUnixTimestamp('2030-01-01 00:00:01');
SELECT vsys_id, recv_time, log_id, profile_id, start_time, end_time, attack_type, severity, conditions, destination_ip, destination_country, source_ip_list, source_country_list, session_rate, packet_rate, bit_rate
FROM tsg_galaxy_tmp.dos_event where recv_time >= toUnixTimestamp('2030-01-01 00:00:00') AND recv_time <toUnixTimestamp('2030-01-01 00:00:01');
SELECT recv_time, log_id, decoded_as, session_id, start_timestamp_ms, end_timestamp_ms, duration_ms, tcp_handshake_latency_ms, ingestion_time, processing_time, insert_time, device_id, out_link_id, in_link_id, device_tag, data_center, device_group, sled_ip, address_type, vsys_id, t_vsys_id, flags, flags_identify_info, security_rule_list, security_action, monitor_rule_list, shaping_rule_list, proxy_rule_list, statistics_rule_list, sc_rule_list, sc_rsp_raw, sc_rsp_decrypted, proxy_action, proxy_pinning_status, proxy_intercept_status, proxy_passthrough_reason, proxy_client_side_latency_ms, proxy_server_side_latency_ms, proxy_client_side_version, proxy_server_side_version, proxy_cert_verify, proxy_intercept_error, monitor_mirrored_pkts, monitor_mirrored_bytes, client_ip, client_port, client_os_desc, client_geolocation, client_asn, subscriber_id, imei, imsi, phone_number, apn, server_ip, server_port, server_os_desc, server_geolocation, server_asn, server_fqdn, server_domain, app_transition, app, app_debug_info, app_content, app_extra_info, fqdn_category_list, ip_protocol, decoded_path, dns_message_id, dns_qr, dns_opcode, dns_aa, dns_tc, dns_rd, dns_ra, dns_rcode, dns_qdcount, dns_ancount, dns_nscount, dns_arcount, dns_qname, dns_qtype, dns_qclass, dns_cname, dns_sub, dns_rr, dns_response_latency_ms, http_url, http_host, http_request_line, http_response_line, http_request_body, http_response_body, http_proxy_flag, http_sequence, http_cookie, http_referer, http_user_agent, http_request_content_length, http_request_content_type, http_response_content_length, http_response_content_type, http_set_cookie, http_version, http_status_code, http_response_latency_ms, http_session_duration_ms, http_action_file_size, ssl_version, ssl_sni, ssl_san, ssl_cn, ssl_handshake_latency_ms, ssl_ja3_hash, ssl_ja3s_hash, ssl_cert_issuer, ssl_cert_subject, ssl_esni_flag, ssl_ech_flag, dtls_cookie, dtls_version, dtls_sni, dtls_san, dtls_cn, dtls_handshake_latency_ms, dtls_ja3_fingerprint, dtls_ja3_hash, dtls_cert_issuer, dtls_cert_subject, mail_protocol_type, mail_account, mail_from_cmd, mail_to_cmd, mail_from, mail_password, mail_to, mail_cc, mail_bcc, mail_subject, mail_subject_charset, mail_attachment_name, mail_attachment_name_charset, mail_starttls_flag, mail_eml_file, ftp_account, ftp_url, ftp_link_type, quic_version, quic_sni, quic_user_agent, rdp_cookie, rdp_security_protocol, rdp_client_channels, rdp_keyboard_layout, rdp_client_version, rdp_client_name, rdp_client_product_id, rdp_desktop_width, rdp_desktop_height, rdp_requested_color_depth, rdp_certificate_type, rdp_certificate_count, rdp_certificate_permanent, rdp_encryption_level, rdp_encryption_method, ssh_version, ssh_auth_success, ssh_client_version, ssh_server_version, ssh_cipher_alg, ssh_mac_alg, ssh_compression_alg, ssh_kex_alg, ssh_host_key_alg, ssh_host_key, ssh_hassh, sip_call_id, sip_originator_description, sip_responder_description, sip_user_agent, sip_server, sip_originator_sdp_connect_ip, sip_originator_sdp_media_port, sip_originator_sdp_media_type, sip_originator_sdp_content, sip_responder_sdp_connect_ip, sip_responder_sdp_media_port, sip_responder_sdp_media_type, sip_responder_sdp_content, sip_duration_s, sip_bye, rtp_payload_type_c2s, rtp_payload_type_s2c, rtp_pcap_path, rtp_originator_dir, stratum_cryptocurrency, stratum_mining_pools, stratum_mining_program, stratum_mining_subscribe, sent_pkts, received_pkts, sent_bytes, received_bytes, tcp_c2s_ip_fragments, tcp_s2c_ip_fragments, tcp_c2s_lost_bytes, tcp_s2c_lost_bytes, tcp_c2s_o3_pkts, tcp_s2c_o3_pkts, tcp_c2s_rtx_pkts, tcp_s2c_rtx_pkts, tcp_c2s_rtx_bytes, tcp_s2c_rtx_bytes, tcp_rtt_ms, tcp_client_isn, tcp_server_isn, packet_capture_file, in_src_mac, out_src_mac, in_dest_mac, out_dest_mac, encapsulation, dup_traffic_flag, tunnel_endpoint_a_desc, tunnel_endpoint_b_desc
FROM tsg_galaxy_tmp.monitor_event where recv_time >= toUnixTimestamp('2030-01-01 00:00:00') AND recv_time <toUnixTimestamp('2030-01-01 00:00:01');
SELECT recv_time, log_id, decoded_as, session_id, start_timestamp_ms, end_timestamp_ms, duration_ms, tcp_handshake_latency_ms, ingestion_time, processing_time, insert_time, device_id, out_link_id, in_link_id, device_tag, data_center, device_group, sled_ip, address_type, vsys_id, t_vsys_id, flags, flags_identify_info, security_rule_list, security_action, monitor_rule_list, shaping_rule_list, proxy_rule_list, statistics_rule_list, sc_rule_list, sc_rsp_raw, sc_rsp_decrypted, proxy_action, proxy_pinning_status, proxy_intercept_status, proxy_passthrough_reason, proxy_client_side_latency_ms, proxy_server_side_latency_ms, proxy_client_side_version, proxy_server_side_version, proxy_cert_verify, proxy_intercept_error, monitor_mirrored_pkts, monitor_mirrored_bytes, client_ip, client_port, client_os_desc, client_geolocation, client_asn, subscriber_id, imei, imsi, phone_number, apn, server_ip, server_port, server_os_desc, server_geolocation, server_asn, server_fqdn, server_domain, app_transition, app, app_debug_info, app_content, app_extra_info, fqdn_category_list, ip_protocol, decoded_path, http_url, http_host, http_request_line, http_response_line, http_request_body, http_response_body, http_proxy_flag, http_sequence, http_cookie, http_referer, http_user_agent, http_request_content_length, http_request_content_type, http_response_content_length, http_response_content_type, http_set_cookie, http_version, http_status_code, http_response_latency_ms, http_session_duration_ms, http_action_file_size, doh_url, doh_host, doh_request_line, doh_response_line, doh_cookie, doh_referer, doh_user_agent, doh_content_length, doh_content_type, doh_set_cookie, doh_version, doh_message_id, doh_qr, doh_opcode, doh_aa, doh_tc, doh_rd, doh_ra, doh_rcode, doh_qdcount, doh_ancount, doh_nscount, doh_arcount, doh_qname, doh_qtype, doh_qclass, doh_cname, doh_sub, doh_rr, sent_pkts, received_pkts, sent_bytes, received_bytes, tcp_c2s_ip_fragments, tcp_s2c_ip_fragments, tcp_c2s_lost_bytes, tcp_s2c_lost_bytes, tcp_c2s_o3_pkts, tcp_s2c_o3_pkts, tcp_c2s_rtx_pkts, tcp_s2c_rtx_pkts, tcp_c2s_rtx_bytes, tcp_s2c_rtx_bytes, tcp_rtt_ms, tcp_client_isn, tcp_server_isn, packet_capture_file, in_src_mac, out_src_mac, in_dest_mac, out_dest_mac, encapsulation, dup_traffic_flag, tunnel_endpoint_a_desc, tunnel_endpoint_b_desc
FROM tsg_galaxy_tmp.proxy_event where recv_time >= toUnixTimestamp('2030-01-01 00:00:00') AND recv_time <toUnixTimestamp('2030-01-01 00:00:01');
SELECT recv_time, log_id, decoded_as, session_id, start_timestamp_ms, end_timestamp_ms, duration_ms, tcp_handshake_latency_ms, ingestion_time, processing_time, insert_time, device_id, out_link_id, in_link_id, device_tag, data_center, device_group, sled_ip, address_type, vsys_id, t_vsys_id, flags, flags_identify_info, security_rule_list, security_action, monitor_rule_list, sc_rule_list, sc_rsp_raw, sc_rsp_decrypted, shaping_rule_list, proxy_rule_list, statistics_rule_list, proxy_action, proxy_pinning_status, proxy_intercept_status, proxy_passthrough_reason, proxy_client_side_latency_ms, proxy_server_side_latency_ms, proxy_client_side_version, proxy_server_side_version, proxy_cert_verify, proxy_intercept_error, monitor_mirrored_pkts, monitor_mirrored_bytes, client_ip, client_port, client_os_desc, client_geolocation, client_asn, subscriber_id, imei, imsi, phone_number, apn, server_ip, server_port, server_os_desc, server_geolocation, server_asn, server_fqdn, server_domain, app_transition, app, app_debug_info, app_content, app_extra_info, fqdn_category_list, ip_protocol, decoded_path, dns_message_id, dns_qr, dns_opcode, dns_aa, dns_tc, dns_rd, dns_ra, dns_rcode, dns_qdcount, dns_ancount, dns_nscount, dns_arcount, dns_qname, dns_qtype, dns_qclass, dns_cname, dns_sub, dns_rr, dns_response_latency_ms, http_url, http_host, http_request_line, http_response_line, http_request_body, http_response_body, http_proxy_flag, http_sequence, http_cookie, http_referer, http_user_agent, http_request_content_length, http_request_content_type, http_response_content_length, http_response_content_type, http_set_cookie, http_version, http_status_code, http_response_latency_ms, http_session_duration_ms, http_action_file_size, ssl_version, ssl_sni, ssl_san, ssl_cn, ssl_handshake_latency_ms, ssl_ja3_hash, ssl_ja3s_hash, ssl_cert_issuer, ssl_cert_subject, ssl_esni_flag, ssl_ech_flag, dtls_cookie, dtls_version, dtls_sni, dtls_san, dtls_cn, dtls_handshake_latency_ms, dtls_ja3_fingerprint, dtls_ja3_hash, dtls_cert_issuer, dtls_cert_subject, mail_protocol_type, mail_account, mail_from_cmd, mail_to_cmd, mail_from, mail_password, mail_to, mail_cc, mail_bcc, mail_subject, mail_subject_charset, mail_attachment_name, mail_attachment_name_charset, mail_starttls_flag, mail_eml_file, ftp_account, ftp_url, ftp_link_type, quic_version, quic_sni, quic_user_agent, rdp_cookie, rdp_security_protocol, rdp_client_channels, rdp_keyboard_layout, rdp_client_version, rdp_client_name, rdp_client_product_id, rdp_desktop_width, rdp_desktop_height, rdp_requested_color_depth, rdp_certificate_type, rdp_certificate_count, rdp_certificate_permanent, rdp_encryption_level, rdp_encryption_method, ssh_version, ssh_auth_success, ssh_client_version, ssh_server_version, ssh_cipher_alg, ssh_mac_alg, ssh_compression_alg, ssh_kex_alg, ssh_host_key_alg, ssh_host_key, ssh_hassh, sip_call_id, sip_originator_description, sip_responder_description, sip_user_agent, sip_server, sip_originator_sdp_connect_ip, sip_originator_sdp_media_port, sip_originator_sdp_media_type, sip_originator_sdp_content, sip_responder_sdp_connect_ip, sip_responder_sdp_media_port, sip_responder_sdp_media_type, sip_responder_sdp_content, sip_duration_s, sip_bye, rtp_payload_type_c2s, rtp_payload_type_s2c, rtp_pcap_path, rtp_originator_dir, stratum_cryptocurrency, stratum_mining_pools, stratum_mining_program, stratum_mining_subscribe, sent_pkts, received_pkts, sent_bytes, received_bytes, tcp_c2s_ip_fragments, tcp_s2c_ip_fragments, tcp_c2s_lost_bytes, tcp_s2c_lost_bytes, tcp_c2s_o3_pkts, tcp_s2c_o3_pkts, tcp_c2s_rtx_pkts, tcp_s2c_rtx_pkts, tcp_c2s_rtx_bytes, tcp_s2c_rtx_bytes, tcp_rtt_ms, tcp_client_isn, tcp_server_isn, packet_capture_file, in_src_mac, out_src_mac, in_dest_mac, out_dest_mac, encapsulation, dup_traffic_flag, tunnel_endpoint_a_desc, tunnel_endpoint_b_desc
FROM tsg_galaxy_tmp.security_event where recv_time >= toUnixTimestamp('2030-01-01 00:00:00') AND recv_time <toUnixTimestamp('2030-01-01 00:00:01');
SELECT recv_time, log_id, decoded_as, session_id, start_timestamp_ms, end_timestamp_ms, duration_ms, tcp_handshake_latency_ms, ingestion_time, processing_time, insert_time, device_id, out_link_id, in_link_id, device_tag, data_center, device_group, sled_ip, address_type, vsys_id, t_vsys_id, flags, flags_identify_info, security_rule_list, security_action, monitor_rule_list, sc_rule_list, sc_rsp_raw, sc_rsp_decrypted, shaping_rule_list, proxy_rule_list, statistics_rule_list, proxy_action, proxy_pinning_status, proxy_intercept_status, proxy_passthrough_reason, proxy_client_side_latency_ms, proxy_server_side_latency_ms, proxy_client_side_version, proxy_server_side_version, proxy_cert_verify, proxy_intercept_error, monitor_mirrored_pkts, monitor_mirrored_bytes, client_ip, client_port, client_os_desc, client_geolocation, client_asn, subscriber_id, imei, imsi, phone_number, apn, server_ip, server_port, server_os_desc, server_geolocation, server_asn, server_fqdn, server_domain, app_transition, app, app_debug_info, app_content, app_extra_info, fqdn_category_list, ip_protocol, decoded_path, dns_message_id, dns_qr, dns_opcode, dns_aa, dns_tc, dns_rd, dns_ra, dns_rcode, dns_qdcount, dns_ancount, dns_nscount, dns_arcount, dns_qname, dns_qtype, dns_qclass, dns_cname, dns_sub, dns_rr, dns_response_latency_ms, http_url, http_host, http_request_line, http_response_line, http_request_body, http_response_body, http_proxy_flag, http_sequence, http_cookie, http_referer, http_user_agent, http_request_content_length, http_request_content_type, http_response_content_length, http_response_content_type, http_set_cookie, http_version, http_status_code, http_response_latency_ms, http_session_duration_ms, http_action_file_size, ssl_version, ssl_sni, ssl_san, ssl_cn, ssl_handshake_latency_ms, ssl_ja3_hash, ssl_ja3s_hash, ssl_cert_issuer, ssl_cert_subject, ssl_esni_flag, ssl_ech_flag, dtls_cookie, dtls_version, dtls_sni, dtls_san, dtls_cn, dtls_handshake_latency_ms, dtls_ja3_fingerprint, dtls_ja3_hash, dtls_cert_issuer, dtls_cert_subject, mail_protocol_type, mail_account, mail_from_cmd, mail_to_cmd, mail_from, mail_password, mail_to, mail_cc, mail_bcc, mail_subject, mail_subject_charset, mail_attachment_name, mail_attachment_name_charset, mail_starttls_flag, mail_eml_file, ftp_account, ftp_url, ftp_link_type, quic_version, quic_sni, quic_user_agent, rdp_cookie, rdp_security_protocol, rdp_client_channels, rdp_keyboard_layout, rdp_client_version, rdp_client_name, rdp_client_product_id, rdp_desktop_width, rdp_desktop_height, rdp_requested_color_depth, rdp_certificate_type, rdp_certificate_count, rdp_certificate_permanent, rdp_encryption_level, rdp_encryption_method, ssh_version, ssh_auth_success, ssh_client_version, ssh_server_version, ssh_cipher_alg, ssh_mac_alg, ssh_compression_alg, ssh_kex_alg, ssh_host_key_alg, ssh_host_key, ssh_hassh, sip_call_id, sip_originator_description, sip_responder_description, sip_user_agent, sip_server, sip_originator_sdp_connect_ip, sip_originator_sdp_media_port, sip_originator_sdp_media_type, sip_originator_sdp_content, sip_responder_sdp_connect_ip, sip_responder_sdp_media_port, sip_responder_sdp_media_type, sip_responder_sdp_content, sip_duration_s, sip_bye, rtp_payload_type_c2s, rtp_payload_type_s2c, rtp_pcap_path, rtp_originator_dir, stratum_cryptocurrency, stratum_mining_pools, stratum_mining_program, stratum_mining_subscribe, sent_pkts, received_pkts, sent_bytes, received_bytes, tcp_c2s_ip_fragments, tcp_s2c_ip_fragments, tcp_c2s_lost_bytes, tcp_s2c_lost_bytes, tcp_c2s_o3_pkts, tcp_s2c_o3_pkts, tcp_c2s_rtx_pkts, tcp_s2c_rtx_pkts, tcp_c2s_rtx_bytes, tcp_s2c_rtx_bytes, tcp_rtt_ms, tcp_client_isn, tcp_server_isn, packet_capture_file, in_src_mac, out_src_mac, in_dest_mac, out_dest_mac, encapsulation, dup_traffic_flag, tunnel_endpoint_a_desc, tunnel_endpoint_b_desc
FROM tsg_galaxy_tmp.session_record where recv_time >= toUnixTimestamp('2030-01-01 00:00:00') AND recv_time <toUnixTimestamp('2030-01-01 00:00:01');
SELECT recv_time, log_id, decoded_as, session_id, ingestion_time, processing_time, insert_time, address_type, vsys_id, client_ip, client_port, server_ip, server_port, sent_pkts, received_pkts, sent_bytes, received_bytes, dns_message_id, dns_qr, dns_opcode, dns_aa, dns_tc, dns_rd, dns_ra, dns_rcode, dns_qdcount, dns_ancount, dns_nscount, dns_arcount, dns_qname, dns_qtype, dns_qclass, dns_cname, dns_sub, dns_rr, dns_response_latency_ms, http_url, http_host, http_request_line, http_response_line, http_request_body, http_response_body, http_proxy_flag, http_sequence, http_cookie, http_referer, http_user_agent, http_request_content_length, http_request_content_type, http_response_content_length, http_response_content_type, http_set_cookie, http_version, http_status_code, http_response_latency_ms, http_session_duration_ms, http_action_file_size, mail_protocol_type, mail_account, mail_from_cmd, mail_to_cmd, mail_from, mail_password, mail_to, mail_cc, mail_bcc, mail_subject, mail_subject_charset, mail_attachment_name, mail_attachment_name_charset, mail_starttls_flag, mail_eml_file, sip_call_id, sip_originator_description, sip_responder_description, sip_user_agent, sip_server, sip_originator_sdp_connect_ip, sip_originator_sdp_media_port, sip_originator_sdp_media_type, sip_originator_sdp_content, sip_responder_sdp_connect_ip, sip_responder_sdp_media_port, sip_responder_sdp_media_type, sip_responder_sdp_content, sip_duration_s, sip_bye
FROM tsg_galaxy_tmp.transaction_record where recv_time >= toUnixTimestamp('2030-01-01 00:00:00') AND recv_time <toUnixTimestamp('2030-01-01 00:00:01');
SELECT recv_time, log_id, decoded_as, session_id, start_timestamp_ms, end_timestamp_ms, duration_ms, tcp_handshake_latency_ms, ingestion_time, processing_time, insert_time, device_id, out_link_id, in_link_id, device_tag, data_center, device_group, sled_ip, address_type, vsys_id, t_vsys_id, flags, flags_identify_info, security_rule_list, security_action, monitor_rule_list, shaping_rule_list, proxy_rule_list, statistics_rule_list, sc_rule_list, sc_rsp_raw, sc_rsp_decrypted, proxy_action, proxy_pinning_status, proxy_intercept_status, proxy_passthrough_reason, proxy_client_side_latency_ms, proxy_server_side_latency_ms, proxy_client_side_version, proxy_server_side_version, proxy_cert_verify, proxy_intercept_error, monitor_mirrored_pkts, monitor_mirrored_bytes, client_ip, client_port, client_os_desc, client_geolocation, client_asn, subscriber_id, imei, imsi, phone_number, apn, server_ip, server_port, server_os_desc, server_geolocation, server_asn, server_fqdn, server_domain, app_transition, app, app_debug_info, app_content, app_extra_info, fqdn_category_list, ip_protocol, decoded_path, sip_call_id, sip_originator_description, sip_responder_description, sip_user_agent, sip_server, sip_originator_sdp_connect_ip, sip_originator_sdp_media_port, sip_originator_sdp_media_type, sip_originator_sdp_content, sip_responder_sdp_connect_ip, sip_responder_sdp_media_port, sip_responder_sdp_media_type, sip_responder_sdp_content, sip_duration_s, sip_bye, rtp_payload_type_c2s, rtp_payload_type_s2c, rtp_pcap_path, rtp_originator_dir, sent_pkts, received_pkts, sent_bytes, received_bytes, tcp_c2s_ip_fragments, tcp_s2c_ip_fragments, tcp_c2s_lost_bytes, tcp_s2c_lost_bytes, tcp_c2s_o3_pkts, tcp_s2c_o3_pkts, tcp_c2s_rtx_pkts, tcp_s2c_rtx_pkts, tcp_c2s_rtx_bytes, tcp_s2c_rtx_bytes, tcp_rtt_ms, tcp_client_isn, tcp_server_isn, packet_capture_file, in_src_mac, out_src_mac, in_dest_mac, out_dest_mac, encapsulation, dup_traffic_flag, tunnel_endpoint_a_desc, tunnel_endpoint_b_desc
FROM tsg_galaxy_tmp.voip_record where recv_time >= toUnixTimestamp('2030-01-01 00:00:00') AND recv_time <toUnixTimestamp('2030-01-01 00:00:01');

View File

@@ -0,0 +1,15 @@
#!/bin/bash
home=$(cd `dirname $0`; pwd)
# 遍历每个节点执行迁移
for ip in `cat iplist.txt`
do
# 后台执行,输出日志
echo "$ip节点"
ssh $ip "[ ! -d $home ] && mkdir -p $home"
scp -r $home/*local_table_to_2402.sh $ip:$home/
ssh $ip "cd $home && chmod +x ./*.sh"
echo ""
done

View File

@@ -0,0 +1,84 @@
#!/bin/bash
# 本脚本逐时间段按最新往前顺序迁移clickhouse数据TSG24.01日志重组后数据迁移
# 迁移表 参数可选值session_recordsecurity_eventmonitor_eventtransaction_recordvoip_recordproxy_eventdos_event
table=$1
# 数据开始时间(UTC) 参数, 例如:"2023-10-26 00:00:00"
data_start_time=$2
# 数据结束时间(UTC) 参数, 例如:"2023-10-28 00:00:00"
data_end_time=$3
# 每批迁移数据时间段长度(分钟) 参数, 例如:240
slice_interval_minute=$4
timestamp_start=`date --utc --date="$data_start_time" +%s`
timestamp_end=`date --utc --date="$data_end_time" +%s`
# 校验迁移表参数
case $table in
"session_record")
# 迁移session_record
echo "迁移表${table}, start:$data_start_time, end:$data_end_time, interval_minute:$slice_interval_minute"
;;
"security_event")
# 迁移security_event
echo "迁移表${table}, start:$data_start_time, end:$data_end_time, interval_minute:$slice_interval_minute"
;;
"monitor_event")
# 迁移monitor_event
echo "迁移表${table}, start:$data_start_time, end:$data_end_time, interval_minute:$slice_interval_minute"
;;
"transaction_record")
# 迁移transaction_record
echo "迁移表${table}, start:$data_start_time, end:$data_end_time, interval_minute:$slice_interval_minute"
;;
"voip_record")
# 迁移voip_record
echo "迁移表${table}, start:$data_start_time, end:$data_end_time, interval_minute:$slice_interval_minute"
;;
"proxy_event")
# 迁移proxy_event
echo "迁移表${table}, start:$data_start_time, end:$data_end_time, interval_minute:$slice_interval_minute"
;;
"dos_event")
# 迁移dos_event
echo "迁移表${table}, start:$data_start_time, end:$data_end_time, interval_minute:$slice_interval_minute"
;;
*)
echo "所迁移表${table}不在范围:session_recordsecurity_eventmonitor_eventtransaction_recordvoip_recordproxy_eventdos_event"
exit 1
;;
esac
# 校验时间参数
if [ -z "$timestamp_start" ]; then
echo "data_start_time fmt err"
exit 1
fi
if [ -z "$timestamp_end" ]; then
echo "data_end_time fmt err"
exit 1
fi
if [ $timestamp_start -ge $timestamp_end ]; then
echo "date range err"
exit 1
fi
if [[ ! "$slice_interval_minute" =~ ^[1-9][0-9]*$ ]]; then
echo "slice_interval_minute参数必须是正确的分钟数"
exit 1
fi
home=$(cd `dirname $0`; pwd)
# 遍历每个节点执行迁移
for ip in `cat iplist.txt`
do
# 后台执行,输出日志
echo "$ip 节点开始执行迁移"
ssh $ip "cd $home && chmod +x ./*.sh && ./start_migrate_local_table_to_2402.sh $table '$data_start_time' '$data_end_time' $slice_interval_minute"
echo ""
done

View File

@@ -0,0 +1,61 @@
#!/bin/bash
home=$(cd `dirname $0`; pwd)
table=$1
if [ -z "$table" ]; then
echo "缺少table参数"
exit 1
fi
ips=($(cat iplist.txt))
ips_size=${#ips[*]}
ip_starts=$( seq 0 $(($ips_size - 1)) )
ip_ends=$( seq 0 $(($ips_size - 1)) )
for ((i=0;i<$ips_size;i++))
do
ip_starts[$i]=0
ip_ends[$i]=0
done
while true ; do
# 遍历每个节点
for ((i=0;i<$ips_size;i++)); do
ip=${ips[$i]}
start=${ip_starts[$i]}
end=${ip_ends[$i]}
if [ $start -eq 0 ]; then
info=$(ssh $ip "cat $home/log_$table.txt | grep migrate_table_start")
if [ -n "$info" ]; then
echo "${ip}迁移开始:${info}"
ip_starts[$i]=1
start=1
fi
fi
if [ $start -eq 1 ] && [ $end -eq 0 ] ; then
info=$(ssh $ip "cat $home/log_$table.txt | grep migrate_table_end")
if [ -n "$info" ]; then
echo "${ip}迁移结束:${info}"
ip_ends[$i]=1
end=1
fi
fi
done
#全部结束
finish_cnt=0
for ((i=0;i<$ips_size;i++)); do
start=${ip_starts[$i]}
end=${ip_ends[$i]}
if [ $start -eq 1 ] && [ $end -eq 1 ] ; then
finish_cnt=$(($finish_cnt+1))
fi
done
if [ $finish_cnt -ge $ips_size ]; then
echo "所有节点迁移结束"
break
fi
sleep 2
done

View File

@@ -0,0 +1,77 @@
#!/bin/bash
# 本脚本逐时间段按最新往前顺序迁移clickhouse数据TSG24.01日志重组后数据迁移
# 迁移表 参数可选值session_recordsecurity_eventmonitor_eventtransaction_recordvoip_recordproxy_eventdos_event
table=$1
# 数据开始时间(UTC) 参数, 例如:"2023-10-26 00:00:00"
data_start_time=$2
# 数据结束时间(UTC) 参数, 例如:"2023-10-28 00:00:00"
data_end_time=$3
# 每批迁移数据时间段长度(分钟) 参数, 例如:240
slice_interval_minute=$4
timestamp_start=`date --utc --date="$data_start_time" +%s`
timestamp_end=`date --utc --date="$data_end_time" +%s`
# 校验迁移表参数
case $table in
"session_record")
# 迁移session_record
echo "迁移表${table}, start:$data_start_time, end:$data_end_time, interval_minute:$slice_interval_minute"
;;
"security_event")
# 迁移security_event
echo "迁移表${table}, start:$data_start_time, end:$data_end_time, interval_minute:$slice_interval_minute"
;;
"monitor_event")
# 迁移monitor_event
echo "迁移表${table}, start:$data_start_time, end:$data_end_time, interval_minute:$slice_interval_minute"
;;
"transaction_record")
# 迁移transaction_record
echo "迁移表${table}, start:$data_start_time, end:$data_end_time, interval_minute:$slice_interval_minute"
;;
"voip_record")
# 迁移voip_record
echo "迁移表${table}, start:$data_start_time, end:$data_end_time, interval_minute:$slice_interval_minute"
;;
"proxy_event")
# 迁移proxy_event
echo "迁移表${table}, start:$data_start_time, end:$data_end_time, interval_minute:$slice_interval_minute"
;;
"dos_event")
# 迁移dos_event
echo "迁移表${table}, start:$data_start_time, end:$data_end_time, interval_minute:$slice_interval_minute"
;;
*)
echo "所迁移表${table}不在范围:session_recordsecurity_eventmonitor_eventtransaction_recordvoip_recordproxy_eventdos_event"
exit 1
;;
esac
# 校验时间参数
if [ -z "$timestamp_start" ]; then
echo "data_start_time fmt err"
exit 1
fi
if [ -z "$timestamp_end" ]; then
echo "data_end_time fmt err"
exit 1
fi
if [ $timestamp_start -ge $timestamp_end ]; then
echo "date range err"
exit 1
fi
if [[ ! "$slice_interval_minute" =~ ^[1-9][0-9]*$ ]]; then
echo "slice_interval_minute参数必须是正确的分钟数"
exit 1
fi
# 后台执行,输出日志
nohup ./migrate_local_table_to_2402.sh "$table" "$data_start_time" "$data_end_time" $slice_interval_minute > "log_$table.txt" 2>&1 &
echo "已启动迁移${table}表任务,时间范围[$data_start_time, $data_end_time], 每批迁移段分钟:$slice_interval_minute, 日志输出到:log_$table.txt。请查看日志文件确认每段数据迁移情况"

View File

@@ -0,0 +1,46 @@
set distributed_ddl_task_timeout = 180;
-- 删除源表同步子表物化视图
-- 源表rename到历史表
RENAME TABLE tsg_galaxy_v3.session_record_local to tsg_galaxy_tmp.session_record_local_old on cluster ck_cluster;
RENAME TABLE tsg_galaxy_v3.security_event_local to tsg_galaxy_tmp.security_event_local_old on cluster ck_cluster;
RENAME TABLE tsg_galaxy_v3.transaction_record_local to tsg_galaxy_tmp.transaction_record_local_old on cluster ck_cluster;
RENAME TABLE tsg_galaxy_v3.voip_record_local to tsg_galaxy_tmp.voip_record_local_old on cluster ck_cluster;
RENAME TABLE tsg_galaxy_v3.proxy_event_local to tsg_galaxy_tmp.proxy_event_local_old on cluster ck_cluster;
RENAME TABLE tsg_galaxy_v3.dos_event_local to tsg_galaxy_tmp.dos_event_local_old on cluster ck_cluster;
-- 创建源分布式表old
create table IF NOT EXISTS tsg_galaxy_tmp.session_record_old ON CLUSTER ck_query (
common_recv_time Int64,
common_log_id UInt64
) ENGINE =Distributed(ck_cluster,tsg_galaxy_tmp,session_record_local_old,rand());
create table IF NOT EXISTS tsg_galaxy_tmp.security_event_old ON CLUSTER ck_query (
common_recv_time Int64,
common_log_id UInt64
) ENGINE =Distributed(ck_cluster,tsg_galaxy_tmp,security_event_local_old,rand());
CREATE TABLE IF NOT EXISTS tsg_galaxy_tmp.transaction_record_old ON CLUSTER ck_query(
common_recv_time Int64,
common_log_id UInt64
) ENGINE =Distributed(ck_cluster,tsg_galaxy_tmp,transaction_record_local_old,rand());
CREATE TABLE IF NOT EXISTS tsg_galaxy_tmp.voip_record_old ON CLUSTER ck_query(
common_recv_time Int64,
common_log_id UInt64
) ENGINE =Distributed(ck_cluster,tsg_galaxy_tmp,voip_record_local_old,rand());
create table IF NOT EXISTS tsg_galaxy_tmp.proxy_event_old ON CLUSTER ck_query (
common_recv_time Int64,
common_log_id UInt64
) ENGINE =Distributed(ck_cluster,tsg_galaxy_tmp,proxy_event_local_old,rand());
CREATE TABLE IF NOT EXISTS tsg_galaxy_tmp.dos_event_old ON CLUSTER ck_query(
log_id UInt64,
profile_id UInt64,
start_time Int64
) ENGINE = Distributed(ck_cluster,tsg_galaxy_tmp,dos_event_local_old,rand());

View File

@@ -0,0 +1,307 @@
由于各环境当前使用tsg版本与升级的版本均不同故在此提供通用步骤因主键与字段不同旧版本统一升级至23.07版本进行处理
具体步骤:
Step1 :停止入库任务。
Step2 旧版本clickhouse库表升级到23.07版本
Step3 tsg_galaxy_tmp数据库新建24.02版本库表,修改建表语句中数据库名tsg_galaxy_v3->tsg_galaxy_tmp
Step4 23.07版本库表迁移至tsg_galaxy_tmp数据库
Step5 : tsg_galaxy_v3数据库新建目标版本库表如24.04
Step6 : 启动入库任务->tsg_galaxy_v3
Step7 : 迁移脚本迁移tsg_galaxy_tmp(23.07)->tsg_galaxy_tmp(24.02)
Step8 : 升级tsg_galaxy_tmp(24.02)->升级tsg_galaxy_tmp目标版本如24.04
Step9 : 命令迁移tsg_galaxy_tmp目标版本如24.04->tsg_galaxy_v3目标版本如24.04,按照partition手动迁移
说明
请按步骤依次执行,执行脚本报错时联系研发处理后再执行之后的步骤。
所有ck步骤都需要在query节点执行
执行所有sql语句之前需要停止日志留存调度任务确保ck中无分布式ddl语句H执行否则执行的sql会阻塞住影响后续步骤执行
验证sql需要在query节点执行
clickhouse-client -h 127.0.0.1 --port 9001 -m -u default --password ****** --query "select query from system.distributed_ddl_queue where status =0 limit 1"
若返回结果为空则可执行升级步骤,否则需要等待。
一、停止旧表ck入库任务
停止旧表ck入库任务
二、旧版本clickhouse库表升级到23.07版本,依次执行版本升级语句
三、临时库初始化24.02版本库表
1.执行2402版本初始化建表语句
clickhouse-client -h 127.0.0.1 --port 9001 -m -n -u default --password ****** --distributed_ddl_task_timeout 180 < init_tsg_galaxy_tmp_24_02_table.sql
2.校验表结构
clickhouse-client -h 127.0.0.1 --port 9001 -m -n -u default --password ****** --distributed_ddl_task_timeout 180 < check_tsg_galaxy_tmp_24_02_table.sql
无报错信息说明校验通过
23.07版本库表迁移至tsg_galaxy_tmp数据库
1.迁移sql
clickhouse-client -h 127.0.0.1 --port 9001 -m -n -u default --password ****** --distributed_ddl_task_timeout 180 < move_v3_2307_to_tmp_2307.sql
2.查看tmp库old表数据量
clickhouse-client -h 127.0.0.1 --port 9001 -m -n -u default --password ****** --distributed_ddl_task_timeout 180 < cat_tmp_old_table_row_count.sql
tsg_galaxy_v3数据库新建目标版本库表
clickhouse-client -h 127.0.0.1 --port 9001 -m -n -u default --password ****** --distributed_ddl_task_timeout 180 < 对应版本初始化sql
启动ck入库任务
1.启动目标版本ck入库任务
离线脚本同步历史数据至临时数据库2402版本库表
在query节点执行以下步骤iplist.txt中为ck所有data节点ip地址
步骤描述
1.进入migrate_table_2402文件夹,使脚本可执行
chmod +x ./*.sh
2.分发迁移脚本到data节点
./01_send_migrate_table_scripts.sh
2.选择迁移某个表同步需要时间区间的数据时间区间:[实时同步任务开始时间向前推n天, 实时同步任务开始时间)时间区间为左闭右开不包含结束时间点
# 迁移security_event表
./02_start_migrate_table.sh security_event "2024-01-10 00:00:00" "2024-01-20 00:00:00" 60
3.监控data节点迁移情况所有表迁移完成后确认每个节点同步数据成功/失败批次数如有失败批次确认是否需要处理
# 监控security_event表迁移
./03_monitor_migrate_table.sh security_event
4.选择下个张需要迁移的表重复2-4步骤支持选择迁移的表有: security_event, monitor_event, session_record, transaction_record, voip_record, proxy_event, dos_event
迁移和监控各个表执行命令示例
# 迁移security_event表
./02_start_migrate_table.sh security_event "2024-01-10 00:00:00" "2024-01-20 00:00:00" 60
# 监控security_event表迁移
./03_monitor_migrate_table.sh security_event
# 迁移monitor_event表
./02_start_migrate_table.sh monitor_event "2024-01-10 00:00:00" "2024-01-20 00:00:00" 60
# 监控monitor_event表迁移
./03_monitor_migrate_table.sh monitor_event
# 迁移session_record表
./02_start_migrate_table.sh session_record "2024-01-10 00:00:00" "2024-01-20 00:00:00" 60
# 监控session_record表迁移
./03_monitor_migrate_table.sh session_record
# 迁移transaction_record表
./02_start_migrate_table.sh transaction_record "2024-01-10 00:00:00" "2024-01-20 00:00:00" 60
# 监控transaction_record表迁移
./03_monitor_migrate_table.sh transaction_record
# 迁移voip_record表
./02_start_migrate_table.sh voip_record "2024-01-10 00:00:00" "2024-01-20 00:00:00" 60
# 监控voip_record表迁移
./03_monitor_migrate_table.sh voip_record
# 迁移proxy_event表
./02_start_migrate_table.sh proxy_event "2024-01-10 00:00:00" "2024-01-20 00:00:00" 60
# 监控proxy_event表迁移
./03_monitor_migrate_table.sh proxy_event
# 迁移dos_event表
./02_start_migrate_table.sh dos_event "2024-01-10 00:00:00" "2024-01-20 00:00:00" 60
# 监控dos_event表迁移
./03_monitor_migrate_table.sh dos_event
迁移日志无报错数据迁移完成
如果有数据迁移失败批次查看新老表迁移数据量对应情况(ck每台data节点)
-- security_event
SELECT
date_trunc('day', toDateTime(common_recv_time)) d,
COUNT(1) cnt
FROM tsg_galaxy_tmp.security_event_local_old
WHERE common_recv_time>= toUnixTimestamp('2024-01-10 00:00:00') and common_recv_time < toUnixTimestamp('2024-01-20 00:00:00')
and common_action in (16, 96)
group by date_trunc('day', toDateTime(common_recv_time))
order by d
;
SELECT
date_trunc('day', toDateTime(recv_time)) d,
COUNT(1) cnt
FROM tsg_galaxy_tmp.security_event_local
WHERE recv_time >= toUnixTimestamp('2024-01-10 00:00:00') and recv_time < toUnixTimestamp('2024-01-20 00:00:00')
group by date_trunc('day', toDateTime(recv_time))
order by d
;
-- monitor_event
SELECT
date_trunc('day', toDateTime(common_recv_time)) d,
COUNT(1) cnt
FROM tsg_galaxy_tmp.security_event_local_old
WHERE common_recv_time>= toUnixTimestamp('2024-01-10 00:00:00') and common_recv_time < toUnixTimestamp('2024-01-20 00:00:00')
and common_action = 1
group by date_trunc('day', toDateTime(common_recv_time))
order by d
;
SELECT
date_trunc('day', toDateTime(recv_time)) d,
COUNT(1) cnt
FROM tsg_galaxy_tmp.monitor_event_local
WHERE recv_time >= toUnixTimestamp('2024-01-10 00:00:00') and recv_time < toUnixTimestamp('2024-01-20 00:00:00')
group by date_trunc('day', toDateTime(recv_time))
order by d
;
-- session_record
SELECT
date_trunc('day', toDateTime(common_recv_time)) d,
COUNT(1) cnt
FROM tsg_galaxy_tmp.session_record_local_old
WHERE common_recv_time>= toUnixTimestamp('2024-01-10 00:00:00') and common_recv_time < toUnixTimestamp('2024-01-20 00:00:00')
group by date_trunc('day', toDateTime(common_recv_time))
order by d
;
SELECT
date_trunc('day', toDateTime(recv_time)) d,
COUNT(1) cnt
FROM tsg_galaxy_tmp.session_record_local
WHERE recv_time >= toUnixTimestamp('2024-01-10 00:00:00') and recv_time < toUnixTimestamp('2024-01-20 00:00:00')
group by date_trunc('day', toDateTime(recv_time))
order by d
;
-- transaction_record
SELECT
date_trunc('day', toDateTime(common_recv_time)) d,
COUNT(1) cnt
FROM tsg_galaxy_tmp.transaction_record_local_old
WHERE common_recv_time>= toUnixTimestamp('2024-01-10 00:00:00') and common_recv_time < toUnixTimestamp('2024-01-20 00:00:00')
group by date_trunc('day', toDateTime(common_recv_time))
order by d
;
SELECT
date_trunc('day', toDateTime(recv_time)) d,
COUNT(1) cnt
FROM tsg_galaxy_tmp.transaction_record_local
WHERE recv_time >= toUnixTimestamp('2024-01-10 00:00:00') and recv_time < toUnixTimestamp('2024-01-20 00:00:00')
group by date_trunc('day', toDateTime(recv_time))
order by d
;
-- voip_record
SELECT
date_trunc('day', toDateTime(common_recv_time)) d,
COUNT(1) cnt
FROM tsg_galaxy_tmp.voip_record_local_old
WHERE common_recv_time>= toUnixTimestamp('2024-01-10 00:00:00') and common_recv_time < toUnixTimestamp('2024-01-20 00:00:00')
group by date_trunc('day', toDateTime(common_recv_time))
order by d
;
SELECT
date_trunc('day', toDateTime(recv_time)) d,
COUNT(1) cnt
FROM tsg_galaxy_tmp.voip_record_local
WHERE recv_time >= toUnixTimestamp('2024-01-10 00:00:00') and recv_time < toUnixTimestamp('2024-01-20 00:00:00')
group by date_trunc('day', toDateTime(recv_time))
order by d
;
-- proxy_event
SELECT
date_trunc('day', toDateTime(common_recv_time)) d,
COUNT(1) cnt
FROM tsg_galaxy_tmp.proxy_event_local_old
WHERE common_recv_time>= toUnixTimestamp('2024-01-10 00:00:00') and common_recv_time < toUnixTimestamp('2024-01-20 00:00:00')
group by date_trunc('day', toDateTime(common_recv_time))
order by d
;
SELECT
date_trunc('day', toDateTime(recv_time)) d,
COUNT(1) cnt
FROM tsg_galaxy_tmp.proxy_event_local
WHERE recv_time >= toUnixTimestamp('2024-01-10 00:00:00') and recv_time < toUnixTimestamp('2024-01-20 00:00:00')
group by date_trunc('day', toDateTime(recv_time))
order by d
;
-- dos_event
SELECT
date_trunc('day', toDateTime(start_time)) d,
COUNT(1) cnt
FROM tsg_galaxy_tmp.dos_event_local_old
WHERE start_time>= toUnixTimestamp('2024-01-10 00:00:00') and start_time < toUnixTimestamp('2024-01-20 00:00:00')
group by date_trunc('day', toDateTime(start_time))
order by d
;
SELECT
date_trunc('day', toDateTime(start_time)) d,
COUNT(1) cnt
FROM tsg_galaxy_tmp.dos_event_local
WHERE start_time >= toUnixTimestamp('2024-01-10 00:00:00') and start_time < toUnixTimestamp('2024-01-20 00:00:00')
group by date_trunc('day', toDateTime(start_time))
order by d
;
升级tsg_galaxy_tmp(24.02)->升级tsg_galaxy_tmp目标版本如24.04)主要目的使临时库与实际库表结构相同
从24.03开始依次执行直至目标版本注意所有版本升级sql需要将tsg_galaxy_v3替换tsg_galaxy_tmp,包括校验sql
九、命令迁移tsg_galaxy_tmp目标版本如24.04->tsg_galaxy_v3目标版本如24.04,按照partition手动迁移
登录命令行之后手动按需执行需要迁移的分区如下示例迁移2024年4月1日的数据
clickhouse-client -h 127.0.0.1 --port 9001 -m -n -u default --password ******
ALTER TABLE tsg_galaxy_tmp.session_record_local on cluster ck_cluster move partition 20240401 to tsg_galaxy_v3.session_record_local;
ALTER TABLE tsg_galaxy_tmp.security_event_local on cluster ck_cluster move partition 20240401 to tsg_galaxy_v3.security_event_local;
ALTER TABLE tsg_galaxy_tmp.transaction_record_local on cluster ck_cluster move partition 20240401 to tsg_galaxy_v3.transaction_record_local;
ALTER TABLE tsg_galaxy_tmp.voip_record_local on cluster ck_cluster move partition 20240401 to tsg_galaxy_v3.voip_record_local;
ALTER TABLE tsg_galaxy_tmp.proxy_event_local on cluster ck_cluster move partition 20240401 to tsg_galaxy_v3.proxy_event_local;
ALTER TABLE tsg_galaxy_tmp.dos_event_local on cluster ck_cluster move partition 20240401 to tsg_galaxy_v3.dos_event_local;