2022-11-23 15:30:24 +08:00
package com.zdjizhi.source ;
import cn.hutool.core.io.FileUtil ;
import cn.hutool.core.io.IoUtil ;
2023-05-23 10:38:15 +08:00
import cn.hutool.core.io.file.FileReader ;
import cn.hutool.crypto.digest.DigestUtil ;
2022-11-23 15:30:24 +08:00
import cn.hutool.json.JSONObject ;
import com.alibaba.nacos.api.NacosFactory ;
import com.alibaba.nacos.api.PropertyKeyConst ;
import com.alibaba.nacos.api.config.ConfigService ;
import com.alibaba.nacos.api.config.listener.Listener ;
2023-06-08 16:57:19 +08:00
import com.alibaba.nacos.api.exception.NacosException ;
2022-11-23 15:30:24 +08:00
import com.fasterxml.jackson.databind.JavaType ;
import com.google.common.base.Joiner ;
import com.jayway.jsonpath.JsonPath ;
import com.zdjizhi.common.CommonConfig ;
import com.zdjizhi.common.CustomFile ;
import com.zdjizhi.common.KnowledgeLog ;
import com.zdjizhi.utils.* ;
import org.apache.commons.io.IOUtils ;
import org.apache.flink.configuration.Configuration ;
import org.apache.http.Header ;
import org.apache.http.message.BasicHeader ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
import java.io.* ;
import java.text.SimpleDateFormat ;
import java.util.* ;
import java.util.concurrent.Executor ;
2023-06-08 16:57:19 +08:00
public class HttpSource extends RichHttpSourceFunction < Map < String , String > > {
2022-11-23 15:30:24 +08:00
private static final Logger logger = LoggerFactory . getLogger ( HttpSource . class ) ;
private static final String EXPR = " $.[?(@.version=='latest' && @.name in ['ip_v4_built_in','ip_v6_built_in','ip_v4_user_defined','ip_v6_user_defined'])].['name','sha256','format','path'] " ;
2023-05-26 15:51:57 +08:00
private static Map < String , String > knowledgeMetaCache = new HashMap < > ( ) ;
2023-06-08 16:57:19 +08:00
private static HashMap < String , String > knowledgeUpdateCache ;
private static final int TRY_TIMES = 3 ;
2023-05-26 15:51:57 +08:00
private static HttpClientUtils2 httpClientUtils ;
2022-11-23 15:30:24 +08:00
//连接nacos的配置
private Properties nacosProperties ;
//nacos data id
private String NACOS_DATA_ID ;
//nacos group
private String NACOS_GROUP ;
//nacos 连接超时时间
private long NACOS_READ_TIMEOUT ;
private ConfigService configService ;
2023-05-23 10:38:15 +08:00
private static Header header ;
//运行状态cancel时置为false
private boolean isRunning = true ;
//是否下发,默认不发送
private boolean isSending = false ;
2023-05-26 15:51:57 +08:00
2023-06-08 16:57:19 +08:00
public HttpSource ( Properties nacosProperties , String NACOS_DATA_ID , String NACOS_GROUP , long NACOS_READ_TIMEOUT ) {
2022-11-23 15:30:24 +08:00
this . nacosProperties = nacosProperties ;
this . NACOS_DATA_ID = NACOS_DATA_ID ;
this . NACOS_GROUP = NACOS_GROUP ;
this . NACOS_READ_TIMEOUT = NACOS_READ_TIMEOUT ;
}
@Override
public void open ( Configuration parameters ) throws Exception {
super . open ( parameters ) ;
2023-05-23 10:38:15 +08:00
httpClientUtils = new HttpClientUtils2 ( ) ;
2022-11-23 15:30:24 +08:00
//初始化元数据缓存
2023-05-23 10:38:15 +08:00
knowledgeMetaCache = new HashMap < > ( 16 ) ;
2022-11-23 15:30:24 +08:00
//初始化定位库缓存
2023-06-08 16:57:19 +08:00
knowledgeUpdateCache = new HashMap < > ( 16 ) ;
2023-05-23 10:38:15 +08:00
header = new BasicHeader ( " token " , CommonConfig . HOS_TOKEN ) ;
2023-06-08 16:57:19 +08:00
//连接nacos配置
try {
configService = NacosFactory . createConfigService ( nacosProperties ) ;
} catch ( NacosException e ) {
logger . error ( " Get Schema config from Nacos error,The exception message is :{} " , e . getMessage ( ) ) ;
}
//初始化知识库
initKnowledge ( ) ;
2022-11-23 15:30:24 +08:00
logger . info ( " 连接nacos: " + nacosProperties . getProperty ( PropertyKeyConst . SERVER_ADDR ) ) ;
2023-06-08 16:57:19 +08:00
2022-11-23 15:30:24 +08:00
}
@Override
public void run ( SourceContext ctx ) throws Exception {
2023-06-08 16:57:19 +08:00
if ( ! knowledgeUpdateCache . isEmpty ( ) ) {
ctx . collect ( knowledgeUpdateCache ) ;
knowledgeUpdateCache . clear ( ) ;
2022-11-23 15:30:24 +08:00
}
2023-06-08 16:57:19 +08:00
// }
2022-11-23 15:30:24 +08:00
configService . addListener ( NACOS_DATA_ID , NACOS_GROUP , new Listener ( ) {
@Override
public Executor getExecutor ( ) {
return null ;
}
@Override
public void receiveConfigInfo ( String configMsg ) {
try {
logger . info ( " receive update config: " + configMsg ) ;
if ( StringUtil . isNotBlank ( configMsg ) ) {
ArrayList < Object > metaList = JsonPath . parse ( configMsg ) . read ( EXPR ) ;
2023-06-08 16:57:19 +08:00
if ( metaList . size ( ) > 0 ) {
2022-11-23 15:30:24 +08:00
for ( Object metadata : metaList ) {
JSONObject knowledgeJson = new JSONObject ( metadata , false , true ) ;
String fileName = Joiner . on ( CommonConfig . LOCATION_SEPARATOR ) . useForNull ( " " ) . join ( knowledgeJson . getStr ( " name " ) ,
knowledgeJson . getStr ( " format " ) ) ;
String sha256 = knowledgeJson . getStr ( " sha256 " ) ;
String filePath = knowledgeJson . getStr ( " path " ) ;
2023-05-23 10:38:15 +08:00
if ( ! sha256 . equals ( knowledgeMetaCache . get ( fileName ) ) ) {
knowledgeMetaCache . put ( fileName , sha256 ) ;
updateKnowledge ( fileName , filePath , sha256 ) ;
2022-11-23 15:30:24 +08:00
}
}
2023-06-08 16:57:19 +08:00
if ( ! knowledgeUpdateCache . isEmpty ( ) ) {
ctx . collect ( knowledgeUpdateCache ) ;
knowledgeUpdateCache . clear ( ) ;
2023-05-23 10:38:15 +08:00
}
2022-11-23 15:30:24 +08:00
}
}
} catch ( Exception e ) {
logger . error ( " 监听nacos配置失败 " , e ) ;
}
}
} ) ;
while ( isRunning ) {
2023-05-23 10:38:15 +08:00
try {
Thread . sleep ( 10000 ) ;
} catch ( InterruptedException e ) {
e . printStackTrace ( ) ;
}
2022-11-23 15:30:24 +08:00
}
}
2023-06-08 16:57:19 +08:00
private void initKnowledge ( ) {
String configMsg = " " ;
2022-11-23 15:30:24 +08:00
try {
2023-06-08 16:57:19 +08:00
configMsg = configService . getConfig ( NACOS_DATA_ID , NACOS_GROUP , NACOS_READ_TIMEOUT ) ;
} catch ( NacosException e ) {
logger . error ( " 从Nacos获取知识库元数据配置文件异常, 异常信息为:{} " , e . getMessage ( ) ) ;
}
if ( StringUtil . isNotBlank ( configMsg ) ) {
ArrayList < Object > metaList = JsonPath . parse ( configMsg ) . read ( EXPR ) ;
if ( metaList . size ( ) > 0 ) {
2022-11-23 15:30:24 +08:00
for ( Object metadata : metaList ) {
JSONObject knowledgeJson = new JSONObject ( metadata , false , true ) ;
String fileName = Joiner . on ( CommonConfig . LOCATION_SEPARATOR ) . useForNull ( " " ) . join ( knowledgeJson . getStr ( " name " ) ,
knowledgeJson . getStr ( " format " ) ) ;
String sha256 = knowledgeJson . getStr ( " sha256 " ) ;
String filePath = knowledgeJson . getStr ( " path " ) ;
2023-05-23 10:38:15 +08:00
byte [ ] localFileByte = getLocalFile ( fileName ) ;
String localFileSha256Hex = DigestUtil . sha256Hex ( localFileByte ) ;
if ( sha256 . equals ( localFileSha256Hex ) ) {
logger . info ( " 本地文件{}的sha256为:{} ,Nacos内记录为:{} ,sha256相等 " , fileName , localFileSha256Hex , sha256 ) ;
knowledgeMetaCache . put ( fileName , sha256 ) ;
} else {
logger . info ( " 本地文件{}的sha256为:{} ,Nacos内记录为:{} ,sha256不相等, 更新本地文件及缓存 " , fileName , localFileSha256Hex , sha256 ) ;
updateKnowledge ( fileName , filePath , sha256 ) ;
}
2022-11-23 15:30:24 +08:00
}
}
}
}
2023-05-23 10:38:15 +08:00
private void updateKnowledge ( String fileName , String filePath , String sha256 ) {
2022-11-23 15:30:24 +08:00
InputStream inputStream = null ;
2023-05-23 10:38:15 +08:00
int retryNum = 0 ;
2022-11-23 15:30:24 +08:00
try {
2023-05-23 10:38:15 +08:00
while ( retryNum < TRY_TIMES ) {
inputStream = httpClientUtils . httpGetInputStream ( filePath , 90000 , header ) ;
if ( inputStream ! = null ) {
byte [ ] downloadBytes = IOUtils . toByteArray ( inputStream ) ;
2023-06-08 16:57:19 +08:00
String downloadFileSha256Hex = DigestUtil . sha256Hex ( downloadBytes ) ;
if ( sha256 . equals ( downloadFileSha256Hex ) & & downloadBytes . length > 0 ) {
logger . info ( " 通过HOS下载{}的sha256为:{} ,Nacos内记录为:{} ,sha256相等 " , fileName , sha256 ) ;
boolean updateStatus = updateLocalFile ( fileName , downloadBytes ) ;
if ( updateStatus ) {
2023-05-23 10:38:15 +08:00
knowledgeMetaCache . put ( fileName , sha256 ) ;
2023-06-08 16:57:19 +08:00
knowledgeUpdateCache . put ( fileName , sha256 ) ;
2023-05-23 10:38:15 +08:00
retryNum = TRY_TIMES ;
2023-06-08 16:57:19 +08:00
} else {
2023-05-23 10:38:15 +08:00
retryNum + + ;
2023-06-08 16:57:19 +08:00
//避免频繁请求HOS
Thread . sleep ( 10000 ) ;
2023-05-23 10:38:15 +08:00
}
2023-06-08 16:57:19 +08:00
} else {
logger . error ( " 通过HOS下载{}的sha256为:{} ,Nacos内记录为:{} ,sha256不相等 开始第{}次重试下载文件 " , fileName , downloadFileSha256Hex , sha256 , retryNum ) ;
retryNum + + ;
//避免频繁请求HOS
Thread . sleep ( 10000 ) ;
2023-05-23 10:38:15 +08:00
}
}
}
2022-11-23 15:30:24 +08:00
} catch ( IOException ioException ) {
ioException . printStackTrace ( ) ;
2023-06-08 16:57:19 +08:00
} catch ( InterruptedException e ) {
e . printStackTrace ( ) ;
2022-11-23 15:30:24 +08:00
} finally {
IOUtils . closeQuietly ( inputStream ) ;
2023-05-23 10:38:15 +08:00
}
}
2023-06-08 16:57:19 +08:00
private boolean updateLocalFile ( String fileName , byte [ ] downloadBytes ) {
2023-05-23 10:38:15 +08:00
FileOutputStream outputStream = null ;
2023-06-08 16:57:19 +08:00
boolean updateStatus = false ;
2023-05-23 10:38:15 +08:00
try {
2023-06-08 16:57:19 +08:00
HdfsUtils . uploadFileByBytes ( CommonConfig . HDFS_PATH + fileName , downloadBytes ) ;
updateStatus = true ;
2023-05-23 10:38:15 +08:00
} catch ( IOException ioe ) {
logger . error ( " 更新本地文件{}时发生IO异常,异常信息为: " , fileName , ioe . getMessage ( ) ) ;
ioe . printStackTrace ( ) ;
} catch ( RuntimeException e ) {
logger . error ( " 更新本地文件{}时发生异常,异常信息为: " , fileName , e . getMessage ( ) ) ;
e . printStackTrace ( ) ;
} finally {
2022-11-23 15:30:24 +08:00
IOUtils . closeQuietly ( outputStream ) ;
}
2023-06-08 16:57:19 +08:00
return updateStatus ;
2022-11-23 15:30:24 +08:00
}
2023-05-23 10:38:15 +08:00
private static byte [ ] getLocalFile ( String name ) {
byte [ ] fileBytes = null ;
try {
fileBytes = HdfsUtils . getFileBytes ( CommonConfig . HDFS_PATH + name ) ;
} catch ( RuntimeException | IOException e ) {
logger . error ( " IpLookupUtils download MMDB files error, message is: " + e . getMessage ( ) ) ;
e . printStackTrace ( ) ;
}
return fileBytes ;
}
2022-11-23 15:30:24 +08:00
@Override
public void cancel ( ) {
this . isRunning = false ;
}
}