更新 domain 解析正则匹配错误问题
This commit is contained in:
@@ -1,71 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>hdfs://ns1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.tmp.dir</name>
|
||||
<value>file:/opt/hadoop/tmp</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>io.file.buffer.size</name>
|
||||
<value>131702</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.root.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.root.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.logfile.size</name>
|
||||
<value>10000000</value>
|
||||
<description>The max size of each log file</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.logfile.count</name>
|
||||
<value>1</value>
|
||||
<description>The max number of log files</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>ha.zookeeper.quorum</name>
|
||||
<value>master:2181,slave1:2181,slave2:2181</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.hdfs.impl</name>
|
||||
<value>org.apache.hadoop.hdfs.DistributedFileSystem</value>
|
||||
<description>The FileSystem for hdfs: uris.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.compression.codecs</name>
|
||||
<value>com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>io.compression.codec.lzo.class</name>
|
||||
<value>com.hadoop.compression.lzo.LzoCodec</value>
|
||||
</property>
|
||||
</configuration>
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
/**
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
-->
|
||||
<configuration>
|
||||
<property>
|
||||
<name>hbase.rootdir</name>
|
||||
<value>hdfs://ns1/hbase-1.4.9</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.cluster.distributed</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.zookeeper.quorum</name>
|
||||
<value>192.168.40.119,192.168.40.122,192.168.40.123</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.master.info.port</name>
|
||||
<value>60010</value>
|
||||
</property>
|
||||
<!-- 开启启schema支持 对应hbase的namespace -->
|
||||
<property>
|
||||
<name>phoenix.schema.isNamespaceMappingEnabled</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>phoenix.schema.mapSystemTablesToNamespace</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.client.keyvalue.maxsize</name>
|
||||
<value>99428800</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.server.keyvalue.maxsize</name>
|
||||
<value>99428800</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.regionserver.wal.codec</name>
|
||||
<value>org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>phoenix.query.timeoutMs</name>
|
||||
<value>1800000</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.rpc.timeout</name>
|
||||
<value>1200000</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.client.scanner.caching</name>
|
||||
<value>1000</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.client.scanner.timeout.period</name>
|
||||
<value>1200000</value>
|
||||
</property>
|
||||
</configuration>
|
||||
@@ -1,116 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
<property>
|
||||
<name>dfs.namenode.name.dir</name>
|
||||
<value>file:/home/ceiec/hadoop/dfs/name</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.datanode.data.dir</name>
|
||||
<value>file:/home/ceiec/hadoop/dfs/data</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.secondary.http-address</name>
|
||||
<value>192.168.40.119:9001</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.webhdfs.enabled</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.permissions</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.permissions.enabled</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.nameservices</name>
|
||||
<value>ns1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.blocksize</name>
|
||||
<value>134217728</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.ha.namenodes.ns1</name>
|
||||
<value>nn1,nn2</value>
|
||||
</property>
|
||||
<!-- nn1的RPC通信地址,nn1所在地址 -->
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.ns1.nn1</name>
|
||||
<value>192.168.40.119:8020</value>
|
||||
</property>
|
||||
<!-- nn1的http通信地址,外部访问地址 -->
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.ns1.nn1</name>
|
||||
<value>192.168.40.119:50070</value>
|
||||
</property>
|
||||
<!-- nn2的RPC通信地址,nn2所在地址 -->
|
||||
<property>
|
||||
<name>dfs.namenode.rpc-address.ns1.nn2</name>
|
||||
<value>192.168.40.122:8020</value>
|
||||
</property>
|
||||
<!-- nn2的http通信地址,外部访问地址 -->
|
||||
<property>
|
||||
<name>dfs.namenode.http-address.ns1.nn2</name>
|
||||
<value>192.168.40.122:50070</value>
|
||||
</property>
|
||||
<!-- 指定NameNode的元数据在JournalNode日志上的存放位置(一般和zookeeper部署在一起) -->
|
||||
<property>
|
||||
<name>dfs.namenode.shared.edits.dir</name>
|
||||
<value>qjournal://192.168.40.119:8485;192.168.40.122:8485;192.168.40.123:8485/ns1</value>
|
||||
</property>
|
||||
<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
|
||||
<property>
|
||||
<name>dfs.journalnode.edits.dir</name>
|
||||
<value>/home/ceiec/hadoop/journal</value>
|
||||
</property>
|
||||
<!--客户端通过代理访问namenode,访问文件系统,HDFS 客户端与Active 节点通信的Java 类,使用其确定Active 节点是否活跃 -->
|
||||
<property>
|
||||
<name>dfs.client.failover.proxy.provider.ns1</name>
|
||||
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
|
||||
</property>
|
||||
<!--这是配置自动切换的方法,有多种使用方法,具体可以看官网,在文末会给地址,这里是远程登录杀死的方法 -->
|
||||
<property>
|
||||
<name>dfs.ha.fencing.methods</name>
|
||||
<value>sshfence</value>
|
||||
</property>
|
||||
<!-- 这个是使用sshfence隔离机制时才需要配置ssh免登陆 -->
|
||||
<property>
|
||||
<name>dfs.ha.fencing.ssh.private-key-files</name>
|
||||
<value>/root/.ssh/id_rsa</value>
|
||||
</property>
|
||||
<!-- 配置sshfence隔离机制超时时间,这个属性同上,如果你是用脚本的方法切换,这个应该是可以不配置的 -->
|
||||
<property>
|
||||
<name>dfs.ha.fencing.ssh.connect-timeout</name>
|
||||
<value>30000</value>
|
||||
</property>
|
||||
<!-- 这个是开启自动故障转移,如果你没有自动故障转移,这个可以先不配 -->
|
||||
<property>
|
||||
<name>dfs.ha.automatic-failover.enabled</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
#*****************jedis连接参数设置*********************
|
||||
#redis服务器ip
|
||||
redis.ip=192.168.40.153
|
||||
#redis服务器端口号
|
||||
redis.port=19000
|
||||
#与服务器建立连接的超时时间
|
||||
redis.timeout=3000
|
||||
#************************jedis池参数设置*******************
|
||||
#jedis的最大活跃连接数
|
||||
redis.pool.maxActive=200
|
||||
#jedis最大空闲连接数
|
||||
redis.pool.maxIdle=5
|
||||
#jedis池没有连接对象返回时,等待可用连接的最大时间,单位毫秒,默认值为-1,表示永不超时。
|
||||
#如果超过等待时间,则直接抛出JedisConnectionException
|
||||
redis.pool.maxWait=-1
|
||||
#从池中获取连接的时候,是否进行有效检查
|
||||
redis.pool.testOnBorrow=true
|
||||
#归还连接的时候,是否进行有效检查
|
||||
redis.pool.testOnReturn=true
|
||||
@@ -10,7 +10,7 @@ public class FlowWriteConfig {
|
||||
|
||||
public static final String LOG_STRING_SPLITTER = "\t";
|
||||
public static final String SQL_STRING_SPLITTER = "#";
|
||||
public static final String SEGMENTATION = ",";
|
||||
public static final String DOMAIN_SPLITTER = ".";
|
||||
|
||||
/**
|
||||
* System
|
||||
|
||||
@@ -12,6 +12,9 @@ import com.zdjizhi.utils.IpLookup;
|
||||
import com.zdjizhi.utils.StringUtil;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@@ -25,7 +28,10 @@ import java.util.regex.Pattern;
|
||||
|
||||
public class TransFormUtils {
|
||||
private static Logger logger = Logger.getLogger(TransFormUtils.class);
|
||||
private static Pattern WEB_PATTERN = Pattern.compile("[^\\\\.]+(\\.com\\.cn|\\.net\\.cn|\\.org\\.cn|\\.gov\\.cn|\\.com|\\.net|\\.cn|\\.org|\\.cc|\\.me|\\.tel|\\.mobi|\\.asia|\\.biz|\\.info|\\.name|\\.tv|\\.hk|\\.公司|\\.中国|\\.网络)");
|
||||
private final static Set<String> PUBLIC_SUFFIX_SET = new HashSet<String>(
|
||||
Arrays.asList("com|org|net|gov|edu|co|tv|mobi|info|asia|xxx|onion|cc|cn|com.cn|edu.cn|gov.cn|net.cn|org.cn|jp|kr|tw|com.hk|hk|com.hk|org.hk|se|com.se|org.se"
|
||||
.split("\\|")));
|
||||
private static Pattern IP_PATTERN = Pattern.compile("(\\d{1,3}\\.){3}(\\d{1,3})");
|
||||
private static IpLookup ipLookup = new IpLookup.Builder(false)
|
||||
.loadDataFileV4(FlowWriteConfig.IP_LIBRARY + "Kazakhstan.mmdb")
|
||||
.loadDataFileV6(FlowWriteConfig.IP_LIBRARY + "Kazakhstan.mmdb")
|
||||
@@ -156,10 +162,10 @@ public class TransFormUtils {
|
||||
* @return 顶级域名
|
||||
*/
|
||||
private static String getTopDomain(String sni, String host) {
|
||||
if (StringUtil.isNotBlank(sni)) {
|
||||
return getDomain(sni);
|
||||
} else if (StringUtil.isNotBlank(host)) {
|
||||
return getDomain(host);
|
||||
if (StringUtil.isNotBlank(host)) {
|
||||
return getDomainName(host);
|
||||
} else if (StringUtil.isNotBlank(sni)) {
|
||||
return getDomainName(sni);
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
@@ -169,19 +175,27 @@ public class TransFormUtils {
|
||||
/**
|
||||
* 根据url截取顶级域名
|
||||
*
|
||||
* @param url 网站url
|
||||
* @param host 网站url
|
||||
* @return 顶级域名
|
||||
*/
|
||||
private static String getDomain(String url) {
|
||||
try {
|
||||
Matcher matcher = WEB_PATTERN.matcher(url);
|
||||
if (matcher.find()) {
|
||||
return matcher.group();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
private static String getDomainName(String host) {
|
||||
if (host.endsWith(FlowWriteConfig.DOMAIN_SPLITTER)) {
|
||||
host = host.substring(0, host.length() - 1);
|
||||
}
|
||||
return "";
|
||||
if (IP_PATTERN.matcher(host).matches()) {
|
||||
return host;
|
||||
}
|
||||
int index = 0;
|
||||
String candidate = host;
|
||||
for (; index >= 0; ) {
|
||||
index = candidate.indexOf(FlowWriteConfig.DOMAIN_SPLITTER);
|
||||
String subCandidate = candidate.substring(index + 1);
|
||||
if (PUBLIC_SUFFIX_SET.contains(subCandidate)) {
|
||||
return candidate;
|
||||
}
|
||||
candidate = subCandidate;
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -48,16 +48,16 @@ public class HBaseTest {
|
||||
|
||||
@Test
|
||||
public void change() {
|
||||
Long begin = System.currentTimeMillis();
|
||||
System.gc();
|
||||
Long start = Runtime.getRuntime().freeMemory();
|
||||
System.out.println("开始内存"+start);
|
||||
getAll();
|
||||
System.gc();
|
||||
Long end = Runtime.getRuntime().freeMemory();
|
||||
System.out.println("结束内存"+end);
|
||||
System.out.println( "一个HashMap对象占内存: " + (end - start));
|
||||
System.out.println(System.currentTimeMillis() - begin);
|
||||
// Long begin = System.currentTimeMillis();
|
||||
// System.gc();
|
||||
// Long start = Runtime.getRuntime().freeMemory();
|
||||
// System.out.println("开始内存"+start);
|
||||
// getAll();
|
||||
// System.gc();
|
||||
// Long end = Runtime.getRuntime().freeMemory();
|
||||
// System.out.println("结束内存"+end);
|
||||
// System.out.println( "一个HashMap对象占内存: " + (end - start));
|
||||
// System.out.println(System.currentTimeMillis() - begin);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user