This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
zhuyujia-webhopper/Tools/domain_extract.py
little_stone bd2d50cf35 code update
2022-05-05 20:41:28 +08:00

34 lines
886 B
Python

import tldextract
class Extracter:
"""
extract the subdomain and check whether the resource is a third-parth
ipt: a resource url
opt: resource host, isThirdParty(0 False, 1 True)
"""
def __init__(self, webiste):
self.website = webiste
self.host, self.domain = self.extract(self.website)
@staticmethod
def extract(url):
subdomain, domain, suffix = tldextract.extract(url)
if not subdomain:
host = domain + "." + suffix
else:
host = subdomain + '.' + domain + '.' + suffix
domain = domain + "." + suffix
return host, domain
def isThirdParty(self, url):
if not url:
return None, None
isThirdParty = 0
host, domain = self.extract(url)
if domain != self.domain:
isThirdParty = 1
return host, isThirdParty