34 lines
886 B
Python
34 lines
886 B
Python
|
|
import tldextract
|
|
|
|
|
|
class Extracter:
|
|
"""
|
|
extract the subdomain and check whether the resource is a third-parth
|
|
ipt: a resource url
|
|
opt: resource host, isThirdParty(0 False, 1 True)
|
|
"""
|
|
def __init__(self, webiste):
|
|
self.website = webiste
|
|
self.host, self.domain = self.extract(self.website)
|
|
|
|
@staticmethod
|
|
def extract(url):
|
|
subdomain, domain, suffix = tldextract.extract(url)
|
|
if not subdomain:
|
|
host = domain + "." + suffix
|
|
else:
|
|
host = subdomain + '.' + domain + '.' + suffix
|
|
domain = domain + "." + suffix
|
|
return host, domain
|
|
|
|
def isThirdParty(self, url):
|
|
if not url:
|
|
return None, None
|
|
isThirdParty = 0
|
|
host, domain = self.extract(url)
|
|
if domain != self.domain:
|
|
isThirdParty = 1
|
|
return host, isThirdParty
|
|
|