import tldextract class Extracter: """ extract the subdomain and check whether the resource is a third-parth ipt: a resource url opt: resource host, isThirdParty(0 False, 1 True) """ def __init__(self, webiste): self.website = webiste self.host, self.domain = self.extract(self.website) @staticmethod def extract(url): subdomain, domain, suffix = tldextract.extract(url) if not subdomain: host = domain + "." + suffix else: host = subdomain + '.' + domain + '.' + suffix domain = domain + "." + suffix return host, domain def isThirdParty(self, url): if not url: return None, None isThirdParty = 0 host, domain = self.extract(url) if domain != self.domain: isThirdParty = 1 return host, isThirdParty