''' 1.读取本地url列表 2.逐个访问url 3.判断该url下面是否包含指定的文件 4.如果包含,将该url写入本地,如果不包含,则去除该url ''' import requests import multiprocessing class Check_file(object): def __init__(self): self.headers = { 'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36", } # 读取本地url列表并返回 def read_local_file(self, q): with open('../url/url.txt', 'r', encoding='utf-8') as f: url_data = f.readlines() for url in url_data: q.put(url) print('数据读取完毕') # 逐个访问url,并判断数据是否存在 def request_url(self, q): suffixs = ['index.php'] while True: url_data = q.get() for url in url_data: if url.startswith('https://'): url_one = url elif not url.startswith('http://'): url_one = 'http://' + url.strip() + '/' for suffix in suffixs: re_url = url_one.strip() + suffix try: re = requests.get(url=re_url, headers=self.headers, timeout=1) if re.status_code == 200 and 'define' in re.content.decode(): print('[*]' + '\t' + re_url) with open('../file/file.txt', 'a', encoding='utf-8') as f: f.write(re_url + '\n') except Exception as e: print(e) if q.empty(): break # 方法集合 def main(self): q = multiprocessing.Queue() p1 = multiprocessing.Process(target=self.read_local_file, args=(q,)) p2 = multiprocessing.Process(target=self.request_url, args=(q,)) p1.start() p2.start() if __name__ == '__main__': c = Check_file() for i in range(10): c.main()