求知
探索知识边界,点亮智慧之光
多线程百度收录批量查询工具,python
多线程查询百度是否收录,记得替换cookie,好久没更新了,可能会出现验证码,更换cookie就行,不知道能不能用...import requests
from queue import Queue
from threading import Thread
class shoulu(Thread):
seen = set()
result = {}
def __init__(self,k_queue):
super(shoulu, self).__init__()
self.k_queue = k_queue
def run(self):
while True:
url = self.k_queue.get()
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
'Cookie':'PSTM=1636077267; BIDUPSID=6D048EF8EF78EC012B99FCDD1F25E02E; BAIDUID=E88303C13DB2F9BEE9834DF179D2F017:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID=nW8OJeC62ZhFDlrHgQwNJTVRpe3IPzOTH6aoTDPTyF4Ms1bCCtgWEG0PKM8g0Ku-S2L-ogKK0eOTHkCF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=JJkO_D_atKvDqTrP-trf5DCShUFstqvdB2Q-XPoO3KJCDpOOyhOjeJ_p3bjNbPQiW5cpoMbgylRp8P3y0bb2DUA1y4vpK-ogQgTxoUJ2fnRJEUcGqj5Ah--ebPRiJPQ9QgbW5hQ7tt5W8ncFbT7l5hKpbt-q0x-jLTnhVn0MBCK0hDvPKITD-tFO5eT22-usKerd2hcHMPoosItm04Txyh0SqqrZLp5N-Tnr5D_btMbUoqRHXnJi0btQDPvxBf7pWDTm_q5TtUJMqqOx-JOrqfLn5MOyKMnitIT9-pno0hQrh459XP68bTkA5bjZKxtq3mkjbPbDfn028DKuDjRDKICV-frb-C62aKDsLnbnBhcqJ-ovQT0M04C7ybO2eR3ZQNcNQPb10D_5hUbeWfvpXn-R0hbjJM7xWeJpaJ5nJq5nhMJmKfb2-J0mqto7-P3y523ion5vQpnOEpQ3DRoWXPIqbN7P-p5Z5mAqKl0MLPbtbb0xXj_0DjPVKgTa54cbb4o2WbCQyqjM8pcN2b5oQTO30xTnBp32HCQZ-UQ4-PbdEC_RDpOUWfAkXpJvQnJjt2JxaqRCWJ5TMl5jDh3MKftQ-qOdexQ7bIny0hvctn5cShncLUjrDRLbXU6BK5vPbNcZ0l8K3l02V-bIe-t2XjQhDHRabK6aKC5bL6rJabC3MpOcXU6q2bDeQN0JQt6nWNn2sxQFQUQCq-bsL6K5Dp0vWtv4WbbvLT7johRTWqR4OR5JjxonDh83Ktbj5R3dHmT7LnbO5hvvhb5O3M7OLUKmDloOW-TB5bbPLUQF5l8-sq0x0bOte-bQXH_EJ6tOtRKJ_Kv55RrOfjrP-trf5DCShUFs2q-OB2Q-5KL-yJnPsbR4y-JG5UCp3bjNbPLeWGRE2MbdJJjoShbM3brPjMuyDf5m3b3MQ2TxoUJcBCnJhhvGqq-KXKuebPRiJPQ9QgbW5hQ7tt5W8ncFbT7l5hKpbt-q0x-jLTnhVn0M5DK0HPonHj82DTo33J; SIGNIN_UC=70a2711cf1d3d9b1a82d2f87d633bd8a03926704966bhwWmyeOdCL0QjlCuOVvMOxjyrVy4GxZIZIErQWbjcI64aIkE7BGUkWEA%2FWPFwWeA%2FKT0b%2BqUa2wadhpSpKuLKQ%2BBVlWyMZ%2FYgfO5UTDIvy%2Fw%2FdyecSVZI9UToftXBg3MBBw5onaHIrLSajCMUJ17noCz2HFy%2Ba0spwEnZtVqQseGx%2Fes6EzWgCGXigbCIofHCh%2ByAjDGefFcsIs1b40LixDu4UnA6xvH1ojVkQ9dyKeXzoDdAVup6RxeSlKsinCy097%2Bxs6kLX3wJ8QNYATgyhqJU0%2FHiw7Lq%2FGLWkqdHTHrXevYNkbh5JG2VYkl4IxWo%2FbrieowtAaVpnE7TFuzw%3D%3D64136060045377610966832433932427; H_PS_PSSID=35411_35104_31253_35489_34584_35491_35584_34813_35685_35316_26350_35751_22157; delPer=0; PSINO=5; H_WISE_SIDS=107311_110085_114550_127969_184716_186635_186743_186840_186844_188841_189034_189253_189755_190624_190803_191068_191245_191287_191370_192206_193246_193283_193559_194085_194519_195329_195343_195631_196045_196427_196590_197241_197350_197512_197711_197782_197958_198033_198089_198271_198513_198650_199083_199466_199578_199753_199777_199796_199974_200029_200128_200158_200193_200274_200450_200560_200576_200735_200743_201054_201098_201178_201328_201359_201539_201553_201598_201706_201819_201867_201978_8000073_8000104_8000122_8000137_8000150_8000155_8000157_8000173_8000178_8000186; SE_LAUNCH=5%3A27377659; rsv_i=aac9nOCV2qvoxyieGLmVG796qLlT3IQrYSHKt8IluOYm%2FdWUUtCyPvMEO5U8Tt6j56my3RpcXdaGGg%2BWgEsqMu%2Fl0cXPZT8; BD_HOME=1; BD_UPN=12314753; BD_CK_SAM=1; H_PS_645EC=9554Uj3V46WGW%2BlKlqPmBxueMnNiMXk7DGrn7B0QPiYpS6z08f94hLKxBcQ; BA_HECTOR=0g2h2g840kag802g9h1guhvpg0q; BDSVRTM=130; channel=baidusearch; baikeVisitId=200c57ee-4a42-40bc-829c-8774d10ea51c',
}
query = 'http://ipv6.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd={}'.format(url)
resp = requests.get(query,headers=headers).text
print(resp.encode("latin1").decode("utf8"))
try:
if "请检查您的输入是否正确" in resp:
print(url,'未收录')
self.filter(url,'未收录')
elif "百度为您找到相关结果约" in resp:
print(url,'已收录')
self.filter(url,'已收录')
elif "http://verify.baidu.com" in resp:
print("查询过程出现验证码")
self.filter(url,'查询过程出现验证码')
time.sleep(100)
else:
print(url,'未获取源码')
self.filter(url,'未获取源码')
finally:
self.k_queue.task_done()
def filter(self,url,sl):
shoulu.result[url] = sl
if __name__=="__main__":
k_queue = Queue()
with open('domains.txt',encoding='utf-8') as urls:
for url in urls:
url = url.strip('\n')
k_queue.put(url)
shoulu.seen.add(url)
for i in range(5):
bds = shoulu(k_queue)
bds.setDaemon(True)
bds.start()
k_queue.join()
sort_list = shoulu.result.items()
save = open('baidumobilekey.txt', 'w', encoding='utf-8')
for item in sort_list:
line = '%s\t%s\n' % (item[0], item[1])
save.write(line)
save.flush()
save.close()
print('done,完成查询')