多线程百度收录批量查询工具,python

多线程查询百度是否收录,记得替换cookie,好久没更新了,可能会出现验证码,更换cookie就行,不知道能不能用...


import requests
from queue import Queue
from threading import Thread

class shoulu(Thread):
    seen = set()
    result = {}
    def __init__(self,k_queue):
        super(shoulu, self).__init__()
        self.k_queue = k_queue


    def run(self):

        while True:
            url = self.k_queue.get()
            headers = {
                'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
                'Cookie':'PSTM=1636077267; BIDUPSID=6D048EF8EF78EC012B99FCDD1F25E02E; BAIDUID=E88303C13DB2F9BEE9834DF179D2F017:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID=nW8OJeC62ZhFDlrHgQwNJTVRpe3IPzOTH6aoTDPTyF4Ms1bCCtgWEG0PKM8g0Ku-S2L-ogKK0eOTHkCF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=JJkO_D_atKvDqTrP-trf5DCShUFstqvdB2Q-XPoO3KJCDpOOyhOjeJ_p3bjNbPQiW5cpoMbgylRp8P3y0bb2DUA1y4vpK-ogQgTxoUJ2fnRJEUcGqj5Ah--ebPRiJPQ9QgbW5hQ7tt5W8ncFbT7l5hKpbt-q0x-jLTnhVn0MBCK0hDvPKITD-tFO5eT22-usKerd2hcHMPoosItm04Txyh0SqqrZLp5N-Tnr5D_btMbUoqRHXnJi0btQDPvxBf7pWDTm_q5TtUJMqqOx-JOrqfLn5MOyKMnitIT9-pno0hQrh459XP68bTkA5bjZKxtq3mkjbPbDfn028DKuDjRDKICV-frb-C62aKDsLnbnBhcqJ-ovQT0M04C7ybO2eR3ZQNcNQPb10D_5hUbeWfvpXn-R0hbjJM7xWeJpaJ5nJq5nhMJmKfb2-J0mqto7-P3y523ion5vQpnOEpQ3DRoWXPIqbN7P-p5Z5mAqKl0MLPbtbb0xXj_0DjPVKgTa54cbb4o2WbCQyqjM8pcN2b5oQTO30xTnBp32HCQZ-UQ4-PbdEC_RDpOUWfAkXpJvQnJjt2JxaqRCWJ5TMl5jDh3MKftQ-qOdexQ7bIny0hvctn5cShncLUjrDRLbXU6BK5vPbNcZ0l8K3l02V-bIe-t2XjQhDHRabK6aKC5bL6rJabC3MpOcXU6q2bDeQN0JQt6nWNn2sxQFQUQCq-bsL6K5Dp0vWtv4WbbvLT7johRTWqR4OR5JjxonDh83Ktbj5R3dHmT7LnbO5hvvhb5O3M7OLUKmDloOW-TB5bbPLUQF5l8-sq0x0bOte-bQXH_EJ6tOtRKJ_Kv55RrOfjrP-trf5DCShUFs2q-OB2Q-5KL-yJnPsbR4y-JG5UCp3bjNbPLeWGRE2MbdJJjoShbM3brPjMuyDf5m3b3MQ2TxoUJcBCnJhhvGqq-KXKuebPRiJPQ9QgbW5hQ7tt5W8ncFbT7l5hKpbt-q0x-jLTnhVn0M5DK0HPonHj82DTo33J; SIGNIN_UC=70a2711cf1d3d9b1a82d2f87d633bd8a03926704966bhwWmyeOdCL0QjlCuOVvMOxjyrVy4GxZIZIErQWbjcI64aIkE7BGUkWEA%2FWPFwWeA%2FKT0b%2BqUa2wadhpSpKuLKQ%2BBVlWyMZ%2FYgfO5UTDIvy%2Fw%2FdyecSVZI9UToftXBg3MBBw5onaHIrLSajCMUJ17noCz2HFy%2Ba0spwEnZtVqQseGx%2Fes6EzWgCGXigbCIofHCh%2ByAjDGefFcsIs1b40LixDu4UnA6xvH1ojVkQ9dyKeXzoDdAVup6RxeSlKsinCy097%2Bxs6kLX3wJ8QNYATgyhqJU0%2FHiw7Lq%2FGLWkqdHTHrXevYNkbh5JG2VYkl4IxWo%2FbrieowtAaVpnE7TFuzw%3D%3D64136060045377610966832433932427; H_PS_PSSID=35411_35104_31253_35489_34584_35491_35584_34813_35685_35316_26350_35751_22157; delPer=0; PSINO=5; H_WISE_SIDS=107311_110085_114550_127969_184716_186635_186743_186840_186844_188841_189034_189253_189755_190624_190803_191068_191245_191287_191370_192206_193246_193283_193559_194085_194519_195329_195343_195631_196045_196427_196590_197241_197350_197512_197711_197782_197958_198033_198089_198271_198513_198650_199083_199466_199578_199753_199777_199796_199974_200029_200128_200158_200193_200274_200450_200560_200576_200735_200743_201054_201098_201178_201328_201359_201539_201553_201598_201706_201819_201867_201978_8000073_8000104_8000122_8000137_8000150_8000155_8000157_8000173_8000178_8000186; SE_LAUNCH=5%3A27377659; rsv_i=aac9nOCV2qvoxyieGLmVG796qLlT3IQrYSHKt8IluOYm%2FdWUUtCyPvMEO5U8Tt6j56my3RpcXdaGGg%2BWgEsqMu%2Fl0cXPZT8; BD_HOME=1; BD_UPN=12314753; BD_CK_SAM=1; H_PS_645EC=9554Uj3V46WGW%2BlKlqPmBxueMnNiMXk7DGrn7B0QPiYpS6z08f94hLKxBcQ; BA_HECTOR=0g2h2g840kag802g9h1guhvpg0q; BDSVRTM=130; channel=baidusearch; baikeVisitId=200c57ee-4a42-40bc-829c-8774d10ea51c',

            }
            query = 'http://ipv6.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd={}'.format(url)
            resp = requests.get(query,headers=headers).text
            print(resp.encode("latin1").decode("utf8"))
            try:
                if "请检查您的输入是否正确" in resp:
                    print(url,'未收录')
                    self.filter(url,'未收录')
                elif "百度为您找到相关结果约" in resp:
                    print(url,'已收录')
                    self.filter(url,'已收录')
                elif "http://verify.baidu.com" in resp:
                    print("查询过程出现验证码")
                    self.filter(url,'查询过程出现验证码')
                    time.sleep(100)
                else:
                    print(url,'未获取源码')
                    self.filter(url,'未获取源码')

            finally:
                self.k_queue.task_done()
    def filter(self,url,sl):
        shoulu.result[url] = sl

if __name__=="__main__":
    k_queue = Queue()
    with open('domains.txt',encoding='utf-8') as urls:
        for url in urls:
            url = url.strip('\n')
            k_queue.put(url)
            shoulu.seen.add(url)
    for i in range(5):
        bds = shoulu(k_queue)
        bds.setDaemon(True)
        bds.start()
    k_queue.join()
    sort_list = shoulu.result.items()
    save = open('baidumobilekey.txt', 'w', encoding='utf-8')
    for item in sort_list:
        line = '%s\t%s\n' % (item[0], item[1])
        save.write(line)
        save.flush()
    save.close()
    print('done,完成查询')

转载请说明出处 内容投诉内容投诉
趣谈网 » 多线程百度收录批量查询工具,python
您需要 登录账户 后才能发表评论

发表评论

欢迎 访客 发表评论