几乎一样的代码和配置, aiohttp 和 requests 获取到的页面内容不一样, 这个问题可能出在哪?
时间: 2020-08-18来源:V2EX
前景提要
查了一下 Stack Overflow 问题可能在 ssl 上, 但是改了还是不行。 import asyncio import requests import time import json import random import aiohttp from ori_async import get_proxy list_url = 'https://www.amazon.co.uk/s?k=dress&ref=nb_sb_noss' product_url = 'https://www.amazon.co.uk/AUSELILY-Womens-Sleeve-Pleated-Pockets/dp/B082W811L3/ref=sr_1_1_sspa?dchild=1&keywords=dress&qid=1596264150&s=clothing&sr=1-1-spons&psc=1&spLa=ZW5jcnlwdGVkUXVhbGlmaWVyPUEzTEpRR0NLRlhQMFFDJmVuY3J5cHRlZElkPUEwMDY5Nzg5MkZTUllZWTM3VFVIQiZlbmNyeXB0ZWRBZElkPUEwOTU0NzQ1MTE0QzhFV0w0SjJOMCZ3aWRnZXROYW1lPXNwX2F0ZiZhY3Rpb249Y2xpY2tSZWRpcmVjdCZkb05vdExvZ0NsaWNrPXRydWU=' baidu_url = 'https://www.baidu.com' headers = { "authority": "www.amazon.co.uk", "method": "GET", "path": "/s?k=dress&i=clothing&ref=nb_sb_noss", "scheme": "https", "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "accept-encoding": "gzip, deflate, br", "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7,ja;q=0.6,zh-TW;q=0.5", "cache-control": "max-age=0", "cookie": "session-id=258-7467381-0898317; i18n-prefs=GBP; ubid-acbuk=262-6045121-6110346; x-wl-uid=13CgqXYwZCFN3okL9HYQm7Iyt8Md2S5Dj4uA/dwKZrgtM0V8Ii0/OTO/AbtTOSRVzmlpbyCfIvZw=; session-token=\"K1UgAkfjQLKORhX6CN0AXXcRycoGecW5zqC3Nk8y/0rr7ZHQPpm5kzjT0YPAY/M8vVOWYNXoWCDYhSbrXyHP/bnNILWcLd2I+04dfUASNtpMHQkTh2YNffN748Rd9HxMQ6wFjVkDsfhgbm/YgEQ5uy8H+qWM1i9z+8uePKVL1BfVQtQpOEFxLQK+1GRQHBfdt7urN81Bkg0WjHz3pOVR31pILIjf3aM1nhncWG1P/A93yPpUOBTFeMh5/6V4RrddfHGn4uifWdg=\"; session-id-time=2082758401l; csm-hit=tb:3GP38DER2C68WEEPMTK8+s-KCAYBB85E78WBDWP92X7|1595834228498&t:1595834228498&adb:adblk_yes", "downlink": "9.75", "ect": "4g", "rtt": "200", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "same-origin", "sec-fetch-user": "?1", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4181.9 Safari/537.36" } url = list_url # proxies = get_proxy() async def main(): connector = aiohttp.TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=connector) as session: async with session.get(url, headers=data["headers"]) as resp: print(resp.status) content = await resp.text() print(len(content)) start = time.time() loop = asyncio.get_event_loop() loop.run_until_complete(main()) end = time.time() print('spend time is {}'.format(end - start)) # response = requests.get(url, headers=headers, proxies=proxies, timeout=8, verify=False) response = requests.get(url, headers=headers, timeout=8, verify=False) print("length is {}".format(len(response.text)))

科技资讯:

科技学院:

科技百科:

科技书籍:

网站大全:

软件大全:

热门排行