汇总一下,爬虫常见的问题(坑);
import requests
req = requests.get("<http://news.sina.com.cn/>")
if req.encoding == 'ISO-8859-1':
encodings = requests.utils.get_encodings_from_content(req.text)
if encodings:
encoding = encodings[0]
else:
encoding = req.apparent_encoding
encode_content = req.content.decode(encoding, 'replace') #如果设置为replace,则会用?取代非法字符;