
from urllib import request
import requests
from io import BytesIO
import gzip
import re
url = ‘https://www.douyu.com/g_DOTA2’
#这里如果用group的话就提取不到代码块中间的dev
root_pattern = '<div class=“DyListCover-info”>[\s\S]*?</div>'
r = request.urlopen(url)
htmls = r.read()
buff = BytesIO(htmls)
f = gzip.GzipFile(fileobj=buff)
htmls = f.read().decode(‘utf-8’)
root_html = re.findall(root_pattern, htmls)
for html in root_html:
print(html)