def get_detail_html(queue):
# 爬取文章详情页
while True:
for i in range(5):
url = queue.get()
print("get detail html started") #该行缩近一个单元则能正常退出
time.sleep(2)
for i in range(5):
queue.task_done()
print("get detail html end")
def get_detail_url(queue):
# 爬取文章列表页
for i in range(20):
queue.put("http://projectsedu.com/{id}".format(id=i))
# 1. 线程通信方式- 共享变量
if __name__ == "__main__":
detail_url_queue = Queue(maxsize=1000)
thread_detail_url = threading.Thread(target=get_detail_url, args=(detail_url_queue,))
for i in range(10):
html_thread = threading.Thread(target=get_detail_html, args=(detail_url_queue,))
html_thread.setDaemon(True)
html_thread.start()
start_time = time.time()
thread_detail_url.start()
detail_url_queue.join()
# 当主线程退出的时候, 子线程kill掉
print("last time: {}".format(time.time() - start_time))