wkhtmltopdf [软件],这个是必学准备好的,不然这个案例是实现不出来的
# 请求数据import requests # 数据请求模块url = f'https://blog.csdn.net/fei347795790/article/list/1' # 确定请求网址# headers 请求头, 主要用于伪装python, 防止程序被服务器识别出来headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36'}# 用requests模块里面get方式发送请求response = requests.get(url=url, headers=headers)print(response.text)# <Response [200]> 响应对象 200 表示请求成功# 解析数据, 提取内容# 更多资源、源码、解答可加:832157862for index in href: html_data = requests.get(url=index, headers=headers).text selector_1 = parsel.Selector(html_data) title = selector_1.css('#articleContentId::text').get() content = selector_1.css('#content_views').get() article_content = html_str.format(article=content) print(title) print(article_content) break# 保存数据html_path = 'html\\' + title +'.html'with open(html_path, mode='w', encoding=' utf-8') as f: f.write(article_content)print(title,'保存成功')
image.png
image.png
html_path = 'html\\ + title + '.html' pdf_path = 'pdf\\' + title + '.pdf' with open(html_path, mode='w', encoding='utf-8') as f: f.write(article_content) config = pdfkit.configuration(wkhtmltopdf=r'C:\01-Software-installation\wkhtmltopdf\bin\wkhtmltopdf.exe') ppdfkit.from_file(html_path,pdf_path,configuration=config) print(title,'保存成功')# 更多资源、源码、解答可加:832157862
image.png
联系客服