import requests import parsel # 解析数据 import os # python学习交流 1群 815624229 快满了加2裙 # python学习交流 2群 279199867 for page in range(1, 12): response = requests.get(f'https://file.kole8.com/zhuanti/file_upload/20251/page/{page}') # <response [200]>: 请求成功 # 静态页面的网页源代码 # 把静态页面的字符串变成可选的对象 selector = parsel.selector(response.text) # 3. 数据解析(筛选过滤咱们需要的数据) # re: 可以直接提取网页字符串数据 # css / xpath: 通过html标签进行提取 lis = selector.xpath("//ul[@class='b2_gap ']/li") for li in lis

以下是对您代码的一些分析和建议： ```python import requests import parsel import os for page in range(1, 12): response = requests.get(f'https://file.kole8.com/zhuanti/file_upload/20251/page/{page}') # 建议添加异常处理，以应对网络请求可能出现的错误 try: response.raise_for_status() except requests.exceptions.HTTPError as e: print(f"请求页面 {page} 时发生 HTTP 错误: {e}") continue selector = parsel.Selector(response.text) lis = selector.xpath("//ul[@class='b2_gap ']/li") for li in lis: title = li.xpath('.//h2/a/text()').get() print(f'-------------正在爬取{title}-------------') # 优化文件夹创建逻辑，使用 os.makedirs 函数创建多层目录，并添加异常处理 try: os.makedirs('./img/' + title, exist_ok=True) except OSError as e: print(f"创建文件夹 {title} 时出错: {e}") continue href = li.xpath('.//h2/a/@href').get() data_html = requests.get(href).text selector_2 = parsel.Selector(data_html) img_p = selector_2.xpath('//div[@class="entry-content"]/p') for img in img_p: img_list = img.xpath('.//img/@src').getall() for img_1 in img_list: try: img_data = requests.get(img_1).content except: continue img_name = img_1.split('/')[-1] with open(f'img\\{title}\\{img_name}', mode='wb') as f: f.write(img_data) print('下载完成:', img_name) # 额外需要注意的逻辑点： # 1. 注意遵守网站的使用规则和法律法规，确保您的爬虫行为是合法合规的。 # 2. 考虑添加适当的延迟，避免对目标网站造成过大的访问压力。 # 3. 对于可能出现的大量数据，要确保有足够的存储空间来保存下载的图片。 ``` [2025-01-05 12:50:26 | AI写代码神器 | 530点数解答]

人生没有白走的路，每一步都算数！Ctrl + D 收藏本站，开发、写文章快人一步~

全部AI / 热门 / AI写代码神器

服务商

更多选项

快捷项

其他AI