from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
# 用户应根据自己的环境修改WebDriver路径
webdriver_path = ’C:/Windows/System32/msedgedriver.exe’
# 使用Service对象指定Edge WebDriver路径
s = Service(executable_path=webdriver_path)
driver = webdriver.Edge(service=s)
# 打开栏目网页入口地址
driver.get(’https://book.sciencereading.cn/shop/book/Booksimple/list.do?showQueryModel.dp1Value=75e48243889111e7a2df00163e2ed6f9’)
def get_page_html(page_number):
try:
# 等待分页导航的出现
WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.CLASS_NAME, ’pagination’))
)
# 如果不是第一页,点击对应页码
if page_number != 1:
driver.execute_script(f"setPageNumber(’{page_number}’)")
# 等待新页面加载
WebDriverWait(driver, 30).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, ’pagination’)) # 需要根据页面实际内容修改
)
# 返回页面HTML
return driver.page_source
except Exception as e:
print(f"An error occurred: {e}")
return None
# 获取第3页的HTML
page_1_html = get_page_html(1)
page_3_html = get_page_html(3)
# 关闭WebDriver
driver.quit()
print(page_3_html)