# wait = WebDriverWait(browser,30,0.5) # wait.until(lambda diver:browser.find_element(By.XPATH,'/html/body/div[2]/div[2]/div[3]/div/div[2]/ul/li[1]/span/a')) for i in range(1,100): try: print(browser.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[3]/div/div[2]/ul/li[{}]/span/a'.format(i)).get_attribute("href")) except Exception as e: break browser.close()
for i in range(1,100): try: print(dict(tree.xpath("/html/body/div[2]/div[2]/div[3]/div/div[2]/ul/li[{}]/span/a".format(i))[0].attrib)["href"]) except Exception as e: break break print("") print("")
最终效果同selenium
最终代码
import time
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait
import urllib3 import requests from lxml import etree urllib3.disable_warnings() while True: pattern = input("请输入模式(1代表使用selenium,2代表使用requests):") print("") if pattern == "1": browser = webdriver.Firefox() websiteURL = input("请输入要爬取的网址:") browser.get(websiteURL)
# wait = WebDriverWait(browser,30,0.5) # wait.until(lambda diver:browser.find_element(By.XPATH,'/html/body/div[2]/div[2]/div[3]/div/div[2]/ul/li[1]/span/a')) for i in range(1,100): try: print(browser.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[3]/div/div[2]/ul/li[{}]/span/a'.format(i)).get_attribute("href")) except Exception as e: break browser.close() break elif pattern == "2": session = requests.session() websiteURL = input("请输入要爬取的网址:") print("") print("") res = session.get(websiteURL) parser = etree.HTMLParser(encoding="utf-8") tree = etree.HTML(res.text) print("结果如下:") print("")
for i in range(1,100): try: print(dict(tree.xpath("/html/body/div[2]/div[2]/div[3]/div/div[2]/ul/li[{}]/span/a".format(i))[0].attrib)["href"]) except Exception as e: break break print("") print("") else: print("输入错误,请重新输入")