爬虫-获取数据xpath
-
安装lxml
pip3 install lxml
-
基本用法
import reauests from lxml import etree url = 'xxx' res = reuests.get(url).text html = etree.HTML(res) # 获取所有div标签 xpath = '//div' print(html.xpath(xpath)) #获取id=xx的div标签下的class=yy的span标签 xpath = '//div[@id="xx"]//span[@class="yy"]' print(html.xpath(xpath)) #获取文本 xpath = '//div[@id="xx"]//span[@class="yy"]//text()' print(html.xpath(xpath)) #获取属性 href xpath = '//div[@id="xx"]//span[@class="yy"]//@href' print(html.xpath