一、Jsoup查找DOM元素的方法
getElementById(String id) 根据id 来查询DOM
getElementsByTag(String tagName) 根据tag 名称来查询DOM
getElementsByClass(String className) 根据样式名称来查询DOM
getElementsByAttribute(String key) 根据属性名来查询DOM
getElementsByAttributeValue(String key,String value) 根据属性名和属性值来查询DOM
二、代码实现
???public static void main(String[] args) throws Exception{ ???????// 创建httpClient实例 ???????CloseableHttpClient httpClient = HttpClients.createDefault(); ???????// 创建httpGet实例 ???????HttpGet httpGet = new HttpGet("http://www.cnblogs.com"); ???????httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0"); ???????CloseableHttpResponse response = httpClient.execute(httpGet); ???????String content = null; ???????if(response != null){ ???????????HttpEntity entity = response.getEntity(); ??????????????content = EntityUtils.toString(entity, "UTF-8"); ?// 获取网页内容 ???????????????????????Document document = Jsoup.parse(content); ?// 解析网页,得到文档对象 ???????????????????????/** ????????????* 1.根据tag获取元素 ????????????*/ ???????????Elements elements = document.getElementsByTag("title"); // 获取 tag为 title的DOM元素 ???????????Element element = elements.get(0); // 获取第一个DOM元素 ???????????String title = element.text(); ?// 返回元素的文本 ???????????System.out.println("博客园的标题:" + title); ???????????????????????/** ????????????* 2.根据 id获取元素 ????????????*/ ???????????Element element2 = document.getElementById("site_nav_top"); ???????????String navTop = element2.text(); ???????????System.out.println("座右铭:" + navTop); ???????????????????????/** ????????????* 3.根据样式获取元素 ????????????*/ ???????????Elements elements3 = document.getElementsByClass("post_item"); ???????????System.out.println("============根据样式获取元素============="); ???????????for(Element e : elements3){ ???????????????System.out.println(e.html()); ???????????????System.out.println("------------------------------"); ???????????} ???????????????????????/** ????????????* 4.根据属性名称来查询DOM ????????????*/ ???????????Elements elements4 = document.getElementsByAttribute("width"); ???????????System.out.println("============根据属性名称来查询DOM============="); ???????????for(Element e : elements4){ ???????????????System.out.println(e.toString()); ???????????????System.out.println("------------------------------"); ???????????} ???????????????????????/** ????????????* 5.根据属性名和属性值来查询DOM ????????????*/ ???????????Elements elements5 = document.getElementsByAttributeValue("target", "_blank"); ???????????System.out.println("============ 根据属性名和属性值来查询DOM============="); ???????????for(Element e : elements5){ ???????????????System.out.println(e.toString()); ???????????????System.out.println("------------------------------"); ???????????} ???????} ???????if(response != null){ ???????????response.close(); ???????} ???????if(httpClient != null){ ???????????httpClient.close(); ???????} ???}
Jsoup(二)-- Jsoup查找DOM元素
原文地址:http://www.cnblogs.com/xbq8080/p/7523836.html