package com.zdf.jsoup;import java.io.IOException;import java.util.ArrayList;import java.util.List;import org.jsoup.Connection;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.nodes.Node;import org.jsoup.select.Elements;import org.springframework.stereotype.Controller;import org.springframework.web.bind.annotation.RequestMapping;import org.springframework.web.bind.annotation.ResponseBody;import com.alibaba.fastjson.JSON;import com.zdf.jsoup.domain.Ticket;@Controllerpublic class JsoupTest {@RequestMapping("/jsoupTest")public @ResponseBody List<Ticket> jsoupTest() {String url = "http://dynamic.12306.cn/otn/board/boardMore";Connection connect = Jsoup.connect(url);try {List<Ticket> arr = new ArrayList<Ticket>();Document document = connect.get();Element table = document.getElementById("table_board_more");//Elements tr = table.getElementsByTag("tr");Elements tr = table.getElementsByTag("tr");//获取trfor (Element element : tr) {Elements th = element.getElementsByTag("th");//for (Element element2 : th) {////System.out.println("hello"+element2.text());//}Ticket ticket = new Ticket();for (int i = 0; i < element.children().size(); i++) {ticket.setStartPlace(element.child(0).text());//出发地ticket.setEndPlace(element.child(1).text());//目的地}System.out.println(ticket);arr.add(ticket);}return arr;} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}return null;}//跳转页面@RequestMapping("/")public String toVm() {return "velocity/ticket";}}
另外解释一下:html/text和val的区别:
html添加带有标签的时候使用,text获取文本 val是只有当有value这个属性的时候才能使用比如:<input type="text" value="XX"></input>这个时候使用val()
<span>您好!</span>这个时候使用text
还有child和children的区别:child是一个需要写下标,children是所有该元素的子节点,返回值为elements
Jsoup爬虫最终修订版
原文地址:https://www.cnblogs.com/df1151112630/p/8448616.html