参考文档:https://www.cnblogs.com/asxinyu/p/CSharp_HtmlAgilityPack_XPath_Weather_Data.html#_label0
HtmlAgilityPack是一个开源的解析HTML元素的类库,最大的特点是可以通过XPath来解析HMTL
下载地址如下:http://htmlagilitypack.codeplex.com/
XPath教程:http://www.w3school.com.cn/xpath/index.asp
XPath获取方法:
官方API:http://html-agility-pack.net/parser
???????public ActionResult Index() ???????{ ???????????//爬取天气预报 ???????????HtmlWeb htmlWeb = new HtmlWeb(); ???????????string url = "http://www.tianqihoubao.com/lishi/taizhou/month/201712.html"; ???????????htmlWeb.OverrideEncoding = Encoding.GetEncoding("gb2312"); ?//解决乱码问题 ???????????HtmlAgilityPack.HtmlDocument document = htmlWeb.Load(url); ???????????var res = document.DocumentNode.SelectSingleNode(@"//*[@id=‘content‘]/table"); ???????????List<WeatherReport> weatherReports = new List<WeatherReport>(); ???????????if(res!=null) ???????????{ ???????????????var list = res.SelectNodes(@"tr"); ???????????????list.RemoveAt(0);//移除第一行,是表头 ???????????????// 遍历每一行,获取日期,以及天气状况等信息 ???????????????????????????foreach (var item in list) ???????????????{ ???????????????????var dd = item.SelectNodes(@"td"); ???????????????????//日期 - ?- 气温 - 风力风向 ???????????????????if (dd.Count != 4) continue; ???????????????????//获取当前行日期 ???????????????????var date1 = dd[0].InnerText.Replace("\r\n", "").Replace(" ", "").Trim(); ???????????????????//获取当前行天气状况 ???????????????????var tq = dd[1].InnerText.Replace("\r\n", "").Replace(" ", "").Trim(); ???????????????????//获取当前行气温 ???????????????????var qw = dd[2].InnerText.Replace("\r\n", "").Replace(" ", "").Trim(); ???????????????????//获取当前行风力风向 ???????????????????var fx = dd[3].InnerText.Replace("\r\n", "").Replace(" ", "").Trim(); ???????????????????//输出 ???????????????????Console.WriteLine("{0}:{1},{2},{3}", date1, tq, qw, fx); ???????????????????WeatherReport weatherReport = new WeatherReport { ???????????????????????Date = date1, ???????????????????????State = tq, ???????????????????????Temperature = qw, ???????????????????????Wind=fx ???????????????????}; ???????????????????weatherReports.Add(weatherReport); ???????????????} ???????????} ???????????return View(weatherReports); ???????}
HtmlAgilityPack解析html
原文地址:http://www.cnblogs.com/liandy0906/p/8085558.html