using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using HtmlAgilityPack;//引用爬虫DLL
using System.Text;
using DotNet;
using System.Net;
using System.IO;
public partial class _Default : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
kwldg_Reptile();
}
//腾讯家居首页(热门产品)爬虫
private void TencentHome_HotProdcut_Reptile()
{
//抓取地址
string url = "http://hm.jia360.com/";
//实例化HtmlWeb对象
HtmlWeb web = new HtmlWeb();
//创建html文档,并接受返回参数
HtmlDocument htmldoc = web.Load(url);//加载url
//获取li标签下的所有a标签节点
HtmlNodeCollection aCollection = htmldoc.DocumentNode.SelectNodes("//*[starts-with(@class,'tab_box ')]//li/a");
//遍历a标签集合
foreach (var item in aCollection)
{
//获取a标签text
string title = item.InnerText;
//获取a标签href
string href = item.Attributes["href"].Value;
//获取img标签src
string imgpath = item.SelectSingleNode("./img/@src").Attributes["src"].Value;
//图片保存路径
string SavePath = Server.MapPath("~/upload/link/" + Path.GetFileName(imgpath));
//下载图片
WebClient wc = new WebClient();
wc.DownloadFile(imgpath, SavePath);
//输出
Response.Write(title + "<br/>");
Response.Write(href + "<br/>");
Response.Write(imgpath + "<br/>");
}
}
//98工作室(知识库页)爬虫
private void kwldg_Reptile()
{
//抓取地址
string url = "http://98keji.com/article/article_list.aspx?pn=1";
//实例化HtmlWeb对象
HtmlWeb web = new HtmlWeb();
//创建html文档,并接受返回参数
HtmlDocument htmldoc = web.Load(url);//加载url
//获取li标签下的所有a标签节点
HtmlNodeCollection aCollection = htmldoc.DocumentNode.SelectNodes("//*[starts-with(@class,'article_list ')]//li/a");
//遍历a标签集合
foreach (var item in aCollection)
{
//获取a标签text
string title = item.InnerText;
//获取a标签href
string href = item.Attributes["href"].Value;
//输出
Response.Write(title + "<br/>");
Response.Write(href + "<br/>");
}
}
}
ASP.NET_爬虫HtmlAgilityPack
猜你喜欢
转载自blog.csdn.net/qq_33285360/article/details/109219395
今日推荐
周排行