闲来无聊爬了下全国的省市区乡镇居委会的信息,存入到数据。
以后做地址联动选择的时候可能用得着,这次可以精确到居委会
数据来源:国家统计局 2016年统计用区划代码和城乡划分代码(截止2016年07月31日)
http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/index.html
具体代码,也是写的比较随意:
using AngleSharp.Parser.Html; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net; using System.Net.Http; using System.Text; using System.Threading.Tasks; namespace CrawlerArea { class Program { static void Main(string[] args) { Console.WriteLine(DateTime.Now); //省 //f("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/index.html"); //市 //AreaDBEntities areaDBEntities = new AreaDBEntities(); //var data = areaDBEntities.AreaInfoes.ToList(); //foreach (var item in data) //{ // string url = string.Format("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/{0}.html", item.Code); // getCityInfo(url, item.Code); // System.Threading.Thread.Sleep(50); //} //Console.WriteLine(DateTime.Now); //区 / 县 //AreaDBEntities areaDBEntities = new AreaDBEntities(); //var data = areaDBEntities.AreaInfoes.ToList(); //foreach (var item in data) //{ // string url = string.Format("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/{0}/{1}.html", item.PedarId, item.Code); // getCountyInfo(url, item.PedarId, item.Code); // System.Threading.Thread.Sleep(50); //} //Console.WriteLine(DateTime.Now); ////街道 //AreaDBEntities areaDBEntities = new AreaDBEntities(); //var data = areaDBEntities.AreaInfoes.Where(t => t.PedarId >= 1000).ToList(); //foreach (var item in data) //{ // string temp = item.Code.ToString(); // string url = string.Format("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/{0}/{1}/{2}.html", temp.Substring(0, 2), temp.Substring(2, 2), item.Code); // getStreetInfo(url, item.Code); // System.Threading.Thread.Sleep(50); //} //Console.WriteLine(DateTime.Now); //村委会 AreaDBEntities areaDBEntities = new AreaDBEntities(); var data = areaDBEntities.AreaInfoes.Where(t => t.Code.Length >= 9 ).ToList(); foreach (var item in data) { string temp = item.Code.ToString(); string url = string.Format("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/{0}/{1}/{2}/{3}.html", temp.Substring(0, 2), temp.Substring(2, 2), temp.Substring(4, 2), item.Code); getCommitteeInfo(url, item.Code); Console.WriteLine(item.Code+"----"+item.Name); System.Threading.Thread.Sleep(200); } Console.WriteLine(DateTime.Now); Console.WriteLine("OK"); Console.ReadKey(); } /// <summary> /// 居委会 /// </summary> /// <param name="url"></param> /// <param name="code"></param> private static void getCommitteeInfo(string url, string code) { var htmlString = HttpGet(url); HtmlParser htmlParser = new HtmlParser(); var data = htmlParser.Parse(htmlString) .QuerySelectorAll(".villagetr") .Select(t => t) .ToList(); List<Node> list = new List<Node>(); foreach (var item in data) { var str = item.Children.ToList(); Node node = new Node(); node.code = str[0].InnerHtml; node.area = str[2].InnerHtml; list.Add(node); } AreaDBEntities areaDBEntities = new AreaDBEntities(); foreach (var item in list) { AreaInfo areaInfo = new AreaInfo(); areaInfo.Code = item.code; areaInfo.Name = item.area; areaInfo.PedarId =int.Parse( code); areaDBEntities.AreaInfoes.Add(areaInfo); } areaDBEntities.SaveChanges(); } /// <summary> /// 街道 /// </summary> /// <param name="url"></param> /// <param name="code"></param> //private static void getStreetInfo(string url, int? code) //{ // var htmlString = HttpGet(url); // HtmlParser htmlParser = new HtmlParser(); // var data = htmlParser.Parse(htmlString) // .QuerySelectorAll(".towntr") // .Select(t => t) // .ToList(); // List<Node> list = new List<Node>(); // foreach (var item in data) // { // var area = htmlParser.Parse(item.InnerHtml) // .QuerySelectorAll("a") // .Select(t => t).ToList(); // foreach (var td in area) // { // Node node = new Node(); // node.code = td.GetAttribute("href"); // node.area = td.TextContent; // list.Add(node); // } // } // AreaDBEntities areaDBEntities = new AreaDBEntities(); // int k = 0; // foreach (var item in list) // { // if (k % 2 != 0) // { // AreaInfo areaInfo = new AreaInfo(); // Console.WriteLine(item.code + "----" + item.area); // string code1 = item.code.Substring(item.code.IndexOf("/") + 1, 9); // areaInfo.Code = int.Parse(code1); // areaInfo.Name = item.area; // areaInfo.PedarId = code; // areaDBEntities.AreaInfoes.Add(areaInfo); // } // k++; // } // areaDBEntities.SaveChanges(); // Console.WriteLine(); //} //private static void getCountyInfo(string url, int? PedarId, int? code) //{ // if (PedarId == null) return; // var htmlString = HttpGet(url); // HtmlParser htmlParser = new HtmlParser(); // var data = htmlParser.Parse(htmlString) // .QuerySelectorAll(".countytr") // .Select(t => t) // .ToList(); // List<Node> list = new List<Node>(); // foreach (var item in data) // { // var area = htmlParser.Parse(item.InnerHtml) // .QuerySelectorAll("a") // .Select(t => t).ToList(); // foreach (var td in area) // { // Node node = new Node(); // node.code = td.GetAttribute("href"); // node.area = td.TextContent; // list.Add(node); // } // } // AreaDBEntities areaDBEntities = new AreaDBEntities(); // int k = 0; // foreach (var item in list) // { // if (k % 2 != 0) // { // AreaInfo areaInfo = new AreaInfo(); // Console.WriteLine(item.code + "----" + item.area); // string code1 = item.code.Substring(item.code.IndexOf("/") + 1, 6); // areaInfo.Code = int.Parse(code1); // areaInfo.Name = item.area; // areaInfo.PedarId = code; // areaDBEntities.AreaInfoes.Add(areaInfo); // } // k++; // } // areaDBEntities.SaveChanges(); // Console.WriteLine(); //} //private static void getCityInfo(string url, int? PedarId) //{ // var htmlString = HttpGet(url); // HtmlParser htmlParser = new HtmlParser(); // var data = htmlParser.Parse(htmlString) // .QuerySelectorAll(".citytr") // .Select(t => t) // .ToList(); // List<Node> list = new List<Node>(); // foreach (var item in data) // { // var area = htmlParser.Parse(item.InnerHtml) // .QuerySelectorAll("a") // .Select(t => t).ToList(); // foreach (var td in area) // { // Node node = new Node(); // node.code = td.GetAttribute("href"); // node.area = td.TextContent; // list.Add(node); // } // } // AreaDBEntities areaDBEntities = new AreaDBEntities(); // int k = 0; // foreach (var item in list) // { // if (k % 2 != 0) // { // AreaInfo areaInfo = new AreaInfo(); // Console.WriteLine(item.code + "----" + item.area); // string code = item.code.Substring(item.code.IndexOf("/") + 1, 4); // areaInfo.Code = int.Parse(code); // areaInfo.Name = item.area; // areaInfo.PedarId = PedarId; // areaDBEntities.AreaInfoes.Add(areaInfo); // } // k++; // } // areaDBEntities.SaveChanges(); // Console.WriteLine(); //} public static string HttpGet(string url) { try { Encoding encoding = Encoding.UTF8; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); request.Method = "GET"; request.Accept = "text/html, application/xhtml+xml, */*"; request.ContentType = "application/json"; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); using (StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.Default)) { return reader.ReadToEnd(); } } catch (Exception ex) { //log.Error("WeChatHelper", ex); return null; } } ////得到省的信息 //static void f(string url) //{ // var htmlString = HttpGet(url); // HtmlParser htmlParser = new HtmlParser(); // var data = htmlParser.Parse(htmlString) // .QuerySelectorAll(".provincetr") // .Select(t => t) // .ToList(); // List<Node> list = new List<Node>(); // foreach (var item in data) // { // var area = htmlParser.Parse(item.InnerHtml) // .QuerySelectorAll("a") // .Select(t => t).ToList(); // foreach (var td in area) // { // Node node = new Node(); // node.code = td.GetAttribute("href"); // node.area = td.TextContent; // list.Add(node); // } // } // AreaDBEntities areaDBEntities = new AreaDBEntities(); // foreach (var item in list) // { // AreaInfo areaInfo = new AreaInfo(); // Console.WriteLine(item.code + "----" + item.area); // areaInfo.Code = int.Parse(item.code.Substring(0, item.code.IndexOf("."))); // areaInfo.Name = item.area; // areaInfo.PedarId = null; // areaDBEntities.AreaInfoes.Add(areaInfo); // } // areaDBEntities.SaveChanges(); // Console.WriteLine(); // //} //} } class Node { public string code { get; set; } public string area { get; set; } } class td { public string td1 { get; set; } } }
等下会将生成数据库脚本分享出来,可以私聊我