package com.boonya; import java.io.IOException; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.util.Scanner; public class WebCapturer { /** * 根据用户获取的网络地址获取该网页的内容 * @param myURL 地址 * 如:"http://www.google.com.hk/search?q=biao" * @return */ @SuppressWarnings("resource") public static String getHtmlByURL(String myURL){ URL url=null; String htmlContext=""; try { url = new URL(myURL); HttpURLConnection conn; try { conn = (HttpURLConnection) url.openConnection(); conn.setRequestProperty("User-Agent", // 很重要 "Mozilla/5.0 (X11; U; Linux i686; zh-CN; rv:1.9.1.2) " + " Gecko/20090803 Fedora/3.5.2-2.fc11 Firefox/3.5.2"); conn.setRequestMethod("GET"); Scanner scanner = new Scanner(conn.getInputStream()); while (scanner.hasNextLine()) { htmlContext+=scanner.nextLine(); } } catch (IOException e) { e.printStackTrace(); } } catch (MalformedURLException e) { e.printStackTrace(); } return htmlContext; } public static void main(String[] args) throws Exception { System.out.println(WebCapturer.getHtmlByURL("http://www.google.com.hk/search?q=biao")); } }
Java抓取URL指定的HTML内容
通过Java获取到URL指定的内容,如果有目标数据可以再写代码解析获取到的内容
猜你喜欢
转载自boonya.iteye.com/blog/1870520
今日推荐
周排行