版权声明:如若转载,请联系作者。 https://blog.csdn.net/liu16659/article/details/84367427
Java使用Socket技术获取网页
1.代码
- 接口
package com.lawson.crawler.inface;
/**
* 1.interface
*/
public interface Crawler {
public void crawler(String url);
}
- 实现类
package com.lawson.crawler.impl;
import com.lawson.crawler.inface.Crawler;
import java.io.*;
import java.net.Socket;
public class CrawlerImpl implements Crawler {
public void crawler(String url) {
BufferedWriter bw =null;
BufferedReader br = null;
try {
// Creates a stream socket and connects it to the specified port number on the named host.
Socket socket = new Socket(url,80);//build a socket
//bw = new BufferedWriter(new OutputStreamWriter(socket.getOutputStream()));
//The abbreviation of following code
OutputStream outputStream = socket.getOutputStream();//Returns an output stream for this socket.
OutputStreamWriter oSWriter = new OutputStreamWriter(outputStream);//Creates an OutputStreamWriter that uses the default character encoding.
bw = new BufferedWriter(oSWriter);//Creates a buffered character-output stream that uses a default-sized output buffer.
//be care of the pattern of request
bw.write("GET /"+url+ " HTTP/1.1\r\n");
bw.write("HOST:" + url + "\r\n");
bw.write("\r\n");//HTTP head end!
bw.flush();
br = new BufferedReader(new InputStreamReader(socket.getInputStream()));
String line ;
while((line = br.readLine())!=null){
System.out.println(line);
}
} catch (IOException e) {
e.printStackTrace();
}finally {
if(bw != null){
try {
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (br != null) {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public static void main(String[] args) {
Crawler crawler = new CrawlerImpl();
String url = "www.baidu.com";
crawler.crawler(url);
}
}
2. 运行结果
到这里,就表明一个通过socket获取网页是成功的。【但是可以仔细发现,返回的结果是一个"https://www.baidu.com/search/error/html"】
但是有时候一不小心,也会得到如下的结果:
可以看到这个是一个请求错误。表示的意思是:在请求报文中存在语法错误。