前言:
文件上传用的已经很多,java web 大概用到如下
- Struts
- Spring MVC CommonsMultipartResolver
- Commons-fileupload
Struts/Spring MVC 实现都是基于Commons-fileupload,但背后的原理,大多数估计没有关注,最近阅读一些开源源码也发现,只有基础才是最重要的,万变不离其宗,在it领域不然会被漫天的新技术,冲昏了头,不知所措,下面开始。
HTTP:
- 表单form 类似
<form action="/file/upload" method="post" enctype="multipart/form-data"> <input type="text" name="name"><br> <input type="file" name="file1"><br> <input type="file" name="file2"><br> <input type="submit" value="提交"> </form>
2. 用浏览器追踪表单提交,会发现如下
Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 Content-Type:multipart/form-data; boundary=----WebKitFormBoundary4PCP0w0H0qxg16VB Origin:http://localhost:8080 Referer:http://localhost:8080/sys/template/tem/create Upgrade-Insecure-Requests:1 User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36 ------WebKitFormBoundary4PCP0w0H0qxg16VB Content-Disposition: form-data; name="id" ------WebKitFormBoundary4PCP0w0H0qxg16VB Content-Disposition: form-data; name="name" 测试 ------WebKitFormBoundary4PCP0w0H0qxg16VB Content-Disposition: form-data; name="type" INDEX ------WebKitFormBoundary4PCP0w0H0qxg16VB Content-Disposition: form-data; name="layoutFile"; filename="confirm-btn.png" Content-Type: image/png ------WebKitFormBoundary4PCP0w0H0qxg16VB Content-Disposition: form-data; name="temFile"; filename="login.html" Content-Type: text/html ------WebKitFormBoundary4PCP0w0H0qxg16VB--
重要部位红色已经标注,表单提交时http 头部的 Content-Type 会有一个boundary分隔符,分隔符会分割表单提交的每项内容(也就是每个input域),如是文件则Content-Disposition会出现一个filename,同时带上Content-Type描述文件类型,否则没有,大体的解析格式如下(为了显示观看,故意换行显示,实际上没有)
-----------分隔符\r\n Content-Disposition: form-data; name="XX"\r\n Content-Type: image/png\r\n \r\n 具体内容 ------------分隔符\r\n Content-Disposition: form-data; name="XX"; filename="XX"\r\n Content-Type: image/png\r\n \r\n 具体内容 ------------分隔符\r\n Content-Disposition: form-data; name="XX"; filename="XX"\r\n Content-Type: image/png\r\n \r\n 具体内容 ------------分隔符--\r\n
注:最后一行会多出--,例如---------------分隔符--\r\n,同时------------分隔符会比boundary=----分隔符 多--两个,总体可以理解以--boundary进行分割的
JAVA Servlet 实现:
@WebServlet(urlPatterns="/file/upload") public class FileServlet extends HttpServlet{ private static final long serialVersionUID = 1L; @Override protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { String contentType = request.getContentType(); //文件上传(类似:Content-Type:multipart/form-data; boundary=----WebKitFormBoundary4PCP0w0H0qxg16VB) if(contentType != null && contentType.startsWith("multipart/form-data")){ try { List<FileItem> fileItems = FileItemParse.parseForm(request); System.out.println(fileItems); } catch (Exception e) { e.printStackTrace(); } } } @Override protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { request.getRequestDispatcher("/WEB-INF/views/file/upload.jsp") .forward(request, response); } }
public class FileItemParse { //获取边界值 public static String getBoundary(HttpServletRequest request) { String rtnStr = null; String tmpType = request.getContentType(); if (null != tmpType) { rtnStr = tmpType.contains("boundary=") ? tmpType.split("boundary=")[1] : null; } return "--".concat(rtnStr); //此处应该是规范,比ContentType中多2个- } //解析表单 public static List<FileItem> parseForm(HttpServletRequest request) throws Exception{ List<FileItem> fileItems = new ArrayList<FileItem>(); byte[] boundaryBytes = getBoundary(request).getBytes(); int boundaryBytesLen = boundaryBytes.length; BufferedInputStream input = null; ByteArrayOutputStream out = new ByteArrayOutputStream(); try { input = new BufferedInputStream(request.getInputStream()); int tmpI = -1; int tmpL = -1; FileItem item = null; //跳过分界线 input.skip(boundaryBytesLen); while ((tmpI = input.read()) != -1) { if (tmpI == 13) { tmpL = (input.read()); if (tmpL == 10) { if (out.size() == 0) { //跳过空行分隔符 continue; } String bufferStr = out.toString("UTF-8"); //Content-Disposition if(bufferStr.contains("Content-Disposition:")){ item = new FileItem(); String[] tmpStr = bufferStr.split(";"); String nameV = tmpStr[1].split("=")[1]; item.setParamName(nameV.substring(1, nameV.length() - 1)); //去除" if(bufferStr.contains("filename")){//文件表单域 String filenameV = tmpStr[2].split("=")[1]; item.setFileName(filenameV.substring(1, filenameV.length() - 1)); //去除" }else{//普通表单域 fetchContent(item, input, boundaryBytes); fileItems.add(item); } out.reset(); continue; } //Content-Type if(bufferStr.contains("Content-Type:")){ item.setMimeType(bufferStr.split(":")[1].trim()); fetchContent(item, input, boundaryBytes); fileItems.add(item); //文件存储 out.reset(); continue; } } out.write(tmpI); out.write(tmpL); } out.write(tmpI); } } catch (IOException ioe) { ioe.printStackTrace(); } finally { if (null != input) { try { out.close(); input.close(); } catch (IOException e) { e.printStackTrace(); } } } return fileItems; } //内容提取 private static void fetchContent(FileItem item, BufferedInputStream input, byte[] boundaryBytes) throws IOException{ input.skip(2); //跳过空行分隔符 int i = -1; int l = -1; ByteArrayOutputStream tempOut = new ByteArrayOutputStream(); byte[] tempByte = new byte[boundaryBytes.length]; while((i = input.read()) != -1){ if (13 == i) { l = input.read(); if (10 == l && isBoundary(input, boundaryBytes, tempByte)) { break; } else { tempOut.write(i); tempOut.write(l); if (10 == l) { //如不是分解符,则写入存储 tempOut.write(tempByte); } continue; } } tempOut.write(i); } if(item.getMimeType() != null){ //文件 //此处测试环境,故直接写入本地文件,正式应写入系统java.io.temp目录 String url = "d:/temp/" + item.getFileName(); File file = new File(url); if(!file.getParentFile().exists()){ file.getParentFile().mkdirs(); } FileOutputStream out = new FileOutputStream(file); out.write(tempOut.toByteArray()); out.flush(); out.close(); item.setSimpleField(false); item.setFilePath(url); } else{ item.setParamValue(new String(tempOut.toByteArray(), "UTF-8")); item.setSimpleField(true); } } private static boolean isBoundary(BufferedInputStream input, byte[] sourceBoundaryBytes, byte[] temp) throws IOException{ int count = input.read(temp); for (int i = 0; i < count; i++) { if (sourceBoundaryBytes[i] != temp[i]) { return false; } } return true; } }
public class FileItem { //file private String mimeType; //文件类型 private String filePath; //存储路径 private String fileName; //上传文件名 //true:非file表单项, false:file表单项 private boolean isSimpleField; private String paramName; private String paramValue; //get set }
以上只是一个简单的不完全实现,主要是针对HTTP 文件上传数据协议的一个解析过程,更多的可以去看Commons-fileupload源码,里面有更进一步的数据封装(例如进度条)。
参考文献:
http://www.ietf.org:80/rfc/rfc1867.txt
http://www.ietf.org:80/rfc/rfc2045.txt
http://blog.csdn.net/ybygjy/article/details/5869158