版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/Knight_Key/article/details/86677360
pom.xml
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.4</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>2.0.8</version>
</dependency>
代码
public void pdfTest() {
try {
// 是否排序
boolean sort = false;
// 开始提取页数
int startPage = 1;
// 结束提取页数
int endPage = Integer.MAX_VALUE;
String content = null;
PrintWriter writer = null;
//pdf文本路径
String path = "C:\Users\Administrator\Desktop\123.pdf";
//输出txt文本路径
String target="C:\Users\Administrator\Desktop\123.txt";
PDDocument document = PDDocument.load(new File(path));
PDFTextStripper pts = new PDFTextStripper();
endPage = document.getNumberOfPages();
System.out.println("Total Page: " + endPage);
pts.setStartPage(startPage);
pts.setEndPage(endPage);
try {
//content就是从pdf中解析出来的文本
content = pts.getText(document);
writer = new PrintWriter(new FileOutputStream(target));
writer.write(content);// 写入文件内容
writer.flush();
writer.close();
} catch (Exception e) {
throw e;
}finally {
if (null != document)
document.close();
}
System.out.println("Get PDF Content ...");
} catch (Exception e) {
e.printStackTrace();
}
}
svn checkout http://svn.apache.org/repos/asf/pdfbox/trunk/examples