pom.xml配置
<!-- 读取pdf文件 -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.4</version>
</dependency>
java代码:
private static String pdfRead(File file) {
String content = null;
PDDocument document = null;
try {
document = PDDocument.load(file);
PDFTextStripper pts = new PDFTextStripper();
pts.setStartPage(1);
pts.setEndPage(10);
content = pts.getText(document);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (null != document) {
try {
document.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return content;
}
读成string类型,读取1到10页