maven 依赖:
<!-- pdf start-->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.11</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.bouncycastle/bcprov-jdk16 -->
<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk16</artifactId>
<version>1.46</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.itextpdf/itextpdf -->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.13</version>
</dependency>
demo:
private static String itextPdf(byte[] bytes) {
try {
PdfReader pr = new PdfReader(bytes);
PdfReaderContentParser prcp = new PdfReaderContentParser(pr);
int pageNum = pr.getNumberOfPages();
StringBuffer sb = new StringBuffer();
for (int i = 1; i < pageNum; i++) {
SimpleTextExtractionStrategy strategy = prcp.processContent(i, new SimpleTextExtractionStrategy());
sb.append(strategy.getResultantText());
}
return sb.toString();
} catch (Exception e) {
}
return "";
}
private static String pdfBox(byte[] bytes) {
try {
PDDocument document = PDDocument.load(bytes);
int pageNum = document.getNumberOfPages();
PDFTextStripper ps = new PDFTextStripper();
ps.setStartPage(1);
ps.setEndPage(pageNum);
String text = ps.getText(document);
return text;
} catch (Exception e) {
}
return "";
}