/*
* Copyright 2018 Smyfinancial.com All right reserved. This software is the
* confidential and proprietary information of Smyfinancial.com ("Confidential
* Information"). You shall not disclose such Confidential Information and shall
* use it only in accordance with the terms of the license agreement you entered
* into with Smyfinancial.com.
*/
package com.ying.common;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.List;
import java.util.ListIterator;
import com.google.common.collect.Lists;
import com.lowagie.text.pdf.PdfReader;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.multipdf.Splitter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.text.PDFTextStripper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.smy.ass.filesearch.IndexItem;
import com.smy.ass.filesearch.Indexer;
import com.smy.ass.filesearch.Searcher;
import javax.imageio.ImageIO;
/**
* 类PdfUtil.java的实现描述:pdf工具类
*
* @author CAIYONGFENG 2018年3月8日 下午6:10:15
*/
public class PdfUtil {
private static Logger logger = LoggerFactory.getLogger(PdfUtil.class);
//lucene索引文件存储路径
// private static final String INDEX_DIR = Property.getProperty("file.index.dir");
private static final String INDEX_DIR = "";
private static final int DEFAULT_RESULT_SIZE = 100;
/**
* 获取pdf文件的页数
*
* @param pdfFile pdf文件对象
* @return 页数
*/
public static Integer getPageCount(File pdfFile) {
try {
if (pdfFile == null) {
throw new Exception("文件为空,请检查!");
}
if (!(pdfFile.getName().endsWith(".pdf") || pdfFile.getName().endsWith(".PDF"))) {
throw new Exception("文件非pdf格式,请检查!");
}
PDDocument pdd = PDDocument.load(pdfFile);
PDPageTree pages = pdd.getDocumentCatalog().getPages();
return pages.getCount();
} catch (Exception e) {
logger.error("获取Pdf文件页数异常(默认返回1页),原因:", e);
return null;
}
}
/**
* 获取pdf文件的页数
*
* @param bytes pdf文件对象
* @return 页数
*/
public static Integer getPageCountByByte(byte[] bytes) {
try {
if (bytes == null) {
throw new Exception("文件为空,请检查!");
}
PDDocument pdd = PDDocument.load(bytes);
PDPageTree pages = pdd.getDocumentCatalog().getPages();
return pages.getCount();
} catch (Exception e) {
logger.error("获取Pdf文件页数异常(默认返回1页),原因:", e);
return null;
}
}
/**
* 获取pdf文件的页数
*
* @param pdfFilePath pdf文件路径
* @return 页数
*/
public static Integer getPageCount(String pdfFilePath) {
try {
if (StringUtils.isBlank(pdfFilePath)) {
throw new Exception("文件路径为空");
}
File pdfFile = new File(pdfFilePath);
return getPageCount(pdfFile);
} catch (Exception e) {
logger.error("获取pdf文件的页数异常,原因:", e);
return null;
}
}
/**
* 搜索pdf文件的内容
*
* @param pdfFile
* @param str
* @return
*/
@Deprecated
public static boolean isContainStr(File pdfFile, String str) {
try {
long start = System.currentTimeMillis();
IndexItem pdfIndexItem = index(pdfFile);
//创建索引器
Indexer indexer = new Indexer(INDEX_DIR);
indexer.index(pdfIndexItem);
indexer.close();
//创建搜索器
Searcher searcher = new Searcher(INDEX_DIR);
int result = searcher.findByContent(str, DEFAULT_RESULT_SIZE);
logger.info("cost:" + (System.currentTimeMillis() - start) + " ms");
if (result == 1) {
return true;
} else {
return false;
}
} catch (Exception e) {
logger.error("判断pdf文件是否包含指定字符串失败,原因:", e);
throw new RuntimeException(e);
}
}
//生成文件的索引信息
private static IndexItem index(File file) throws IOException {
org.apache.pdfbox.pdmodel.PDDocument doc = org.apache.pdfbox.pdmodel.PDDocument.load(file);
String content = new org.apache.pdfbox.text.PDFTextStripper().getText(doc);
doc.close();
return new IndexItem((long) file.getName().hashCode(), file.getName(), content);
}
/**
* pdf转word
*
* @param pdfFile pdf文件
* @param routerDirFullPath 存储全路径
*/
public static void pdf2doc(File pdfFile, String routerDirFullPath) {
try {
PDDocument doc = PDDocument.load(pdfFile);
int pagenumber = doc.getDocumentCatalog().getPages().getCount();
FileOutputStream fos = new FileOutputStream(routerDirFullPath);
Writer writer = new OutputStreamWriter(fos, "UTF-8");
PDFTextStripper stripper = new PDFTextStripper();
stripper.setSortByPosition(true);//排序
//stripper.setWordSeparator("");//pdfbox对中文默认是用空格分隔每一个字,通过这个语句消除空格(视频是这么说的)
stripper.setStartPage(1);//设置转换的开始页
stripper.setEndPage(pagenumber);//设置转换的结束页
stripper.writeText(doc, writer);
writer.close();
doc.close();
} catch (InvalidPasswordException e) {
logger.error("pdf2doc error 密码错误!reason:", e);
} catch (Exception e) {
logger.error("pdf2doc error!reason:", e);
}
}
/**
* 图片转pdf
*/
public static void imageToPdf(String idCardBackUrl, String idCardFrontUrl) {
PDDocument document = null;
PDPageContentStream contentStream = null;
InputStream input = null;
ByteArrayOutputStream baOut = new ByteArrayOutputStream();
try {
// 创建pdf文档和A4空白页
document = new PDDocument();
PDPage blankPage = new PDPage(PDRectangle.A4);
document.addPage(blankPage);
// 获取身份证图片
PDImageXObject idCardBackImg = PDImageXObject.createFromFile(idCardBackUrl, document);
PDImageXObject idCardFrontImg = PDImageXObject.createFromFile(idCardFrontUrl, document);
// 图片等比例缩放
float backZoomWidth;
float backZoomHeight;
if (idCardBackImg.getWidth() >= idCardBackImg.getHeight()) {
backZoomWidth = 320f;
backZoomHeight = 320f / idCardBackImg.getWidth() * idCardBackImg.getHeight();
} else {
backZoomHeight = 320f;
backZoomWidth = 320f / idCardBackImg.getHeight() * idCardBackImg.getWidth();
}
float frontZoomWidth;
float frontZoomHeight;
if (idCardFrontImg.getWidth() >= idCardFrontImg.getHeight()) {
frontZoomWidth = 320f;
frontZoomHeight = 320f / idCardFrontImg.getWidth() * idCardFrontImg.getHeight();
} else {
frontZoomHeight = 320f;
frontZoomWidth = 320f / idCardFrontImg.getHeight() * idCardFrontImg.getWidth();
}
// A4页宽高
float boxWith = blankPage.getCropBox().getWidth();
float boxHeight = blankPage.getCropBox().getHeight();
contentStream = new PDPageContentStream(document, blankPage);
contentStream.drawImage(idCardBackImg, (boxWith - backZoomWidth) / 2, (boxHeight / 2 - backZoomHeight) / 2,
backZoomWidth, backZoomHeight);
contentStream.drawImage(idCardFrontImg, (boxWith - frontZoomWidth) / 2,
(boxHeight / 2 - frontZoomHeight) / 2 + boxHeight / 2, frontZoomWidth, frontZoomHeight);
contentStream.close();
document.save(baOut);
input = new ByteArrayInputStream(baOut.toByteArray());
FileUtils.writeByteArrayToFile(new File("D:\\test\\testPdf.pdf"), IOUtils.toByteArray(input));
} catch (Exception e) {
e.printStackTrace();
} finally {
IOUtils.closeQuietly(document);
IOUtils.closeQuietly(baOut);
IOUtils.closeQuietly(input);
}
}
/**
* 合并pdf文件
*
* @param pdfFullPaths pdf文件路径
* @param desPdfPath 合并后的pdf文件路径
*/
public static void mergePdf(String[] pdfFullPaths, String desPdfPath) {
try {
PDFMergerUtility mergePdf = new PDFMergerUtility();
for (int i = 0; i < pdfFullPaths.length; i++) {
mergePdf.addSource(pdfFullPaths[i]);
}
mergePdf.setDestinationFileName(desPdfPath);
MemoryUsageSetting memUsageSetting = MemoryUsageSetting.setupTempFileOnly();
mergePdf.mergeDocuments(memUsageSetting);
} catch (FileNotFoundException e) {
logger.error("文件不存在,原因:", e);
} catch (Exception e) {
logger.error("合并pdf文件异常,原因:", e);
}
}
/**
* 等比例缩放
*/
private static float getZoomSize(float widthImg, float heightImg, float zoomWidth) {
return zoomWidth * heightImg / widthImg;
}
/**
* 获取pdf文件内容
*
* @param file
* @return
*/
public static String getPdfText(File file) {
try {
org.apache.pdfbox.pdmodel.PDDocument doc = org.apache.pdfbox.pdmodel.PDDocument.load(file);
String content = new PDFTextStripper().getText(doc);
return content;
} catch (IOException e) {
logger.error("pdf获取文字内容出错", e);
}
return null;
}
/**
* 合并指定页并生成新PDF文件
* @param pdfPath
* @param mergedFileName
* @param savePath
* @param pageNumbers
* @return
*/
public static String mergerPages(String pdfPath,String mergedFileName,String savePath ,int ... pageNumbers) {
try {
List<String> mergedFilePaths = Lists.newArrayList();
Integer pageCount = getPageCount(new File(pdfPath));
//generate pageFile
for (int pageNumber:pageNumbers) {
if(pageNumber > pageCount){
throw new Exception("指定页码不存在!");
}
String splitFileName = mergedFileName.concat("_").concat(String.valueOf(pageNumber));
String mergedFilePath = splitPdf(pageNumber, pdfPath,splitFileName , savePath);
mergedFilePaths.add(mergedFilePath);
}
PDFMergerUtility pdfMergerUtility = new PDFMergerUtility();
//merge pageFile to new Pdf File
List<InputStream> fileInputStreamList = Lists.newArrayList();
for (String mergedFilePath:mergedFilePaths) {
File pageFile = new File(mergedFilePath);
FileInputStream fileInputStream = new FileInputStream(pageFile);
fileInputStreamList.add(fileInputStream);
}
pdfMergerUtility.addSources(fileInputStreamList);
String realFilePath = FilePathUtil.getRealFilePath(savePath.concat(File.separator).concat(mergedFileName));
pdfMergerUtility.setDestinationFileName(realFilePath);
pdfMergerUtility.mergeDocuments(MemoryUsageSetting.setupMainMemoryOnly());
//close stream
for (InputStream inputStream : fileInputStreamList) {
if(inputStream!=null){
inputStream.close();
}
}
//delete tmpFile
for (String mergedFilePath : mergedFilePaths) {
File pageFile = new File(mergedFilePath);
pageFile.delete();
}
return realFilePath;
}catch (Exception e){
logger.error("合并PDF页面失败,原因:",e);
}
return null;
}
/**
* 分割pdf文件某一页并保存为一个新pdf文件
* @param pageNum 页码
* @param filePath pdf源文件全路径
* @param fileName 新文件名称
* @param outPath 新文件保存的路径
* @return 新文件的全路径
*/
public static String splitPdf(int pageNum, String filePath, String fileName, String outPath) {
File indexFile = new File(filePath);// 这是对应文件名
PDDocument document = null;
try {
document = PDDocument.load(indexFile);
Splitter splitter = new Splitter();
splitter.setStartPage(pageNum);
splitter.setEndPage(pageNum);
java.util.List<PDDocument> pages = splitter.split(document);
ListIterator<PDDocument> iterator = pages.listIterator();
while (iterator.hasNext()) {
File file = new File(outPath);
if (!file.exists()) {
file.mkdirs();
}
PDDocument pd = iterator.next();
File newFile = new File(outPath + fileName);
if (newFile.exists()) {
newFile.delete();
}
pd.save(outPath + fileName);
pd.close();
if (newFile.exists()) {
return newFile.getPath();
}
}
document.close();
} catch (IOException e) {
logger.error("分割PDF文件失败,原因:",e);
} catch (Exception e) {
logger.error("分割PDF文件异常,原因:",e);
}
return null;
}
/**
* 在PDF文件中查找指定文本
* @param pdfPath pdf文件全路径
* @return 页码
*/
public static int findInPdf(String pdfPath,String keyWords){
try {
File pdfFile = new File(pdfPath);
org.apache.pdfbox.pdmodel.PDDocument doc = org.apache.pdfbox.pdmodel.PDDocument.load(pdfFile);
PDPageTree pages = doc.getPages();
int index = -1;
for (int i = 0; i < pages.getCount(); i++) {
PDFTextStripper pdfTextStripper = new PDFTextStripper();
pdfTextStripper.setStartPage(i);
pdfTextStripper.setEndPage(i);
String text = pdfTextStripper.getText(doc);
int result = text.indexOf(keyWords);
if (result != -1) {
index = i;
break;
}
}
doc.close();
return index;
}catch (Exception e){
logger.error("查找文本异常,原因:",e);
throw new RuntimeException(e);
}
}
public static void pdf2Image(String pdfFilePath, String imageFilePath, int rotation) {
File file = new File(pdfFilePath);
PDDocument pdDocument;
try {
long startPdfMills = System.currentTimeMillis();
String imgFolderPath = imageFilePath;
pdDocument = PDDocument.load(file);
PDFRenderer renderer = new PDFRenderer(pdDocument);
/* dpi越大转换后越清晰,相对转换速度越慢 */
PdfReader reader = new PdfReader(pdfFilePath);
int pages = reader.getNumberOfPages();
for (int i = 0; i < pages; i++) {
long createImgStartMills = System.currentTimeMillis();
File dstFile = new File(imgFolderPath);
if (!dstFile.exists()) {
dstFile.createNewFile();
}
// 旋转角度
if (rotation > 0) {
pdDocument.getPage(i).setRotation(rotation);
}
BufferedImage image = renderer.renderImageWithDPI(i, 144);
ImageIO.write(image, "jpg", dstFile);
logger.info("生成图片:{} time:{}ms.", imgFolderPath, System.currentTimeMillis()-createImgStartMills);
break;
}
} catch (IOException e) {
logger.error("pdf2img fail pdfFilePath:{}", pdfFilePath, e);
throw new RuntimeException(e);
}
}
public static void main(String[] args) {
// String pdfPath = "D:\\data\\欧春蕊.pdf";D091704250000120005
// System.out.println(getPdfText(new File(pdfPath)));
/*
String pdfPath2 = "D:\\1.smy_docs\\2期-201706(全).PDF";
int index = findInPdf(pdfPath2, "D091704250000120005");
System.out.println("index:" + index);
*/
/* String mergedPath = PdfUtil.mergerPages("C:\\Users\\shan\\Desktop\\诉讼四期优化\\实时代偿3期-201710.pdf",
"merged.pdf", "d:\\", 1, 3);
System.out.println(mergedPath);*/
File file = new File("C:\\Users\\shan\\Desktop\\诉讼四期优化\\实时代偿3期-201710.pdf");
Integer pageCount = PdfUtil.getPageCount(file);
}
}
pdf util
猜你喜欢
转载自blog.csdn.net/yingcly003/article/details/96864325
今日推荐
周排行