itext,jxl实现pdf转为txt,txt转excel
pom.xml配置
<properties>
<com.itextpdf.version>5.3.2</com.itextpdf.version>
<org.bouncycastle.version>1.52</org.bouncycastle.version>
<jxl.version>1.0</jxl.version>
</properties>
<dependencies>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>${com.itextpdf.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcpg-jdk15on</artifactId>
<version>${org.bouncycastle.version}</version>
</dependency>
<dependency>
<groupId>jxl</groupId>
<artifactId>jxl</artifactId>
<version>${jxl.version}</version>
</dependency>
</dependencies>
itext 读取pdf->txt
package itext;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.FilteredTextRenderListener;
import com.itextpdf.text.pdf.parser.LocationTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
import com.itextpdf.text.pdf.parser.RegionTextRenderFilter;
import com.itextpdf.text.pdf.parser.RenderFilter;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;
public class ReadPdfByiText {
public static void main(String[] args) throws IOException {
String outputPath = "D:\\developcodespace\\PdfContent_1.txt";
PrintWriter writer = new PrintWriter(new FileOutputStream(outputPath));
String fileName = "D:\\developcodespace\\20190323175137823782.pdf";
readPdf(writer, fileName);
}
public static void readPdf(PrintWriter writer,String fileName){
String pageContent = "";
try {
PdfReader reader = new PdfReader(fileName);
int pageNum = reader.getNumberOfPages();
for(int i=1;i<=pageNum;i++){
String textFromPage = PdfTextExtractor.getTextFromPage(reader, i);
pageContent += textFromPage;
}
writer.write(pageContent);
} catch (Exception e) {
e.printStackTrace();
}finally{
writer.close();
}
}
public static void readPdf_filter(PrintWriter writer,String fileName){
String pageContent = "";
try {
Rectangle rect = new Rectangle(90, 0, 450, 40);
RenderFilter filter = new RegionTextRenderFilter(rect);
PdfReader reader = new PdfReader(fileName);
int pageNum = reader.getNumberOfPages();
TextExtractionStrategy strategy;
for (int i = 1; i <= pageNum; i++) {
strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
pageContent +=PdfTextExtractor.getTextFromPage(reader, i, strategy);
}
writer.write(pageContent);
} catch (Exception e) {
e.printStackTrace();
}finally{
writer.close();
}
}
}
jxl读取txt->excel
package itext;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import jxl.write.WriteException;
import jxl.write.biff.RowsExceededException;
public class TextToExcel {
public static void main(String[] args) {
File file = new File("D:\\developcodespace\\PdfContent_1.txt");
File file2 = new File("D:\\developcodespace\\work.xls");
if (file.exists() && file.isFile()) {
InputStreamReader read = null;
String line = "";
BufferedReader input = null;
WritableWorkbook wbook = null;
WritableSheet sheet;
try {
read = new InputStreamReader(new FileInputStream(file), "utf-8");
input = new BufferedReader(read);
wbook = Workbook.createWorkbook(file2);
sheet = wbook.createSheet("first", 0);
try {
Label company = new Label(0, 0, "公司名称");
sheet.addCell(company);
Label position = new Label(1, 0, "岗位");
sheet.addCell(position);
Label salary = new Label(2, 0, "薪资");
sheet.addCell(salary);
Label status = new Label(3, 0, "状态");
sheet.addCell(status);
} catch (RowsExceededException e) {
e.printStackTrace();
} catch (WriteException e) {
e.printStackTrace();
}
int m = 1;
int n = 0;
Label t;
while ((line = input.readLine()) != null) {
if(!line.startsWith("014")){
continue;
}
String[] words = line.split("[ \t]");
for (int i = 0; i < words.length; i++) {
if (!words[i].matches("\\s*")) {
t = new Label(n, m, words[i].trim());
sheet.addCell(t);
n++;
}
}
n = 0;
m++;
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (RowsExceededException e) {
e.printStackTrace();
} catch (WriteException e) {
e.printStackTrace();
} finally {
try {
wbook.write();
wbook.close();
input.close();
read.close();
} catch (IOException e) {
e.printStackTrace();
} catch (WriteException e) {
e.printStackTrace();
}
}
System.out.println("over!");
System.exit(0);
} else {
System.out.println("file is not exists or not a file");
System.exit(0);
}
}
}