分为两部分:1.word转html。2.html转pdf 。
其中转html后会有不规范提示。
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.jsoup.Jsoup;
import org.w3c.dom.Document;
public class html {
public static void main(String argv[]) {
try {
convert2Html("F:\\1495206705005.doc","F:\\test.html");
} catch (Exception e) {
e.printStackTrace();
}
}
public static void writeFile(String content, String path) {
//字体转换,如果不需要 ,则可以省略
System.out.println(content);
content.toString();
content = content.replaceAll("font-family:黑体;", "font-family:MS Mincho;");
System.out.println(content);
content = content.replaceAll("font-family:Calibri;", "font-family:MS Mincho;");
content = content.replaceAll("font-family:宋体;", "font-family:MS Mincho;");
FileOutputStream fos = null;
BufferedWriter bw = null;
org.jsoup.nodes.Document doc = Jsoup.parse(content);
content=doc.html();
//html格式修改
content = content.replace("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">", "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"></meta>");
content = content.replace("<meta content=\"Administrator\" name=\"author\">", "<meta content=\"Administrator\" name=\"author\"></meta>");
content = content.replace("<img src=\"0.png\" style=\"width:6.1680555in;height:4.3722224in;vertical-align:text-bottom;\">",
"<img src=\"0.png\" style=\"width:6.1680555in;height:4.3722224in;vertical-align:text-bottom;\"></img>");
System.out.println(content);
try {
File file = new File(path);
fos = new FileOutputStream(file);
bw = new BufferedWriter(new OutputStreamWriter(fos,"UTF-8"));
bw.write(content);
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} finally {
try {
if (bw != null)
bw.close();
if (fos != null)
fos.close();
} catch (IOException ie) {
}
}
}
//word 转 html
public static void convert2Html(String fileName, String outPutFile)
throws TransformerException, IOException,
ParserConfigurationException {
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));
// XSSFWorkbook xssfwork=new XSSFWorkbook(new FileInputStream(fileName));
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
wordToHtmlConverter.setPicturesManager( new PicturesManager()
{
public String savePicture( byte[] content,
PictureType pictureType, String suggestedName,
float widthInches, float heightInches )
{
return suggestedName;
}
} );
wordToHtmlConverter.processDocument(wordDocument);
//save pictures
List pics=wordDocument.getPicturesTable().getAllPictures();
if(pics!=null){
for(int i=0;i<pics.size();i++){
Picture pic = (Picture)pics.get(i);
System.out.println();
try {
pic.writeImageContent(new FileOutputStream("F:/"
+ pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "HTML");
serializer.transform(domSource, streamResult);
out.close();
writeFile(new String(out.toByteArray()), outPutFile);
}
}
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Font;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorkerFontProvider;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import org.xhtmlrenderer.pdf.ITextFontResolver;
import org.xhtmlrenderer.pdf.ITextRenderer;
import javax.imageio.ImageIO;
import java.awt.*;
import java.io.*;
import java.nio.charset.Charset;
/**
* Created by Administrator on 2017/5/19.
*/
public class testforpdf {
public static void main(String[] args) throws IOException, DocumentException, Exception{
String outFile = "F:\\testqijia.pdf";
File file = new File(outFile);
file.getParentFile().mkdirs();
testforpdf.createPdf(outFile,"F:\\test.html");
}
/**s
* html转pdf
* @param file 需要保持的路径
* @param htmlURl 需要转pdf的html
* @throws IOException
* @throws DocumentException
*/
public static void createPdf(String file,String htmlURl) throws IOException, DocumentException {
// step 1
Document document = new Document();
// step 2
PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
writer.setViewerPreferences(PdfWriter.HideToolbar);
//设置图片是否精确放置,选择是(true),否(false)
writer.setStrictImageSequence(true);
File f = new File("F:\\0.png");
Image i = ImageIO.read(f);
//使用Paragraph来添加图片
com.itextpdf.text.Paragraph paragraph=new com.itextpdf.text.Paragraph();
//图片的物理路径
paragraph.add("F:\\0.png");
// step 3
document.open();
// step 4
XMLWorkerHelper.getInstance().parseXHtml(writer, document, new FileInputStream(htmlURl), null,Charset.forName("UTF-8"), new AsianFontProvider());
// step 5
document.close();
}
/**
* 解决下载pdf的中文
* @author Administrator
*
*/
public static class AsianFontProvider extends XMLWorkerFontProvider {
public Font getFont(final String fontname, final String encoding,
final boolean embedded, final float size, final int style,
final BaseColor color) {
BaseFont bf = null;
try {
bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
} catch (Exception e) {
e.printStackTrace();
}
Font font = new Font(bf, size, style, color);
font.setColor(color);
return font;
}
}
}