利用itext将html文件转化pdf文件

一:IText组件介绍

iText是用于生成PDF文档的一个java类库。通过iText不仅可以生成PDF或rtf的文档,而且可以将XML、Html文件转化为PDF文件。

将报告数据生成HTML文档,然后通过控制CSS样式单,调用iText组件,将html打印成为A4规格的静态pdf文件。

该方案的优势是:

  1. Html文件是文本文件,便于模块化
  2. 彻底摆脱旧版报告系统严重依赖word模板和windows服务器
  3. 纯粹的java技术组件,兼容性好

二:IText的引入使用

1.maven包导入


<properties>
    <itext.version>7.1.11</itext.version>
</properties>
<dependencies>
    <!-- 获取html并替换内容 -->
    <dependency>
      <groupId>org.jsoup</groupId>
      <artifactId>jsoup</artifactId>
      <version>1.6.3</version>
    </dependency>
    <!-- itext7 -->
    <dependency>
        <groupId>com.itextpdf</groupId>
        <artifactId>kernel</artifactId>
        <version>${itext.version}</version>
    </dependency>
    <dependency>
        <groupId>com.itextpdf</groupId>
        <artifactId>io</artifactId>
        <version>${itext.version}</version>
    </dependency>
    <dependency>
        <groupId>com.itextpdf</groupId>
        <artifactId>layout</artifactId>
        <version>${itext.version}</version>
    </dependency>
    <dependency>
        <groupId>com.itextpdf</groupId>
        <artifactId>forms</artifactId>
        <version>${itext.version}</version>
    </dependency>
    <dependency>
        <groupId>com.itextpdf</groupId>
        <artifactId>pdfa</artifactId>
        <version>${itext.version}</version>
    </dependency>
    <dependency>
        <groupId>com.itextpdf</groupId>
        <artifactId>pdftest</artifactId>
        <version>${itext.version}</version>
    </dependency>
    <dependency>
        <groupId>com.itextpdf</groupId>
        <artifactId>font-asian</artifactId>
        <version>${itext.version}</version>
    </dependency>
    <dependency>
        <groupId>org.slf4j</groupId>
        <artifactId>slf4j-log4j12</artifactId>
        <version>1.7.18</version>
    </dependency>

    <!--itext7 html转pdf用到的包-->
    <dependency>
        <groupId>com.itextpdf</groupId>
        <artifactId>html2pdf</artifactId>
        <version>3.0.0</version>
    </dependency>
</dependencies>

2.获取HTML字符串,并转化为PDF示例:

String fileName = "PDF文件名";
//inline 设置为可在浏览器内打开,下载文件名称为fileName
response.setHeader("Content-Disposition", "inline;fileName=" + new String(fileName.getBytes(), "ISO8859-1") + ".pdf");
response.setContentType("application/pdf;charset=UTF-8");

//html文本地址
Sting url = "";
//根据html获取网络上的html对象
Document html = Jsoup.connect(url).get();
//替换html内容后将输出html文本
String htmlStr = html.html();

//导入字体
FontProvider font = new FontProvider();
font.addFont("/com/common/font/simsun.ttf");

ConverterProperties c = new ConverterProperties();
c.setFontProvider(font);
c.setCharset("utf-8");

PdfDocument pd = new PdfDocument(new PdfWriter(response.getOutputStream()));
//设置文件标题为fileName,web上展示的标题为此标题
pd.getDocumentInfo().setTitle(fileName);

Document document = new Document(pd, PageSize.A3);
try{
    //设置页面边距 必须先设置边距,再添加内容,否则页边距无效
    document.setMargins(20, 0, 20, 0);
    List<IElement> list = HtmlConverter.convertToElements(htmlStr, c);
    for (IElement ie : list) {
        document.add((IBlockElement) ie);
    }
}finally {
    document.close();
}

 3.下面展示三种6种基础的将HTML转化为PDF的形式

1:最简单的html用String方式表示的形式

package com.ctbri.test;
 
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
 
import com.itextpdf.html2pdf.HtmlConverter;
 
/**
 * Converts a simple Hello World HTML String to a PDF document.
 */
public class C01E01_HelloWorld {
 
    /** 
     * The HTML-html原文件路径
     * The target —— 结果的输出所在的文件夹
     * DesT —— pdf输出的具体路径
     */
    public static final String HTML = "<h1>Test</h1><p>Hello World</p>";
    public static final String TARGET = "target/results/ch01/";
    public static final String DEST = String.format("%stest-01.pdf", TARGET);
    
    public static void main(String[] args) throws IOException {
        File file = new File(TARGET);
        file.mkdirs();
        new C01E01_HelloWorld().createPdf(HTML, DEST);
        System.out.println("ok");
    }
    
    /**
     * Creates the PDF file.
     */
    public void createPdf(String html, String dest) throws IOException {
        HtmlConverter.convertToPdf(html, new FileOutputStream(dest));
    }
}

2、需要引入图片、css文件

对于这部分需要引入几个特定的属性:

  1. BASEURI 用于表示装载有css、image、以及html等文件的文件夹
  2. SRC    html文件所在的路径
  3. TARGET PDF文件所在的父路径
  4. DEST 生成的pdf文件的路径

比如:

package com.ctbri.test;
 
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
 
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
 
public class C01E02_HelloWorld {
 
	public static final String BASEURI = "src/main/resources/html/";
	public static final String HTML = "<h1>Test</h1><p>Hello World</p><img src=\"img/logo.png\">";
	public static final String TARGET = "target/results/ch01/";
	public static final String DEST = String.format("%stest-02.pdf", TARGET);
	
    public static void main(String[] args) throws IOException {
        File file = new File(TARGET);
        file.mkdirs();
        new C01E02_HelloWorld().createPdf(BASEURI, HTML, DEST);
    }
    
    /**
     * Creates the PDF file.
     */
    public void createPdf(String baseUri, String html, String dest) throws IOException {
    	ConverterProperties properties = new ConverterProperties();
    	properties.setBaseUri(baseUri);
        HtmlConverter.convertToPdf(html, new FileOutputStream(dest), properties);
    }
}

 3、html和pdf都用File对象表示

package com.ctbri.test;
 
import java.io.File;
import java.io.IOException;
 
import com.itextpdf.html2pdf.HtmlConverter;
 
/**
 * Converts a simple HTML file to PDF using File objects
 */
public class C01E03_HelloWorld {
 
	public static final String BASEURI = "src/main/resources/html/";
	public static final String SRC = String.format("%shello.html", BASEURI);
	public static final String TARGET = "target/results/ch01/";
	public static final String DEST = String.format("%stest-03.pdf", TARGET);
	
    public static void main(String[] args) throws IOException {
        File file = new File(TARGET);
        file.mkdirs();
        new C01E03_HelloWorld().createPdf(BASEURI, SRC, DEST);
    }
 
    /**
     * Creates the PDF file.
     * @param baseUri the base URI
     * @param src the path to the source HTML file
     * @param dest the path to the resulting PDF
     */
    public void createPdf(String baseUri, String src, String dest) throws IOException {
        HtmlConverter.convertToPdf(new File(src), new File(dest));
    }
}

4、html和pdf都用FileInputStream对象表示

package com.ctbri.test;
 
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
 
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
 
/**
 * Converts a simple HTML file to PDF using an InputStream and an OutputStream
 */
public class C01E04_HelloWorld {
 
	public static final String BASEURI = "src/main/resources/html/";
	public static final String SRC = String.format("%shello.html", BASEURI);
	public static final String TARGET = "target/results/ch01/";
	public static final String DEST = String.format("%stest-04.pdf", TARGET);
	
	/**
	 * The main method of this example.
	 */
    public static void main(String[] args) throws IOException {
        File file = new File(TARGET);
        file.mkdirs();
        new C01E04_HelloWorld().createPdf(BASEURI, SRC, DEST);
    }
 
    /**
     * Creates the PDF file.
     */
    public void createPdf(String baseUri, String src, String dest) throws IOException {
    	ConverterProperties properties = new ConverterProperties();
    	properties.setBaseUri(baseUri);
        HtmlConverter.convertToPdf(new FileInputStream(src), new FileOutputStream(dest), properties);
    }
}
5、html用FileInputStream、pdf用PDfWriter

利用pdfWriter代替outputStream ,前者可以更好的配合我们设置输出文件的属性。

package com.ctbri.test;
 
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
 
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.WriterProperties;
 
/**
 * Converts a simple HTML file to PDF using an InputStream and a PdfWriter
 */
public class C01E05_HelloWorld {
 
	public static final String BASEURI = "src/main/resources/html/";
	public static final String SRC = String.format("%shello.html", BASEURI);
	public static final String TARGET = "target/results/ch01/";
	public static final String DEST = String.format("%stest-05.pdf", TARGET);
	
    public static void main(String[] args) throws IOException {
        File file = new File(TARGET);
        file.mkdirs();
        new C01E05_HelloWorld().createPdf(BASEURI, SRC, DEST);
    }
 
    /**
     * Creates the PDF file. output using PdfWriter
     */
    public void createPdf(String baseUri, String src, String dest) throws IOException { 
    	ConverterProperties properties = new ConverterProperties();
    	properties.setBaseUri(baseUri);
    	PdfWriter writer = new PdfWriter(dest, new WriterProperties().setFullCompressionMode(true));
        HtmlConverter.convertToPdf(new FileInputStream(src), writer, properties);
    }
}

6、html用FileInputStream、pdf用PDfDocument

这一层和上一层不同之处在于,输出的时候用一层pdfDocument包裹在pdfWriter外面。

并且引入了pdf.setTagged() 方便我们加入侧边的语义结构。

package com.ctbri.test;
 
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
 
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfWriter;
 
/**
 * Converts a simple HTML file to PDF using an InputStream and a PdfDocument
 */
public class C01E06_HelloWorld {
 
	public static final String BASEURI = "src/main/resources/html/";
	public static final String SRC = String.format("%shello.html", BASEURI);
	public static final String TARGET = "target/results/ch01/";
	public static final String DEST = String.format("%stest-06.pdf", TARGET);
	
    public static void main(String[] args) throws IOException {
        File file = new File(TARGET);
        file.mkdirs();
        new C01E06_HelloWorld().createPdf(BASEURI, SRC, DEST);
    }
 
    /**
     * Creates the PDF file.
     */
    public void createPdf(String baseUri, String src, String dest) throws IOException { 
    	ConverterProperties properties = new ConverterProperties();
    	properties.setBaseUri(baseUri);
    	PdfWriter writer = new PdfWriter(dest);
    	PdfDocument pdf = new PdfDocument(writer);
    	pdf.setTagged();		//用于增加目录
        HtmlConverter.convertToPdf(new FileInputStream(src), pdf, properties);
    }
}

总结:上面介绍了6中基础的将html转为pdf文件的方式,总体没有什么区别,都是对不同源文件或者输出文件处理的几种方式。

包括:File、OutputStream、pdfWriter、pdfDocument,基本上都能实现目地,只是对于不同的输出方式采用不同的方式。

附上一篇对IText7的详解,包括样式格式等等的设置:http://www.hellojava.com/a/91454.html 

猜你喜欢

转载自blog.csdn.net/happyAliceYu/article/details/108278208