PD4ML将HTML文件转换成PDF文件

网上有好几种方法可以将将HTML文件转换成PDF文件但是有些对HTML文件格式要求比较严格，稍微错了一些就不能生成我们所要的PDF文件，这里我推荐一个PD4ML，它可以解决HTML文件格式不正确的问题，可以生成一个比较好的PDF文件，其处理速度快，而且对CSS文件兼容的非常好。下面是最基本的
PD4ML编程：

/**
 * 
 */
package samples;
 
import java.awt.Insets;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.security.InvalidParameterException;
 
import org.zefer.pd4ml.PD4Constants;
import org.zefer.pd4ml.PD4ML;
 
public class GettingStarted1 {
	protected int topValue = 10;
	protected int leftValue = 20;
	protected int rightValue = 10;
	protected int bottomValue = 10;
	protected int userSpaceWidth = 1300;
 
	public static void main(String[] args) {
		try {
			GettingStarted1 jt = new GettingStarted1();
			jt.doConversion("http://pd4ml.com/sample.htm", "c:/pd4ml.pdf");
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
 
	public void doConversion( String url, String outputPath ) 
				throws InvalidParameterException, MalformedURLException, IOException {
		File output = new File(outputPath);
		java.io.FileOutputStream fos = new java.io.FileOutputStream(output);
 
		PD4ML pd4ml = new PD4ML();
			
		pd4ml.setHtmlWidth(userSpaceWidth); // set frame width of "virtual web browser" 
			
		// choose target paper format and "rotate" it to landscape orientation
		pd4ml.setPageSize(pd4ml.changePageOrientation(PD4Constants.A4)); 
			
		// define PDF page margins
		pd4ml.setPageInsetsMM(new Insets(topValue, leftValue, bottomValue, rightValue)); 
 
		// source HTML document also may have margins, could be suppressed this way 
		// (PD4ML *Pro* feature):
		pd4ml.addStyle("BODY {margin: 0}", true);
			
		// If built-in basic PDF fonts are not sufficient or 
		// if you need to output non-Latin texts,
		// TTF embedding feature should help (PD4ML *Pro*)
		pd4ml.useTTF("c:/windows/fonts", true);
 
		pd4ml.render(new URL(url), fos); // actual document conversion from URL to file
		fos.close();
			
		System.out.println( outputPath + "\ndone." );
	}
}

下面的Java类稍微改变了上面的示例。现在它将源HTML预先读入一个字符串，并将其传递给包装到StringReader的render()方法。首先，它将PDF字节写入ByteArrayOutputStream，这使得测量结果文档的大小成为可能.该方法的一个缺点是RAM利用率较大。

package samples;
 
import java.awt.Insets;;
import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.security.InvalidParameterException;
 
import org.zefer.pd4ml.PD4Constants;
import org.zefer.pd4ml.PD4ML;
 
public class GettingStarted2 {
	protected int topValue = 10;
	protected int leftValue = 20;
	protected int rightValue = 10;
	protected int bottomValue = 10;
	protected int userSpaceWidth = 1300;
 
	public static void main(String[] args) {
		try {
			GettingStarted2 jt = new GettingStarted2();
			String html = readFile("c:/sample.htm", "UTF-8");
			jt.doConversion2(html, "c:/pd4ml.pdf");
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
 
	public void doConversion2( String htmlDocument, String outputPath ) 
				throws InvalidParameterException, MalformedURLException, IOException {
 
		PD4ML pd4ml = new PD4ML();
			
		pd4ml.setHtmlWidth(userSpaceWidth); // set frame width of "virtual web browser" 
			
		// choose target paper format
		pd4ml.setPageSize(pd4ml.changePageOrientation(PD4Constants.A4)); 
			
		// define PDF page margins
		pd4ml.setPageInsetsMM(new Insets(topValue, leftValue, bottomValue, rightValue)); 
 
		// source HTML document also may have margins, could be suppressed this way 
		// (PD4ML *Pro* feature):
		pd4ml.addStyle("BODY {margin: 0}", true);
			
		// If built-in basic PDF fonts are not sufficient or 
		// if you need to output non-Latin texts, TTF embedding feature should help 
		// (PD4ML *Pro*)
		pd4ml.useTTF("c:/windows/fonts", true);
 
		ByteArrayOutputStream baos = new ByteArrayOutputStream();
		// actual document conversion from HTML string to byte array
		pd4ml.render(new StringReader(htmlDocument), baos); 
		// if the HTML has relative references to images etc, 
		// use render() method with baseDirectory parameter instead
		baos.close();
		
		System.out.println( "resulting PDF size: " + baos.size() + " bytes" );
		// in Web scenarios it is a good idea to send the size with 
		// "Content-length" HTTP header
 
		File output = new File(outputPath);
		java.io.FileOutputStream fos = new java.io.FileOutputStream(output);
		fos.write( baos.toByteArray() );
		fos.close();
		
		System.out.println( outputPath + "\ndone." );
	}
	
	private final static String readFile( String path, String encoding ) throws IOException {
 
		File f = new File( path );
		FileInputStream is = new FileInputStream(f);
		BufferedInputStream bis = new BufferedInputStream(is);
		
		ByteArrayOutputStream fos = new ByteArrayOutputStream();
		byte buffer[] = new byte[2048];
 
		int read;
		do {
			read = is.read(buffer, 0, buffer.length);
			if (read > 0) { 
				fos.write(buffer, 0, read); 
			}
		} while (read > -1);
 
		fos.close();
		bis.close();
		is.close();
 
		return fos.toString(encoding);
	}
}
涉及格式转换用到的包，可以通过以下网址进行下载：

KirioYoLo

发布了36 篇原创文章 · 获赞 45 · 访问量 9万+

私信关注

PD4ML将HTML文件转换成PDF文件

猜你喜欢