???????package com.css.java.learning.massbag; ???????import java.awt.image.BufferedImage; ???????import java.io.File; ???????import java.io.FileOutputStream; ???????import java.util.ArrayList; ???????import java.util.Iterator; ???????import java.util.List; ???????import javax.imageio.IIOImage; ???????import javax.imageio.ImageIO; ???????import javax.imageio.ImageWriter; ???????import javax.imageio.stream.ImageOutputStream; ???????import org.apache.pdfbox.pdmodel.PDDocument; ???????import org.apache.pdfbox.pdmodel.PDPage; ???????/** ????????*PDF 转html ?????????* @author Red_Ant ????????* 20180925 ????????*/ ???????public class PdfToHtml { ???????????@SuppressWarnings("deprecation") ???????????private static void pdfToHtml(String sourcePath, String outPath){ ???????????????List<String> imgList = new ArrayList<String>(); ???????????????try { ???????????????????PDDocument doc = PDDocument.load(sourcePath); ???????????????????int pageCount = doc.getPageCount(); ????????????????????System.out.println("总共多少页?" + pageCount); ????????????????????List pages = doc.getDocumentCatalog().getAllPages(); ????????????????????for(int i=0;i<pages.size();i++){ ???????????????????????????PDPage page = (PDPage)pages.get(i); ????????????????????????????BufferedImage image = page.convertToImage(); ????????????????????????????Iterator iter = ImageIO.getImageWritersBySuffix("jpg"); ????????????????????????????ImageWriter writer = (ImageWriter)iter.next(); ????????????????????????????String imgName = File.separator + "20180925_pdf" + File.separator +i+".jpg"; ???????????????????????????File folder = new File(outPath + File.separator + "20180925_pdf"); ?//先创建文件夹 ???????????????????????????folder.mkdirs(); ???????????????????????????File outFile = new File(outPath + imgName); //再创建文件 ???????????????????????????imgList.add("20180925_pdf" + File.separator +i+".jpg"); ???????????????????????????outFile.createNewFile(); ???????????????????????????FileOutputStream out = new FileOutputStream(outFile); ????????????????????????????ImageOutputStream outImage = ImageIO.createImageOutputStream(out); ????????????????????????????writer.setOutput(outImage); ????????????????????????????writer.write(new IIOImage(image,null,null)); ????????????????????} ???????????????????doc.close(); ???????????????} catch (Exception e) { ???????????????????e.printStackTrace(); ???????????????} ????????????????PptToHtml.createPPTHtml(outPath, imgList, sourcePath); ???????????} ???????????public static void main(String[] args) { ???????????????pdfToHtml("D:\\red_ant_file\\20180925\\20180925_pdf\\西点烘焙百科全书电子书.pdf", ????????????????????????"D:\\red_ant_file\\20180925\\20180925_pdf"); ???????????} ???????}
实现效果
PDF转图片以及转html
原文地址:http://blog.51cto.com/13479739/2285600