分享web开发知识

注册/登录|最近发布|今日推荐

主页 IT知识网页技术软件开发前端开发代码编程运营维护技术分享教程案例
当前位置:首页 > 软件开发

word导出html实现在线预览

发布时间:2023-09-06 01:45责任编辑:赖小花关键词:word
需要的maven依赖经过编译,必须版本配合一致:<!-- https://mvnrepository.com/artifact/fr.opensagres.xdocreport/org.apache.poi.xwpf.converter.core --> ???<dependency> ???????<groupId>fr.opensagres.xdocreport</groupId> ???????<artifactId>org.apache.poi.xwpf.converter.core</artifactId> ???????<version>1.0.5</version> ???</dependency> ???<!-- https://mvnrepository.com/artifact/fr.opensagres.xdocreport/org.apache.poi.xwpf.converter.xhtml --> ???<dependency> ???????<groupId>fr.opensagres.xdocreport</groupId> ???????<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId> ???????<version>1.0.5</version> ???</dependency><!-- 版本与上面一致 --> ???<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-io --> ???<dependency> ???????<groupId>org.apache.commons</groupId> ???????<artifactId>commons-io</artifactId> ???????<version>1.3.2</version> ???</dependency> ???<dependency> ???????<groupId>org.apache.poi</groupId> ???????<artifactId>poi-scratchpad</artifactId> ???????<version>3.17</version> ???</dependency> ???<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-collections4 --> ???<dependency> ???????<groupId>org.apache.commons</groupId> ???????<artifactId>commons-collections4</artifactId> ???????<version>4.0</version> ???</dependency> ???<dependency> ???????<groupId>org.apache.poi</groupId> ???????<artifactId>poi</artifactId> ???????<version>3.17</version> ???</dependency> ???<!-- <dependency> ???????<groupId>org.apache.poi</groupId> ???????<artifactId>poi-ooxml-schemas</artifactId> ???????<version>3.16</version> ???</dependency> --> ???<!-- https://mvnrepository.com/artifact/org.apache.xmlbeans/xmlbeans --> ???<dependency> ???????<groupId>org.apache.xmlbeans</groupId> ???????<artifactId>xmlbeans</artifactId> ???????<version>2.6.0</version> ???</dependency> ???<dependency> ???????<groupId>org.apache.poi</groupId> ???????<artifactId>poi-ooxml</artifactId> ???????<version>3.14</version> ???</dependency><!-- poi引用的包要保持版本号一致,不然也会出现 ClassNotFoundException: org.apache.poi.wp.usermodel.Paragraph这个错误 --> ???<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 --> ???<dependency> ???????<groupId>org.apache.commons</groupId> ???????<artifactId>commons-lang3</artifactId> ???????<version>3.4</version> ???</dependency> ???package com.zyhao.openec.excel.utils;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.IURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

public class WordToHtml {
public static void main(String[] args) {
try {
wordToHtml("E:\me\2.docx", "E:\me\", "123.html");
// ?????????wordToHtml("E:\me\2.doc", "E:\me\", "12.html");
} catch (Exception e) {
e.printStackTrace();
}
}

public static void wordToHtml(String wordPath,String htmlPath,String newFilename) throws Exception { ???????????convert2Html(wordPath, htmlPath, newFilename);} ???public static void writeFile(String content, String path) throws Exception { ???????FileOutputStream fos = null; ???????BufferedWriter bw = null; ???try { ???????????File file = new File(path); ???????fos = new FileOutputStream(file); ???????????bw = new BufferedWriter(new OutputStreamWriter(fos)); ???????????bw.write(content); ?????} catch (FileNotFoundException fnfe) { ???????????fnfe.printStackTrace(); ???????} catch (IOException ioe) { ???????????ioe.printStackTrace(); ???????} finally { ???????????try { ???????????????if (bw != null) ???????????????????bw.close(); ???????????????if (fos != null) ???????????????????fos.close(); ???????????} catch (IOException ie) { ???????????} ???????} ???} ???/** * 将word转换成html * 支持 .doc and .docx * @param fileName word文件名 * @param outPutFilePath html存储路径 * @param newFileName html名 * @throws Exception ?*/public static void convert2Html(String fileName, String outPutFilePath,String newFileName) ???????????throws Exception { ???String substring = fileName.substring(fileName.lastIndexOf(".")+1); ???ByteArrayOutputStream out = new ByteArrayOutputStream(); ???/** ????* word2007和word2003的构建方式不同, ????* 前者的构建方式是xml,后者的构建方式是dom树。 ????* 文件的后缀也不同,前者后缀为.docx,后者后缀为.doc ????* 相应的,apache.poi提供了不同的实现类。 ????*/ ???if("docx".equals(substring)){

// ?????????writeFile(new String("<html><head> ?<meta http-equiv=\"content-type\" content=\"text/html\" charset=\"utf-8\"/></head>对不起,.docx格式的word文档,暂时不能生成预览</html>".getBytes("utf-8")), outPutFilePath+newFileName);

 ???????//step 1 : load DOCX into XWPFDocument ???????InputStream inputStream = new FileInputStream(new File(fileName)); ???????XWPFDocument document = new XWPFDocument(inputStream); ???????//step 2 : prepare XHTML options ???????final String imageUrl = ""; ???????XHTMLOptions options = XHTMLOptions.create(); ???????options.setExtractor(new FileImageExtractor(new File(outPutFilePath + imageUrl))); ???????options.setIgnoreStylesIfUnused(false); ???????options.setFragment(true); ???????options.URIResolver(new IURIResolver() {

// ?????????????@Override 重写的方法,加上这个报错,你看看是啥问题
public String resolve(String uri) {
return imageUrl + uri;
}
});

 ???????//step 3 : convert XWPFDocument to XHTML ???????XHTMLConverter.getInstance().convert(document, out, options); ???}else{ ???????HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile)); ???????????WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( ???????????????????DocumentBuilderFactory.newInstance().newDocumentBuilder() ???????????????????????????.newDocument()); ????????wordToHtmlConverter.setPicturesManager( new PicturesManager() ????????????{ ????????????????public String savePicture( byte[] content, ????????????????????????PictureType pictureType, String suggestedName, ????????????????????????float widthInches, float heightInches ) ????????????????{ ????????????????????return suggestedName; ????????????????} ????????????} ); ???????????wordToHtmlConverter.processDocument(wordDocument); ???????????//save pictures ???????????List pics=wordDocument.getPicturesTable().getAllPictures(); ?????????if(pics!=null&&!pics.isEmpty()){ ???????????for(int i=0;i<pics.size();i++){ ???????????????????Picture pic = (Picture)pics.get(i); ???????????????????System.out.println(); ???????????????????try { ???????????????????????pic.writeImageContent(new FileOutputStream(outPutFilePath ???????????????????????????????+ pic.suggestFullFileName())); ???????????????????} catch (FileNotFoundException e) { ???????????????????????e.printStackTrace(); ???????????????????} ?????????????????} ???????????} ???????????Document htmlDocument = wordToHtmlConverter.getDocument(); ???????????DOMSource domSource = new DOMSource(htmlDocument); ???????????StreamResult streamResult = new StreamResult(out); ???????????TransformerFactory tf = TransformerFactory.newInstance(); ???//这个应该是转换成xml的 ???????Transformer serializer = tf.newTransformer(); ???????????serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); ???????????serializer.setOutputProperty(OutputKeys.INDENT, "yes"); ???????????serializer.setOutputProperty(OutputKeys.METHOD, "html"); ???????????serializer.transform(domSource, streamResult); ???????} ???????out.close(); ???????writeFile(new String(out.toByteArray()), outPutFilePath+newFileName); ???}

}

word导出html实现在线预览

原文地址:http://blog.51cto.com/17099933344/2087457

知识推荐

我的编程学习网——分享web前端后端开发技术知识。 垃圾信息处理邮箱 tousu563@163.com 网站地图
icp备案号 闽ICP备2023006418号-8 不良信息举报平台 互联网安全管理备案 Copyright 2023 www.wodecom.cn All Rights Reserved