java实现word⽂件转html(图⽚⽤base64转化)
1.添加需要的jar包:
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.xdocreport.document</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.15</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.15</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.verter.xhtml</artifactId>
<version>2.0.1</version>
</dependency>
2.来⼀个⼩demo吧。
对于该demo,描述⼏个我觉得需要注意的点:
2.2:word⽂档的后缀有.doc和.docx,需要知道转换的⽅法不是⼀样的。所以,对于不同的⽂档,我们需要知道其⽂档后缀是什么,才能进⾏下⼀步操作;
2.3:此demo,我选择通过接⼝直接返回动态的html,当然,如果想⽣成⼀个静态的html,可以⾃⼰修改输出⽅式;
2.4:对于⽂档中涉及到图⽚如何转化的问题,暂时选择⽤base64转码到html中
2.5:最后:此demo中测试转化的⽂档,⽬前只测试了简单的⽂本加图⽚,所以可能有别的问题待发现并解决。
/**
* 将word转成html
*
* @param id
* @return
* @throws Exception
*/
@ApiOperation(value = "将word转成html")
@GetMapping(value = "/convertWordToHtml")
public void convertWordToHtml(@RequestParam(required = true) String id, HttpServletResponse httpServletResponse) throws Exception {
}
//此处省略部分不重要的代码哈,只需将需要转化的⽂档转成inputStream。
InputStream inputStream = null;
OutputStream outputStream = OutputStream();
/**
* 将 docx 转成 html
*
* @param outputStream 输出流
* @throws Exception
*/
public static void convertDocxFileToHtml(OutputStream outputStream) throws Exception {
//创建操作word的对象
XWPFDocument document = new XWPFDocument(inputStream);
XHTMLOptions options = ate();
options.setIgnoreStylesIfUnused(false);
options.setFragment(true);
//图⽚⽤base64转化
options.setImageManager(new Base64EmbedImgManager());
pdf转html //转化成HTML
Instance().convert(document, outputStream, options);
outputStream.flush();
outputStream.close();
inputStream.close();
}
/**
* 将 doc 转成 html
*
* @param outputStream 输出流
* @throws Exception
*/
public static void convertDocFileToHtml(OutputStream outputStream) throws Exception {
//ps:当inputStream!=null,⽽⽣成wordDocument报错,请检查⽂档是否⽤office word保存的
HWPFDocument wordDocument = (HWPFDocument) WordToHtmlUtils.loadDoc(inputStream);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
wInstance().newDocumentBuilder().newDocument()
);
//将图⽚转成base64的格式
PicturesManager pictureRunMapper = (bytes, pictureType, s, v, v1) -> "data:image/png;base64," + deBase64String(bytes); wordToHtmlConverter.setPicturesManager(pictureRunMapper);
//解析word⽂档
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = Document();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outputStream);
TransformerFactory factory = wInstance();
Transformer serializer = wTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
ansform(domSource, streamResult);
}
发布评论