java实现word⽂件转html(图⽚⽤base64转化
1.添加需要的jar包:
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.xdocreport.document</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.15</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.15</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.verter.xhtml</artifactId>
<version>2.0.1</version>
</dependency>
2.来⼀个⼩demo吧。
对于该demo,描述⼏个我觉得需要注意的点:
2.1:不知道有没有⼩伙伴发⽣了jar包冲突的现象呢,可以考虑修改⼀下jar包版本号哦,基本上应该没什么问题呢;
2.2:word⽂档的后缀有.doc和.docx,需要知道转换的⽅法不是⼀样的。所以,对于不同的⽂档,我们需要知道其⽂档后缀是什么,才能进⾏下⼀步操作;
2.3:此demo,我选择通过接⼝直接返回动态的html,当然,如果想⽣成⼀个静态的html,可以⾃⼰修改输出⽅式;
2.4:对于⽂档中涉及到图⽚如何转化的问题,暂时选择⽤base64转码到html中
2.5:最后:此demo中测试转化的⽂档,⽬前只测试了简单的⽂本加图⽚,所以可能有别的问题待发现并解决。
/**
* 将word转成html
*
* @param id
* @return
* @throws Exception
*/
@ApiOperation(value = "将word转成html")
@GetMapping(value = "/convertWordToHtml")
public void convertWordToHtml(@RequestParam(required = true) String id, HttpServletResponse httpServletResponse) throws Exception {
}
  //此处省略部分不重要的代码哈,只需将需要转化的⽂档转成inputStream。
  InputStream inputStream = null;
  OutputStream outputStream = OutputStream();
   /**
   * 将 docx 转成 html
   *
   * @param outputStream 输出流
    * @throws Exception
    */
   public static void convertDocxFileToHtml(OutputStream outputStream) throws Exception {
  //创建操作word的对象
 XWPFDocument document = new XWPFDocument(inputStream);
 XHTMLOptions options = ate();
 options.setIgnoreStylesIfUnused(false);
 options.setFragment(true);
//图⽚⽤base64转化
options.setImageManager(new Base64EmbedImgManager());
pdf转html //转化成HTML
  Instance().convert(document, outputStream, options);
outputStream.flush();
outputStream.close();
inputStream.close();
   }
   /**
   * 将 doc 转成 html
   *
   * @param outputStream 输出流
    * @throws Exception
    */
   public static void convertDocFileToHtml(OutputStream outputStream) throws Exception {
      //ps:当inputStream!=null,⽽⽣成wordDocument报错,请检查⽂档是否⽤office word保存的
      HWPFDocument wordDocument = (HWPFDocument) WordToHtmlUtils.loadDoc(inputStream);
      WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
    wInstance().newDocumentBuilder().newDocument()
      );
      //将图⽚转成base64的格式
      PicturesManager pictureRunMapper = (bytes, pictureType, s, v, v1) -> "data:image/png;base64," + deBase64String(bytes);      wordToHtmlConverter.setPicturesManager(pictureRunMapper);
      //解析word⽂档
      wordToHtmlConverter.processDocument(wordDocument);
      Document htmlDocument = Document();
      DOMSource domSource = new DOMSource(htmlDocument);
      StreamResult streamResult = new StreamResult(outputStream);
      TransformerFactory factory = wInstance();
      Transformer serializer = wTransformer();
      serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
      serializer.setOutputProperty(OutputKeys.INDENT, "yes");
      serializer.setOutputProperty(OutputKeys.METHOD, "html");
      ansform(domSource, streamResult);
}