1.导⼊Maven包
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>xdocreport</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.3</version>
</dependency>
2.转换代码
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.verter.WordToHtmlConverter;
import org.apache.BasicURIResolver;
import org.apache.FileImageExtractor;
import org.apache.verter.xhtml.XHTMLConverter;
import org.apache.verter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
l.parsers.DocumentBuilderFactory;
l.transform.OutputKeys;
l.transform.Transformer;
l.transform.TransformerFactory;
l.transform.dom.DOMSource;
l.transform.stream.StreamResult;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
public class Test {
// doc转换为html
void docToHtml() throws Exception {
String sourceFileName = "C:\\doc\\test.doc";
String targetFileName = "C:\\html\\test.html";
String imagePathStr = "C:\\html\\image\\";
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName));
Document document = wInstance().newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
// 保存图⽚,并返回图⽚的相对路径
wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
try(FileOutputStream out = new FileOutputStream(imagePathStr + name)){
out.write(content);
} catch (Exception e) {
e.printStackTrace();
}
return "image/" + name;
});
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = Document();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(new File(targetFileName));
TransformerFactory tf = wInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
}
// docx转换为html
public void docxToHtml() throws Exception {
String sourceFileName = "D:\\ac\\00.docx";
String targetFileName = "D:\\ac\\test.html";
String imagePathStr = "D:\\ac\\image\\";
OutputStreamWriter outputStreamWriter = null;
try {
XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));
XHTMLOptions options = ate();
// 存放图⽚的⽂件夹
options.setExtractor(new FileImageExtractor(new File(imagePathStr)));
// html中图⽚的路径
options.URIResolver(new BasicURIResolver("image"));
outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8"); XHTMLConverter xhtmlConverter = (XHTMLConverter) Instance();
} finally {
if (outputStreamWriter != null) {
outputStreamWriter.close();
}
}
}
发布评论