java使用POI实现html和word相互转换

java使⽤POI实现html和word相互转换

项⽬后端使⽤了springboot，maven，前端使⽤了ckeditor富⽂本编辑器。⽬前从html转换的word为doc格式，⽽图⽚处理⽀持的是docx格式，所以需要⼿动把doc另存为docx，然后才可以进⾏图⽚替换。

⼀.添加maven依赖

主要使⽤了以下和poi相关的依赖，为了便于获取html的图⽚元素，还使⽤了jsoup：

<groupId>org.apache.poi</groupId>

pdf转html<version>3.14</version>

</dependency>

<groupId>org.apache.poi</groupId>

<artifactId>poi-scratchpad</artifactId>

</dependency>

<groupId>org.apache.poi</groupId>

<artifactId>poi-ooxml</artifactId>

</dependency>

<groupId>fr.opensagres.xdocreport</groupId>

<artifactId>xdocreport</artifactId>

</dependency>

<groupId>org.apache.poi</groupId>

<artifactId>poi-ooxml-schemas</artifactId>

</dependency>

<groupId>org.apache.poi</groupId>

<artifactId>ooxml-schemas</artifactId>

</dependency>

<groupId>org.jsoup</groupId>

<artifactId>jsoup</artifactId>

</dependency>

⼆.word转换为html

在springboot项⽬的resources⽬录下新建static⽂件夹，将需要转换的word⽂件temp.docx粘贴进去，由于static是springboot的默认资源⽂件，所以不需要在配置⽂件⾥⾯另⾏配置了，如果改成其他名字，需要在l进⾏相应配置。

doc格式转换为html：

public static String docToHtml() throws Exception {

File path = new URL("classpath:").getPath());

String imagePathStr = AbsolutePath() + "\\static\\image\\";

String sourceFileName = AbsolutePath() + "\\static\\test.doc";

String targetFileName = AbsolutePath() + "\\static\\test2.html";

File file = new File(imagePathStr);

if(!ists()) {

file.mkdirs();

}

HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName));

org.w3c.dom.Document document = wInstance().newDocumentBuilder().newDocument();

WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);

//保存图⽚，并返回图⽚的相对路径

wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {

try (FileOutputStream out = new FileOutputStream(imagePathStr + name)) {

out.write(content);

} catch (Exception e) {

e.printStackTrace();

}

return "image/" + name;

});

wordToHtmlConverter.processDocument(wordDocument);

org.w3c.dom.Document htmlDocument = Document();

DOMSource domSource = new DOMSource(htmlDocument);

StreamResult streamResult = new StreamResult(new File(targetFileName));

TransformerFactory tf = wInstance();

Transformer serializer = tf.newTransformer();

serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");

serializer.setOutputProperty(OutputKeys.INDENT, "yes");

serializer.setOutputProperty(OutputKeys.METHOD, "html");

return targetFileName;

}

docx格式转换为html

public static String docxToHtml() throws Exception {

File path = new URL("classpath:").getPath());

String imagePath = AbsolutePath() + "\\static\\image";

String sourceFileName = AbsolutePath() + "\\static\\test.docx";

String targetFileName = AbsolutePath() + "\\static\\test.html";

OutputStreamWriter outputStreamWriter = null;

try {

XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));

XHTMLOptions options = ate();

// 存放图⽚的⽂件夹

options.setExtractor(new FileImageExtractor(new File(imagePath)));

/ html中图⽚的路径

options.URIResolver(new BasicURIResolver("image"));

outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");

XHTMLConverter xhtmlConverter = (XHTMLConverter) Instance();

} finally {

if (outputStreamWriter != null) {

outputStreamWriter.close();

}

return targetFileName;

}

转换成功后会⽣成对应的html⽂件，如果想在前端展⽰，直接读取⽂件转换为String返回给前端即可。public static String readfile(String filePath) {

File file = new File(filePath);

InputStream input = null;

try {

input = new FileInputStream(file);

} catch (FileNotFoundException e) {

e.printStackTrace();

}

StringBuffer buffer = new StringBuffer();

byte[] bytes = new byte[1024];

try {

for (int n; (n = ad(bytes)) != -1;) {

buffer.append(new String(bytes, 0, n, "utf8"));

}

} catch (IOException e) {

e.printStackTrace();

}

String();

}

在富⽂本编辑器ckeditor中的显⽰效果：

三.html转换为word

实现思路就是先把html中的所有图⽚元素提取出来，统⼀替换为变量字符”${imgReplace}“，如果多张图⽚，可以依序排列下去，之后⽣成对应的doc⽂件（之前试过直接⽣成docx⽂件发现打不开，这个问题尚未到好的解决⽅法），我们将其另存为docx⽂件，之后就可以替换变量为图⽚了：

public static String writeWordFile(String content) {

String path = "D:/wordFile";

Map<String, Object> param = new HashMap<String, Object>();

if (!"".equals(path)) {

File fileDir = new File(path);

if (!ists()) {

fileDir.mkdirs();

}

content = HtmlUtils.htmlUnescape(content);

List<HashMap<String, String>> imgs = getImgStr(content);

int count = 0;

for (HashMap<String, String> img : imgs) {

count++;

//处理替换以“/>”结尾的img标签

content = ("img"), "${imgReplace" + count + "}");

//处理替换以“>”结尾的img标签

content = ("img1"), "${imgReplace" + count + "}");

Map<String, Object> header = new HashMap<String, Object>();

try {

File filePath = new URL("classpath:").getPath());

String imagePath = AbsolutePath() + "\\static\\";

imagePath += ("src").replaceAll("/", "\\\\");

//如果没有宽⾼属性，默认设置为400*300

("width") == null || ("height") == null) {

header.put("width", 400);

header.put("height", 300);

}else {

header.put("width", (int) (Double.("width"))));

header.put("height", (int) (Double.("height"))));

}

header.put("type", "jpg");

header.put("content", OfficeUtil.inputStream2ByteArray(new FileInputStream(imagePath), true));

} catch (FileNotFoundException e) {

e.printStackTrace();

}

param.put("${imgReplace" + count + "}", header);

}

try {

// ⽣成doc格式的word⽂档，需要⼿动改为docx

byte by[] = Bytes("UTF-8");

ByteArrayInputStream bais = new ByteArrayInputStream(by);

POIFSFileSystem poifs = new POIFSFileSystem();

DirectoryEntry directory = Root();

DocumentEntry documentEntry = ateDocument("WordDocument", bais);

FileOutputStream ostream = new FileOutputStream("D:\\wordFile\\temp.doc");

poifs.writeFilesystem(ostream);

bais.close();

ostream.close();

// 临时⽂件（⼿动改好的docx⽂件）

CustomXWPFDocument doc = ateWord(param, "D:\\wordFile\\temp.docx");

//最终⽣成的带图⽚的word⽂件

FileOutputStream fopts = new FileOutputStream("D:\\wordFile\\final.docx");

doc.write(fopts);

fopts.close();

} catch (Exception e) {

e.printStackTrace();

}

return "D:/wordFile/final.docx";

}

//获取html中的图⽚元素信息

public static List<HashMap<String, String>> getImgStr(String htmlStr) {

List<HashMap<String, String>> pics = new ArrayList<HashMap<String, String>>();

Document doc = Jsoup.parse(htmlStr);

Elements imgs = doc.select("img");

for (Element img : imgs) {

HashMap<String, String> map = new HashMap<String, String>();

if(!"".equals(img.attr("width"))) {

map.put("width", img.attr("width").substring(0, img.attr("width").length() - 2));

}

if(!"".equals(img.attr("height"))) {

map.put("height", img.attr("height").substring(0, img.attr("height").length() - 2));

}

map.put("img", String().substring(0, String().length() - 1) + "/>");

map.put("img1", String());

map.put("src", img.attr("src"));

pics.add(map);

}

return pics;

}

OfficeUtil⼯具类，之前发现⽹上的写法只⽀持⼀张图⽚的修改，多张图⽚就会报错，是因为添加了图⽚，processParagraphs⽅法中的runs的⼤⼩改变了，会报ArrayList的异常，就和我们循环list中删除元素会报异常道理⼀样，解决⽅法就是复制⼀个新的Arraylist进⾏循环即可：

ample.demo.util;

import java.io.ByteArrayInputStream;

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStream;

import java.util.ArrayList;

import java.util.Iterator;

import java.util.List;

import java.util.Map;

import java.util.Map.Entry;

import org.apache.poi.POIXMLDocument;

import org.apache.actor.WordExtractor;

import org.apache.poi.openxml4j.opc.OPCPackage;

import org.apache.poi.xwpf.usermodel.XWPFParagraph;

import org.apache.poi.xwpf.usermodel.XWPFRun;

import org.apache.poi.xwpf.usermodel.XWPFTable;

import org.apache.poi.xwpf.usermodel.XWPFTableCell;

import org.apache.poi.xwpf.usermodel.XWPFTableRow;

/**

* 适⽤于word 2007

public class OfficeUtil {

/**

* 根据指定的参数值、模板，⽣成 word ⽂档

* @param param 需要替换的变量

* @param template 模板

public static CustomXWPFDocument generateWord(Map<String, Object> param, String template) {

CustomXWPFDocument doc = null;

try {

OPCPackage pack = POIXMLDocument.openPackage(template);

doc = new CustomXWPFDocument(pack);

if (param != null && param.size() > 0) {

//处理段落

List<XWPFParagraph> paragraphList = Paragraphs();

processParagraphs(paragraphList, param, doc);

//处理表格

Iterator<XWPFTable> it = TablesIterator();

while (it.hasNext()) {

XWPFTable table = it.next();

List<XWPFTableRow> rows = Rows();

for (XWPFTableRow row : rows) {

List<XWPFTableCell> cells = TableCells();

for (XWPFTableCell cell : cells) {

List<XWPFParagraph> paragraphListTable = Paragraphs();

processParagraphs(paragraphListTable, param, doc);

}

} catch (Exception e) {

e.printStackTrace();

}

return doc;

}

/**

* 处理段落

* @param paragraphList

public static void processParagraphs(List<XWPFParagraph> paragraphList,Map<String, Object> param,CustomXWPFDocument doc){ if(paragraphList != null && paragraphList.size() > 0){

for(XWPFParagraph paragraph:paragraphList){

//poi转换过来的⾏间距过⼤，需要⼿动调整

SpacingBefore() >= 1000 || SpacingAfter() > 1000) {

paragraph.setSpacingBefore(0);

paragraph.setSpacingAfter(0);

}

//设置word中左右间距

paragraph.setIndentationLeft(0);

paragraph.setIndentationRight(0);

List<XWPFRun> runs = Runs();

//加了图⽚，修改了paragraph的runs的size，所以循环不能使⽤runs

List<XWPFRun> allRuns = new ArrayList<XWPFRun>(runs);

for (XWPFRun run : allRuns) {

String text = Text(0);

if(text != null){

boolean isSetText = false;

for (Entry<String, Object> entry : Set()) {

String key = Key();

if(text.indexOf(key) != -1){

isSetText = true;

Object value = Value();

if (value instanceof String) {//⽂本替换

text = place(key, String());

} else if (value instanceof Map) {//图⽚替换

text = place(key, "");

Map pic = (Map)value;

int width = Integer.("width").toString());

int height = Integer.("height").toString());

int picType = ("type").toString());

byte[] byteArray = (byte[]) ("content");

ByteArrayInputStream byteInputStream = new ByteArrayInputStream(byteArray);

try {

String blipId = doc.addPictureData(byteInputStream,picType);

java使用POI实现html和word相互转换

发布评论取消回复

最近发表

热门文章

标签列表