当前位置: 首页 > news >正文

java实现word转html(支持docx及doc文件)

private final static String tempPath = "C:\\Users\\xxx\\Desktop\\Word2Html\\src\\test\\";//图片及相关文件保存的路径public static void main(String argv[]) {try {JFileChooser fileChooser = new JFileChooser();fileChooser.setDialogTitle("Select a Word Document");fileChooser.setAcceptAllFileFilterUsed(false);fileChooser.addChoosableFileFilter(new javax.swing.filechooser.FileNameExtensionFilter("Word Documents", "doc", "docx"));int returnValue = fileChooser.showOpenDialog(null);if (returnValue == JFileChooser.APPROVE_OPTION) {File inputFile = fileChooser.getSelectedFile();String fileName = inputFile.getAbsolutePath();String defaultOutputDir = System.getProperty("user.home") + "\\Desktop\\";String outputFileName = defaultOutputDir + inputFile.getName().replaceFirst("[.][^.]+$", "") + ".html";if (fileName.endsWith(".doc")) {doc2Html(fileName, outputFileName);} else if (fileName.endsWith(".docx")) {docx2Html(fileName, outputFileName);}}} catch (Exception e) {e.printStackTrace();}}/*** doc转换为html** @param fileName* @param outPutFile* @throws TransformerException* @throws IOException* @throws ParserConfigurationException*/public static void doc2Html(String fileName, String outPutFile) throws TransformerException, IOException, ParserConfigurationException {long startTime = System.currentTimeMillis();HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());// 图片保存路径设置wordToHtmlConverter.setPicturesManager(new PicturesManager() {public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {String picturePath = "images" + File.separator + suggestedName;// 检查并创建图片文件夹File imageFolder = new File(tempPath + "images");if (!imageFolder.exists()) {boolean created = imageFolder.mkdirs(); // 创建文件夹if (created) {System.out.println("Images folder created at: " + imageFolder.getAbsolutePath());} else {System.out.println("Failed to create images folder.");}}// 写入图片数据,确保每次写入try {File pictureFile = new File(tempPath + picturePath);try (FileOutputStream fos = new FileOutputStream(pictureFile)) {fos.write(content);  // 写入图片数据System.out.println("Image saved to: " + pictureFile.getAbsolutePath());}} catch (IOException e) {e.printStackTrace();}return picturePath; // 返回相对路径}});wordToHtmlConverter.processDocument(wordDocument);Document htmlDocument = wordToHtmlConverter.getDocument();ByteArrayOutputStream out = new ByteArrayOutputStream();DOMSource domSource = new DOMSource(htmlDocument);StreamResult streamResult = new StreamResult(out);TransformerFactory tf = TransformerFactory.newInstance();Transformer serializer = tf.newTransformer();serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");serializer.setOutputProperty(OutputKeys.INDENT, "yes");serializer.setOutputProperty(OutputKeys.METHOD, "html");serializer.transform(domSource, streamResult);out.close();String htmlContent = new String(out.toByteArray());htmlContent = htmlContent.replaceAll("TOC \\\\o \"1-3\" \\\\h \\\\z \\\\u", "");writeFile(htmlContent, outPutFile);System.out.println("Generate " + outPutFile + " with " + (System.currentTimeMillis() - startTime) + " ms.");}/*** 写文件** @param content* @param path*/public static void writeFile(String content, String path) {FileOutputStream fos = null;BufferedWriter bw = null;try {File file = new File(path);fos = new FileOutputStream(file);bw = new BufferedWriter(new OutputStreamWriter(fos, "utf-8"));bw.write(content);} catch (FileNotFoundException fnfe) {fnfe.printStackTrace();} catch (IOException ioe) {ioe.printStackTrace();} finally {try {if (bw != null) bw.close();if (fos != null) fos.close();} catch (IOException e) {}}}/*** docx格式word转换为html** @param fileName* @param outPutFile* @throws TransformerException* @throws IOException* @throws ParserConfigurationException*/public static void docx2Html(String fileName, String outPutFile) throws TransformerException, IOException, ParserConfigurationException {long startTime = System.currentTimeMillis();XWPFDocument document = new XWPFDocument(new FileInputStream(fileName));// 提取目录StringBuilder toc = new StringBuilder();toc.append("<div id='toc'>\n<h2>Table of Contents</h2>\n<ul>\n");// 遍历文档中的段落,查找标题并构建目录List<XWPFParagraph> paragraphs = document.getParagraphs();for (XWPFParagraph paragraph : paragraphs) {String style = paragraph.getStyle();  // 获取段落样式if (style != null && (style.equals("Heading 1") || style.equals("Heading 2") || style.equals("Heading 3"))) {String text = paragraph.getText();// 根据标题级别构建目录项toc.append("<li><a href='#" + text.hashCode() + "'>" + text + "</a></li>\n");}}toc.append("</ul>\n</div>\n");// 设置XHTMLOptionsXHTMLOptions options = XHTMLOptions.create().indent(4);File imageFolder = new File(tempPath);options.setExtractor(new FileImageExtractor(imageFolder));options.URIResolver(new FileURIResolver(imageFolder));File outFile = new File(outPutFile);outFile.getParentFile().mkdirs();OutputStream out = new FileOutputStream(outFile);// Convert docx to XHTMLXHTMLConverter.getInstance().convert(document, out, options);System.out.println("Generate " + outPutFile + " with " + (System.currentTimeMillis() - startTime) + " ms.");// 获取转换后的HTML内容String htmlContent = new String(((ByteArrayOutputStream) out).toByteArray(), "UTF-8");// 将TOC插入到HTML的开头htmlContent = toc + htmlContent;// 手动添加表格样式(边框)htmlContent = htmlContent.replaceAll("<table>", "<table style='border: 1px solid black; border-collapse: collapse;'>");htmlContent = htmlContent.replaceAll("<td>", "<td style='border: 1px solid black; padding: 5px;'>");htmlContent = htmlContent.replaceAll("<th>", "<th style='border: 1px solid black; padding: 5px;'>");// 写入到输出文件writeFile(htmlContent, outPutFile);}

pom文件

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion><groupId>fxma</groupId><artifactId>Word2Html</artifactId><version>0.0.1-SNAPSHOT</version><packaging>jar</packaging><name>Word2Html</name><url>http://maven.apache.org</url><properties><project.build.sourceEncoding>UTF-8</project.build.sourceEncoding></properties><dependencies><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>3.8.1</version><scope>test</scope></dependency><dependency><groupId>commons-io</groupId><artifactId>commons-io</artifactId><version>2.4</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>3.8</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>3.8</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>3.8</version></dependency><dependency><groupId>fr.opensagres.xdocreport</groupId><artifactId>xdocreport</artifactId><version>1.0.4</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>ooxml-schemas</artifactId><version>1.1</version></dependency></dependencies>
</project>

 

 

http://www.lryc.cn/news/522803.html

相关文章:

  • 搜维尔科技:Xsens人形机器人解决方案的优势
  • 【王树森搜索引擎技术】概要01:搜索引擎的基本概念
  • 《Java核心技术II》可中断套接字
  • 基于 Python 的深度学习的车俩特征分析系统,附源码
  • C#读写ini配置文件保存设置参数
  • SwanLab环境变量列表
  • 深度学习入门-CNN
  • 微服务网关,如何选择?
  • SpringBoot集成Mqtt服务实现消费发布和接收消费
  • 在Mac mini上实现本地话部署AI和知识库
  • 一个方法被多个线程同时调用,确保同样参数的调用只能有一个线程执行,不同参数的调用则可以多个线程同时执行
  • 3. MySQL事务并发的问题与解决方法
  • 25/1/15 嵌入式笔记 初学STM32F108
  • MySQL的不同SQL模式导致行为不同?
  • Flink 使用 Kafka 作为数据源时遇到了偏移量提交失败的问题
  • 【日志篇】(7.6) ❀ 01. 在macOS下刷新FortiAnalyzer固件 ❀ FortiAnalyzer 日志分析
  • LSA更新、撤销
  • DevUI 2024 年度运营报告:开源生态的成长足迹与未来蓝图
  • centos 7 Mysql服务
  • React 表单处理与网络请求封装详解[特殊字符][特殊字符]
  • C++ 的 CTAD 与推断指示(Deduction Guides)
  • 【Rust自学】13.2. 闭包 Pt.2:闭包的类型推断和标注
  • 如何将原来使用cmakelist编译的qt工程转换为可使用Visual Studio编译的项目
  • 微软确认Win10停更不碍Microsoft 365使用!未来是否更新成谜
  • Ubuntu、Windows系统网络设置(ping通内外网)
  • 华为OD机试E卷 ---最大值
  • UllnnovationHub,一个开源的WPF控件库
  • Fabric区块链网络搭建:保姆级图文详解
  • Kubernetes (K8s) 权限管理指南
  • IM聊天学习资源