libreoffice容器word转pdf

发布于:2025-05-25 ⋅ 阅读:(16) ⋅ 点赞:(0)

先说结论,市面上不花钱的,简单的效果好的就是这个种方式,在线测试下来不如命令转的效果好。AsposeWords和SpireDoc效果都不错,但是只有这个word转pdf感觉花3-5w不划算。
下载容器路径 https://docker.aityp.com/i/search?search=libreoffice
部署LibreOffice容器
使用Docker运行LibreOffice的无头模式(headless),提供文档转换服务:

#需要挂载输入输出路径和安装字体路径
docker run -d \
--name libreoffice1 \
-v /opt/libreoffice1/input:/app/input \
-v /opt/libreoffice1/output:/app/output \
-v /usr/share/fonts/:/usr/share/fonts/
-p 3000:3000 \
linuxserver/libreoffice:latest 
 #online用的是 需要注意容器配置文件有个位置需要改成一下 要不然http访问不通
 docker run -t -d -p 9980:9980 -e "username=admin" -e "password=123456" --restart always --cap-add SYS_ADMIN libreofficeonline:telecom

此命令启动一个LibreOffice容器,监听8100端口,并将宿主机目录挂载到容器内以便文件交换。

Java调用REST API转换文档
若容器提供REST API(如libreserver/office-api),可通过Java的HTTP客户端发送请求:

package cn.zjtele.pubinfo.demo.api.controller;

import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.ssl.SSLContexts;
import org.apache.http.util.EntityUtils;
import org.slf4j.MDC;

import javax.net.ssl.SSLContext;
import java.io.File;
import java.io.FileOutputStream;
import java.nio.charset.StandardCharsets;
import java.security.KeyManagementException;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import static com.sun.javafx.runtime.async.BackgroundExecutor.getExecutor;

public class LibreOfficeOnlineMasterConverter {

    // 正确的API端点路径(根据您的服务器配置可能需要调整)
    private static final String LOOL_CONVERT_URL = "http://localhost:9980/lool/convert-to/pdf";

    // 如果需要忽略SSL证书验证
    static SSLContext sslContext;

    static {
        try {
            sslContext = SSLContexts.custom()
                    .loadTrustMaterial((chain, authType) -> true)
                    .build();
        } catch (NoSuchAlgorithmException e) {
            throw new RuntimeException(e);
        } catch (KeyManagementException e) {
            throw new RuntimeException(e);
        } catch (KeyStoreException e) {
            throw new RuntimeException(e);
        }
    }

    // 在类初始化时创建共享的HttpClient
    private static final CloseableHttpClient sharedHttpClient = HttpClients.custom()
            .setSSLContext(sslContext)
            .setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE)
            .setMaxConnTotal(100)  // 最大连接数
            .setMaxConnPerRoute(20) // 每个路由最大连接数
            .build();



    public static void printPoolStatus() {
        ThreadPoolExecutor executor = (ThreadPoolExecutor) getExecutor();
        System.out.println("活跃线程: " + executor.getActiveCount() +
                " / 队列任务: " + executor.getQueue().size());
    }
    public static boolean convertToPdf(String inputFile, String outputFile) throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException {
        MDC.put("traceId", UUID.randomUUID().toString().substring(0,8));
        System.out.println("开始处理文件: " + inputFile);
        // 如果需要忽略SSL证书验证
//        SSLContext sslContext = SSLContexts.custom()
//                .loadTrustMaterial((chain, authType) -> true)
//                .build();

        // 修改convertToPdf方法中的httpClient获取方式
//        CloseableHttpClient httpClient = sharedHttpClient;
        // 调整HttpClient配置,增加超时控制
        RequestConfig config = RequestConfig.custom()
                .setConnectTimeout(5000)       // 连接超时5秒
                .setSocketTimeout(30000)       // 数据传输超时30秒
                .build();

        CloseableHttpClient httpClient = HttpClients.custom()
                .setDefaultRequestConfig(config)
                .setConnectionManager(new PoolingHttpClientConnectionManager()) // 使用连接池
                .build();
        try {
            // 1. 创建POST请求
            HttpPost httpPost = new HttpPost(LOOL_CONVERT_URL);

            // 2. 构建Multipart请求体(尝试不同字段名)
            MultipartEntityBuilder builder = MultipartEntityBuilder.create();
            builder.addBinaryBody(
                    "file",  // 先尝试"file",如果失败再尝试"data"
                    new File(inputFile),
                    getContentType(inputFile),
                    new File(inputFile).getName()
            );

            // 3. 设置必要的头信息(master分支特定头)
            httpPost.setHeader("X-WOPI-Override", "CONVERT_TO");
            httpPost.setHeader("X-WOPI-FileExtension", getFileExtension(inputFile));
            httpPost.setHeader("X-WOPI-SuggestedTarget", getOutputFilename(outputFile));
            httpPost.setHeader("X-LOOL-WOPI-ConvertTo", "pdf");  // master分支特有
            httpPost.setHeader("Accept", "application/pdf");

            // 4. 添加其他可能的必要头
            httpPost.setHeader("User-Agent", "Java LibreOffice Converter");
            httpPost.setHeader("Cache-Control", "no-cache");

            httpPost.setEntity(builder.build());

            System.out.println("发送请求到: " + LOOL_CONVERT_URL);
            System.out.println("使用头信息: " + httpPost.getAllHeaders());

            // 5. 执行请求
            try (CloseableHttpResponse response = httpClient.execute(httpPost)) {
                int statusCode = response.getStatusLine().getStatusCode();
                HttpEntity entity = response.getEntity();

                System.out.println("响应状态: " + response.getStatusLine());
                System.out.println("响应头: " + response.getAllHeaders());

                if (statusCode == 200 && entity != null) {
                    try (FileOutputStream fos = new FileOutputStream(outputFile)) {
                        entity.writeTo(fos);
                    }
                    return true;
                } else {
                    String responseBody = entity != null ?
                            EntityUtils.toString(entity, StandardCharsets.UTF_8) : "无响应体";
                    System.err.println("转换失败. 状态码: " + statusCode);
                    System.err.println("响应体: " + responseBody);

                    // 如果400错误,尝试使用"data"作为字段名
                    if (statusCode == 400) {
                        System.out.println("尝试使用'data'作为字段名重试...");
                        return retryWithDataField(inputFile, outputFile);
                    }
                }
            }
        } catch (Exception e) {
            System.err.println("转换过程中发生错误: " + e.getMessage());
            e.printStackTrace();
        } finally {
            try {
                httpClient.close();
            } catch (Exception e) {
                System.err.println("关闭HTTP客户端时出错: " + e.getMessage());
            }
        }
        return false;
    }

    /**
     * 使用"data"作为字段名重试
     */
    private static boolean retryWithDataField(String inputFile, String outputFile) {
        CloseableHttpClient httpClient = HttpClients.createDefault();

        try {
            HttpPost httpPost = new HttpPost(LOOL_CONVERT_URL);
            MultipartEntityBuilder builder = MultipartEntityBuilder.create();
            builder.addBinaryBody(
                    "data",  // 使用"data"作为字段名
                    new File(inputFile),
                    getContentType(inputFile),
                    new File(inputFile).getName()
            );

            // 设置相同的头信息
            httpPost.setHeader("X-WOPI-Override", "CONVERT_TO");
            httpPost.setHeader("X-WOPI-FileExtension", getFileExtension(inputFile));
            httpPost.setHeader("X-WOPI-SuggestedTarget", getOutputFilename(outputFile));
            httpPost.setHeader("X-LOOL-WOPI-ConvertTo", "pdf");
            httpPost.setHeader("Accept", "application/pdf");

            httpPost.setEntity(builder.build());

            try (CloseableHttpResponse response = httpClient.execute(httpPost)) {
                if (response.getStatusLine().getStatusCode() == 200) {
                    try (FileOutputStream fos = new FileOutputStream(outputFile)) {
                        response.getEntity().writeTo(fos);
                    }
                    return true;
                }
            }
        } catch (Exception e) {
            System.err.println("重试失败: " + e.getMessage());
        }
        return false;
    }

    // 新增异步转换方法
    public static Future<Boolean> convertToPdfAsync(String inputFile, String outputFile) {
        return ConverterThreadPool.getExecutor().submit(() -> {
            try {
                return convertToPdf(inputFile, outputFile);
            } catch (Exception e) {
                System.err.println("异步任务执行异常: " + e.getMessage());
                return false;
            }
        });
    }

    // 新增批量处理方法
    public static Map<String, Future<Boolean>> batchConvert(Map<String, String> filePairs) {
        Map<String, Future<Boolean>> results = new ConcurrentHashMap<>();
        filePairs.forEach((input, output) ->
                results.put(input, convertToPdfAsync(input, output))
        );
        return results;
    }

    /**
     * 获取正确的内容类型
     */
    private static ContentType getContentType(String filePath) {
        String ext = getFileExtension(filePath).toLowerCase();
        switch (ext) {
            case "docx": return ContentType.create("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
            case "doc": return ContentType.create("application/msword");
            case "odt": return ContentType.create("application/vnd.oasis.opendocument.text");
            default: return ContentType.APPLICATION_OCTET_STREAM;
        }
    }

    private static String getFileExtension(String filePath) {
        int lastDotIndex = filePath.lastIndexOf('.');
        return lastDotIndex > 0 ? filePath.substring(lastDotIndex + 1) : "";
    }

    private static String getOutputFilename(String filePath) {
        return new File(filePath).getName();
    }

    public static void main(String[] args) throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException {
    String inputFile = "C:\\Users\\sheng\\Desktop\\chongqing.docx";
    String outputFile = "C:\\Users\\sheng\\Desktop\\chongqing.pdf";

        System.out.println("开始转换: " + inputFile + " → " + outputFile);
        boolean b = convertToPdf(inputFile, outputFile);
        System.out.println("转换结果: " + b);
    }
}

 

通过命令行调用容器内工具
若容器仅包含LibreOffice命令行工具,可通过Java执行Docker命令完成转换:

package cn.zjtele.pubinfo.demo.wordtopdf;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

public class LibreOfficeConverter {

    private static final String INPUT_DIR = "D:/docker/input";  // 本地输入目录
    private static final String OUTPUT_DIR = "D:/docker/output"; // 本地输出目录

    public static void main(String[] args) {
//        if (args.length == 0) {
//            System.out.println("请提供要转换的Word文件名(例如:example.docx)");
//            return;
//        }

        long l = System.currentTimeMillis();
        String fileName = "11.docx";
        Path inputFilePath = Paths.get(INPUT_DIR, fileName);
        File inputFile = inputFilePath.toFile();

        if (!inputFile.exists()) {
            System.out.println("文件不存在:" + inputFilePath);
            return;
        }

        try {
            // 确保输出目录存在
            Files.createDirectories(Paths.get(OUTPUT_DIR));

            // 构造输出文件路径
            String outputFileName = fileName.replace(".docx", ".pdf");
            Path outputFilePath = Paths.get(OUTPUT_DIR, outputFileName);

            // 调用 LibreOffice 容器进行转换
            convertFileUsingLibreOffice(inputFile.getAbsolutePath(), outputFilePath.toString());

            System.out.println("文件转换成功!PDF文件已保存到:" + outputFilePath);
            System.out.println("转换耗时:" + (System.currentTimeMillis() - l) + "ms");
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("文件转换失败!");
        }
    }

    private static void convertFileUsingLibreOffice(String inputFilePath, String outputFilePath) throws IOException, InterruptedException {
        // 使用 LibreOffice 容器命令进行转换
        String command = String.format(
//                "docker exec -i another_linuxserver-libreoffice libreoffice --headless --convert-to pdf --outdir /app/output /app/input/%s",
//                new File(inputFilePath).getName()
                "docker exec -i libreoffice767 libreoffice --headless --convert-to pdf --outdir /app/output /app/input/%s",
                new File(inputFilePath).getName()

        );

        Process process = Runtime.getRuntime().exec(command);
        int exitCode = process.waitFor();

        if (exitCode != 0) {
            throw new RuntimeException("LibreOffice 转换失败,退出码:" + exitCode);
        }
    }
}

文件路径处理注意事项
确保Java应用有权限访问宿主机和容器的挂载目录。
输入/输出路径需使用容器内的映射路径(如/opt/documents)。
转换完成后从挂载目录提取PDF文件。


网站公告

今日签到

点亮在社区的每一天
去签到