Java读取SpringBoot工程内所有汉字

发布于:2025-07-09 ⋅ 阅读:(19) ⋅ 点赞:(0)

Java读取SpringBoot工程内所有汉字,并输出上下文,文件类型,汉字内容,文件路径

package com.bims;

import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import java.io.*;
import java.nio.file.*;
import java.util.*;
import java.util.regex.*;


public class 读取工程内所有文字 {

    private static final int MAX_ROWS_PER_SHEET = 1000000;
    private static final int CONTEXT_SIZE = 200;

    // 文件类型和对应的注释模式
    private static final Map<String, CommentPattern> COMMENT_PATTERNS = new HashMap<>();
    static {
        COMMENT_PATTERNS.put(".java", new CommentPattern("//.*", "/\\*.*?\\*/", Pattern.DOTALL));
        COMMENT_PATTERNS.put(".js", new CommentPattern("//.*", "/\\*.*?\\*/", Pattern.DOTALL));
        COMMENT_PATTERNS.put(".html", new CommentPattern(null, "<!--.*?-->", Pattern.DOTALL));
        COMMENT_PATTERNS.put(".xml", new CommentPattern(null, "<!--.*?-->", Pattern.DOTALL));
        COMMENT_PATTERNS.put(".css", new CommentPattern(null, "/\\*.*?\\*/", Pattern.DOTALL));
    }

    // 中文正则表达式
    private static final Pattern CHINESE_PATTERN = Pattern.compile("[\\u4e00-\\u9fa5]+");

    public static void main(String[] args) {
        Scanner scanner = new Scanner(System.in);
        System.out.print("请输入Java工程路径: ");
        String projectDir = scanner.nextLine();
        System.out.print("请输入输出Excel文件名(默认:chinese_results.xlsx): ");
        String outputFile = scanner.nextLine();
        if (outputFile.isEmpty()) outputFile = "chinese_results.xlsx";
        scanner.close();

        try {
            List<ChineseOccurrence> results = scanProject(projectDir);
            saveToExcel(results, outputFile);
            System.out.println("扫描完成! 找到 " + results.size() + " 处中文内容");
            System.out.println("结果保存到: " + new File(outputFile).getAbsolutePath());
        } catch (IOException e) {
            System.err.println("处理过程中出错: " + e.getMessage());
            e.printStackTrace();
        } catch (Exception e) {
            System.err.println("发生错误: " + e.getMessage());
            e.printStackTrace();
        }
    }

    private static List<ChineseOccurrence> scanProject(String projectDir) throws IOException {
        List<ChineseOccurrence> results = new ArrayList<>();
        long startTime = System.currentTimeMillis();

        Path startPath = Paths.get(projectDir);
        if (!Files.exists(startPath)) {
            throw new IOException("路径不存在: " + projectDir);
        }

        Files.walk(startPath)
                .parallel()
                .filter(Files::isRegularFile)
                .filter(path -> {
                    String fileName = path.toString();
                    int dotIndex = fileName.lastIndexOf('.');
                    if (dotIndex == -1) return false;
                    String ext = fileName.substring(dotIndex).toLowerCase();
                    return COMMENT_PATTERNS.containsKey(ext);
                })
                .forEach(path -> {
                    try {
                        String content = new String(Files.readAllBytes(path), "UTF-8");
                        String fileName = path.toString();
                        int dotIndex = fileName.lastIndexOf('.');
                        String fileType = (dotIndex != -1) ? fileName.substring(dotIndex) : "Unknown";
                        String ext = fileType.toLowerCase();

                        if (!COMMENT_PATTERNS.containsKey(ext)) {
                            return;
                        }

                        CommentPattern pattern = COMMENT_PATTERNS.get(ext);

                        // 移除注释
                        String cleanContent = removeComments(content, pattern);

                        // 查找中文
                        findChineseInContent(cleanContent, fileName, fileType, results);
                    } catch (IOException e) {
                        System.err.println("处理文件出错: " + path + " - " + e.getMessage());
                    } catch (Exception e) {
                        System.err.println("处理文件时发生错误: " + path + " - " + e.getMessage());
                    }
                });

        long duration = System.currentTimeMillis() - startTime;
        System.out.println("扫描耗时: " + duration + "ms, 找到 " + results.size() + " 个结果");
        return results;
    }

    private static String removeComments(String content, CommentPattern pattern) {
        // 先移除多行注释
        String result = pattern.multiPattern != null ?
                pattern.multiPattern.matcher(content).replaceAll("") :
                content;

        // 再移除单行注释
        if (pattern.singlePattern != null) {
            return pattern.singlePattern.matcher(result).replaceAll("");
        }
        return result;
    }

    private static void findChineseInContent(String content, String filePath, String fileType,
                                             List<ChineseOccurrence> results) {
        Matcher matcher = CHINESE_PATTERN.matcher(content);
        int count = 0;
        int maxMatchesPerFile = 100000; // 每个文件最大匹配数

        while (matcher.find() && count < maxMatchesPerFile) {
            String chinese = matcher.group();
            int start = matcher.start();
            int end = matcher.end();

            // 获取上下文
            int contextStart = Math.max(0, start - CONTEXT_SIZE);
            int contextEnd = Math.min(content.length(), end + CONTEXT_SIZE);
            String context = content.substring(contextStart, contextEnd);

            // 添加结果
            synchronized (results) {
                results.add(new ChineseOccurrence(filePath, fileType, chinese, context));
            }
            count++;
        }

        if (count >= maxMatchesPerFile) {
            System.err.println("警告: 文件 " + filePath + " 超过最大匹配限制 (" + maxMatchesPerFile + ")");
        }
    }

    private static void saveToExcel(List<ChineseOccurrence> results, String outputFile) throws IOException {
        try (Workbook workbook = new XSSFWorkbook()) {
            int totalResults = results.size();
            int sheetCount = (int) Math.ceil((double) totalResults / MAX_ROWS_PER_SHEET);

            System.out.println("总结果数: " + totalResults);
            System.out.println("需要创建 " + sheetCount + " 个Sheet页");

            // 创建可重用的单元格样式
            CellStyle headerStyle = createHeaderStyle(workbook);
            CellStyle wrapTextStyle = createWrapTextStyle(workbook);
            CellStyle fileTypeStyle = createFileTypeStyle(workbook);

            for (int sheetIndex = 0; sheetIndex < sheetCount; sheetIndex++) {
                // 计算当前sheet的数据范围
                int startIndex = sheetIndex * MAX_ROWS_PER_SHEET;
                int endIndex = Math.min((sheetIndex + 1) * MAX_ROWS_PER_SHEET, totalResults);
                int rowsInSheet = endIndex - startIndex;

                // 创建Sheet
                String sheetName = "结果";
                if (sheetCount > 1) {
                    sheetName += "_" + (sheetIndex + 1);
                }
                Sheet sheet = workbook.createSheet(sheetName);

                // 创建标题行
                createHeaderRow(sheet, headerStyle);

                // 填充数据
                for (int i = 0; i < rowsInSheet; i++) {
                    ChineseOccurrence occ = results.get(startIndex + i);
                    Row row = sheet.createRow(i + 1); // +1 跳过标题行

                    // 文件路径
                    Cell fileCell = row.createCell(0);
                    fileCell.setCellValue(occ.filePath);

                    // 文件类型
                    Cell typeCell = row.createCell(1);
                    typeCell.setCellValue(occ.fileType);
                    typeCell.setCellStyle(fileTypeStyle);

                    // 中文字符
                    Cell chineseCell = row.createCell(2);
                    chineseCell.setCellValue(occ.chinese);

                    // 上下文代码(带换行)
                    Cell contextCell = row.createCell(3);
                    contextCell.setCellValue(occ.context);
                    contextCell.setCellStyle(wrapTextStyle);
                }

                // 调整列宽
                sheet.autoSizeColumn(0); // 文件路径
                sheet.setColumnWidth(1, 10 * 256); // 文件类型(固定宽度)
                sheet.autoSizeColumn(2); // 中文字符
                sheet.setColumnWidth(3, 150 * 256); // 上下文代码

                System.out.println("Sheet " + (sheetIndex + 1) + " 完成: " + rowsInSheet + " 行");
            }

            // 保存文件
            try (FileOutputStream fos = new FileOutputStream(outputFile)) {
                workbook.write(fos);
            }
        }
    }

    private static CellStyle createHeaderStyle(Workbook workbook) {
        CellStyle headerStyle = workbook.createCellStyle();
        Font headerFont = workbook.createFont();
        headerFont.setBold(true);
        headerFont.setColor(IndexedColors.WHITE.getIndex());
        headerStyle.setFont(headerFont);
        headerStyle.setFillForegroundColor(IndexedColors.DARK_BLUE.getIndex());
        headerStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);
        headerStyle.setAlignment(HorizontalAlignment.CENTER);
        headerStyle.setBorderBottom(BorderStyle.THIN);
        headerStyle.setBorderTop(BorderStyle.THIN);
        headerStyle.setBorderLeft(BorderStyle.THIN);
        headerStyle.setBorderRight(BorderStyle.THIN);
        return headerStyle;
    }

    private static CellStyle createWrapTextStyle(Workbook workbook) {
        CellStyle style = workbook.createCellStyle();
        style.setWrapText(true);
        style.setBorderBottom(BorderStyle.THIN);
        style.setBorderTop(BorderStyle.THIN);
        style.setBorderLeft(BorderStyle.THIN);
        style.setBorderRight(BorderStyle.THIN);
        return style;
    }

    private static CellStyle createFileTypeStyle(Workbook workbook) {
        CellStyle style = workbook.createCellStyle();
        style.setAlignment(HorizontalAlignment.CENTER);
        style.setFillForegroundColor(IndexedColors.LIGHT_YELLOW.getIndex());
        style.setFillPattern(FillPatternType.SOLID_FOREGROUND);
        style.setBorderBottom(BorderStyle.THIN);
        style.setBorderTop(BorderStyle.THIN);
        style.setBorderLeft(BorderStyle.THIN);
        style.setBorderRight(BorderStyle.THIN);
        return style;
    }

    private static void createHeaderRow(Sheet sheet, CellStyle headerStyle) {
        Row headerRow = sheet.createRow(0);

        // 文件路径列
        Cell fileHeader = headerRow.createCell(0);
        fileHeader.setCellValue("文件路径");
        fileHeader.setCellStyle(headerStyle);

        // 文件类型列(新增列)
        Cell typeHeader = headerRow.createCell(1);
        typeHeader.setCellValue("文件类型");
        typeHeader.setCellStyle(headerStyle);

        // 中文字符列
        Cell chineseHeader = headerRow.createCell(2);
        chineseHeader.setCellValue("中文字符");
        chineseHeader.setCellStyle(headerStyle);

        // 上下文代码列
        Cell contextHeader = headerRow.createCell(3);
        contextHeader.setCellValue("上下文代码");
        contextHeader.setCellStyle(headerStyle);
    }

    private static class ChineseOccurrence {
        String filePath;
        String fileType; // 新增:文件类型
        String chinese;
        String context;

        ChineseOccurrence(String filePath, String fileType, String chinese, String context) {
            this.filePath = filePath;
            this.fileType = fileType;
            this.chinese = chinese;
            this.context = context;
        }
    }

    private static class CommentPattern {
        Pattern singlePattern;
        Pattern multiPattern;

        CommentPattern(String singleLineRegex, String multiLineRegex, int flags) {
            this.singlePattern = singleLineRegex != null ?
                    Pattern.compile(singleLineRegex, flags) : null;
            this.multiPattern = multiLineRegex != null ?
                    Pattern.compile(multiLineRegex, flags) : null;
        }
    }
}


网站公告

今日签到

点亮在社区的每一天
去签到