XML高效处理类 - 专为Office文档XML处理优化

发布于:2025-07-23 ⋅ 阅读:(15) ⋅ 点赞:(0)

/**
 *
 * 提供XML读取、写入、修改、查询等高级功能,支持命名空间和复杂XML结构
 * 
 * 主要功能:
 * 1. 复杂路径解析(支持属性筛选、索引、通配符)
 * 2. 完整节点类型支持(元素、文本、CDATA、注释、PI)
 * 3. 高效元素/属性操作(增删改查、复制、移动)
 * 4. 流式处理(低内存占用,适合大型XML)
 */

<?php
namespace BTWord\Processor;

use BTWord\Exceptions\DocxProcessingException;
use XMLReader;
use XMLWriter;
use function count;
use function explode;
use function implode;
use function in_array;
use function preg_match;
use function strpos;
/**
 * XML高效处理类 - 专为Office文档XML处理优化
 * 提供XML读取、写入、修改、查询等高级功能,支持命名空间和复杂XML结构
 * 
 * 主要功能:
 * 1. 复杂路径解析(支持属性筛选、索引、通配符)
 * 2. 完整节点类型支持(元素、文本、CDATA、注释、PI)
 * 3. 高效元素/属性操作(增删改查、复制、移动)
 * 4. 流式处理(低内存占用,适合大型XML)
 */
class XmlProcessor
{
    private XMLReader $reader;
    private XMLWriter $writer;
    private array $namespaces = [];
    private array $namespaceUris = [];
// XML节点类型常量
    private const NODE_ELEMENT = XMLReader::ELEMENT;
    private const NODE_END_ELEMENT = XMLReader::END_ELEMENT;
    private const NODE_TEXT = XMLReader::TEXT;
    private const NODE_CDATA = XMLReader::CDATA;
    private const NODE_COMMENT = XMLReader::COMMENT;
    private const NODE_PI = XMLReader::PI;
    private const NODE_WHITESPACE = XMLReader::SIGNIFICANT_WHITESPACE;

    public function __construct()
    {
        $this->reader = new XMLReader();
        $this->writer = new XMLWriter();
        $this->writer->setIndent(true);
        $this->writer->setIndentString('  ');
    }
  /**
     * 注册命名空间(支持双向映射,避免前缀冲突)
     * @param string $prefix 命名空间前缀
     * @param string $uri 命名空间URI
     */
    public function addNamespace(string $prefix, string $uri): void
    {
        $this->namespaces[$prefix] = $uri;
        $this->namespaceUris[$uri] = $prefix;
    }
    /**
     * 解析XML为数组(流式解析,低内存占用)
     * @param string $xmlContent XML内容
     * @param bool $preserveAttributes 是否保留属性(键名带@前缀)
     * @return array 解析后的数据数组
     * @throws DocxProcessingException 当XML解析失败时抛出
     */
    public function parseToArray(string $xmlContent, bool $preserveAttributes = true): array
    {
         $result = [];
        $stack = [];
        $current = &$result;

        $this->processXmlContent($xmlContent, function () use (&$current, &$stack, $preserveAttributes) {
            $nodeType = $this->reader->nodeType;
            $nodeName = $this->reader->name;

            // 处理开始元素
            if ($nodeType === self::NODE_ELEMENT) {
                $element = [];

                // 处理属性
                if ($preserveAttributes && $this->reader->hasAttributes) {
                    $attrs = [];
                    while ($this->reader->moveToNextAttribute()) {
                        $attrs['@' . $this->reader->name] = $this->reader->value;
                    }
                    $this->reader->moveToElement();
                    $element = array_merge($element, $attrs);
                }

                // 处理子节点容器
                $element['#children'] = [];
                $childKey = $nodeName;

                // 处理重复节点(转为数组)
                if (isset($current[$childKey])) {
                    if (!is_array($current[$childKey]) || !isset($current[$childKey][0])) {
                        $current[$childKey] = [$current[$childKey]];
                    }
                    $childIndex = count($current[$childKey]);
                    $current[$childKey][$childIndex] = &$element;
                    $stack[] = &$current;
                    $stack[] = $childKey;
                    $stack[] = $childIndex;
                    $current = &$current[$childKey][$childIndex]['#children'];
                } else {
                    $current[$childKey] = &$element;
                    $stack[] = &$current;
                    $stack[] = $childKey;
                    $stack[] = null;
                    $current = &$current[$childKey]['#children'];
                }

                // 空元素处理
                if ($this->reader->isEmptyElement) {
                    unset($element['#children']); // 空元素无children
                    array_pop($stack); // 移除childIndex
                    array_pop($stack); // 移除childKey
                    $parent = &$stack[array_pop($stack)];
                    $current = &$parent;
                }
            }

            // 处理结束元素
            elseif ($nodeType === self::NODE_END_ELEMENT) {
                if (empty($current)) {
                    array_pop($stack); // 移除childIndex
                    $childKey = array_pop($stack);
                    $parent = &$stack[array_pop($stack)];
                    unset($parent[$childKey]['#children']); // 无children则移除键
                } else {
                    array_pop($stack); // 移除childIndex
                    $childKey = array_pop($stack);
                    $parent = &$stack[array_pop($stack)];
                }
                $current = &$parent;
            }

            // 处理文本/CDATA节点
            elseif (in_array($nodeType, [self::NODE_TEXT, self::NODE_CDATA])) {
                $value = $this->reader->value;
                if (empty($current)) {
                    $current['#text'] = $value;
                } else {
                    $current[] = ['#text' => $value];
                }
            }

            // 处理注释节点
            elseif ($nodeType === self::NODE_COMMENT) {
                $current['#comment'] = $this->reader->value;
            }

            // 处理PI节点
            elseif ($nodeType === self::NODE_PI) {
                $current['#pi_' . $nodeName] = $this->reader->value;
            }

            return true;
        });

        return $result;
    }
   /**
     * 创建新的XML文档(增强版)
     * @param string $rootElement 根元素名称,支持命名空间前缀(格式:prefix:element)
     * @param array $attributes 根元素属性
     * @param string $version XML版本
     * @param string $encoding 编码格式
     * @return string 创建的XML内容
     */
    public function createDocument(
        string $rootElement,
        array $attributes = [],
        string $version = '1.0',
        string $encoding = 'UTF-8'
    ): string {
      $this->writer->openMemory();
        $this->writer->startDocument($version, $encoding);

        // 处理根元素(带命名空间)
        $this->startElement($rootElement);
        $this->writeAttributes($attributes);

        $this->writer->endElement(); // 关闭根元素
        $this->writer->endDocument();
        return $this->writer->outputMemory();
    }
    /**
     * 读取XML文件(支持编码检测)
     * @param string $filePath XML文件路径
     * @param string $encoding 预期编码(默认UTF-8)
     * @return string XML内容
     * @throws DocxProcessingException 当文件无法打开时抛出
     */
    public function readFile(string $filePath, string $encoding = 'UTF-8'): string
    {
           $context = stream_context_create(['http' => ['encoding' => $encoding]]);
        if (!$this->reader->open($filePath, $encoding, LIBXML_NONET, $context)) {
            throw new DocxProcessingException('Failed to open XML file: ' . $filePath);
        }

        $this->writer->openMemory();
        $this->processXml();
        $this->reader->close();
        return $this->writer->outputMemory();
    }
    /**
     * 向XML添加子元素(支持复杂路径和插入位置)
     * @param string $xmlString XML内容
     * @param string $parentPath 父元素路径(支持属性筛选:parent/child[@attr="val"])
     * @param string $childName 子元素名称,支持命名空间前缀
     * @param string $childValue 子元素文本值(支持CDATA:前缀加'cdata:'则自动包裹)
     * @param array $attributes 子元素属性数组
     * @param bool $prepend 是否前置插入(默认后置)
     * @return string 更新后的XML内容
     * @throws DocxProcessingException 当XML解析失败时抛出
     */
    public function addElement(
        string $xmlString,
        string $parentPath,
        string $childName,
        string $childValue = '',
        array $attributes = [],
        bool $prepend = false
    ): string {
        $pathParser = $this->createPathParser($parentPath);

        return $this->modifyXml($xmlString, function ($writer, $currentPath, $depth) use (
            $pathParser,
            $childName,
            $childValue,
            $attributes,
            $prepend
        ) {
            static $added = false;
            $currentNodePath = implode('/', $currentPath);

            // 前置插入:在父元素开始标签后立即插入
            if ($this->isElementNode() && !$added) {
                if ($pathParser->matches($currentNodePath, $this->reader)) {
                    $this->writeElement($childName, $childValue, $attributes);
                    $added = true;
                }
            }

            // 后置插入:在父元素结束标签前插入
            if ($this->isEndElementNode() && !$added) {
                $parentPath = implode('/', $currentPath);
                if ($pathParser->matches($parentPath, $this->reader)) {
                    $this->writeElement($childName, $childValue, $attributes);
                    $added = true;
                }
            }

            return false;
        });
    }
   /**
     * 更新XML元素值(支持复杂路径和多节点)
     * @param string $xmlString XML内容
     * @param string $elementPath 元素路径(支持通配符和属性筛选)
     * @param string $newValue 新的元素值(支持CDATA:前缀加'cdata:')
     * @param int $maxUpdates 最大更新数量(-1表示全部)
     * @return string 更新后的XML内容
     * @throws DocxProcessingException 当XML解析失败时抛出
     */
    public function updateValue(
        string $xmlString,
        string $elementPath,
        string $newValue,
        int $maxUpdates = -1
    ): string {
        $pathParser = $this->createPathParser($elementPath);

        return $this->modifyXml($xmlString, function ($writer, $currentPath, $depth) use (
            $pathParser,
            $newValue,
            $maxUpdates
        ) {
            static $updatedCount = 0;
            $currentNodePath = implode('/', $currentPath);

            // 检查是否达到最大更新数量
            if ($maxUpdates > 0 && $updatedCount >= $maxUpdates) {
                return false;
            }

            // 匹配目标元素且为文本节点
            if ($this->isTextNode() && $pathParser->matches($currentNodePath, $this->reader)) {
                // 处理CDATA
                if (strpos($newValue, 'cdata:') === 0) {
                    $this->writer->writeCData(substr($newValue, 5));
                } else {
                    $this->writer->text($newValue);
                }
                $updatedCount++;
                return true; // 跳过原文本
            }

            return false;
        });
    }
    /**
     * 删除XML元素(支持复杂路径和批量删除)
     * @param string $xmlString XML内容
     * @param string $elementPath 元素路径(支持通配符和属性筛选)
     * @param int $maxDeletions 最大删除数量(-1表示全部)
     * @return string 更新后的XML内容
     * @throws DocxProcessingException 当XML解析失败时抛出
     */
    public function removeElement(string $xmlString, string $elementPath, int $maxDeletions = -1): string
    {
        $pathParser = $this->createPathParser($elementPath);

        return $this->modifyXml($xmlString, function ($writer, $currentPath, $depth) use (
            $pathParser,
            $maxDeletions
        ) {
            static $skip = false, $targetDepth = 0, $deletionCount = 0;

            // 跳过被删除元素的子节点
            if ($skip) {
                if ($this->isEndElementNode() && $depth <= $targetDepth) {
                    $skip = false;
                    $deletionCount++;
                }
                return true; // 跳过处理
            }

            // 达到最大删除数量则停止
            if ($maxDeletions > 0 && $deletionCount >= $maxDeletions) {
                return false;
            }

            // 匹配目标元素则标记跳过
            if ($this->isElementNode()) {
                $currentNodePath = implode('/', $currentPath);
                if ($pathParser->matches($currentNodePath, $this->reader)) {
                    $skip = true;
                    $targetDepth = $depth - 1;
                    return true; // 跳过元素本身
                }
            }

            return false;
        });
    }
    /**
     * 复制元素到指定位置
     * @param string $xmlString XML内容
     * @param string $sourcePath 源元素路径(支持复杂路径)
     * @param string $targetParentPath 目标父元素路径
     * @param string|null $newName 新元素名称(null则保留原名)
     * @param bool $keepSource 是否保留源元素(默认保留)
     * @return string 更新后的XML内容
     * @throws DocxProcessingException 当元素不存在时抛出
     */
    public function copyElement(
        string $xmlString,
        string $sourcePath,
        string $targetParentPath,
        ?string $newName = null,
        bool $keepSource = true
    ): string {
         // 提取源元素XML片段
        $sourceXml = $this->getOuterXml($xmlString, $sourcePath);
        if ($sourceXml === null) {
            throw new DocxProcessingException("Source element not found: {$sourcePath}");
        }

        // 替换元素名称(如需要)
        if ($newName) {
            $sourceXml = preg_replace('/^<(\w+:?)[^>]+>/', "<{$newName}>", $sourceXml, 1);
            $sourceXml = preg_replace('/<\/(\w+:?)[^>]+>$/', "</{$newName}>", $sourceXml, 1);
        }

        // 插入到目标位置
        $result = $this->addElement(
            $xmlString,
            $targetParentPath,
            '', // 临时名称(实际用XML片段)
            $sourceXml,
            [],
            false
        );

        // 不保留源元素则删除
        return $keepSource ? $result : $this->removeElement($result, $sourcePath, 1);
    }

    /**
     * 移动元素到新位置(本质是复制+删除源)
     * @param string $xmlString XML内容
     * @param string $sourcePath 源元素路径
     * @param string $targetParentPath 目标父元素路径
     * @return string 更新后的XML内容
     */
    public function moveElement(string $xmlString, string $sourcePath, string $targetParentPath): string
    {
        return $this->copyElement($xmlString, $sourcePath, $targetParentPath, null, false);
    }

/**
     * 获取元素的完整XML片段(outer XML)
     * @param string $xmlString XML内容
     * @param string $elementPath 元素路径
     * @return string|null 元素的完整XML片段,未找到则返回null
     * @throws DocxProcessingException 当XML解析失败时抛出
     */
    public function getOuterXml(string $xmlString, string $elementPath): ?string
    {
        $pathParser = $this->createPathParser($elementPath);
        $fragment = null;
        $captureWriter = new XMLWriter();
        $captureWriter->openMemory();

        $this->processXmlContent($xmlString, function () use (
            $pathParser,
            $captureWriter,
            &$fragment
        ) {
            static $capturing = false, $targetDepth = 0;

            if ($capturing) {
                // 捕获元素的所有节点(包括子节点)
                $this->copyNodeToWriter($this->reader, $captureWriter);
                // 捕获结束:当遇到目标深度的结束标签
                if ($this->isEndElementNode() && $this->reader->depth === $targetDepth) {
                    $capturing = false;
                    $fragment = $captureWriter->outputMemory();
                    return false; // 停止解析
                }
                return true;
            }

            // 开始捕获:匹配目标元素
            if ($this->isElementNode()) {
                $currentPath = $this->buildCurrentPath();
                if ($pathParser->matches($currentPath, $this->reader)) {
                    $capturing = true;
                    $targetDepth = $this->reader->depth;
                    $this->copyNodeToWriter($this->reader, $captureWriter); // 捕获开始标签
                }
            }

            return true;
        });

        return $fragment;
    }

/**
     * 检查元素是否存在(高效方法)
     * @param string $xmlString XML内容
     * @param string $elementPath 元素路径
     * @return bool 是否存在
     * @throws DocxProcessingException 当XML解析失败时抛出
     */
    public function exists(string $xmlString, string $elementPath): bool
    {
        $pathParser = $this->createPathParser($elementPath);
        $exists = false;

        $this->processXmlContent($xmlString, function () use ($pathParser, &$exists) {
            if ($this->isElementNode()) {
                $currentPath = $this->buildCurrentPath();
                if ($pathParser->matches($currentPath, $this->reader)) {
                    $exists = true;
                    return false; // 找到则停止
                }
            }
            return true;
        });

        return $exists;
    }

    /**
     * 查找所有匹配路径的元素(增强版)
     * @param string $xmlString XML内容
     * @param string $elementPath 元素路径(支持通配符、属性筛选、索引)
     * @return array 匹配元素数组,每个元素包含:
     *              - value: 文本值
     *              - attributes: 属性数组
     *              - outer_xml: 完整XML片段
     *              - path: 元素路径
     *              - depth: 深度
     * @throws DocxProcessingException 当XML解析失败时抛出
     */
    public function query(string $xmlString, string $elementPath): array
    {
        $pathParser = $this->createPathParser($elementPath);
        $results = [];
        $currentElement = null;
        $currentWriter = new XMLWriter();

        $this->processXmlContent($xmlString, function () use (
            $pathParser,
            &$results,
            &$currentElement,
            $currentWriter
        ) {
            if ($this->isElementNode()) {
                $currentPath = $this->buildCurrentPath();
                if ($pathParser->matches($currentPath, $this->reader)) {
                    // 初始化当前元素信息
                    $currentElement = [
                        'value' => '',
                        'attributes' => $this->getAllAttributes(),
                        'path' => $currentPath,
                        'depth' => $this->reader->depth,
                        'outer_xml' => ''
                    ];
                    $results[] = &$currentElement;
                    $currentWriter->openMemory();
                    $this->copyNodeToWriter($this->reader, $currentWriter); // 记录开始标签
                }
            }

            // 收集元素内文本
            if ($currentElement && $this->isTextNode() && $this->reader->depth === $currentElement['depth'] + 1) {
                $currentElement['value'] .= $this->reader->value;
            }

            // 记录outer_xml(直到元素结束)
            if ($currentElement && $this->reader->depth >= $currentElement['depth']) {
                if (!$this->isElementNode() || $this->reader->depth !== $currentElement['depth']) {
                    $this->copyNodeToWriter($this->reader, $currentWriter);
                }
                // 元素结束时保存outer_xml
                if ($this->isEndElementNode() && $this->reader->depth === $currentElement['depth']) {
                    $currentElement['outer_xml'] = $currentWriter->outputMemory();
                    $currentElement = null;
                }
            }

            return true;
        });

        return $results;
    }

 /**
     * (增强版)更新XML元素的属性
     * @param string $xmlString XML内容
     * @param string $elementPath 元素路径(支持复杂路径)
     * @param string $attributeName 属性名称
     * @param string $newValue 新的属性值
     * @param bool $addIfMissing 当属性不存在时是否添加
     * @return string 更新后的XML内容
     * @throws DocxProcessingException 当XML解析失败时抛出
     */
    public function updateAttribute(
        string $xmlString,
        string $elementPath,
        string $attributeName,
        string $newValue,
        bool $addIfMissing = true
    ): string {
        $pathParser = $this->createPathParser($elementPath);

        return $this->modifyXml($xmlString, function ($writer, $currentPath, $depth) use (
            $pathParser,
            $attributeName,
            $newValue,
            $addIfMissing
        ) {
            static $updatedCount = 0;

            if ($this->isElementNode()) {
                $currentNodePath = implode('/', $currentPath);
                if ($pathParser->matches($currentNodePath, $this->reader)) {
                    // 写入开始标签
                    $this->startElementWithNamespace();

                    // 处理属性(更新或添加)
                    $attrs = $this->getAllAttributes();
                    $attrExists = isset($attrs[$attributeName]);
                    if ($attrExists || $addIfMissing) {
                        $attrs[$attributeName] = $newValue;
                    }
                    $this->writeAttributes($attrs);

                    // 空元素处理
                    if ($this->reader->isEmptyElement) {
                        $this->writer->endElement();
                    }

                    $updatedCount++;
                    return true; // 跳过默认处理
                }
            }

            return false;
        });
    }

    /**
     * 替换整个元素(包括子元素)
     * @param string $xmlString XML内容
     * @param string $elementPath 元素路径
     * @param string $newValue 新文本值
     * @param array $newAttributes 新属性数组
     * @return string 更新后的XML
     */
    public function replaceElement(
        string $xmlString,
        string $elementPath,
        string $newValue = '',
        array $newAttributes = []
    ): string {
        $pathParser = $this->createPathParser($elementPath);

        return $this->modifyXml($xmlString, function ($writer, $currentPath, $depth) use (
            $pathParser,
            $newValue,
            $newAttributes
        ) {
            static $replacing = false, $targetDepth = 0;
            $currentNodePath = implode('/', $currentPath);

            // 处理元素开始标签
            if ($this->isElementNode() && !$replacing) {
                if ($pathParser->matches($currentNodePath, $this->reader)) {
                    $replacing = true;
                    $targetDepth = $depth;
                    
                    // 写入新元素开始标签
                    $this->startElementWithNamespace();
                    $this->writeAttributes($newAttributes);
                    
                    // 处理值替换
                    if ($newValue !== '') {
                        $this->writer->text($newValue);
                        $this->writer->endElement();
                        return true;
                    }
                    return true; // 只更新属性,保留内容
                }
            }

            // 处理元素结束标签
            if ($this->isEndElementNode() && $replacing && $depth === $targetDepth) {
                $replacing = false;
                if ($newValue === '') {
                    $this->writer->endElement();
                }
                return true;
            }

            // 跳过被替换元素的内容
            if ($replacing) {
                return true;
            }

            return false;
        });
    }

    /**
     * 批量更新匹配元素
     * @param string $xmlString XML内容
     * @param string $elementPath 元素路径
     * @param callable $updater 更新回调 function(string $value, array $attrs): array
     * @return string 更新后的XML
     */
    public function batchUpdateElements(
        string $xmlString,
        string $elementPath,
        callable $updater
    ): string {
        $pathParser = $this->createPathParser($elementPath, true);

        return $this->modifyXml($xmlString, function ($writer, $currentPath, $depth) use ($pathParser, $updater) {
            static $updating = false, $targetDepth = 0, $currentValue = '', $currentAttrs = [];
            $currentNodePath = implode('/', $currentPath);

            // 开始元素处理
            if ($this->isElementNode() && $pathParser->matches($currentNodePath, $this->reader)) {
                $updating = true;
                $targetDepth = $depth;
                $currentValue = '';
                $currentAttrs = $this->getAllAttributes();
                
                // 立即更新属性
                [$newValue, $newAttrs] = $updater('', $currentAttrs);
                $this->startElementWithNamespace();
                $this->writeAttributes($newAttrs);
                return true;
            }

            // 收集文本内容
            if ($updating && $this->isTextNode() && $depth === $targetDepth + 1) {
                $currentValue .= $this->reader->value;
                return true;
            }

            // 结束元素处理
            if ($this->isEndElementNode() && $updating && $depth === $targetDepth) {
                $updating = false;
                
                // 应用最终更新
                [$finalValue, $finalAttrs] = $updater($currentValue, $currentAttrs);
                $this->writer->text($finalValue);
                $this->writer->endElement();
                return true;
            }

            return false;
        });
    }

    // 以下为内部辅助方法(保持原实现)
   /**
     * 路径解析器(支持复杂路径语法)
     * @param string $path 路径字符串(如:parent/child[@id="1"][2]、root/*
     * @return object 包含matches方法的解析器对象
     */
    private function createPathParser(string $path): object
    {
        $segments = explode('/', $path);
        $filters = [];
        $index = null;

        // 解析每段路径中的筛选条件和索引
        foreach ($segments as &$segment) {
            // 解析索引:如element[2]
            if (preg_match('/(.*)\[(\d+)\]$/', $segment, $m)) {
                $segment = $m[1];
                $index = (int)$m[2] - 1; // 转为0基索引
            }

            // 解析属性筛选:如element[@attr="val"]
            if (preg_match('/(.*)\[@([^=]+)=["\']([^"\']+)["\']\]/', $segment, $m)) {
                $segment = $m[1];
                $filters[] = [
                    'attr' => trim($m[2]),
                    'value' => trim($m[3])
                ];
            }
        }
        unset($segment);

        return new class($segments, $filters, $index) {
            private $segments;
            private $filters;
            private $index;
            private $matchCount = 0;

            public function __construct($segments, $filters, $index)
            {
                $this->segments = $segments;
                $this->filters = $filters;
                $this->index = $index;
            }

            public function matches(string $currentPath, XMLReader $reader): bool
            {
                $currentSegments = explode('/', $currentPath);

                // 路径长度不匹配
                if (count($currentSegments) !== count($this->segments)) {
                    return false;
                }

                // 检查每段路径(支持通配符*)
                foreach ($this->segments as $i => $segment) {
                    if ($segment === '*') {
                        continue; // 通配符匹配任意段
                    }
                    if ($currentSegments[$i] !== $segment) {
                        return false;
                    }
                }

                // 检查属性筛选条件
                foreach ($this->filters as $filter) {
                    $attrValue = $reader->getAttribute($filter['attr']);
                    if ($attrValue !== $filter['value']) {
                        return false;
                    }
                }

                // 检查索引匹配(仅当指定了索引)
                if ($this->index !== null) {
                    $this->matchCount++;
                    return $this->matchCount - 1 === $this->index;
                }

                return true;
            }
        };
    }
    /**
     * 构建当前元素的路径字符串(修复版)
     * @return string 路径字符串(如:root/parent/child)
     */
    private function buildCurrentPath(): string
    {
        static $pathStack = [];

        if ($this->isElementNode()) {
            $pathStack[] = $this->reader->name;
        } elseif ($this->isEndElementNode()) {
            array_pop($pathStack);
        }

        return implode('/', $pathStack);
    }

     /**
     * 复制节点到指定XMLWriter
     * @param XMLReader $reader 源读取器
     * @param XMLWriter $writer 目标写入器
     */
    private function copyNodeToWriter(XMLReader $reader, XMLWriter $writer): void
    {
        switch ($reader->nodeType) {
            case self::NODE_ELEMENT:
                $writer->startElement($reader->name);
                // 复制属性
                if ($reader->hasAttributes) {
                    $reader->moveToFirstAttribute();
                    do {
                        $writer->writeAttribute($reader->name, $reader->value);
                    } while ($reader->moveToNextAttribute());
                    $reader->moveToElement();
                }
                if ($reader->isEmptyElement) {
                    $writer->endElement();
                }
                break;
            case self::NODE_END_ELEMENT:
                $writer->endElement();
                break;
            case self::NODE_TEXT:
                $writer->text($reader->value);
                break;
            case self::NODE_CDATA:
                $writer->writeCData($reader->value);
                break;
            case self::NODE_COMMENT:
                $writer->writeComment($reader->value);
                break;
            case self::NODE_PI:
                $writer->writePI($reader->name, $reader->value);
                break;
            case self::NODE_WHITESPACE:
                $writer->text($reader->value);
                break;
        }
    }

    /**
     * 获取当前元素的所有属性
     * @return array 属性数组(键为属性名,值为属性值)
     */
    private function getAllAttributes(): array
    {
        $attrs = [];
        if ($this->reader->hasAttributes) {
            $this->reader->moveToFirstAttribute();
            do {
                $attrs[$this->reader->name] = $this->reader->value;
            } while ($this->reader->moveToNextAttribute());
            $this->reader->moveToElement();
        }
        return $attrs;
    }
     // ------------------------------ 基础方法 ------------------------------
        /**
     * 开始元素(带命名空间支持)
     * @param string $name 元素名
     */
    private function startElement(string $name): void
    {
        if (strpos($name, ':') !== false) {
            [$prefix, $localName] = explode(':', $name, 2);
            if (isset($this->namespaces[$prefix])) {
                $this->writer->startElementNS($prefix, $localName, $this->namespaces[$prefix]);
            } else {
                $this->writer->startElement($name);
            }
        } else {
            $this->writer->startElement($name);
        }
    }
       /**
     * 带命名空间的元素开始标签写入(基于当前reader节点)
     */
    private function startElementWithNamespace(): void
    {
        $this->startElement($this->reader->name);
    }
    /**
     * 写入元素(带命名空间支持)
     * @param string $name 元素名称
     * @param string $value 元素值(前缀'cdata:'则自动包裹CDATA)
     * @param array $attributes 属性数组
     */
    private function writeElement(string $name, string $value = '', array $attributes = []): void
    {
        if (empty($name) && !empty($value)) {
            $this->writer->writeRaw($value); // 写入原始XML片段
            return;
        }

        $this->startElement($name);
        $this->writeAttributes($attributes);

        // 处理CDATA值
        if (strpos($value, 'cdata:') === 0) {
            $this->writer->writeCData(substr($value, 5));
        } elseif ($value !== '') {
            $this->writer->text($value);
        }

        $this->writer->endElement();
    }
    /**
     * 写入属性数组
     * @param array $attributes 属性数组 [属性名 => 值]
     */
    private function writeAttributes(array $attributes): void
    {
        foreach ($attributes as $name => $value) {
            if (strpos($name, ':') !== false) {
                [$prefix, $local] = explode(':', $name, 2);
                if (isset($this->namespaces[$prefix])) {
                    $this->writer->writeAttributeNS($prefix, $local, $this->namespaces[$prefix], $value);
                } else {
                    $this->writer->writeAttribute($name, $value);
                }
            } else {
                $this->writer->writeAttribute($name, $value);
            }
        }
    }
    /**
     * 从当前reader写入属性
     */
    private function writeAttributesFromReader(): void
    {
        $this->writeAttributes($this->getAllAttributes());
    }
/**
     * 通用节点处理(支持所有节点类型)
     */
    private function handleNode(): void
    {
        switch ($this->reader->nodeType) {
            case self::NODE_ELEMENT:
                $this->startElementWithNamespace();
                $this->writeAttributesFromReader();
                if ($this->reader->isEmptyElement) {
                    $this->writer->endElement();
                }
                break;
            case self::NODE_END_ELEMENT:
                $this->writer->endElement();
                break;
            case self::NODE_TEXT:
                $this->writer->text($this->reader->value);
                break;
            case self::NODE_CDATA:
                $this->writer->writeCData($this->reader->value);
                break;
            case self::NODE_COMMENT:
                $this->writer->writeComment($this->reader->value);
                break;
            case self::NODE_PI:
                $this->writer->writePI($this->reader->name, $this->reader->value);
                break;
            case self::NODE_WHITESPACE:
                $this->writer->text($this->reader->value);
                break;
        }
    }
       /**
     * 处理整个XML文档
     */
    private function processXml(): void
    {
        while ($this->reader->read()) {
            $this->handleNode();
        }
    }
       /**
     * 处理XML内容(通用方法)
     * @param string $xmlString XML内容
     * @param callable $processor 处理器回调(返回false则停止解析)
     */
    private function processXmlContent(string $xmlString, callable $processor): void
    {
        if (!$this->reader->XML($xmlString)) {
            throw new DocxProcessingException('Failed to parse XML content');
        }

        while ($this->reader->read() && $processor() !== false) {
            // 处理器控制流程
        }

        $this->reader->close();
    }
   /**
     * XML修改通用方法
     * @param string $xmlString XML内容
     * @param callable $modifier 修改器回调(返回true则跳过默认处理)
     * @return string 修改后的XML
     */
    private function modifyXml(string $xmlString, callable $modifier): string
    {
        if (!$this->reader->XML($xmlString)) {
            throw new DocxProcessingException('Failed to parse XML content');
        }

        $this->writer->openMemory();
        $currentPath = [];
        $depth = 0;

        while ($this->reader->read()) {
            // 更新当前路径和深度
            if ($this->isElementNode()) {
                $currentPath[] = $this->reader->name;
                $depth++;
            } elseif ($this->isEndElementNode()) {
                array_pop($currentPath);
                $depth--;
            }

            // 执行修改器,判断是否跳过默认处理
            $skipDefault = $modifier($this->writer, $currentPath, $depth);
            if ($skipDefault) {
                continue;
            }

            $this->handleNode();
        }

        $this->reader->close();
        return $this->writer->outputMemory();
    }    
    /**
     * 节点类型判断辅助方法
     */
    private function isElementNode(): bool
    {
        return $this->reader->nodeType === self::NODE_ELEMENT;
    }    
        private function isEndElementNode(): bool
    {
        return $this->reader->nodeType === self::NODE_END_ELEMENT;
    }
  private function isTextNode(): bool
    {
        return in_array($this->reader->nodeType, [self::NODE_TEXT, self::NODE_CDATA]);
    }

    /**
     * 析构函数 - 确保资源正确释放(修复语法错误)
     */
    public function __destruct()
    {
        if (isset($this->reader) && $this->reader->nodeType !== XMLReader::NONE) {
            $this->reader->close();
        }
    }
}


网站公告

今日签到

点亮在社区的每一天
去签到