这次交互的次数比较多,主要是改用逐个字符解析以应对无换行符的xml文件,同时重写了标签和属性处理。修改后的main函数 - 支持命令行参数。
限制:范围支持单字母的列,即A-Z,xml文件无共享字符串。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_LINE_LENGTH 4096
#define MAX_CELL_CONTENT 1024
#define MAX_SHEET_ROWS 1048576 // Excel最大行数
// 用户输入范围
typedef struct {
int start_row;
int end_row;
char start_col; // 列字母,如'A'
char end_col; // 列字母,如'Z'
} ParseRange;
// 解析结果
typedef struct {
int row;
char col;
char value[MAX_CELL_CONTENT];
int is_empty; // 空单元格标记
} CellData;
// 全局变量
CellData *results = NULL;
int result_count = 0;
int result_capacity = 0;
// 在全局变量区添加:
ParseRange current_parse_range = {0}; // 保存当前解析范围
// 函数声明
int binary_search_start(FILE *file, ParseRange range);
int find_row_by_binary_search(FILE *file, int target_row, long *start_pos, long *end_pos);
void parse_row_data(FILE *file, ParseRange range, long start_pos, long end_pos);
void add_cell_result(int row, char col, const char *value, int is_empty);
int is_cell_in_range(int row, char col, ParseRange range);
int compare_row_col(int row1, char col1, int row2, char col2);
void free_results();
#include <time.h> // 需要包含头文件
/**
* 获取程序启动后的时间(秒)
* @return 从程序启动到现在的秒数(浮点数,精度毫秒)
*/
double get_runtime_seconds() {
static clock_t start_t = 0;
if (start_t == 0) {
start_t = clock(); // 首次调用记录启动时间
return 0.0;
}
return (double)(clock() - start_t) / CLOCKS_PER_SEC;
}
/**
* 获取格式化时间字符串(用于调试输出)
* @param prefix 输出前缀(如"解析完成")
* @return 格式化字符串,示例: "[00:01.234] 解析完成"
*/
const char* get_timestamped_msg(const char *prefix) {
static char buf[64];
double seconds = get_runtime_seconds();
int mins = (int)seconds / 60;
int secs = (int)seconds % 60;
int msecs = (int)((seconds - (int)seconds) * 1000);
snprintf(buf, sizeof(buf), "[%02d:%02d.%03d] %s", mins, secs, msecs, prefix);
return buf;
}
/**
* 主解析函数 - 修正版本
* @param filename XML文件路径
* @param range 解析范围
* @return 0成功,-1失败
*/
// 修改parse_sheet_xml函数开头:
int parse_sheet_xml(const char *filename, ParseRange range) {
current_parse_range = range; // 保存范围
// ... 原有代码
//int parse_sheet_xml(const char *filename, ParseRange range) {
FILE *file = fopen(filename, "r");
if (!file) {
perror("无法打开文件");
return -1;
}
// 初始化结果数组
result_count = 0;
result_capacity = 1024;
results = (CellData *)malloc(result_capacity * sizeof(CellData));
if (!results) {
fclose(file);
return -1;
}
// 二分查找起始行
if (binary_search_start(file, range)) {
// 直接开始解析,从当前位置开始
char buffer[MAX_LINE_LENGTH];
long row_start_pos = ftell(file);
// 向前查找最近的<row标签开始位置
for (long pos = row_start_pos; pos >= 0; pos--) {
fseek(file, pos, SEEK_SET);
if (fgetc(file) == '<') {
// 检查是否是<row标签
int is_row_tag = 1;
for (int i = 1; i < 4; i++) {
if (fgetc(file) != "row"[i]) {
is_row_tag = 0;
break;
}
}
if (is_row_tag) {
row_start_pos = pos; // 记录<row标签的起始位置
fseek(file, pos, SEEK_SET); // 定位到<row标签开始
break;
}
}
}
// 获取文件大小作为结束边界
long file_size;
fseek(file, 0, SEEK_END);
file_size = ftell(file);
// 定位到<row标签开始位置,准备解析
fseek(file, row_start_pos, SEEK_SET);
printf("%s 二分查找\n", get_timestamped_msg(""));
printf("二分查找到row_start_pos=%d\n",row_start_pos);
// 直接解析数据 - 从<row标签开始到文件末尾
parse_row_data(file, range, row_start_pos, file_size);
}
fclose(file);
return 0;
}
/**
* 二分查找定位起始行
* @param file 文件指针
* @param range 解析范围
* @return 是否找到起始行
*/
int binary_search_start(FILE *file, ParseRange range) {
long file_size = 0;
long low, high, mid;
// 获取文件大小
fseek(file, 0, SEEK_END);
file_size = ftell(file);
fseek(file, 0, SEEK_SET);
low = 0;
high = file_size;
int last_found_row = -1;
long last_found_pos = -1;
while (low <= high) {
mid = (low + high) / 2;
fseek(file, mid, SEEK_SET);
// 向前查找最近的<row标签
char buffer[MAX_LINE_LENGTH];
long row_start_pos = -1;
int row_num = -1;
// 从mid位置向前扫描,找到前一个<row标签
for (long pos = mid; pos >= low && pos >= 0; pos--) {
fseek(file, pos, SEEK_SET);
if (fgetc(file) == '<') {
if (pos + 4 <= file_size && fgetc(file) == 'r' &&
fgetc(file) == 'o' && fgetc(file) == 'w') {
row_start_pos = pos;
break;
}
}
}
// 如果向前没找到,从mid向后找
if (row_start_pos == -1) {
for (long pos = mid; pos <= high && pos < file_size - 4; pos++) {
fseek(file, pos, SEEK_SET);
if (fgetc(file) == '<') {
if (pos + 4 <= file_size && fgetc(file) == 'r' &&
fgetc(file) == 'o' && fgetc(file) == 'w') {
row_start_pos = pos;
break;
}
}
}
}
if (row_start_pos == -1) {
// 没有找到<row标签
if (mid == low) break;
high = mid - 1;
continue;
}
// 解析行号
fseek(file, row_start_pos, SEEK_SET);
while (fgets(buffer, MAX_LINE_LENGTH, file)) {
if (strstr(buffer, "<row")) {
char *row_attr = strstr(buffer, "r=\"");
if (row_attr) {
row_attr += 3; // 跳过r="
row_num = atoi(row_attr);
break;
}
}
}
if (row_num == -1) {
// 解析行号失败,调整搜索范围
if (row_start_pos < range.start_row) low = mid + 1;
else high = mid - 1;
continue;
}
if (row_num == range.start_row) {
// 找到精确匹配
last_found_row = row_num;
last_found_pos = row_start_pos;
break;
} else if (row_num < range.start_row) {
// 当前行小于目标行
if (row_num > last_found_row) {
last_found_row = row_num;
last_found_pos = row_start_pos;
}
low = mid + 1;
} else {
// 当前行大于目标行
high = mid - 1;
}
}
// 如果找到了合适的起始位置
if (last_found_row != -1) {
//printf(" last_found_pos=%d\n", last_found_pos);
fseek(file, last_found_pos, SEEK_SET);
return 1;
}
return 0;
}
/**
* 添加单元格结果到结果数组
*/
void add_cell_result(int row, char col, const char *value, int is_empty) {
// 关键修复:只保存用户指定范围内的单元格
if (row < current_parse_range.start_row || row > current_parse_range.end_row ||
col < current_parse_range.start_col || col > current_parse_range.end_col) {
return; // 直接返回,不保存范围外的数据
}
// 扩展结果数组
if (result_count >= result_capacity) {
result_capacity *= 2;
results = (CellData *)realloc(results, result_capacity * sizeof(CellData));
if (!results) {
fprintf(stderr, "内存分配失败\n");
return;
}
}
results[result_count].row = row;
results[result_count].col = col;
strncpy(results[result_count].value, value, MAX_CELL_CONTENT - 1);
results[result_count].value[MAX_CELL_CONTENT - 1] = '\0';
results[result_count].is_empty = is_empty;
result_count++;
//printf("row=%d,col=%c ",row,col);
}
/**
* 检查单元格是否在用户指定范围内
*/
int is_cell_in_range(int row, char col, ParseRange range) {
if (row < range.start_row || row > range.end_row) return 0;
if (col < range.start_col || col > range.end_col) return 0;
return 1;
}
/**
* 比较两个行列坐标
* @return -1: row1<col1 < row2<col2, 0: 相等, 1: row1<col1 > row2<col2
*/
int compare_row_col(int row1, char col1, int row2, char col2) {
if (row1 != row2) return (row1 < row2) ? -1 : 1;
if (col1 != col2) return (col1 < col2) ? -1 : 1;
return 0;
}
/**
* 释放结果内存
*/
void free_results() {
if (results) {
free(results);
results = NULL;
}
result_count = 0;
result_capacity = 0;
}
/**
* 打印解析结果
*/
void print_results() {
printf("解析结果:\n");
for (int i = 0; i < result_count; i++) {
if (results[i].is_empty) {
printf("单元格 %c%d: (空)\n", results[i].col, results[i].row);
} else {
printf("单元格 %c%d: %s\n", results[i].col, results[i].row, results[i].value);
}
}
}
/**
* 以Excel的A1表示法打印解析范围
* 例如:A1:H7
* @param range 要打印的解析范围
*/
void print_parse_range(ParseRange range) {
printf("解析范围: %c%d:%c%d\n",
range.start_col, range.start_row,
range.end_col, range.end_row);
}
// 使用示例
// 前面的所有函数保持不变...
/**
* 从用户输入解析Excel格式范围(如A1:H5)
* @param input 用户输入的字符串
* @param range 输出解析结果
* @return 0成功,-1失败
*/
int parse_excel_range(const char *input, ParseRange *range) {
if (!input || !range) return -1;
char start_col = '\0', end_col = '\0';
int start_row = 0, end_row = 0;
int parsed = 0;
// 跳过空白
while (isspace(*input)) input++;
// 解析起始列
if (isalpha(*input)) {
start_col = toupper(*input);
input++;
// 解析起始行
char *end_ptr;
start_row = strtol(input, &end_ptr, 10);
if (end_ptr > input) {
input = end_ptr;
// 解析分隔符
while (isspace(*input)) input++;
if (*input == ':') {
input++;
while (isspace(*input)) input++;
// 解析结束列
if (isalpha(*input)) {
end_col = toupper(*input);
input++;
// 解析结束行
end_row = strtol(input, &end_ptr, 10);
if (end_ptr > input) {
parsed = 1;
}
}
}
}
}
if (parsed) {
range->start_row = start_row;
range->end_row = end_row;
range->start_col = start_col;
range->end_col = end_col;
printf("start_row=%d,end_row=%d",start_row,end_row);
return 0;
}
return -1;
}
/**
* 将结果保存为CSV文件 - 修正版本
* @param filename 输出CSV文件名
* @return 0成功,-1失败
*/
int save_results_to_csv(const char *filename) {
if (!filename || result_count == 0) return -1;
printf("result_count=%d\n",result_count);
FILE *csv = fopen(filename, "w");
if (!csv) {
perror("无法创建CSV文件");
return -1;
}
// 收集所有行号
int *rows = (int *)malloc(result_count * sizeof(int));
int row_count = 0;
for (int i = 0; i < result_count; i++) {
int found = 0;
for (int j = 0; j < row_count; j++) {
if (rows[j] == results[i].row) {
found = 1;
break;
}
}
if (!found) {
rows[row_count++] = results[i].row;
}
}
// 按行号排序
for (int i = 0; i < row_count - 1; i++) {
for (int j = i + 1; j < row_count; j++) {
if (rows[i] > rows[j]) {
int temp = rows[i];
rows[i] = rows[j];
rows[j] = temp;
}
}
}
int start_col = results[0].col; // 实际起始列
int end_col = results[0].col; // 实际结束列
for (int i = 1; i < result_count; i++) {
if (results[i].col < start_col) start_col = results[i].col;
if (results[i].col > end_col) end_col = results[i].col;
}
int col_count = end_col - start_col + 1;
// 写入标题行
fprintf(csv, "Row,");
for (int c = 0; c < col_count; c++) {
fprintf(csv, "%c", start_col + c);
if (c < col_count - 1) fprintf(csv, ",");
}
fprintf(csv, "\n");
// 为每一行生成CSV数据
for (int r = 0; r < row_count; r++) {
int current_row = rows[r];
// 检查该行是否有数据(在用户指定范围内)
int has_data = 0;
for (int i = 0; i < result_count; i++) {
if (results[i].row == current_row &&
results[i].col >= start_col &&
results[i].col <= end_col &&
!results[i].is_empty) {
has_data = 1;
break;
}
}
if (!has_data) continue; // 跳过全空行
// 生成该行的CSV数据
fprintf(csv, "%d,", current_row); // 行号作为第一列
for (int c = 0; c < col_count; c++) {
char col = start_col + c;
char *value = NULL;
int is_empty = 1;
// 查找该列的数据
for (int i = 0; i < result_count; i++) {
if (results[i].row == current_row &&
results[i].col == col) {
value = results[i].value;
is_empty = results[i].is_empty;
break;
}
}
if (!is_empty && value && strlen(value) > 0) {
// 转义CSV特殊字符
if (strchr(value, ',') || strchr(value, '"') || strchr(value, '\n')) {
fprintf(csv, "\"%s\"", value);
} else {
fprintf(csv, "%s", value);
}
} else {
// 空单元格
fprintf(csv, "");
}
if (c < col_count - 1) {
fprintf(csv, ",");
}
}
fprintf(csv, "\n");
}
free(rows);
fclose(csv);
printf("结果已保存到: %s\n", filename);
return 0;
}
/**
* 处理XML缓冲区内容 - 提取为独立函数
* @param buffer 要处理的XML内容
* @param range 解析范围
* @param in_row 输入/输出:是否在行内
* @param current_row 输入/输出:当前行号
* @param current_cell_col 输入/输出:当前单元格列
* @param temp_value 临时值存储
*/
void process_xml_buffer(char *buffer, ParseRange range, int *in_row, int *current_row,
char *current_cell_col, char *temp_value) {
char *pos = buffer;
// 处理每行中的标签
while (*pos) {
if (strncmp(pos, "<row", 4) == 0) {
// 解析行号
char *row_attr = strstr(pos, "r=\"");
if (row_attr) {
row_attr += 3;
*current_row = atoi(row_attr);
}
*in_row = 1;
pos += 4;
}
else if (strncmp(pos, "</row>", 6) == 0) {
// 行结束
if (*current_row >= range.end_row) {
// 超过用户指定范围,停止解析
return;
}
*in_row = 0;
*current_row = -1;
pos += 6;
}
else if (*in_row && strncmp(pos, "<c ", 3) == 0) {
// 解析单元格
char *col_attr = strstr(pos, "r=\"");
char *value_start = NULL;
int is_empty = 0;
int cell_has_value = 0;
int is_self_closing = 0;
if (col_attr) {
col_attr += 3;
*current_cell_col = col_attr[0];
// 检查自闭合标签
char *self_close = strstr(pos, "/>");
if (self_close) {
is_self_closing = 1;
}
// 跳过列字母和数字分隔符
while (isdigit(col_attr[0])) col_attr++;
// 检查单元格值
char *v_tag = strstr(pos, "<v>");
if (v_tag) {
value_start = v_tag + 3;
char *v_end = strstr(v_tag, "</v>");
if (v_end) {
*v_end = '\0';
strncpy(temp_value, value_start, MAX_CELL_CONTENT - 1);
temp_value[MAX_CELL_CONTENT - 1] = '\0';
cell_has_value = 1;
}
}
// 自闭合标签一定是空单元格
if (is_self_closing || !cell_has_value) {
is_empty = 1;
temp_value[0] = '\0';
}
if (is_cell_in_range(*current_row, *current_cell_col, range)) {
add_cell_result(*current_row, *current_cell_col, temp_value, is_empty);
}
}
pos += 3;
}
else if (strncmp(pos, "</c>", 4) == 0) {
// 单元格结束
*current_cell_col = '\0';
pos += 4;
}
else {
pos++;
}
}
}
/**
* 处理标签 - 修复字符串类型和范围
*/
void process_tag(const char *tag_name, const char *attr_value, int attr_count,
ParseRange range, int *in_row, int *current_row,
char *current_cell_col, char *temp_value, int *is_self_closing,
int *value_started, int *value_len) {
if(1==0)printf("调试: 处理标签 '%s', value_started=%d\n", tag_name, *value_started);
int is_end_tag = (tag_name[0] == '/');
const char *tag = is_end_tag ? tag_name + 1 : tag_name;
// 范围检查 - 修复:确保使用用户指定范围
if (*current_row >= 1 && *current_row <= range.end_row &&
*current_cell_col >= range.start_col && *current_cell_col <= range.end_col) {
if (strcmp(tag, "row") == 0) {
if (is_end_tag) {
*in_row = 0;
*current_row = -1;
if(1==0)printf("调试: 行结束\n");
} else {
*in_row = 1;
if(1==0)printf("调试: 进入行\n");
}
}
else if (strcmp(tag, "c") == 0) {
if (is_end_tag) {
*current_cell_col = '\0';
*value_started = 0;
*value_len = 0;
if(1==0)printf("调试: 单元格结束\n");
} else if (*is_self_closing) {
if (is_cell_in_range(*current_row, *current_cell_col, range)) {
if(1==0)printf("调试: 空单元格 %c%d\n", *current_cell_col, *current_row);
add_cell_result(*current_row, *current_cell_col, "", 1);
}
*current_cell_col = '\0';
}
}
else if (strcmp(tag, "v") == 0) {
if (is_end_tag) {
temp_value[*value_len] = '\0';
if (*value_len > 0 && is_cell_in_range(*current_row, *current_cell_col, range)) {
if(1==0)printf("调试: 数值结束 %c%d='%s'\n", *current_cell_col, *current_row, temp_value);
add_cell_result(*current_row, *current_cell_col, temp_value, 0);
}
*value_started = 0;
*value_len = 0;
} else {
*value_started = 1;
*value_len = 0;
temp_value[0] = '\0';
if(1==0)printf("调试: 数值开始\n");
}
}
else if (strcmp(tag, "t") == 0) {
// 修复:处理字符串类型 <t>标签
if (is_end_tag) {
temp_value[*value_len] = '\0';
if (*value_len > 0 && is_cell_in_range(*current_row, *current_cell_col, range)) {
if(1==0)printf("调试: 字符串结束 %c%d='%s'\n", *current_cell_col, *current_row, temp_value);
add_cell_result(*current_row, *current_cell_col, temp_value, 0);
}
*value_started = 0;
*value_len = 0;
} else {
*value_started = 1;
*value_len = 0;
temp_value[0] = '\0';
if(1==0)printf("调试: 字符串开始\n");
}
}
} else {
// 不在用户指定范围内,跳过
if (strcmp(tag, "row") == 0 && !is_end_tag) {
*in_row = 1;
} else if (strcmp(tag, "/row") == 0) {
*in_row = 0;
*current_row = -1;
}
}
}
/**
* 处理属性 - 修复:记录字符串类型
*/
void process_attribute(const char *tag_name, const char *attr_name, const char *attr_value,
ParseRange range, int *in_row, int *current_row,
char *current_cell_col, char *temp_value,
int *value_started, int *value_len) {
if(1==0)printf("调试: 属性 %s=%s, 标签=%s\n", attr_name, attr_value, tag_name);
int is_end_tag = (tag_name[0] == '/');
const char *tag = is_end_tag ? tag_name + 1 : tag_name;
// 全局变量:记录当前单元格类型
static char cell_type[16] = {0};
if (strcmp(attr_name, "r") == 0) {
if (strcmp(tag, "row") == 0) {
*current_row = atoi(attr_value );//+ 1
if(1==0)printf("调试: 行号=%d\n", *current_row);
} else if (strcmp(tag, "c") == 0) {
*current_cell_col = attr_value[0];
if(1==0)printf("调试: 列=%c\n", *current_cell_col);
}
}
else if (strcmp(attr_name, "t") == 0) {
// 修复:记录单元格类型
strncpy(cell_type, attr_value, sizeof(cell_type) - 1);
if(1==0)printf("调试: 单元格类型=%s\n", cell_type);
// 特殊处理:inlineStr类型需要从<t>标签取值
if (strcmp(attr_value, "inlineStr") == 0) {
if(1==0)printf("调试: 检测到字符串类型单元格\n");
}
}
else if (strcmp(attr_name, "s") == 0) {
// 样式属性,可用于优化
}
}
/**
* 解析行数据 - 最终修复版本
* @param file 文件指针
* @param range 解析范围
* @param start_pos 起始位置
* @param end_pos 结束位置
*/
void parse_row_data(FILE *file, ParseRange range, long start_pos, long end_pos) {
char temp_value[MAX_CELL_CONTENT];
int in_row = 0;
int current_row = -1;
char current_cell_col = '\0';
int value_started = 0;
int value_len = 0;
int is_self_closing = 0;
// 状态:0=普通文本, 1=标签开始, 2=标签名, 3=属性名, 4=属性值, 5=值内容
int state = 0;
char tag_name[32] = {0};
char attr_name[16] = {0};
char attr_value[256] = {0};
int tag_len = 0;
int attr_len = 0;
int quote_char = 0;
fseek(file, start_pos, SEEK_SET);
int c;
while ((c = fgetc(file)) != EOF && ftell(file) <= end_pos) {
switch (state) {
case 0: // 普通文本
if (c == '<') {
state = 1;
tag_len = 0;
tag_name[0] = '\0';
is_self_closing = 0;
} else if (value_started) {
// 值内容 - 关键修复:直接捕获
if (value_len < MAX_CELL_CONTENT - 1) {
temp_value[value_len++] = c;
temp_value[value_len] = '\0';
if(1==0)if(1==0)printf("调试: 捕获值 '%c', 当前值='%s'\n", c, temp_value);
}
}
break;
case 1: // 标签开始 '<'
if (c == '/') {
// 结束标签 </tag>
tag_name[tag_len++] = c;
} else if (c == '>' || c == ' ') {
// 开始标签 <tag> 或 <tag ...
tag_name[tag_len] = '\0';
// 处理标签
process_tag(tag_name, NULL, 0, range, &in_row, ¤t_row,
¤t_cell_col, temp_value, &is_self_closing,
&value_started, &value_len);
if (c == ' ') state = 3; // 属性
else state = 0; // 文本
} else if (c == '?' || c == '!') {
// 跳过 <?xml>, <!-- -->
state = 0;
} else if (tag_len < sizeof(tag_name) - 1) {
tag_name[tag_len++] = c;
state = 2; // 进入标签名
}
break;
case 2: // 标签名
if (c == '>' || c == ' ') {
tag_name[tag_len] = '\0';
// 处理标签
process_tag(tag_name, NULL, 0, range, &in_row, ¤t_row,
¤t_cell_col, temp_value, &is_self_closing,
&value_started, &value_len);
if (c == ' ') state = 3; // 属性
else state = 0; // 文本
} else if (c == '/' && (c = fgetc(file)) == '>') {
// 自闭合标签 <tag/>
tag_name[tag_len] = '\0';
is_self_closing = 1;
process_tag(tag_name, NULL, 0, range, &in_row, ¤t_row,
¤t_cell_col, temp_value, &is_self_closing,
&value_started, &value_len);
state = 0;
} else if (tag_len < sizeof(tag_name) - 1) {
tag_name[tag_len++] = c;
}
break;
case 3: // 属性名
if (c == '=') {
attr_name[attr_len] = '\0';
state = 4;
attr_len = 0;
} else if (c == '>' || (c == '/' && (c = fgetc(file)) == '>')) {
// 无属性标签结束
process_tag(tag_name, NULL, 0, range, &in_row, ¤t_row,
¤t_cell_col, temp_value, &is_self_closing,
&value_started, &value_len);
state = 0;
} else if (attr_len < sizeof(attr_name) - 1) {
attr_name[attr_len++] = c;
}
break;
case 4: // 属性值
if (c == '"' || c == '\'') {
quote_char = c;
int val_len = 0;
// 读取属性值
while ((c = fgetc(file)) != EOF && c != quote_char && val_len < sizeof(attr_value) - 1) {
attr_value[val_len++] = c;
}
attr_value[val_len] = '\0';
// 处理属性
process_attribute(tag_name, attr_name, attr_value, range, &in_row,
¤t_row, ¤t_cell_col, temp_value,
&value_started, &value_len);
state = 3; // 回到属性名
}
break;
}
// 调试:每处理100个字符输出一次
static int counter = 0;
if (++counter % 100 == 0) {
if(1==0)if(1==0)printf("调试: 状态=%d, 字符='%c', 值='%s', 行=%d, 列=%c, value_started=%d\n",
state, c, value_started ? temp_value : "", current_row, current_cell_col, value_started);
}
if (current_row > range.end_row) return;
}
}
// 修改后的main函数 - 支持命令行参数
int main(int argc, char *argv[]) {
char filename[1024] = {0};
char csv_filename[1024] = {0};
char range_input[64] = {0};
ParseRange range;
int interactive_mode = 0; // 交互模式标志
printf("%s 程序启动\n", get_timestamped_msg(""));
// 解析命令行参数
if (argc == 4) {
// 命令行模式: program xml_file range csv_file
strncpy(filename, argv[1], sizeof(filename) - 1);
strncpy(range_input, argv[2], sizeof(range_input) - 1);
strncpy(csv_filename, argv[3], sizeof(csv_filename) - 1);
// 确保csv文件名有扩展名
if (!strstr(csv_filename, ".csv")) {
strcat(csv_filename, ".csv");
}
} else if (argc == 1) {
// 无参数,进入交互模式
interactive_mode = 1;
printf("=== Excel XML解析器 (交互模式) ===\n");
printf("请输入XML文件路径: ");
if (!fgets(filename, sizeof(filename), stdin)) {
printf("错误: 无法读取文件名\n");
return -1;
}
filename[strcspn(filename, "\n")] = 0;
printf("请输入解析范围 (格式如 A1:H5): ");
if (!fgets(range_input, sizeof(range_input), stdin)) {
printf("错误: 无法读取范围\n");
return -1;
}
range_input[strcspn(range_input, "\n")] = 0;
printf("请输入CSV输出文件名 (默认: output.csv): ");
if (!fgets(csv_filename, sizeof(csv_filename), stdin)) {
strcpy(csv_filename, "output.csv");
}
csv_filename[strcspn(csv_filename, "\n")] = 0;
if (strlen(csv_filename) == 0) {
strcpy(csv_filename, "output.csv");
}
if (!strstr(csv_filename, ".csv")) {
strcat(csv_filename, ".csv");
}
} else {
printf("用法:\n");
printf(" %s <xml文件路径> <范围(A1:H5)> <csv输出文件名>\n", argv[0]);
printf(" %s (进入交互模式)\n", argv[0]);
printf("示例:\n");
printf(" %s sheet.xml B3:H5 result.csv\n", argv[0]);
return -1;
}
// 解析范围
if (parse_excel_range(range_input, &range) != 0) {
printf("失败: 范围格式错误,请使用格式如 A1:H5\n");
return -1;
}
print_parse_range(range);
// 执行解析
if (parse_sheet_xml(filename, range) == 0) {
// 命令行模式不输出详细结果,仅保存CSV
if (interactive_mode) {
printf("\n解析成功!\n");
print_results();
printf("\n正在保存到CSV...\n");
} else {
// 命令行模式:静默处理
printf("成功: 解析完成\n");
}
printf("%s XML解析完成\n", get_timestamped_msg(""));
// 保存CSV
if (save_results_to_csv(csv_filename) == 0) {
printf("%s CSV保存完成\n", get_timestamped_msg(""));
if (interactive_mode) {
printf("完成!\n");
}
} else {
printf("失败: 无法保存CSV文件\n");
free_results();
return -1;
}
} else {
printf("失败: 解析XML文件失败\n");
free_results();
return -1;
}
free_results();
return 0;
}
编译和运行
gcc bsxml13.c -o bsxml -O3
time ./bsxml /shujv/par/dknyc/xl/worksheets/sheet1.xml A210000:Z211000 obig32.csv
[00:00.000] 程序启动
start_row=210000,end_row=211000解析范围: A210000:Z211000
[01:26.800] 二分查找
二分查找到row_start_pos=358064889
成功: 解析完成
[01:26.881] XML解析完成
result_count=28161
结果已保存到: obig32.csv
[01:30.019] CSV保存完成
real 1m39.296s
user 0m40.100s
sys 0m49.920s
可见对于行数多的靠后范围,二分查找比较慢,张泽鹏先生已经决定和AI PK一下,拭目以待。