1.javascript
张泽鹏先生创作
const fs = require('fs');
const process = require('process');
const args = process.argv.slice(2);
if (args.length !== 1) {
console.error(`Usage: ${process.argv[1]} <filename>`);
process.exit(1);
}
const content = fs.readFileSync(args[0], 'utf8');
const lines = content.split('\n');
if (lines[lines.length - 1] === '') {
lines.pop();
}
lines.sort();
process.stdout.write(lines.join('\n') + '\n');
执行
time node-v24.4.1-linux-x64/bin/node main.js varchar.txt >qsort.txt
real 0m4.455s
user 0m1.416s
sys 0m0.495s
2.rust
张泽鹏先生创作
main.rs
use std::env;
use std::fs::File;
use std::io::{self, BufRead, BufReader, BufWriter, Write};
use std::process;
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() != 2 {
eprintln!("Usage: {} <filename>", args[0]);
process::exit(1);
}
let filename = &args[1];
if let Err(e) = sort_file_lines(filename) {
eprintln!("Error: {}", e);
process::exit(1);
}
}
fn sort_file_lines(filename: &str) -> io::Result<()> {
let file = File::open(filename)?;
let reader = BufReader::with_capacity(10 * 1024, file);
let mut lines: Vec<String> = reader.lines().collect::<Result<Vec<_>, _>>()?;
lines.sort_unstable();
let stdout = io::stdout();
let mut writer = BufWriter::with_capacity(64 * 1024, stdout.lock());
for line in lines {
writeln!(writer, "{}", line)?;
}
writer.flush()?;
Ok(())
}
cargo.toml
[package]
name = "rust-sort"
version = "0.1.0"
edition = "2024"
[profile.release]
lto = true
strip = true
[dependencies]
编译执行
cargo build --release
time rust-sort/target/release/rust-sort varchar.txt >qsort.txt
real 0m2.333s
user 0m0.596s
sys 0m0.193s
4.张泽鹏先生重写的zig语言
const std = @import("std");
const ArrayList = std.ArrayList;
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const output = std.io.getStdOut().writer();
var stream = std.io.bufferedWriter(output);
const stdout = stream.writer();
// 1. Get filename from command line arguments
var args = std.process.args();
_ = args.next(); // Skip program name
const filename = args.next() orelse {
std.debug.print("Usage: program <filename>\n", .{});
return;
};
// 2. Open and read the file
const file = std.fs.cwd().openFile(filename, .{}) catch |err| {
std.debug.print("Open file '{s}' failed: {}\n", .{ filename, err });
return;
};
defer file.close();
// 3. Read file lines
const content = try file.readToEndAlloc(allocator, std.math.maxInt(usize));
defer allocator.free(content);
var lines = ArrayList([]const u8).init(allocator);
defer lines.deinit();
var iterator = std.mem.splitScalar(u8, content, '\n');
while (iterator.next()) |line| {
try lines.append(line);
}
// 4. Sort
std.mem.sort([]const u8, lines.items, {}, compare);
for (lines.items) |line| {
try stdout.print("{s}\n", .{line});
}
try stream.flush();
}
fn compare(context: void, a: []const u8, b: []const u8) bool {
_ = context;
return std.mem.order(u8, a, b) == .lt;
}
关键修改是用std.mem.splitScalar代替了mem.tokenizeSequence,提升很大
编译执行
zig build-exe main.zig -O ReleaseFast
time ./main varchar.txt > zsort.txt
real 0m3.748s
user 0m0.522s
sys 0m0.252s
4.deepseek重写的c语言
提示词:
编写c语言程序,将文件内容读入buf,然后遍历buf,遇到\n就记录偏移地址到一个100万元素的数组a,然后将\n位置填写\0,遍历完成后,对a执行排序,只修改a的元素存放的buf地址,然后浏览a,按地址输出每个字符串+\n到stdout
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#define MAX_LINES 1000000 // 最多100万行
int compare_offsets(const void *a, const void *b) {
return strcmp(*(const char **)a, *(const char **)b);
}
int main(int argc, char *argv[]) {
if (argc != 2) {
fprintf(stderr, "Usage: %s <filename>\n", argv[0]);
return 1;
}
// 打开文件并获取大小
int fd = open(argv[1], O_RDONLY);
if (fd == -1) {
perror("open");
return 1;
}
struct stat st;
if (fstat(fd, &st) == -1) {
perror("fstat");
close(fd);
return 1;
}
size_t file_size = st.st_size;
// 分配缓冲区并读取文件
char *buf = malloc(file_size + 1);
if (!buf) {
perror("malloc");
close(fd);
return 1;
}
if (read(fd, buf, file_size) != file_size) {
perror("read");
free(buf);
close(fd);
return 1;
}
close(fd);
buf[file_size] = '\0'; // 确保以null结尾
// 记录每行起始地址
char *lines[MAX_LINES];
size_t line_count = 0;
lines[line_count++] = buf; // 第一行开始
// 遍历缓冲区,记录每行起始地址并将\n替换为\0
for (char *p = buf; *p && line_count < MAX_LINES; p++) {
if (*p == '\n') {
*p = '\0';
if (*(p + 1)) { // 如果不是文件末尾
lines[line_count++] = p + 1;
}
}
}
// 对行指针数组进行排序
qsort(lines, line_count, sizeof(char *), compare_offsets);
// 输出排序后的行
for (size_t i = 0; i < line_count; i++) {
printf("%s\n", lines[i]);
}
free(buf);
return 0;
}
编译执行
gcc sort_lines.c -o ds_sort -O3
time ./ds_sort varchar.txt > csort.txt
real 0m3.656s
user 0m0.421s
sys 0m0.261s
因为换了amd的机器,我把varchar.txt每行长度扩大了一倍,与上文的计时不可比,所以同时给出amd的机器Linux的sort命令的结果如下:
time sort varchar.txt > xsort.txt
real 0m3.434s
user 0m0.528s
sys 0m0.369s
总结,这几种语言的效率都差不多,修改后的zig跑到了同一起跑线。rust出奇地快,比sort命令还快。我都怀疑它对系统time作修改了,但张泽鹏先生说rust应该没这么无聊。
另外,在windows下编译c程序,需要扩大栈大小,否则执行出错。如下所示:
gcc sort_lines.c -o mingw_sort -O3 -Wl,--stack=68435456
timer64 mingw_sort varchar.txt > msort.txt
Kernel Time = 0.171 = 18%
User Time = 0.609 = 64%
Process Time = 0.781 = 83% Virtual Memory = 110 MB
Global Time = 0.939 = 100% Physical Memory = 113 MB
其中timer64来自7-benchmark, 它的结果被输送到重定向的文件。
测试时间说明,c语言本身并不慢,但是在wsl+docker环境中比较慢。