MIT XV6 - 1.4 Lab: Xv6 and Unix utilities - find

发布于:2025-05-11 ⋅ 阅读:(15) ⋅ 点赞:(0)

接上文 MIT XV6 - 1.3 Lab: Xv6 and Unix utilities - primes

find

继续实验,实验介绍和要求如下 (原文链接 译文链接) :

Write a simple version of the UNIX find program for xv6: find all the files in a directory tree with a specific name. Your solution should be in the file user/find.c.

Some hints:

  • Look at user/ls.c to see how to read directories.
  • Use recursion to allow find to descend into sub-directories.
  • Don’t recurse into “.” and “…”.
  • Changes to the file system persist across runs of qemu; to get a clean file system run make clean and then make qemu.
  • You’ll need to use C strings. Have a look at K&R (the C book), for example Section 5.5.
  • Note that == does not compare strings like in Python. Use strcmp() instead.
  • Add the program to UPROGS in Makefile.

Your solution is correct if produces the following output (when the file system contains the files b, a/b and a/aa/b):

$ make qemu
   ...
   init: starting sh
   $ echo > b
   $ mkdir a
   $ echo > a/b
   $ mkdir a/aa
   $ echo > a/aa/b
   $ find . b
   ./b
   ./a/b
   ./a/aa/b
   $

意思就是参考 ls.c 中对文件遍历的实现写一个find工具,可以指定目录和文件名进行查找,且注意 对 ... 两个节点的过滤,另外也提到了C语言教材的5.5节

/**
 * find.c - A file search utility for xv6 operating system
 * 
 * This program implements a recursive file search functionality similar to the Unix 'find' command.
 * It searches for a specific filename within a given directory path and its subdirectories.
 * 
 * Algorithm Overview:
 * 
 * 1. Start at the given path
 * 2. For each entry in the current directory:
 *    ├── If it's a file:
 *    │   └── Compare with target filename
 *    │       ├── Match → Print full path
 *    │       └── No match → Continue
 *    │
 *    └── If it's a directory:
 *        ├── Skip "." and ".."
 *        └── Recursively search subdirectory
 * 
 * The algorithm uses depth-first search (DFS) to traverse the directory tree.
 */

#include "kernel/types.h"
#include "kernel/fcntl.h"
#include "kernel/fs.h"
#include "kernel/stat.h"
#include "user/user.h"

/**
 * Extracts the filename from a given path
 * @param path The full path string
 * @return Pointer to the filename portion of the path
 * 
 * Example:
 * Input: "/usr/bin/file.txt"
 * Output: "file.txt"
 */
const char *get_path_name(const char *path) {
  const char *p;

  // Find first character after last slash.
  for (p = path + strlen(path); p >= path && *p != '/'; p--)
    ;
  p++;

  return p;
}

/**
 * Recursively searches for a file in a directory and its subdirectories
 * @param path The directory path to search in
 * @param filename The name of the file to find
 */
void find(const char *path, const char *filename) {
  // Buffer for constructing full paths
  char buf[512], *p;
  int fd;
  struct dirent de;
  struct stat st;

  // Open the directory
  if ((fd = open(path, O_RDONLY)) < 0) {
    fprintf(2, "find: cannot open %s\n", path);
    return;
  }

  // Get file status
  if (fstat(fd, &st) < 0) {
    fprintf(2, "find: cannot stat %s\n", path);
    close(fd);
    return;
  }

  switch (st.type) {
  case T_DEVICE:
  case T_FILE:
    // If it's a file, check if it matches the target filename
    if (strcmp(get_path_name(path), filename) == 0) {
      fprintf(1, "%s\n", path);
    }
    break;
  case T_DIR:
    // If it's a directory, check path length and prepare for recursion
    if (strlen(path) + 1 + DIRSIZ + 1 > sizeof buf) {
      fprintf(2, "find: path too long\n");
      break;
    }
    // Construct the base path for subdirectories
    strcpy(buf, path);
    p = buf + strlen(buf);
    *p++ = '/';
    
    // Read directory entries
    while (read(fd, &de, sizeof(de)) == sizeof(de)) {
      if (de.inum == 0)
        continue;
      // Construct full path for current entry
      memmove(p, de.name, DIRSIZ);
      p[DIRSIZ] = 0;
      if (stat(buf, &st) < 0) {
        fprintf(2, "find: cannot stat %s\n", buf);
        continue;
      }

      const char *name = get_path_name(buf);

      // Skip "." and ".." directories to avoid infinite recursion
      if (st.type == T_DIR && strcmp(name, ".") != 0 &&
          strcmp(name, "..") != 0) {
        find(buf, filename);  // Recursive call for subdirectories
      } else if (st.type == T_FILE && strcmp(name, filename) == 0) {
        fprintf(1, "%s\n", buf);  // Found a matching file
      }
    }
    break;
  }
  close(fd);
}

/**
 * Main function to handle command line arguments and start the search
 * @param argc Number of command line arguments
 * @param argv Array of command line arguments
 * @return Exit status
 */
int main(int argc, char *argv[]) {
  if (argc != 3) {
    fprintf(2, "Usage: find <path> <filename>\n");
    exit(1);
  }

  const char *path = argv[1];
  const char *filename = argv[2];

  find(path, filename);

  exit(0);
}

没什么好说的就是一个循环加遍历,实验结果

make qemu
riscv64-unknown-elf-gcc -Wall -Werror -O -fno-omit-frame-pointer -ggdb -gdwarf-2 -DSOL_UTIL -DLAB_UTIL -MD -mcmodel=medany -ffreestanding -fno-common -nostdlib -mno-relax -I. -fno-stack-protector -fno-pie -no-pie -I/opt/homebrew/opt/openjdk@17/include  -c -o user/find.o user/find.c
riscv64-unknown-elf-ld -z max-page-size=4096 -T user/user.ld -o user/_find user/find.o user/ulib.o user/usys.o user/printf.o user/umalloc.o
riscv64-unknown-elf-objdump -S user/_find > user/find.asm
riscv64-unknown-elf-objdump -t user/_find | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$/d' > user/find.sym
mkfs/mkfs fs.img README  user/xargstest.sh user/_cat user/_echo user/_forktest user/_grep user/_init user/_kill user/_ln user/_ls user/_mkdir user/_rm user/_sh user/_stressfs user/_usertests user/_grind user/_wc user/_zombie user/_sleep user/_pingpong user/_primes user/_find 
nmeta 46 (boot, super, log blocks 30 inode blocks 13, bitmap blocks 1) blocks 1954 total 2000
balloc: first 899 blocks have been allocated
balloc: write bitmap block at sector 45
qemu-system-riscv64 -machine virt -bios none -kernel kernel/kernel -m 128M -smp 3 -nographic -global virtio-mmio.force-legacy=false -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0

xv6 kernel is booting

hart 2 starting
hart 1 starting
init: starting sh
$ echo > b
$ mkdir a
$ echo > a/b
$ mkdir a/aa
$ echo > a/aa/b
$ find . b
./b
./a/b
./a/aa/b
$ 

比较有意思的是,我把对...过滤去掉,会得到一个异常

find . b
usertrap(): unexpected scause 0x000000000000000f pid=3
            sepc=0x0000000000000888 stval=0x0000000000002f98

我尝试gdb debug了,但是。。。还不知道怎么去debug具体进程,留个坑

AI对此的解释是

这个错误信息包含几个关键部分:
scause = 0x000000000000000f:
在 RISC-V 中,scause 寄存器用于指示异常的原因
0xf (15) 表示这是一个 “Store/AMO page fault”(存储/原子操作页面错误)
这通常意味着程序试图写入一个无效的内存地址或没有写入权限的内存区域
sepc = 0x0000000000000888:
这是发生异常时的程序计数器(PC)值
表示异常发生在这条指令的地址
stval = 0x0000000000002f98:
这是导致异常的虚拟地址
程序试图访问/写入这个地址时发生了错误
pid=3:
发生异常的进程 ID 是 3
这个错误很可能是因为我们刚才修改的代码中移除了对 “.” 和 “…” 目录的检查

让我解释一下为什么这个检查是必要的:
在文件系统中,“.” 表示当前目录,“…” 表示父目录
如果不跳过这些特殊目录:
当处理 “.” 时,程序会再次进入当前目录,导致无限递归
当处理 “…” 时,程序会进入父目录,然后再次进入当前目录,也会导致无限递归
这种无限递归最终会导致:
栈空间耗尽(因为每个递归调用都会占用栈空间)
或者访问无效的内存地址(当栈溢出时)
这就是为什么我们看到了 “Store/AMO page fault” 错误 - 程序试图在无效的内存地址上执行操作,这通常是由于栈溢出导致的。