6.S081 Lab - Xv6 and Unix Utilities

开新坑刷 MIT 6.S081 Operating System Engineering (Fall 2024)。

实验指导:Lab - Xv6 and Unix Utilities

第一个实验要求实现几个用户程序来熟悉 xv6 和 xv6 的系统调用。

实验任务

sleep

使用 sleep 系统调用实现一个用户空间的 sleep 程序。

1
2
3
4
5
6
7
8
9
10
11
12
13
#include "kernel/types.h"
#include "user/user.h"

int main(int argc, char *argv[]) {
if (argc == 1) {
fprintf(2, "usage: sleep ticks\n");
exit(1);
}

int ticks = atoi(argv[1]);
sleep(ticks);
exit(0);
}

pingpong

这个任务应该是为了理解 fork 和 pipe 设计的。这个实验要求:父进程给子进程发送一个 byte,子进程收到 byte 后打印 “(pid): received ping”;接着子进程给父进程发送一个 byte,父进程收到 byte 后打印 “(pid): received pong”。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#include "kernel/types.h"
#include "user/user.h"

int main(void) {
int pipe_a[2]; // R: child, W: parent
int pipe_b[2]; // R: parent, W: child

pipe(pipe_a);
pipe(pipe_b);

const char byte = 'b';
char buf_byte = 0;

if (fork() != 0) {
// parent process

close(pipe_a[0]);
close(pipe_b[1]);

write(pipe_a[1], &byte, 1);
close(pipe_a[1]);

read(pipe_b[0], &buf_byte, 1);
close(pipe_b[0]);

printf("%d: received pong\n", getpid());
} else {
// child process

close(pipe_a[1]);
close(pipe_b[0]);

read(pipe_a[0], &buf_byte, 1);
close(pipe_a[0]);

printf("%d: received ping\n", getpid());

write(pipe_b[1], &byte, 1);
close(pipe_b[1]);
}

exit(0);
}

primes

这个实验要用多进程来模拟埃氏筛求素数:每发现一个素数就新建一个进程,放在原进程的右边(如图所示),然后用这个进程来筛掉其他的合数。在这个实验中会有一系列进程,由 pipe 穿起来形成一条 pipeline。

Doug McIlroy's Pipeline

这个实验需要注意的是要及时回收 fd,不然可分配的 fd 会在第一个进程发送 280 之前被用光。在 2024 年之前的 lab 是要求筛 2 到 35 之间的素数,但这 2024 年实验改版,要求筛选 2 到 280 之间的素数。这一点还是对及时 close fd 的要求挺高的,我在 2-35 的那个版本中实现的程序到 2024 版运行就出现 fd 分配失败的情况了。

我在这个程序里做了一个优化,如果 \(x > \sqrt{280}\) 并且在 \([2, \sqrt{280}]\) 之间没有因数,那么它一定是一个素数,我们可以直接 print,但是不创建新的进程。有个很有趣的现象是,在不加这个优化的情况下 Ubuntu 上的 qemu 是没问题的,但是在 Mac OS 上的 qemu 中运行就会出现一个 usertrap,加上优化之后就都可以通过,留个坑。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#include "kernel/types.h"
#include "user/user.h"

const int P_READ = 0;
const int P_WRITE = 1;

const int max_n = 280;

// The prime of current process.
// Initialized with `max_n + 1` for `max_n % current != 0`.
int current = max_n + 1;

// The fd to read data from the left process.
int r_fd = -1;

// The fd to write data to the left process.
int w_fd = -1;

void handle(int x);

void log_prime(int x) {
printf("prime %d\n", x);
}

void handle_until_close() {
int x;
while (read(r_fd, &x, sizeof(x)) != 0) {
handle(x);
}

if (w_fd != -1) {
close(w_fd);
}

close(r_fd);
wait(0);
exit(0);
}

void init_r_proc(int cur_prime) {
// Prepare a new pipe.
int pipe_fds[2];
if (pipe(pipe_fds) < 0) {
fprintf(2, "Failed to create pipe\n");
exit(-1);
}

// Fork out a child process.
int pid = fork();
if (pid < 0) {
fprintf(2, "Failed to fork!\n");
close(pipe_fds[P_READ]);
close(pipe_fds[P_WRITE]);
return;
}

// Parent process.
if (pid != 0) {
w_fd = pipe_fds[P_WRITE];
close(pipe_fds[P_READ]);
return;
}

// Child process.
if (pid == 0) {
if (r_fd != -1) {
close(r_fd);
r_fd = -1;
}

if (w_fd != -1) {
close(w_fd);
w_fd = -1;
}

r_fd = pipe_fds[P_READ];
close(pipe_fds[P_WRITE]);

current = cur_prime;
log_prime(current);
handle_until_close();
return;
}
}

void handle(int x) {
if (x % current == 0) {
return;
}

// When x does not have a factor between [2, sqrt(x)],
// it can be determined to be a prime.
// Adding this optimization here to avoid too many processes are forked out.
if (current <= max_n && current * current > max_n) {
log_prime(x);
return;
}

if (w_fd != -1) {
write(w_fd, &x, sizeof(x));
return;
} else {
init_r_proc(x);
return;
}
}

int main(void) {
for (int i = 2; i <= max_n; ++i) {
handle(i);
}

close(w_fd);
wait(0);
exit(0);
}

find

这个任务要求实现 find 程序:find <root> <pattern> 要求打印制定的 root 下文件名为 pattern 的文件路径。DFS 即可。需要注意的是每次 read 一个 path 下的文件或者子目录的时候,会包含 ...,不需要对这两个 entry 进行递归求解。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#include "kernel/types.h"
#include "kernel/fs.h"
#include "kernel/fcntl.h"
#include "kernel/stat.h"
#include "user/user.h"

char g_full_path[512];

int matches(const char *file_name, const char *pattern) {
return strcmp(pattern, "*") == 0 || strcmp(file_name, pattern) == 0;
}

void find(const char *path, const char *pattern) {
int fd;
struct dirent de;
struct stat st;

// Backup the current end and push the path to g_full_path.
int end = strlen(g_full_path);
int end_backup = end;

if (end > 0 && g_full_path[end - 1] != '/') {
g_full_path[end++] = '/';
}

strcpy(g_full_path + end, path);
end += strlen(path);
g_full_path[end] = '\0';

// Open the fd.
if ((fd = open(g_full_path, O_RDONLY)) < 0) {
fprintf(2, "Cannot open %s\n", g_full_path);
goto clean_up_without_closing_fd;
}

// Grab the stat.
if (fstat(fd, &st) < 0) {
fprintf(2, "Cannot stat %s\n", g_full_path);
goto clean_up;
}

// Handle T_FILE.
if (st.type == T_FILE) {
if (matches(path, pattern)) {
printf("%s\n", g_full_path);
}

goto clean_up;
}

// Handle T_DIR.
if (st.type == T_DIR) {
while (read(fd, &de, sizeof(de)) == sizeof(de)) {
if (de.inum == 0) {
continue;
}

if (strcmp(de.name, ".") == 0 || strcmp(de.name, "..") == 0) {
continue;
}

find(de.name, pattern);
}

goto clean_up;
}

clean_up:
close(fd);

clean_up_without_closing_fd:
// Restore the g_full_path for backtracing.
g_full_path[end_backup] = '\0';
}

int main(int argc, char *argv[]) {
if (argc != 3) {
fprintf(2, "Usage: find <root> <pattern>\n");
exit(1);
}

char *root = argv[1];
char *patten = argv[2];
find(root, patten);
exit(0);
}

xargs

这个任务要实现 xargs。为了简化实验,只要求实现 -n 参数为 1 的时候的功能。从 stdin 按行读,读完之后 fork + exec 运行即可。这里需要注意的是 exec 的参数,第一个参数是待执行的程序的路径,第二个参数 argv 实际上是包含第一个参数的。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include "kernel/types.h"
#include "kernel/param.h"
#include "user/user.h"

char* g_argv[MAXARG];
int g_argc = 0;

int getline(char** line) {
static char buf[512];

int offset = 0;
char c;
while (1) {
if (read(0, &c, sizeof(c)) == 0) {
return -1;
}

if (c == '\n') {
buf[offset] = '\0';
*line = buf;
return 0;
}

buf[offset++] = c;
}
}

void execute(char* line) {
int g_argc_backup = g_argc;

int n = strlen(line);
for (int i = 0; i < n; ) {
if (i == ' ') {
line[i++] = '\0';
continue;
}

g_argv[g_argc++] = &line[i];
while (i < n && line[i] != ' ') {
++i;
}
}

g_argv[g_argc++] = 0;

if (fork() != 0) {
wait(0);
} else {
exec(g_argv[0], g_argv);
}

g_argc = g_argc_backup;
}

int main(int argc, char* argv[]) {
if (argc < 2) {
fprintf(2, "usage: xargs <command and args>.\n");
exit(-1);
}

for (int i = 1; i < argc; ++i) {
g_argv[g_argc++] = argv[i];
}

char* line;
while (getline(&line) != -1) {
execute(line);
}

exit(0);
}

实验结果

搞定!

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
➜  xv6-labs-2024 git:(dev/util) ✗ ./grade-lab-util
make: `kernel/kernel' is up to date.
== Test sleep, no arguments == sleep, no arguments: OK (0.6s)
== Test sleep, returns == sleep, returns: OK (0.9s)
== Test sleep, makes syscall == sleep, makes syscall: OK (1.0s)
== Test pingpong == pingpong: OK (1.0s)
== Test primes == primes: OK (1.2s)
== Test find, in current directory == find, in current directory: OK (1.0s)
== Test find, in sub-directory == find, in sub-directory: OK (0.9s)
== Test find, recursive == find, recursive: OK (1.2s)
== Test xargs == xargs: OK (1.0s)
== Test xargs, multi-line echo == xargs, multi-line echo: OK (0.7s)
== Test time ==
time: OK
Score: 110/110