Путеводитель по Руководству Linux

  User  |  Syst  |  Libr  |  Device  |  Files  |  Other  |  Admin  |  Head  |



   seccomp    ( 2 )

работать с состоянием безопасных вычислений процесса (operate on Secure Computing state of the process)

  Name  |  Synopsis  |  Description  |  Return value  |  Error  |  Versions  |  Conforming to  |  Note  |    Examples    |  See also  |

Примеры (Examples)

The program below accepts four or more arguments.  The first
       three arguments are a system call number, a numeric architecture
       identifier, and an error number.  The program uses these values
       to construct a BPF filter that is used at run time to perform the
       following checks:

[1] If the program is not running on the specified architecture, the BPF filter causes system calls to fail with the error ENOSYS.

[2] If the program attempts to execute the system call with the specified number, the BPF filter causes the system call to fail, with errno being set to the specified error number.

The remaining command-line arguments specify the pathname and additional arguments of a program that the example program should attempt to execute using execv(3) (a library function that employs the execve(2) system call). Some example runs of the program are shown below.

First, we display the architecture that we are running on (x86-64) and then construct a shell function that looks up system call numbers on this architecture:

$ uname -m x86_64 $ syscall_nr() { cat /usr/src/linux/arch/x86/syscalls/syscall_64.tbl | \ awk '$2 != "x32" && $3 == "'$1'" { print $1 }' }

When the BPF filter rejects a system call (case [2] above), it causes the system call to fail with the error number specified on the command line. In the experiments shown here, we'll use error number 99:

$ errno 99 EADDRNOTAVAIL 99 Cannot assign requested address

In the following example, we attempt to run the command whoami(1), but the BPF filter rejects the execve(2) system call, so that the command is not even executed:

$ syscall_nr execve 59 $ ./a.out Usage: ./a.out <syscall_nr> <arch> <errno> <prog> [<args>] Hint for <arch>: AUDIT_ARCH_I386: 0x40000003 AUDIT_ARCH_X86_64: 0xC000003E $ ./a.out 59 0xC000003E 99 /bin/whoami execv: Cannot assign requested address

In the next example, the BPF filter rejects the write(2) system call, so that, although it is successfully started, the whoami(1) command is not able to write output:

$ syscall_nr write 1 $ ./a.out 1 0xC000003E 99 /bin/whoami

In the final example, the BPF filter rejects a system call that is not used by the whoami(1) command, so it is able to successfully execute and produce output:

$ syscall_nr preadv 295 $ ./a.out 295 0xC000003E 99 /bin/whoami cecilia

Program source #include <errno.h> #include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <linux/audit.h> #include <linux/filter.h> #include <linux/seccomp.h> #include <sys/prctl.h>

#define X32_SYSCALL_BIT 0x40000000 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))

static int install_filter(int syscall_nr, int t_arch, int f_errno) { unsigned int upper_nr_limit = 0xffffffff;

/* Assume that AUDIT_ARCH_X86_64 means the normal x86-64 ABI (in the x32 ABI, all system calls have bit 30 set in the 'nr' field, meaning the numbers are >= X32_SYSCALL_BIT). */ if (t_arch == AUDIT_ARCH_X86_64) upper_nr_limit = X32_SYSCALL_BIT - 1;

struct sock_filter filter[] = { /* [0] Load architecture from 'seccomp_data' buffer into accumulator. */ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, arch))),

/* [1] Jump forward 5 instructions if architecture does not match 't_arch'. */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, t_arch, 0, 5),

/* [2] Load system call number from 'seccomp_data' buffer into accumulator. */ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, (offsetof(struct seccomp_data, nr))),

/* [3] Check ABI - only needed for x86-64 in deny-list use cases. Use BPF_JGT instead of checking against the bit mask to avoid having to reload the syscall number. */ BPF_JUMP(BPF_JMP | BPF_JGT | BPF_K, upper_nr_limit, 3, 0),

/* [4] Jump forward 1 instruction if system call number does not match 'syscall_nr'. */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, syscall_nr, 0, 1),

/* [5] Matching architecture and system call: don't execute the system call, and return 'f_errno' in 'errno'. */ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (f_errno & SECCOMP_RET_DATA)),

/* [6] Destination of system call number mismatch: allow other system calls. */ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),

/* [7] Destination of architecture mismatch: kill process. */ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), };

struct sock_fprog prog = { .len = ARRAY_SIZE(filter), .filter = filter, };

if (seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)) { perror("seccomp"); return 1; }

return 0; }

int main(int argc, char *argv[]) { if (argc < 5) { fprintf(stderr, "Usage: " "%s <syscall_nr> <arch> <errno> <prog> [<args>]\n" "Hint for <arch>: AUDIT_ARCH_I386: 0x%X\n" " AUDIT_ARCH_X86_64: 0x%X\n" "\n", argv[0], AUDIT_ARCH_I386, AUDIT_ARCH_X86_64); exit(EXIT_FAILURE); }

if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { perror("prctl"); exit(EXIT_FAILURE); }

if (install_filter(strtol(argv[1], NULL, 0), strtol(argv[2], NULL, 0), strtol(argv[3], NULL, 0))) exit(EXIT_FAILURE);

execv(argv[4], &argv[4]); perror("execv"); exit(EXIT_FAILURE); }