1 // SPDX-License-Identifier: GPL-2.0 1 // SPDX-License-Identifier: GPL-2.0 2 /* 2 /* 3 * linux/kernel/seccomp.c 3 * linux/kernel/seccomp.c 4 * 4 * 5 * Copyright 2004-2005 Andrea Arcangeli <andr 5 * Copyright 2004-2005 Andrea Arcangeli <andrea@cpushare.com> 6 * 6 * 7 * Copyright (C) 2012 Google, Inc. 7 * Copyright (C) 2012 Google, Inc. 8 * Will Drewry <wad@chromium.org> 8 * Will Drewry <wad@chromium.org> 9 * 9 * 10 * This defines a simple but solid secure-comp 10 * This defines a simple but solid secure-computing facility. 11 * 11 * 12 * Mode 1 uses a fixed list of allowed system 12 * Mode 1 uses a fixed list of allowed system calls. 13 * Mode 2 allows user-defined system call filt 13 * Mode 2 allows user-defined system call filters in the form 14 * of Berkeley Packet Filters/Linux Soc 14 * of Berkeley Packet Filters/Linux Socket Filters. 15 */ 15 */ 16 #define pr_fmt(fmt) "seccomp: " fmt << 17 16 18 #include <linux/refcount.h> 17 #include <linux/refcount.h> 19 #include <linux/audit.h> 18 #include <linux/audit.h> 20 #include <linux/compat.h> 19 #include <linux/compat.h> 21 #include <linux/coredump.h> 20 #include <linux/coredump.h> 22 #include <linux/kmemleak.h> 21 #include <linux/kmemleak.h> 23 #include <linux/nospec.h> 22 #include <linux/nospec.h> 24 #include <linux/prctl.h> 23 #include <linux/prctl.h> 25 #include <linux/sched.h> 24 #include <linux/sched.h> 26 #include <linux/sched/task_stack.h> 25 #include <linux/sched/task_stack.h> 27 #include <linux/seccomp.h> 26 #include <linux/seccomp.h> 28 #include <linux/slab.h> 27 #include <linux/slab.h> 29 #include <linux/syscalls.h> 28 #include <linux/syscalls.h> 30 #include <linux/sysctl.h> 29 #include <linux/sysctl.h> 31 30 32 /* Not exposed in headers: strictly internal u << 33 #define SECCOMP_MODE_DEAD (SECCOMP_MODE_ << 34 << 35 #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER 31 #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER 36 #include <asm/syscall.h> 32 #include <asm/syscall.h> 37 #endif 33 #endif 38 34 39 #ifdef CONFIG_SECCOMP_FILTER 35 #ifdef CONFIG_SECCOMP_FILTER 40 #include <linux/file.h> 36 #include <linux/file.h> 41 #include <linux/filter.h> 37 #include <linux/filter.h> 42 #include <linux/pid.h> 38 #include <linux/pid.h> 43 #include <linux/ptrace.h> 39 #include <linux/ptrace.h> 44 #include <linux/capability.h> !! 40 #include <linux/security.h> >> 41 #include <linux/tracehook.h> 45 #include <linux/uaccess.h> 42 #include <linux/uaccess.h> 46 #include <linux/anon_inodes.h> 43 #include <linux/anon_inodes.h> 47 #include <linux/lockdep.h> << 48 << 49 /* << 50 * When SECCOMP_IOCTL_NOTIF_ID_VALID was first << 51 * wrong direction flag in the ioctl number. T << 52 * which the kernel needs to keep supporting u << 53 * using the wrong command number. << 54 */ << 55 #define SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR << 56 44 57 enum notify_state { 45 enum notify_state { 58 SECCOMP_NOTIFY_INIT, 46 SECCOMP_NOTIFY_INIT, 59 SECCOMP_NOTIFY_SENT, 47 SECCOMP_NOTIFY_SENT, 60 SECCOMP_NOTIFY_REPLIED, 48 SECCOMP_NOTIFY_REPLIED, 61 }; 49 }; 62 50 63 struct seccomp_knotif { 51 struct seccomp_knotif { 64 /* The struct pid of the task whose fi 52 /* The struct pid of the task whose filter triggered the notification */ 65 struct task_struct *task; 53 struct task_struct *task; 66 54 67 /* The "cookie" for this request; this 55 /* The "cookie" for this request; this is unique for this filter. */ 68 u64 id; 56 u64 id; 69 57 70 /* 58 /* 71 * The seccomp data. This pointer is v 59 * The seccomp data. This pointer is valid the entire time this 72 * notification is active, since it co 60 * notification is active, since it comes from __seccomp_filter which 73 * eclipses the entire lifecycle here. 61 * eclipses the entire lifecycle here. 74 */ 62 */ 75 const struct seccomp_data *data; 63 const struct seccomp_data *data; 76 64 77 /* 65 /* 78 * Notification states. When SECCOMP_R 66 * Notification states. When SECCOMP_RET_USER_NOTIF is returned, a 79 * struct seccomp_knotif is created an 67 * struct seccomp_knotif is created and starts out in INIT. Once the 80 * handler reads the notification off 68 * handler reads the notification off of an FD, it transitions to SENT. 81 * If a signal is received the state t 69 * If a signal is received the state transitions back to INIT and 82 * another message is sent. When the u 70 * another message is sent. When the userspace handler replies, state 83 * transitions to REPLIED. 71 * transitions to REPLIED. 84 */ 72 */ 85 enum notify_state state; 73 enum notify_state state; 86 74 87 /* The return values, only valid when 75 /* The return values, only valid when in SECCOMP_NOTIFY_REPLIED */ 88 int error; 76 int error; 89 long val; 77 long val; 90 u32 flags; << 91 78 92 /* !! 79 /* Signals when this has entered SECCOMP_NOTIFY_REPLIED */ 93 * Signals when this has changed state << 94 * dying, a new seccomp addfd message, << 95 */ << 96 struct completion ready; 80 struct completion ready; 97 81 98 struct list_head list; 82 struct list_head list; 99 << 100 /* outstanding addfd requests */ << 101 struct list_head addfd; << 102 }; << 103 << 104 /** << 105 * struct seccomp_kaddfd - container for secco << 106 * << 107 * @file: A reference to the file to install i << 108 * @fd: The fd number to install it at. If the << 109 * installing process should allocate the << 110 * @flags: The flags for the new file descript << 111 * is allowed. << 112 * @ioctl_flags: The flags used for the seccom << 113 * @setfd: whether or not SECCOMP_ADDFD_FLAG_S << 114 * @ret: The return value of the installing pr << 115 * upon success (>= 0). << 116 * @completion: Indicates that the installing << 117 * installation, or gone away (ei << 118 * reply, or signal) << 119 * @list: list_head for chaining seccomp_kaddf << 120 * << 121 */ << 122 struct seccomp_kaddfd { << 123 struct file *file; << 124 int fd; << 125 unsigned int flags; << 126 __u32 ioctl_flags; << 127 << 128 union { << 129 bool setfd; << 130 /* To only be set on reply */ << 131 int ret; << 132 }; << 133 struct completion completion; << 134 struct list_head list; << 135 }; 83 }; 136 84 137 /** 85 /** 138 * struct notification - container for seccomp 86 * struct notification - container for seccomp userspace notifications. Since 139 * most seccomp filters will not have notifica 87 * most seccomp filters will not have notification listeners attached and this 140 * structure is fairly large, we store the not 88 * structure is fairly large, we store the notification-specific stuff in a 141 * separate structure. 89 * separate structure. 142 * 90 * 143 * @requests: A semaphore that users of this n !! 91 * @request: A semaphore that users of this notification can wait on for 144 * changes. Actual reads and writes !! 92 * changes. Actual reads and writes are still controlled with 145 * filter->notify_lock. !! 93 * filter->notify_lock. 146 * @flags: A set of SECCOMP_USER_NOTIF_FD_* fl << 147 * @next_id: The id of the next request. 94 * @next_id: The id of the next request. 148 * @notifications: A list of struct seccomp_kn 95 * @notifications: A list of struct seccomp_knotif elements. >> 96 * @wqh: A wait queue for poll. 149 */ 97 */ 150 << 151 struct notification { 98 struct notification { 152 atomic_t requests; !! 99 struct semaphore request; 153 u32 flags; << 154 u64 next_id; 100 u64 next_id; 155 struct list_head notifications; 101 struct list_head notifications; >> 102 wait_queue_head_t wqh; 156 }; 103 }; 157 104 158 #ifdef SECCOMP_ARCH_NATIVE << 159 /** << 160 * struct action_cache - per-filter cache of s << 161 * arch/syscall pair << 162 * << 163 * @allow_native: A bitmap where each bit repr << 164 * filter will always allow the << 165 * native architecture. << 166 * @allow_compat: A bitmap where each bit repr << 167 * filter will always allow the << 168 * compat architecture. << 169 */ << 170 struct action_cache { << 171 DECLARE_BITMAP(allow_native, SECCOMP_A << 172 #ifdef SECCOMP_ARCH_COMPAT << 173 DECLARE_BITMAP(allow_compat, SECCOMP_A << 174 #endif << 175 }; << 176 #else << 177 struct action_cache { }; << 178 << 179 static inline bool seccomp_cache_check_allow(c << 180 c << 181 { << 182 return false; << 183 } << 184 << 185 static inline void seccomp_cache_prepare(struc << 186 { << 187 } << 188 #endif /* SECCOMP_ARCH_NATIVE */ << 189 << 190 /** 105 /** 191 * struct seccomp_filter - container for secco 106 * struct seccomp_filter - container for seccomp BPF programs 192 * 107 * 193 * @refs: Reference count to manage the object !! 108 * @usage: reference count to manage the object lifetime. 194 * A filter's reference count is increm !! 109 * get/put helpers should be used when accessing an instance 195 * attached task, once for the dependen !! 110 * outside of a lifetime-guarded section. In general, this 196 * requested for the user notifier. Whe !! 111 * is only needed for handling filters shared across tasks. 197 * the filter can be freed. << 198 * @users: A filter's @users count is incremen << 199 * attached task (filter installation, << 200 * and once for the dependent filter ( << 201 * When it reaches zero it indicates t << 202 * users of that filter exist. No new << 203 * this filter after reaching 0. The @ << 204 * or equal to @refs. Hence, reaching << 205 * the filter can be freed. << 206 * @cache: cache of arch/syscall mappings to a << 207 * @log: true if all actions except for SECCOM 112 * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged 208 * @wait_killable_recv: Put notifying process << 209 * notification is receiv << 210 * @prev: points to a previously installed, or 113 * @prev: points to a previously installed, or inherited, filter 211 * @prog: the BPF program to evaluate 114 * @prog: the BPF program to evaluate 212 * @notif: the struct that holds all notificat 115 * @notif: the struct that holds all notification related information 213 * @notify_lock: A lock for all notification-r 116 * @notify_lock: A lock for all notification-related accesses. 214 * @wqh: A wait queue for poll if a notifier i << 215 * 117 * 216 * seccomp_filter objects are organized in a t 118 * seccomp_filter objects are organized in a tree linked via the @prev 217 * pointer. For any task, it appears to be a 119 * pointer. For any task, it appears to be a singly-linked list starting 218 * with current->seccomp.filter, the most rece 120 * with current->seccomp.filter, the most recently attached or inherited filter. 219 * However, multiple filters may share a @prev 121 * However, multiple filters may share a @prev node, by way of fork(), which 220 * results in a unidirectional tree existing i 122 * results in a unidirectional tree existing in memory. This is similar to 221 * how namespaces work. 123 * how namespaces work. 222 * 124 * 223 * seccomp_filter objects should never be modi 125 * seccomp_filter objects should never be modified after being attached 224 * to a task_struct (other than @refs). !! 126 * to a task_struct (other than @usage). 225 */ 127 */ 226 struct seccomp_filter { 128 struct seccomp_filter { 227 refcount_t refs; !! 129 refcount_t usage; 228 refcount_t users; << 229 bool log; 130 bool log; 230 bool wait_killable_recv; << 231 struct action_cache cache; << 232 struct seccomp_filter *prev; 131 struct seccomp_filter *prev; 233 struct bpf_prog *prog; 132 struct bpf_prog *prog; 234 struct notification *notif; 133 struct notification *notif; 235 struct mutex notify_lock; 134 struct mutex notify_lock; 236 wait_queue_head_t wqh; << 237 }; 135 }; 238 136 239 /* Limit any path through the tree to 256KB wo 137 /* Limit any path through the tree to 256KB worth of instructions. */ 240 #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof 138 #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter)) 241 139 242 /* 140 /* 243 * Endianness is explicitly ignored and left f 141 * Endianness is explicitly ignored and left for BPF program authors to manage 244 * as per the specific architecture. 142 * as per the specific architecture. 245 */ 143 */ 246 static void populate_seccomp_data(struct secco 144 static void populate_seccomp_data(struct seccomp_data *sd) 247 { 145 { 248 /* << 249 * Instead of using current_pt_reg(), << 250 * to safely fetch "current", so just << 251 */ << 252 struct task_struct *task = current; 146 struct task_struct *task = current; 253 struct pt_regs *regs = task_pt_regs(ta 147 struct pt_regs *regs = task_pt_regs(task); 254 unsigned long args[6]; 148 unsigned long args[6]; 255 149 256 sd->nr = syscall_get_nr(task, regs); 150 sd->nr = syscall_get_nr(task, regs); 257 sd->arch = syscall_get_arch(task); !! 151 sd->arch = syscall_get_arch(); 258 syscall_get_arguments(task, regs, args 152 syscall_get_arguments(task, regs, args); 259 sd->args[0] = args[0]; 153 sd->args[0] = args[0]; 260 sd->args[1] = args[1]; 154 sd->args[1] = args[1]; 261 sd->args[2] = args[2]; 155 sd->args[2] = args[2]; 262 sd->args[3] = args[3]; 156 sd->args[3] = args[3]; 263 sd->args[4] = args[4]; 157 sd->args[4] = args[4]; 264 sd->args[5] = args[5]; 158 sd->args[5] = args[5]; 265 sd->instruction_pointer = KSTK_EIP(tas 159 sd->instruction_pointer = KSTK_EIP(task); 266 } 160 } 267 161 268 /** 162 /** 269 * seccomp_check_filter - verify seccomp 163 * seccomp_check_filter - verify seccomp filter code 270 * @filter: filter to verify 164 * @filter: filter to verify 271 * @flen: length of filter 165 * @flen: length of filter 272 * 166 * 273 * Takes a previously checked filter (by bpf_c 167 * Takes a previously checked filter (by bpf_check_classic) and 274 * redirects all filter code that loads struct 168 * redirects all filter code that loads struct sk_buff data 275 * and related data through seccomp_bpf_load. 169 * and related data through seccomp_bpf_load. It also 276 * enforces length and alignment checking of t 170 * enforces length and alignment checking of those loads. 277 * 171 * 278 * Returns 0 if the rule set is legal or -EINV 172 * Returns 0 if the rule set is legal or -EINVAL if not. 279 */ 173 */ 280 static int seccomp_check_filter(struct sock_fi 174 static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) 281 { 175 { 282 int pc; 176 int pc; 283 for (pc = 0; pc < flen; pc++) { 177 for (pc = 0; pc < flen; pc++) { 284 struct sock_filter *ftest = &f 178 struct sock_filter *ftest = &filter[pc]; 285 u16 code = ftest->code; 179 u16 code = ftest->code; 286 u32 k = ftest->k; 180 u32 k = ftest->k; 287 181 288 switch (code) { 182 switch (code) { 289 case BPF_LD | BPF_W | BPF_ABS: 183 case BPF_LD | BPF_W | BPF_ABS: 290 ftest->code = BPF_LDX 184 ftest->code = BPF_LDX | BPF_W | BPF_ABS; 291 /* 32-bit aligned and 185 /* 32-bit aligned and not out of bounds. */ 292 if (k >= sizeof(struct 186 if (k >= sizeof(struct seccomp_data) || k & 3) 293 return -EINVAL 187 return -EINVAL; 294 continue; 188 continue; 295 case BPF_LD | BPF_W | BPF_LEN: 189 case BPF_LD | BPF_W | BPF_LEN: 296 ftest->code = BPF_LD | 190 ftest->code = BPF_LD | BPF_IMM; 297 ftest->k = sizeof(stru 191 ftest->k = sizeof(struct seccomp_data); 298 continue; 192 continue; 299 case BPF_LDX | BPF_W | BPF_LEN 193 case BPF_LDX | BPF_W | BPF_LEN: 300 ftest->code = BPF_LDX 194 ftest->code = BPF_LDX | BPF_IMM; 301 ftest->k = sizeof(stru 195 ftest->k = sizeof(struct seccomp_data); 302 continue; 196 continue; 303 /* Explicitly include allowed 197 /* Explicitly include allowed calls. */ 304 case BPF_RET | BPF_K: 198 case BPF_RET | BPF_K: 305 case BPF_RET | BPF_A: 199 case BPF_RET | BPF_A: 306 case BPF_ALU | BPF_ADD | BPF_K 200 case BPF_ALU | BPF_ADD | BPF_K: 307 case BPF_ALU | BPF_ADD | BPF_X 201 case BPF_ALU | BPF_ADD | BPF_X: 308 case BPF_ALU | BPF_SUB | BPF_K 202 case BPF_ALU | BPF_SUB | BPF_K: 309 case BPF_ALU | BPF_SUB | BPF_X 203 case BPF_ALU | BPF_SUB | BPF_X: 310 case BPF_ALU | BPF_MUL | BPF_K 204 case BPF_ALU | BPF_MUL | BPF_K: 311 case BPF_ALU | BPF_MUL | BPF_X 205 case BPF_ALU | BPF_MUL | BPF_X: 312 case BPF_ALU | BPF_DIV | BPF_K 206 case BPF_ALU | BPF_DIV | BPF_K: 313 case BPF_ALU | BPF_DIV | BPF_X 207 case BPF_ALU | BPF_DIV | BPF_X: 314 case BPF_ALU | BPF_AND | BPF_K 208 case BPF_ALU | BPF_AND | BPF_K: 315 case BPF_ALU | BPF_AND | BPF_X 209 case BPF_ALU | BPF_AND | BPF_X: 316 case BPF_ALU | BPF_OR | BPF_K: 210 case BPF_ALU | BPF_OR | BPF_K: 317 case BPF_ALU | BPF_OR | BPF_X: 211 case BPF_ALU | BPF_OR | BPF_X: 318 case BPF_ALU | BPF_XOR | BPF_K 212 case BPF_ALU | BPF_XOR | BPF_K: 319 case BPF_ALU | BPF_XOR | BPF_X 213 case BPF_ALU | BPF_XOR | BPF_X: 320 case BPF_ALU | BPF_LSH | BPF_K 214 case BPF_ALU | BPF_LSH | BPF_K: 321 case BPF_ALU | BPF_LSH | BPF_X 215 case BPF_ALU | BPF_LSH | BPF_X: 322 case BPF_ALU | BPF_RSH | BPF_K 216 case BPF_ALU | BPF_RSH | BPF_K: 323 case BPF_ALU | BPF_RSH | BPF_X 217 case BPF_ALU | BPF_RSH | BPF_X: 324 case BPF_ALU | BPF_NEG: 218 case BPF_ALU | BPF_NEG: 325 case BPF_LD | BPF_IMM: 219 case BPF_LD | BPF_IMM: 326 case BPF_LDX | BPF_IMM: 220 case BPF_LDX | BPF_IMM: 327 case BPF_MISC | BPF_TAX: 221 case BPF_MISC | BPF_TAX: 328 case BPF_MISC | BPF_TXA: 222 case BPF_MISC | BPF_TXA: 329 case BPF_LD | BPF_MEM: 223 case BPF_LD | BPF_MEM: 330 case BPF_LDX | BPF_MEM: 224 case BPF_LDX | BPF_MEM: 331 case BPF_ST: 225 case BPF_ST: 332 case BPF_STX: 226 case BPF_STX: 333 case BPF_JMP | BPF_JA: 227 case BPF_JMP | BPF_JA: 334 case BPF_JMP | BPF_JEQ | BPF_K 228 case BPF_JMP | BPF_JEQ | BPF_K: 335 case BPF_JMP | BPF_JEQ | BPF_X 229 case BPF_JMP | BPF_JEQ | BPF_X: 336 case BPF_JMP | BPF_JGE | BPF_K 230 case BPF_JMP | BPF_JGE | BPF_K: 337 case BPF_JMP | BPF_JGE | BPF_X 231 case BPF_JMP | BPF_JGE | BPF_X: 338 case BPF_JMP | BPF_JGT | BPF_K 232 case BPF_JMP | BPF_JGT | BPF_K: 339 case BPF_JMP | BPF_JGT | BPF_X 233 case BPF_JMP | BPF_JGT | BPF_X: 340 case BPF_JMP | BPF_JSET | BPF_ 234 case BPF_JMP | BPF_JSET | BPF_K: 341 case BPF_JMP | BPF_JSET | BPF_ 235 case BPF_JMP | BPF_JSET | BPF_X: 342 continue; 236 continue; 343 default: 237 default: 344 return -EINVAL; 238 return -EINVAL; 345 } 239 } 346 } 240 } 347 return 0; 241 return 0; 348 } 242 } 349 243 350 #ifdef SECCOMP_ARCH_NATIVE << 351 static inline bool seccomp_cache_check_allow_b << 352 << 353 << 354 { << 355 if (unlikely(syscall_nr < 0 || syscall << 356 return false; << 357 syscall_nr = array_index_nospec(syscal << 358 << 359 return test_bit(syscall_nr, bitmap); << 360 } << 361 << 362 /** << 363 * seccomp_cache_check_allow - lookup seccomp << 364 * @sfilter: The seccomp filter << 365 * @sd: The seccomp data to lookup the cache w << 366 * << 367 * Returns true if the seccomp_data is cached << 368 */ << 369 static inline bool seccomp_cache_check_allow(c << 370 c << 371 { << 372 int syscall_nr = sd->nr; << 373 const struct action_cache *cache = &sf << 374 << 375 #ifndef SECCOMP_ARCH_COMPAT << 376 /* A native-only architecture doesn't << 377 return seccomp_cache_check_allow_bitma << 378 << 379 << 380 #else << 381 if (likely(sd->arch == SECCOMP_ARCH_NA << 382 return seccomp_cache_check_all << 383 << 384 << 385 if (likely(sd->arch == SECCOMP_ARCH_CO << 386 return seccomp_cache_check_all << 387 << 388 << 389 #endif /* SECCOMP_ARCH_COMPAT */ << 390 << 391 WARN_ON_ONCE(true); << 392 return false; << 393 } << 394 #endif /* SECCOMP_ARCH_NATIVE */ << 395 << 396 #define ACTION_ONLY(ret) ((s32)((ret) & (SECCO << 397 /** 244 /** 398 * seccomp_run_filters - evaluates all seccomp 245 * seccomp_run_filters - evaluates all seccomp filters against @sd 399 * @sd: optional seccomp data to be passed to 246 * @sd: optional seccomp data to be passed to filters 400 * @match: stores struct seccomp_filter that r 247 * @match: stores struct seccomp_filter that resulted in the return value, 401 * unless filter returned SECCOMP_RET_ 248 * unless filter returned SECCOMP_RET_ALLOW, in which case it will 402 * be unchanged. 249 * be unchanged. 403 * 250 * 404 * Returns valid seccomp BPF response codes. 251 * Returns valid seccomp BPF response codes. 405 */ 252 */ >> 253 #define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL))) 406 static u32 seccomp_run_filters(const struct se 254 static u32 seccomp_run_filters(const struct seccomp_data *sd, 407 struct seccomp_ 255 struct seccomp_filter **match) 408 { 256 { 409 u32 ret = SECCOMP_RET_ALLOW; 257 u32 ret = SECCOMP_RET_ALLOW; 410 /* Make sure cross-thread synced filte 258 /* Make sure cross-thread synced filter points somewhere sane. */ 411 struct seccomp_filter *f = 259 struct seccomp_filter *f = 412 READ_ONCE(current->sec 260 READ_ONCE(current->seccomp.filter); 413 261 414 /* Ensure unexpected behavior doesn't 262 /* Ensure unexpected behavior doesn't result in failing open. */ 415 if (WARN_ON(f == NULL)) 263 if (WARN_ON(f == NULL)) 416 return SECCOMP_RET_KILL_PROCES 264 return SECCOMP_RET_KILL_PROCESS; 417 265 418 if (seccomp_cache_check_allow(f, sd)) << 419 return SECCOMP_RET_ALLOW; << 420 << 421 /* 266 /* 422 * All filters in the list are evaluat 267 * All filters in the list are evaluated and the lowest BPF return 423 * value always takes priority (ignori 268 * value always takes priority (ignoring the DATA). 424 */ 269 */ >> 270 preempt_disable(); 425 for (; f; f = f->prev) { 271 for (; f; f = f->prev) { 426 u32 cur_ret = bpf_prog_run_pin !! 272 u32 cur_ret = BPF_PROG_RUN(f->prog, sd); 427 273 428 if (ACTION_ONLY(cur_ret) < ACT 274 if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) { 429 ret = cur_ret; 275 ret = cur_ret; 430 *match = f; 276 *match = f; 431 } 277 } 432 } 278 } >> 279 preempt_enable(); 433 return ret; 280 return ret; 434 } 281 } 435 #endif /* CONFIG_SECCOMP_FILTER */ 282 #endif /* CONFIG_SECCOMP_FILTER */ 436 283 437 static inline bool seccomp_may_assign_mode(uns 284 static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) 438 { 285 { 439 assert_spin_locked(¤t->sighand-> 286 assert_spin_locked(¤t->sighand->siglock); 440 287 441 if (current->seccomp.mode && current-> 288 if (current->seccomp.mode && current->seccomp.mode != seccomp_mode) 442 return false; 289 return false; 443 290 444 return true; 291 return true; 445 } 292 } 446 293 447 void __weak arch_seccomp_spec_mitigate(struct 294 void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { } 448 295 449 static inline void seccomp_assign_mode(struct 296 static inline void seccomp_assign_mode(struct task_struct *task, 450 unsigne 297 unsigned long seccomp_mode, 451 unsigne 298 unsigned long flags) 452 { 299 { 453 assert_spin_locked(&task->sighand->sig 300 assert_spin_locked(&task->sighand->siglock); 454 301 455 task->seccomp.mode = seccomp_mode; 302 task->seccomp.mode = seccomp_mode; 456 /* 303 /* 457 * Make sure SYSCALL_WORK_SECCOMP cann !! 304 * Make sure TIF_SECCOMP cannot be set before the mode (and 458 * filter) is set. 305 * filter) is set. 459 */ 306 */ 460 smp_mb__before_atomic(); 307 smp_mb__before_atomic(); 461 /* Assume default seccomp processes wa 308 /* Assume default seccomp processes want spec flaw mitigation. */ 462 if ((flags & SECCOMP_FILTER_FLAG_SPEC_ 309 if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0) 463 arch_seccomp_spec_mitigate(tas 310 arch_seccomp_spec_mitigate(task); 464 set_task_syscall_work(task, SECCOMP); !! 311 set_tsk_thread_flag(task, TIF_SECCOMP); 465 } 312 } 466 313 467 #ifdef CONFIG_SECCOMP_FILTER 314 #ifdef CONFIG_SECCOMP_FILTER 468 /* Returns 1 if the parent is an ancestor of t 315 /* Returns 1 if the parent is an ancestor of the child. */ 469 static int is_ancestor(struct seccomp_filter * 316 static int is_ancestor(struct seccomp_filter *parent, 470 struct seccomp_filter * 317 struct seccomp_filter *child) 471 { 318 { 472 /* NULL is the root ancestor. */ 319 /* NULL is the root ancestor. */ 473 if (parent == NULL) 320 if (parent == NULL) 474 return 1; 321 return 1; 475 for (; child; child = child->prev) 322 for (; child; child = child->prev) 476 if (child == parent) 323 if (child == parent) 477 return 1; 324 return 1; 478 return 0; 325 return 0; 479 } 326 } 480 327 481 /** 328 /** 482 * seccomp_can_sync_threads: checks if all thr 329 * seccomp_can_sync_threads: checks if all threads can be synchronized 483 * 330 * 484 * Expects sighand and cred_guard_mutex locks 331 * Expects sighand and cred_guard_mutex locks to be held. 485 * 332 * 486 * Returns 0 on success, -ve on error, or the 333 * Returns 0 on success, -ve on error, or the pid of a thread which was 487 * either not in the correct seccomp mode or d !! 334 * either not in the correct seccomp mode or it did not have an ancestral 488 * seccomp filter. 335 * seccomp filter. 489 */ 336 */ 490 static inline pid_t seccomp_can_sync_threads(v 337 static inline pid_t seccomp_can_sync_threads(void) 491 { 338 { 492 struct task_struct *thread, *caller; 339 struct task_struct *thread, *caller; 493 340 494 BUG_ON(!mutex_is_locked(¤t->sign 341 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); 495 assert_spin_locked(¤t->sighand-> 342 assert_spin_locked(¤t->sighand->siglock); 496 343 497 /* Validate all threads being eligible 344 /* Validate all threads being eligible for synchronization. */ 498 caller = current; 345 caller = current; 499 for_each_thread(caller, thread) { 346 for_each_thread(caller, thread) { 500 pid_t failed; 347 pid_t failed; 501 348 502 /* Skip current, since it is i 349 /* Skip current, since it is initiating the sync. */ 503 if (thread == caller) 350 if (thread == caller) 504 continue; 351 continue; 505 /* Skip exited threads. */ << 506 if (thread->flags & PF_EXITING << 507 continue; << 508 352 509 if (thread->seccomp.mode == SE 353 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED || 510 (thread->seccomp.mode == S 354 (thread->seccomp.mode == SECCOMP_MODE_FILTER && 511 is_ancestor(thread->secco 355 is_ancestor(thread->seccomp.filter, 512 caller->secco 356 caller->seccomp.filter))) 513 continue; 357 continue; 514 358 515 /* Return the first thread tha 359 /* Return the first thread that cannot be synchronized. */ 516 failed = task_pid_vnr(thread); 360 failed = task_pid_vnr(thread); 517 /* If the pid cannot be resolv 361 /* If the pid cannot be resolved, then return -ESRCH */ 518 if (WARN_ON(failed == 0)) 362 if (WARN_ON(failed == 0)) 519 failed = -ESRCH; 363 failed = -ESRCH; 520 return failed; 364 return failed; 521 } 365 } 522 366 523 return 0; 367 return 0; 524 } 368 } 525 369 526 static inline void seccomp_filter_free(struct << 527 { << 528 if (filter) { << 529 bpf_prog_destroy(filter->prog) << 530 kfree(filter); << 531 } << 532 } << 533 << 534 static void __seccomp_filter_orphan(struct sec << 535 { << 536 while (orig && refcount_dec_and_test(& << 537 if (waitqueue_active(&orig->wq << 538 wake_up_poll(&orig->wq << 539 orig = orig->prev; << 540 } << 541 } << 542 << 543 static void __put_seccomp_filter(struct seccom << 544 { << 545 /* Clean up single-reference branches << 546 while (orig && refcount_dec_and_test(& << 547 struct seccomp_filter *freeme << 548 orig = orig->prev; << 549 seccomp_filter_free(freeme); << 550 } << 551 } << 552 << 553 static void __seccomp_filter_release(struct se << 554 { << 555 /* Notify about any unused filters in << 556 __seccomp_filter_orphan(orig); << 557 /* Finally drop all references to the << 558 __put_seccomp_filter(orig); << 559 } << 560 << 561 /** << 562 * seccomp_filter_release - Detach the task fr << 563 * drop its reference << 564 * about unused filte << 565 * << 566 * @tsk: task the filter should be released fr << 567 * << 568 * This function should only be called when th << 569 * it detaches it from its filter tree. PF_EXI << 570 * for the task. << 571 */ << 572 void seccomp_filter_release(struct task_struct << 573 { << 574 struct seccomp_filter *orig; << 575 << 576 if (WARN_ON((tsk->flags & PF_EXITING) << 577 return; << 578 << 579 spin_lock_irq(&tsk->sighand->siglock); << 580 orig = tsk->seccomp.filter; << 581 /* Detach task from its filter tree. * << 582 tsk->seccomp.filter = NULL; << 583 spin_unlock_irq(&tsk->sighand->siglock << 584 __seccomp_filter_release(orig); << 585 } << 586 << 587 /** 370 /** 588 * seccomp_sync_threads: sets all threads to u 371 * seccomp_sync_threads: sets all threads to use current's filter 589 * 372 * 590 * @flags: SECCOMP_FILTER_FLAG_* flags to set << 591 * << 592 * Expects sighand and cred_guard_mutex locks 373 * Expects sighand and cred_guard_mutex locks to be held, and for 593 * seccomp_can_sync_threads() to have returned 374 * seccomp_can_sync_threads() to have returned success already 594 * without dropping the locks. 375 * without dropping the locks. 595 * 376 * 596 */ 377 */ 597 static inline void seccomp_sync_threads(unsign 378 static inline void seccomp_sync_threads(unsigned long flags) 598 { 379 { 599 struct task_struct *thread, *caller; 380 struct task_struct *thread, *caller; 600 381 601 BUG_ON(!mutex_is_locked(¤t->sign 382 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); 602 assert_spin_locked(¤t->sighand-> 383 assert_spin_locked(¤t->sighand->siglock); 603 384 604 /* Synchronize all threads. */ 385 /* Synchronize all threads. */ 605 caller = current; 386 caller = current; 606 for_each_thread(caller, thread) { 387 for_each_thread(caller, thread) { 607 /* Skip current, since it need 388 /* Skip current, since it needs no changes. */ 608 if (thread == caller) 389 if (thread == caller) 609 continue; 390 continue; 610 391 611 /* << 612 * Skip exited threads. seccom << 613 * been already called for thi << 614 */ << 615 if (thread->flags & PF_EXITING << 616 continue; << 617 << 618 /* Get a task reference for th 392 /* Get a task reference for the new leaf node. */ 619 get_seccomp_filter(caller); 393 get_seccomp_filter(caller); 620 << 621 /* 394 /* 622 * Drop the task reference to 395 * Drop the task reference to the shared ancestor since 623 * current's path will hold a 396 * current's path will hold a reference. (This also 624 * allows a put before the ass 397 * allows a put before the assignment.) 625 */ 398 */ 626 __seccomp_filter_release(threa !! 399 put_seccomp_filter(thread); 627 << 628 /* Make our new filter tree vi << 629 smp_store_release(&thread->sec 400 smp_store_release(&thread->seccomp.filter, 630 caller->secc 401 caller->seccomp.filter); 631 atomic_set(&thread->seccomp.fi << 632 atomic_read(&caller << 633 402 634 /* 403 /* 635 * Don't let an unprivileged t 404 * Don't let an unprivileged task work around 636 * the no_new_privs restrictio 405 * the no_new_privs restriction by creating 637 * a thread that sets it up, e 406 * a thread that sets it up, enters seccomp, 638 * then dies. 407 * then dies. 639 */ 408 */ 640 if (task_no_new_privs(caller)) 409 if (task_no_new_privs(caller)) 641 task_set_no_new_privs( 410 task_set_no_new_privs(thread); 642 411 643 /* 412 /* 644 * Opt the other thread into s 413 * Opt the other thread into seccomp if needed. 645 * As threads are considered t 414 * As threads are considered to be trust-realm 646 * equivalent (see ptrace_may_ 415 * equivalent (see ptrace_may_access), it is safe to 647 * allow one thread to transit 416 * allow one thread to transition the other. 648 */ 417 */ 649 if (thread->seccomp.mode == SE 418 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) 650 seccomp_assign_mode(th 419 seccomp_assign_mode(thread, SECCOMP_MODE_FILTER, 651 fl 420 flags); 652 } 421 } 653 } 422 } 654 423 655 /** 424 /** 656 * seccomp_prepare_filter: Prepares a seccomp 425 * seccomp_prepare_filter: Prepares a seccomp filter for use. 657 * @fprog: BPF program to install 426 * @fprog: BPF program to install 658 * 427 * 659 * Returns filter on success or an ERR_PTR on 428 * Returns filter on success or an ERR_PTR on failure. 660 */ 429 */ 661 static struct seccomp_filter *seccomp_prepare_ 430 static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) 662 { 431 { 663 struct seccomp_filter *sfilter; 432 struct seccomp_filter *sfilter; 664 int ret; 433 int ret; 665 const bool save_orig = !! 434 const bool save_orig = IS_ENABLED(CONFIG_CHECKPOINT_RESTORE); 666 #if defined(CONFIG_CHECKPOINT_RESTORE) || defi << 667 true; << 668 #else << 669 false; << 670 #endif << 671 435 672 if (fprog->len == 0 || fprog->len > BP 436 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) 673 return ERR_PTR(-EINVAL); 437 return ERR_PTR(-EINVAL); 674 438 675 BUG_ON(INT_MAX / fprog->len < sizeof(s 439 BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); 676 440 677 /* 441 /* 678 * Installing a seccomp filter require 442 * Installing a seccomp filter requires that the task has 679 * CAP_SYS_ADMIN in its namespace or b 443 * CAP_SYS_ADMIN in its namespace or be running with no_new_privs. 680 * This avoids scenarios where unprivi 444 * This avoids scenarios where unprivileged tasks can affect the 681 * behavior of privileged children. 445 * behavior of privileged children. 682 */ 446 */ 683 if (!task_no_new_privs(current) && 447 if (!task_no_new_privs(current) && 684 !ns_capable_noaudit(cu !! 448 security_capable(current_cred(), current_user_ns(), >> 449 CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) != 0) 685 return ERR_PTR(-EACCES); 450 return ERR_PTR(-EACCES); 686 451 687 /* Allocate a new seccomp_filter */ 452 /* Allocate a new seccomp_filter */ 688 sfilter = kzalloc(sizeof(*sfilter), GF 453 sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN); 689 if (!sfilter) 454 if (!sfilter) 690 return ERR_PTR(-ENOMEM); 455 return ERR_PTR(-ENOMEM); 691 456 692 mutex_init(&sfilter->notify_lock); 457 mutex_init(&sfilter->notify_lock); 693 ret = bpf_prog_create_from_user(&sfilt 458 ret = bpf_prog_create_from_user(&sfilter->prog, fprog, 694 seccom 459 seccomp_check_filter, save_orig); 695 if (ret < 0) { 460 if (ret < 0) { 696 kfree(sfilter); 461 kfree(sfilter); 697 return ERR_PTR(ret); 462 return ERR_PTR(ret); 698 } 463 } 699 464 700 refcount_set(&sfilter->refs, 1); !! 465 refcount_set(&sfilter->usage, 1); 701 refcount_set(&sfilter->users, 1); << 702 init_waitqueue_head(&sfilter->wqh); << 703 466 704 return sfilter; 467 return sfilter; 705 } 468 } 706 469 707 /** 470 /** 708 * seccomp_prepare_user_filter - prepares a us 471 * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog 709 * @user_filter: pointer to the user data cont 472 * @user_filter: pointer to the user data containing a sock_fprog. 710 * 473 * 711 * Returns 0 on success and non-zero otherwise 474 * Returns 0 on success and non-zero otherwise. 712 */ 475 */ 713 static struct seccomp_filter * 476 static struct seccomp_filter * 714 seccomp_prepare_user_filter(const char __user 477 seccomp_prepare_user_filter(const char __user *user_filter) 715 { 478 { 716 struct sock_fprog fprog; 479 struct sock_fprog fprog; 717 struct seccomp_filter *filter = ERR_PT 480 struct seccomp_filter *filter = ERR_PTR(-EFAULT); 718 481 719 #ifdef CONFIG_COMPAT 482 #ifdef CONFIG_COMPAT 720 if (in_compat_syscall()) { 483 if (in_compat_syscall()) { 721 struct compat_sock_fprog fprog 484 struct compat_sock_fprog fprog32; 722 if (copy_from_user(&fprog32, u 485 if (copy_from_user(&fprog32, user_filter, sizeof(fprog32))) 723 goto out; 486 goto out; 724 fprog.len = fprog32.len; 487 fprog.len = fprog32.len; 725 fprog.filter = compat_ptr(fpro 488 fprog.filter = compat_ptr(fprog32.filter); 726 } else /* falls through to the if belo 489 } else /* falls through to the if below. */ 727 #endif 490 #endif 728 if (copy_from_user(&fprog, user_filter 491 if (copy_from_user(&fprog, user_filter, sizeof(fprog))) 729 goto out; 492 goto out; 730 filter = seccomp_prepare_filter(&fprog 493 filter = seccomp_prepare_filter(&fprog); 731 out: 494 out: 732 return filter; 495 return filter; 733 } 496 } 734 497 735 #ifdef SECCOMP_ARCH_NATIVE << 736 /** << 737 * seccomp_is_const_allow - check if filter is << 738 * @fprog: The BPF programs << 739 * @sd: The seccomp data to check against, onl << 740 * number are considered constant. << 741 */ << 742 static bool seccomp_is_const_allow(struct sock << 743 struct secc << 744 { << 745 unsigned int reg_value = 0; << 746 unsigned int pc; << 747 bool op_res; << 748 << 749 if (WARN_ON_ONCE(!fprog)) << 750 return false; << 751 << 752 for (pc = 0; pc < fprog->len; pc++) { << 753 struct sock_filter *insn = &fp << 754 u16 code = insn->code; << 755 u32 k = insn->k; << 756 << 757 switch (code) { << 758 case BPF_LD | BPF_W | BPF_ABS: << 759 switch (k) { << 760 case offsetof(struct s << 761 reg_value = sd << 762 break; << 763 case offsetof(struct s << 764 reg_value = sd << 765 break; << 766 default: << 767 /* can't optim << 768 return false; << 769 } << 770 break; << 771 case BPF_RET | BPF_K: << 772 /* reached return with << 773 return k == SECCOMP_RE << 774 case BPF_JMP | BPF_JA: << 775 pc += insn->k; << 776 break; << 777 case BPF_JMP | BPF_JEQ | BPF_K << 778 case BPF_JMP | BPF_JGE | BPF_K << 779 case BPF_JMP | BPF_JGT | BPF_K << 780 case BPF_JMP | BPF_JSET | BPF_ << 781 switch (BPF_OP(code)) << 782 case BPF_JEQ: << 783 op_res = reg_v << 784 break; << 785 case BPF_JGE: << 786 op_res = reg_v << 787 break; << 788 case BPF_JGT: << 789 op_res = reg_v << 790 break; << 791 case BPF_JSET: << 792 op_res = !!(re << 793 break; << 794 default: << 795 /* can't optim << 796 return false; << 797 } << 798 << 799 pc += op_res ? insn->j << 800 break; << 801 case BPF_ALU | BPF_AND | BPF_K << 802 reg_value &= k; << 803 break; << 804 default: << 805 /* can't optimize (unk << 806 return false; << 807 } << 808 } << 809 << 810 /* ran off the end of the filter?! */ << 811 WARN_ON(1); << 812 return false; << 813 } << 814 << 815 static void seccomp_cache_prepare_bitmap(struc << 816 void << 817 size_ << 818 { << 819 struct sock_fprog_kern *fprog = sfilte << 820 struct seccomp_data sd; << 821 int nr; << 822 << 823 if (bitmap_prev) { << 824 /* The new filter must be as r << 825 bitmap_copy(bitmap, bitmap_pre << 826 } else { << 827 /* Before any filters, all sys << 828 bitmap_fill(bitmap, bitmap_siz << 829 } << 830 << 831 for (nr = 0; nr < bitmap_size; nr++) { << 832 /* No bitmap change: not a cac << 833 if (!test_bit(nr, bitmap)) << 834 continue; << 835 << 836 sd.nr = nr; << 837 sd.arch = arch; << 838 << 839 /* No bitmap change: continue << 840 if (seccomp_is_const_allow(fpr << 841 continue; << 842 << 843 /* << 844 * Not a cacheable action: alw << 845 * atomic clear_bit() not need << 846 */ << 847 __clear_bit(nr, bitmap); << 848 } << 849 } << 850 << 851 /** << 852 * seccomp_cache_prepare - emulate the filter << 853 * @sfilter: The seccomp filter << 854 * << 855 * Returns 0 if successful or -errno if error << 856 */ << 857 static void seccomp_cache_prepare(struct secco << 858 { << 859 struct action_cache *cache = &sfilter- << 860 const struct action_cache *cache_prev << 861 sfilter->prev ? &sfilter->prev << 862 << 863 seccomp_cache_prepare_bitmap(sfilter, << 864 cache_pre << 865 SECCOMP_A << 866 SECCOMP_A << 867 << 868 #ifdef SECCOMP_ARCH_COMPAT << 869 seccomp_cache_prepare_bitmap(sfilter, << 870 cache_pre << 871 SECCOMP_A << 872 SECCOMP_A << 873 #endif /* SECCOMP_ARCH_COMPAT */ << 874 } << 875 #endif /* SECCOMP_ARCH_NATIVE */ << 876 << 877 /** 498 /** 878 * seccomp_attach_filter: validate and attach 499 * seccomp_attach_filter: validate and attach filter 879 * @flags: flags to change filter behavior 500 * @flags: flags to change filter behavior 880 * @filter: seccomp filter to add to the curre 501 * @filter: seccomp filter to add to the current process 881 * 502 * 882 * Caller must be holding current->sighand->si 503 * Caller must be holding current->sighand->siglock lock. 883 * 504 * 884 * Returns 0 on success, -ve on error, or 505 * Returns 0 on success, -ve on error, or 885 * - in TSYNC mode: the pid of a thread whic 506 * - in TSYNC mode: the pid of a thread which was either not in the correct 886 * seccomp mode or did not have an ancestr 507 * seccomp mode or did not have an ancestral seccomp filter 887 * - in NEW_LISTENER mode: the fd of the new 508 * - in NEW_LISTENER mode: the fd of the new listener 888 */ 509 */ 889 static long seccomp_attach_filter(unsigned int 510 static long seccomp_attach_filter(unsigned int flags, 890 struct secco 511 struct seccomp_filter *filter) 891 { 512 { 892 unsigned long total_insns; 513 unsigned long total_insns; 893 struct seccomp_filter *walker; 514 struct seccomp_filter *walker; 894 515 895 assert_spin_locked(¤t->sighand-> 516 assert_spin_locked(¤t->sighand->siglock); 896 517 897 /* Validate resulting filter length. * 518 /* Validate resulting filter length. */ 898 total_insns = filter->prog->len; 519 total_insns = filter->prog->len; 899 for (walker = current->seccomp.filter; 520 for (walker = current->seccomp.filter; walker; walker = walker->prev) 900 total_insns += walker->prog->l 521 total_insns += walker->prog->len + 4; /* 4 instr penalty */ 901 if (total_insns > MAX_INSNS_PER_PATH) 522 if (total_insns > MAX_INSNS_PER_PATH) 902 return -ENOMEM; 523 return -ENOMEM; 903 524 904 /* If thread sync has been requested, 525 /* If thread sync has been requested, check that it is possible. */ 905 if (flags & SECCOMP_FILTER_FLAG_TSYNC) 526 if (flags & SECCOMP_FILTER_FLAG_TSYNC) { 906 int ret; 527 int ret; 907 528 908 ret = seccomp_can_sync_threads 529 ret = seccomp_can_sync_threads(); 909 if (ret) { !! 530 if (ret) 910 if (flags & SECCOMP_FI !! 531 return ret; 911 return -ESRCH; << 912 else << 913 return ret; << 914 } << 915 } 532 } 916 533 917 /* Set log flag, if present. */ 534 /* Set log flag, if present. */ 918 if (flags & SECCOMP_FILTER_FLAG_LOG) 535 if (flags & SECCOMP_FILTER_FLAG_LOG) 919 filter->log = true; 536 filter->log = true; 920 537 921 /* Set wait killable flag, if present. << 922 if (flags & SECCOMP_FILTER_FLAG_WAIT_K << 923 filter->wait_killable_recv = t << 924 << 925 /* 538 /* 926 * If there is an existing filter, mak 539 * If there is an existing filter, make it the prev and don't drop its 927 * task reference. 540 * task reference. 928 */ 541 */ 929 filter->prev = current->seccomp.filter 542 filter->prev = current->seccomp.filter; 930 seccomp_cache_prepare(filter); << 931 current->seccomp.filter = filter; 543 current->seccomp.filter = filter; 932 atomic_inc(¤t->seccomp.filter_co << 933 544 934 /* Now that the new filter is in place 545 /* Now that the new filter is in place, synchronize to all threads. */ 935 if (flags & SECCOMP_FILTER_FLAG_TSYNC) 546 if (flags & SECCOMP_FILTER_FLAG_TSYNC) 936 seccomp_sync_threads(flags); 547 seccomp_sync_threads(flags); 937 548 938 return 0; 549 return 0; 939 } 550 } 940 551 941 static void __get_seccomp_filter(struct seccom 552 static void __get_seccomp_filter(struct seccomp_filter *filter) 942 { 553 { 943 refcount_inc(&filter->refs); !! 554 refcount_inc(&filter->usage); 944 } 555 } 945 556 946 /* get_seccomp_filter - increments the referen 557 /* get_seccomp_filter - increments the reference count of the filter on @tsk */ 947 void get_seccomp_filter(struct task_struct *ts 558 void get_seccomp_filter(struct task_struct *tsk) 948 { 559 { 949 struct seccomp_filter *orig = tsk->sec 560 struct seccomp_filter *orig = tsk->seccomp.filter; 950 if (!orig) 561 if (!orig) 951 return; 562 return; 952 __get_seccomp_filter(orig); 563 __get_seccomp_filter(orig); 953 refcount_inc(&orig->users); << 954 } 564 } 955 565 >> 566 static inline void seccomp_filter_free(struct seccomp_filter *filter) >> 567 { >> 568 if (filter) { >> 569 bpf_prog_destroy(filter->prog); >> 570 kfree(filter); >> 571 } >> 572 } >> 573 >> 574 static void __put_seccomp_filter(struct seccomp_filter *orig) >> 575 { >> 576 /* Clean up single-reference branches iteratively. */ >> 577 while (orig && refcount_dec_and_test(&orig->usage)) { >> 578 struct seccomp_filter *freeme = orig; >> 579 orig = orig->prev; >> 580 seccomp_filter_free(freeme); >> 581 } >> 582 } >> 583 >> 584 /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ >> 585 void put_seccomp_filter(struct task_struct *tsk) >> 586 { >> 587 __put_seccomp_filter(tsk->seccomp.filter); >> 588 } >> 589 >> 590 static void seccomp_init_siginfo(kernel_siginfo_t *info, int syscall, int reason) >> 591 { >> 592 clear_siginfo(info); >> 593 info->si_signo = SIGSYS; >> 594 info->si_code = SYS_SECCOMP; >> 595 info->si_call_addr = (void __user *)KSTK_EIP(current); >> 596 info->si_errno = reason; >> 597 info->si_arch = syscall_get_arch(); >> 598 info->si_syscall = syscall; >> 599 } >> 600 >> 601 /** >> 602 * seccomp_send_sigsys - signals the task to allow in-process syscall emulation >> 603 * @syscall: syscall number to send to userland >> 604 * @reason: filter-supplied reason code to send to userland (via si_errno) >> 605 * >> 606 * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info. >> 607 */ >> 608 static void seccomp_send_sigsys(int syscall, int reason) >> 609 { >> 610 struct kernel_siginfo info; >> 611 seccomp_init_siginfo(&info, syscall, reason); >> 612 force_sig_info(SIGSYS, &info, current); >> 613 } 956 #endif /* CONFIG_SECCOMP_FILTER */ 614 #endif /* CONFIG_SECCOMP_FILTER */ 957 615 958 /* For use with seccomp_actions_logged */ 616 /* For use with seccomp_actions_logged */ 959 #define SECCOMP_LOG_KILL_PROCESS (1 << 617 #define SECCOMP_LOG_KILL_PROCESS (1 << 0) 960 #define SECCOMP_LOG_KILL_THREAD (1 << 618 #define SECCOMP_LOG_KILL_THREAD (1 << 1) 961 #define SECCOMP_LOG_TRAP (1 << 619 #define SECCOMP_LOG_TRAP (1 << 2) 962 #define SECCOMP_LOG_ERRNO (1 << 620 #define SECCOMP_LOG_ERRNO (1 << 3) 963 #define SECCOMP_LOG_TRACE (1 << 621 #define SECCOMP_LOG_TRACE (1 << 4) 964 #define SECCOMP_LOG_LOG (1 << 622 #define SECCOMP_LOG_LOG (1 << 5) 965 #define SECCOMP_LOG_ALLOW (1 << 623 #define SECCOMP_LOG_ALLOW (1 << 6) 966 #define SECCOMP_LOG_USER_NOTIF (1 << 624 #define SECCOMP_LOG_USER_NOTIF (1 << 7) 967 625 968 static u32 seccomp_actions_logged = SECCOMP_LO 626 static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS | 969 SECCOMP_LO 627 SECCOMP_LOG_KILL_THREAD | 970 SECCOMP_LO 628 SECCOMP_LOG_TRAP | 971 SECCOMP_LO 629 SECCOMP_LOG_ERRNO | 972 SECCOMP_LO 630 SECCOMP_LOG_USER_NOTIF | 973 SECCOMP_LO 631 SECCOMP_LOG_TRACE | 974 SECCOMP_LO 632 SECCOMP_LOG_LOG; 975 633 976 static inline void seccomp_log(unsigned long s 634 static inline void seccomp_log(unsigned long syscall, long signr, u32 action, 977 bool requested) 635 bool requested) 978 { 636 { 979 bool log = false; 637 bool log = false; 980 638 981 switch (action) { 639 switch (action) { 982 case SECCOMP_RET_ALLOW: 640 case SECCOMP_RET_ALLOW: 983 break; 641 break; 984 case SECCOMP_RET_TRAP: 642 case SECCOMP_RET_TRAP: 985 log = requested && seccomp_act 643 log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP; 986 break; 644 break; 987 case SECCOMP_RET_ERRNO: 645 case SECCOMP_RET_ERRNO: 988 log = requested && seccomp_act 646 log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO; 989 break; 647 break; 990 case SECCOMP_RET_TRACE: 648 case SECCOMP_RET_TRACE: 991 log = requested && seccomp_act 649 log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE; 992 break; 650 break; 993 case SECCOMP_RET_USER_NOTIF: 651 case SECCOMP_RET_USER_NOTIF: 994 log = requested && seccomp_act 652 log = requested && seccomp_actions_logged & SECCOMP_LOG_USER_NOTIF; 995 break; 653 break; 996 case SECCOMP_RET_LOG: 654 case SECCOMP_RET_LOG: 997 log = seccomp_actions_logged & 655 log = seccomp_actions_logged & SECCOMP_LOG_LOG; 998 break; 656 break; 999 case SECCOMP_RET_KILL_THREAD: 657 case SECCOMP_RET_KILL_THREAD: 1000 log = seccomp_actions_logged 658 log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD; 1001 break; 659 break; 1002 case SECCOMP_RET_KILL_PROCESS: 660 case SECCOMP_RET_KILL_PROCESS: 1003 default: 661 default: 1004 log = seccomp_actions_logged 662 log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS; 1005 } 663 } 1006 664 1007 /* 665 /* 1008 * Emit an audit message when the act 666 * Emit an audit message when the action is RET_KILL_*, RET_LOG, or the 1009 * FILTER_FLAG_LOG bit was set. The a 667 * FILTER_FLAG_LOG bit was set. The admin has the ability to silence 1010 * any action from being logged by re 668 * any action from being logged by removing the action name from the 1011 * seccomp_actions_logged sysctl. 669 * seccomp_actions_logged sysctl. 1012 */ 670 */ 1013 if (!log) 671 if (!log) 1014 return; 672 return; 1015 673 1016 audit_seccomp(syscall, signr, action) 674 audit_seccomp(syscall, signr, action); 1017 } 675 } 1018 676 1019 /* 677 /* 1020 * Secure computing mode 1 allows only read/w 678 * Secure computing mode 1 allows only read/write/exit/sigreturn. 1021 * To be fully secure this must be combined w 679 * To be fully secure this must be combined with rlimit 1022 * to limit the stack allocations too. 680 * to limit the stack allocations too. 1023 */ 681 */ 1024 static const int mode1_syscalls[] = { 682 static const int mode1_syscalls[] = { 1025 __NR_seccomp_read, __NR_seccomp_write 683 __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn, 1026 -1, /* negative terminated */ !! 684 0, /* null terminated */ 1027 }; 685 }; 1028 686 1029 static void __secure_computing_strict(int thi 687 static void __secure_computing_strict(int this_syscall) 1030 { 688 { 1031 const int *allowed_syscalls = mode1_s !! 689 const int *syscall_whitelist = mode1_syscalls; 1032 #ifdef CONFIG_COMPAT 690 #ifdef CONFIG_COMPAT 1033 if (in_compat_syscall()) 691 if (in_compat_syscall()) 1034 allowed_syscalls = get_compat !! 692 syscall_whitelist = get_compat_mode1_syscalls(); 1035 #endif 693 #endif 1036 do { 694 do { 1037 if (*allowed_syscalls == this !! 695 if (*syscall_whitelist == this_syscall) 1038 return; 696 return; 1039 } while (*++allowed_syscalls != -1); !! 697 } while (*++syscall_whitelist); 1040 698 1041 #ifdef SECCOMP_DEBUG 699 #ifdef SECCOMP_DEBUG 1042 dump_stack(); 700 dump_stack(); 1043 #endif 701 #endif 1044 current->seccomp.mode = SECCOMP_MODE_ << 1045 seccomp_log(this_syscall, SIGKILL, SE 702 seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true); 1046 do_exit(SIGKILL); 703 do_exit(SIGKILL); 1047 } 704 } 1048 705 1049 #ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER 706 #ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER 1050 void secure_computing_strict(int this_syscall 707 void secure_computing_strict(int this_syscall) 1051 { 708 { 1052 int mode = current->seccomp.mode; 709 int mode = current->seccomp.mode; 1053 710 1054 if (IS_ENABLED(CONFIG_CHECKPOINT_REST 711 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) && 1055 unlikely(current->ptrace & PT_SUS 712 unlikely(current->ptrace & PT_SUSPEND_SECCOMP)) 1056 return; 713 return; 1057 714 1058 if (mode == SECCOMP_MODE_DISABLED) 715 if (mode == SECCOMP_MODE_DISABLED) 1059 return; 716 return; 1060 else if (mode == SECCOMP_MODE_STRICT) 717 else if (mode == SECCOMP_MODE_STRICT) 1061 __secure_computing_strict(thi 718 __secure_computing_strict(this_syscall); 1062 else 719 else 1063 BUG(); 720 BUG(); 1064 } 721 } 1065 #else 722 #else 1066 723 1067 #ifdef CONFIG_SECCOMP_FILTER 724 #ifdef CONFIG_SECCOMP_FILTER 1068 static u64 seccomp_next_notify_id(struct secc 725 static u64 seccomp_next_notify_id(struct seccomp_filter *filter) 1069 { 726 { 1070 /* 727 /* 1071 * Note: overflow is ok here, the id 728 * Note: overflow is ok here, the id just needs to be unique per 1072 * filter. 729 * filter. 1073 */ 730 */ 1074 lockdep_assert_held(&filter->notify_l 731 lockdep_assert_held(&filter->notify_lock); 1075 return filter->notif->next_id++; 732 return filter->notif->next_id++; 1076 } 733 } 1077 734 1078 static void seccomp_handle_addfd(struct secco !! 735 static void seccomp_do_user_notification(int this_syscall, 1079 { !! 736 struct seccomp_filter *match, 1080 int fd; !! 737 const struct seccomp_data *sd) 1081 << 1082 /* << 1083 * Remove the notification, and reset << 1084 * that it has been handled. << 1085 */ << 1086 list_del_init(&addfd->list); << 1087 if (!addfd->setfd) << 1088 fd = receive_fd(addfd->file, << 1089 else << 1090 fd = receive_fd_replace(addfd << 1091 addfd->ret = fd; << 1092 << 1093 if (addfd->ioctl_flags & SECCOMP_ADDF << 1094 /* If we fail reset and retur << 1095 if (fd < 0) { << 1096 n->state = SECCOMP_NO << 1097 } else { << 1098 /* Return the FD we j << 1099 n->flags = 0; << 1100 n->error = 0; << 1101 n->val = fd; << 1102 } << 1103 } << 1104 << 1105 /* << 1106 * Mark the notification as completed << 1107 * might be invalidated and we can't << 1108 */ << 1109 complete(&addfd->completion); << 1110 } << 1111 << 1112 static bool should_sleep_killable(struct secc << 1113 struct secc << 1114 { << 1115 return match->wait_killable_recv && n << 1116 } << 1117 << 1118 static int seccomp_do_user_notification(int t << 1119 struc << 1120 const << 1121 { 738 { 1122 int err; 739 int err; 1123 u32 flags = 0; << 1124 long ret = 0; 740 long ret = 0; 1125 struct seccomp_knotif n = {}; 741 struct seccomp_knotif n = {}; 1126 struct seccomp_kaddfd *addfd, *tmp; << 1127 742 1128 mutex_lock(&match->notify_lock); 743 mutex_lock(&match->notify_lock); 1129 err = -ENOSYS; 744 err = -ENOSYS; 1130 if (!match->notif) 745 if (!match->notif) 1131 goto out; 746 goto out; 1132 747 1133 n.task = current; 748 n.task = current; 1134 n.state = SECCOMP_NOTIFY_INIT; 749 n.state = SECCOMP_NOTIFY_INIT; 1135 n.data = sd; 750 n.data = sd; 1136 n.id = seccomp_next_notify_id(match); 751 n.id = seccomp_next_notify_id(match); 1137 init_completion(&n.ready); 752 init_completion(&n.ready); 1138 list_add_tail(&n.list, &match->notif- !! 753 list_add(&n.list, &match->notif->notifications); 1139 INIT_LIST_HEAD(&n.addfd); << 1140 754 1141 atomic_inc(&match->notif->requests); !! 755 up(&match->notif->request); 1142 if (match->notif->flags & SECCOMP_USE !! 756 wake_up_poll(&match->notif->wqh, EPOLLIN | EPOLLRDNORM); 1143 wake_up_poll_on_current_cpu(& !! 757 mutex_unlock(&match->notify_lock); 1144 else << 1145 wake_up_poll(&match->wqh, EPO << 1146 758 1147 /* 759 /* 1148 * This is where we wait for a reply 760 * This is where we wait for a reply from userspace. 1149 */ 761 */ 1150 do { !! 762 err = wait_for_completion_interruptible(&n.ready); 1151 bool wait_killable = should_s !! 763 mutex_lock(&match->notify_lock); 1152 !! 764 if (err == 0) { 1153 mutex_unlock(&match->notify_l !! 765 ret = n.val; 1154 if (wait_killable) !! 766 err = n.error; 1155 err = wait_for_comple << 1156 else << 1157 err = wait_for_comple << 1158 mutex_lock(&match->notify_loc << 1159 << 1160 if (err != 0) { << 1161 /* << 1162 * Check to see if th << 1163 * whether we should << 1164 */ << 1165 if (!wait_killable && << 1166 continue; << 1167 << 1168 goto interrupted; << 1169 } << 1170 << 1171 addfd = list_first_entry_or_n << 1172 << 1173 /* Check if we were woken up << 1174 if (addfd) << 1175 seccomp_handle_addfd( << 1176 << 1177 } while (n.state != SECCOMP_NOTIFY_R << 1178 << 1179 ret = n.val; << 1180 err = n.error; << 1181 flags = n.flags; << 1182 << 1183 interrupted: << 1184 /* If there were any pending addfd ca << 1185 list_for_each_entry_safe(addfd, tmp, << 1186 /* The process went away befo << 1187 addfd->ret = -ESRCH; << 1188 list_del_init(&addfd->list); << 1189 complete(&addfd->completion); << 1190 } 767 } 1191 768 1192 /* 769 /* 1193 * Note that it's possible the listen 770 * Note that it's possible the listener died in between the time when 1194 * we were notified of a response (or !! 771 * we were notified of a respons (or a signal) and when we were able to 1195 * re-acquire the lock, so only delet 772 * re-acquire the lock, so only delete from the list if the 1196 * notification actually exists. 773 * notification actually exists. 1197 * 774 * 1198 * Also note that this test is only v 775 * Also note that this test is only valid because there's no way to 1199 * *reattach* to a notifier right now 776 * *reattach* to a notifier right now. If one is added, we'll need to 1200 * keep track of the notif itself and 777 * keep track of the notif itself and make sure they match here. 1201 */ 778 */ 1202 if (match->notif) 779 if (match->notif) 1203 list_del(&n.list); 780 list_del(&n.list); 1204 out: 781 out: 1205 mutex_unlock(&match->notify_lock); 782 mutex_unlock(&match->notify_lock); 1206 !! 783 syscall_set_return_value(current, task_pt_regs(current), 1207 /* Userspace requests to continue the << 1208 if (flags & SECCOMP_USER_NOTIF_FLAG_C << 1209 return 0; << 1210 << 1211 syscall_set_return_value(current, cur << 1212 err, ret); 784 err, ret); 1213 return -1; << 1214 } 785 } 1215 786 1216 static int __seccomp_filter(int this_syscall, 787 static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, 1217 const bool rechec 788 const bool recheck_after_trace) 1218 { 789 { 1219 u32 filter_ret, action; 790 u32 filter_ret, action; 1220 struct seccomp_filter *match = NULL; 791 struct seccomp_filter *match = NULL; 1221 int data; 792 int data; 1222 struct seccomp_data sd_local; 793 struct seccomp_data sd_local; 1223 794 1224 /* 795 /* 1225 * Make sure that any changes to mode 796 * Make sure that any changes to mode from another thread have 1226 * been seen after SYSCALL_WORK_SECCO !! 797 * been seen after TIF_SECCOMP was seen. 1227 */ 798 */ 1228 smp_rmb(); !! 799 rmb(); 1229 800 1230 if (!sd) { 801 if (!sd) { 1231 populate_seccomp_data(&sd_loc 802 populate_seccomp_data(&sd_local); 1232 sd = &sd_local; 803 sd = &sd_local; 1233 } 804 } 1234 805 1235 filter_ret = seccomp_run_filters(sd, 806 filter_ret = seccomp_run_filters(sd, &match); 1236 data = filter_ret & SECCOMP_RET_DATA; 807 data = filter_ret & SECCOMP_RET_DATA; 1237 action = filter_ret & SECCOMP_RET_ACT 808 action = filter_ret & SECCOMP_RET_ACTION_FULL; 1238 809 1239 switch (action) { 810 switch (action) { 1240 case SECCOMP_RET_ERRNO: 811 case SECCOMP_RET_ERRNO: 1241 /* Set low-order bits as an e 812 /* Set low-order bits as an errno, capped at MAX_ERRNO. */ 1242 if (data > MAX_ERRNO) 813 if (data > MAX_ERRNO) 1243 data = MAX_ERRNO; 814 data = MAX_ERRNO; 1244 syscall_set_return_value(curr !! 815 syscall_set_return_value(current, task_pt_regs(current), 1245 -dat 816 -data, 0); 1246 goto skip; 817 goto skip; 1247 818 1248 case SECCOMP_RET_TRAP: 819 case SECCOMP_RET_TRAP: 1249 /* Show the handler the origi 820 /* Show the handler the original registers. */ 1250 syscall_rollback(current, cur !! 821 syscall_rollback(current, task_pt_regs(current)); 1251 /* Let the filter pass back 1 822 /* Let the filter pass back 16 bits of data. */ 1252 force_sig_seccomp(this_syscal !! 823 seccomp_send_sigsys(this_syscall, data); 1253 goto skip; 824 goto skip; 1254 825 1255 case SECCOMP_RET_TRACE: 826 case SECCOMP_RET_TRACE: 1256 /* We've been put in this sta 827 /* We've been put in this state by the ptracer already. */ 1257 if (recheck_after_trace) 828 if (recheck_after_trace) 1258 return 0; 829 return 0; 1259 830 1260 /* ENOSYS these calls if ther 831 /* ENOSYS these calls if there is no tracer attached. */ 1261 if (!ptrace_event_enabled(cur 832 if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { 1262 syscall_set_return_va 833 syscall_set_return_value(current, 1263 !! 834 task_pt_regs(current), 1264 835 -ENOSYS, 0); 1265 goto skip; 836 goto skip; 1266 } 837 } 1267 838 1268 /* Allow the BPF to provide t 839 /* Allow the BPF to provide the event message */ 1269 ptrace_event(PTRACE_EVENT_SEC 840 ptrace_event(PTRACE_EVENT_SECCOMP, data); 1270 /* 841 /* 1271 * The delivery of a fatal si 842 * The delivery of a fatal signal during event 1272 * notification may silently 843 * notification may silently skip tracer notification, 1273 * which could leave us with 844 * which could leave us with a potentially unmodified 1274 * syscall that the tracer wo 845 * syscall that the tracer would have liked to have 1275 * changed. Since the process 846 * changed. Since the process is about to die, we just 1276 * force the syscall to be sk 847 * force the syscall to be skipped and let the signal 1277 * kill the process and corre 848 * kill the process and correctly handle any tracer exit 1278 * notifications. 849 * notifications. 1279 */ 850 */ 1280 if (fatal_signal_pending(curr 851 if (fatal_signal_pending(current)) 1281 goto skip; 852 goto skip; 1282 /* Check if the tracer forced 853 /* Check if the tracer forced the syscall to be skipped. */ 1283 this_syscall = syscall_get_nr !! 854 this_syscall = syscall_get_nr(current, task_pt_regs(current)); 1284 if (this_syscall < 0) 855 if (this_syscall < 0) 1285 goto skip; 856 goto skip; 1286 857 1287 /* 858 /* 1288 * Recheck the syscall, since 859 * Recheck the syscall, since it may have changed. This 1289 * intentionally uses a NULL 860 * intentionally uses a NULL struct seccomp_data to force 1290 * a reload of all registers. 861 * a reload of all registers. This does not goto skip since 1291 * a skip would have already 862 * a skip would have already been reported. 1292 */ 863 */ 1293 if (__seccomp_filter(this_sys 864 if (__seccomp_filter(this_syscall, NULL, true)) 1294 return -1; 865 return -1; 1295 866 1296 return 0; 867 return 0; 1297 868 1298 case SECCOMP_RET_USER_NOTIF: 869 case SECCOMP_RET_USER_NOTIF: 1299 if (seccomp_do_user_notificat !! 870 seccomp_do_user_notification(this_syscall, match, sd); 1300 goto skip; !! 871 goto skip; 1301 << 1302 return 0; << 1303 872 1304 case SECCOMP_RET_LOG: 873 case SECCOMP_RET_LOG: 1305 seccomp_log(this_syscall, 0, 874 seccomp_log(this_syscall, 0, action, true); 1306 return 0; 875 return 0; 1307 876 1308 case SECCOMP_RET_ALLOW: 877 case SECCOMP_RET_ALLOW: 1309 /* 878 /* 1310 * Note that the "match" filt 879 * Note that the "match" filter will always be NULL for 1311 * this action since SECCOMP_ 880 * this action since SECCOMP_RET_ALLOW is the starting 1312 * state in seccomp_run_filte 881 * state in seccomp_run_filters(). 1313 */ 882 */ 1314 return 0; 883 return 0; 1315 884 1316 case SECCOMP_RET_KILL_THREAD: 885 case SECCOMP_RET_KILL_THREAD: 1317 case SECCOMP_RET_KILL_PROCESS: 886 case SECCOMP_RET_KILL_PROCESS: 1318 default: 887 default: 1319 current->seccomp.mode = SECCO << 1320 seccomp_log(this_syscall, SIG 888 seccomp_log(this_syscall, SIGSYS, action, true); 1321 /* Dump core only if this is 889 /* Dump core only if this is the last remaining thread. */ 1322 if (action != SECCOMP_RET_KIL !! 890 if (action == SECCOMP_RET_KILL_PROCESS || 1323 (atomic_read(¤t->si !! 891 get_nr_threads(current) == 1) { >> 892 kernel_siginfo_t info; >> 893 1324 /* Show the original 894 /* Show the original registers in the dump. */ 1325 syscall_rollback(curr !! 895 syscall_rollback(current, task_pt_regs(current)); 1326 /* Trigger a coredump !! 896 /* Trigger a manual coredump since do_exit skips it. */ 1327 force_sig_seccomp(thi !! 897 seccomp_init_siginfo(&info, this_syscall, data); 1328 } else { !! 898 do_coredump(&info); 1329 do_exit(SIGSYS); << 1330 } 899 } 1331 return -1; /* skip the syscal !! 900 if (action == SECCOMP_RET_KILL_PROCESS) >> 901 do_group_exit(SIGSYS); >> 902 else >> 903 do_exit(SIGSYS); 1332 } 904 } 1333 905 1334 unreachable(); 906 unreachable(); 1335 907 1336 skip: 908 skip: 1337 seccomp_log(this_syscall, 0, action, 909 seccomp_log(this_syscall, 0, action, match ? match->log : false); 1338 return -1; 910 return -1; 1339 } 911 } 1340 #else 912 #else 1341 static int __seccomp_filter(int this_syscall, 913 static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, 1342 const bool rechec 914 const bool recheck_after_trace) 1343 { 915 { 1344 BUG(); 916 BUG(); 1345 << 1346 return -1; << 1347 } 917 } 1348 #endif 918 #endif 1349 919 1350 int __secure_computing(const struct seccomp_d 920 int __secure_computing(const struct seccomp_data *sd) 1351 { 921 { 1352 int mode = current->seccomp.mode; 922 int mode = current->seccomp.mode; 1353 int this_syscall; 923 int this_syscall; 1354 924 1355 if (IS_ENABLED(CONFIG_CHECKPOINT_REST 925 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) && 1356 unlikely(current->ptrace & PT_SUS 926 unlikely(current->ptrace & PT_SUSPEND_SECCOMP)) 1357 return 0; 927 return 0; 1358 928 1359 this_syscall = sd ? sd->nr : 929 this_syscall = sd ? sd->nr : 1360 syscall_get_nr(current, curre !! 930 syscall_get_nr(current, task_pt_regs(current)); 1361 931 1362 switch (mode) { 932 switch (mode) { 1363 case SECCOMP_MODE_STRICT: 933 case SECCOMP_MODE_STRICT: 1364 __secure_computing_strict(thi 934 __secure_computing_strict(this_syscall); /* may call do_exit */ 1365 return 0; 935 return 0; 1366 case SECCOMP_MODE_FILTER: 936 case SECCOMP_MODE_FILTER: 1367 return __seccomp_filter(this_ 937 return __seccomp_filter(this_syscall, sd, false); 1368 /* Surviving SECCOMP_RET_KILL_* must << 1369 case SECCOMP_MODE_DEAD: << 1370 WARN_ON_ONCE(1); << 1371 do_exit(SIGKILL); << 1372 return -1; << 1373 default: 938 default: 1374 BUG(); 939 BUG(); 1375 } 940 } 1376 } 941 } 1377 #endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */ 942 #endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */ 1378 943 1379 long prctl_get_seccomp(void) 944 long prctl_get_seccomp(void) 1380 { 945 { 1381 return current->seccomp.mode; 946 return current->seccomp.mode; 1382 } 947 } 1383 948 1384 /** 949 /** 1385 * seccomp_set_mode_strict: internal function 950 * seccomp_set_mode_strict: internal function for setting strict seccomp 1386 * 951 * 1387 * Once current->seccomp.mode is non-zero, it 952 * Once current->seccomp.mode is non-zero, it may not be changed. 1388 * 953 * 1389 * Returns 0 on success or -EINVAL on failure 954 * Returns 0 on success or -EINVAL on failure. 1390 */ 955 */ 1391 static long seccomp_set_mode_strict(void) 956 static long seccomp_set_mode_strict(void) 1392 { 957 { 1393 const unsigned long seccomp_mode = SE 958 const unsigned long seccomp_mode = SECCOMP_MODE_STRICT; 1394 long ret = -EINVAL; 959 long ret = -EINVAL; 1395 960 1396 spin_lock_irq(¤t->sighand->sigl 961 spin_lock_irq(¤t->sighand->siglock); 1397 962 1398 if (!seccomp_may_assign_mode(seccomp_ 963 if (!seccomp_may_assign_mode(seccomp_mode)) 1399 goto out; 964 goto out; 1400 965 1401 #ifdef TIF_NOTSC 966 #ifdef TIF_NOTSC 1402 disable_TSC(); 967 disable_TSC(); 1403 #endif 968 #endif 1404 seccomp_assign_mode(current, seccomp_ 969 seccomp_assign_mode(current, seccomp_mode, 0); 1405 ret = 0; 970 ret = 0; 1406 971 1407 out: 972 out: 1408 spin_unlock_irq(¤t->sighand->si 973 spin_unlock_irq(¤t->sighand->siglock); 1409 974 1410 return ret; 975 return ret; 1411 } 976 } 1412 977 1413 #ifdef CONFIG_SECCOMP_FILTER 978 #ifdef CONFIG_SECCOMP_FILTER 1414 static void seccomp_notify_free(struct seccom !! 979 static int seccomp_notify_release(struct inode *inode, struct file *file) 1415 { << 1416 kfree(filter->notif); << 1417 filter->notif = NULL; << 1418 } << 1419 << 1420 static void seccomp_notify_detach(struct secc << 1421 { 980 { >> 981 struct seccomp_filter *filter = file->private_data; 1422 struct seccomp_knotif *knotif; 982 struct seccomp_knotif *knotif; 1423 983 1424 if (!filter) 984 if (!filter) 1425 return; !! 985 return 0; 1426 986 1427 mutex_lock(&filter->notify_lock); 987 mutex_lock(&filter->notify_lock); 1428 988 1429 /* 989 /* 1430 * If this file is being closed becau 990 * If this file is being closed because e.g. the task who owned it 1431 * died, let's wake everyone up who w 991 * died, let's wake everyone up who was waiting on us. 1432 */ 992 */ 1433 list_for_each_entry(knotif, &filter-> 993 list_for_each_entry(knotif, &filter->notif->notifications, list) { 1434 if (knotif->state == SECCOMP_ 994 if (knotif->state == SECCOMP_NOTIFY_REPLIED) 1435 continue; 995 continue; 1436 996 1437 knotif->state = SECCOMP_NOTIF 997 knotif->state = SECCOMP_NOTIFY_REPLIED; 1438 knotif->error = -ENOSYS; 998 knotif->error = -ENOSYS; 1439 knotif->val = 0; 999 knotif->val = 0; 1440 1000 1441 /* << 1442 * We do not need to wake up << 1443 * the notifier will do that << 1444 * like a standard reply. << 1445 */ << 1446 complete(&knotif->ready); 1001 complete(&knotif->ready); 1447 } 1002 } 1448 1003 1449 seccomp_notify_free(filter); !! 1004 kfree(filter->notif); >> 1005 filter->notif = NULL; 1450 mutex_unlock(&filter->notify_lock); 1006 mutex_unlock(&filter->notify_lock); 1451 } << 1452 << 1453 static int seccomp_notify_release(struct inod << 1454 { << 1455 struct seccomp_filter *filter = file- << 1456 << 1457 seccomp_notify_detach(filter); << 1458 __put_seccomp_filter(filter); 1007 __put_seccomp_filter(filter); 1459 return 0; 1008 return 0; 1460 } 1009 } 1461 1010 1462 /* must be called with notif_lock held */ << 1463 static inline struct seccomp_knotif * << 1464 find_notification(struct seccomp_filter *filt << 1465 { << 1466 struct seccomp_knotif *cur; << 1467 << 1468 lockdep_assert_held(&filter->notify_l << 1469 << 1470 list_for_each_entry(cur, &filter->not << 1471 if (cur->id == id) << 1472 return cur; << 1473 } << 1474 << 1475 return NULL; << 1476 } << 1477 << 1478 static int recv_wake_function(wait_queue_entr << 1479 void *key) << 1480 { << 1481 /* Avoid a wakeup if event not intere << 1482 if (key && !(key_to_poll(key) & (EPOL << 1483 return 0; << 1484 return autoremove_wake_function(wait, << 1485 } << 1486 << 1487 static int recv_wait_event(struct seccomp_fil << 1488 { << 1489 DEFINE_WAIT_FUNC(wait, recv_wake_func << 1490 int ret; << 1491 << 1492 if (refcount_read(&filter->users) == << 1493 return 0; << 1494 << 1495 if (atomic_dec_if_positive(&filter->n << 1496 return 0; << 1497 << 1498 for (;;) { << 1499 ret = prepare_to_wait_event(& << 1500 << 1501 if (atomic_dec_if_positive(&f << 1502 break; << 1503 if (refcount_read(&filter->us << 1504 break; << 1505 << 1506 if (ret) << 1507 return ret; << 1508 << 1509 schedule(); << 1510 } << 1511 finish_wait(&filter->wqh, &wait); << 1512 return 0; << 1513 } << 1514 << 1515 static long seccomp_notify_recv(struct seccom 1011 static long seccomp_notify_recv(struct seccomp_filter *filter, 1516 void __user * 1012 void __user *buf) 1517 { 1013 { 1518 struct seccomp_knotif *knotif = NULL, 1014 struct seccomp_knotif *knotif = NULL, *cur; 1519 struct seccomp_notif unotif; 1015 struct seccomp_notif unotif; 1520 ssize_t ret; 1016 ssize_t ret; 1521 1017 1522 /* Verify that we're not given garbag << 1523 ret = check_zeroed_user(buf, sizeof(u << 1524 if (ret < 0) << 1525 return ret; << 1526 if (!ret) << 1527 return -EINVAL; << 1528 << 1529 memset(&unotif, 0, sizeof(unotif)); 1018 memset(&unotif, 0, sizeof(unotif)); 1530 1019 1531 ret = recv_wait_event(filter); !! 1020 ret = down_interruptible(&filter->notif->request); 1532 if (ret < 0) 1021 if (ret < 0) 1533 return ret; 1022 return ret; 1534 1023 1535 mutex_lock(&filter->notify_lock); 1024 mutex_lock(&filter->notify_lock); 1536 list_for_each_entry(cur, &filter->not 1025 list_for_each_entry(cur, &filter->notif->notifications, list) { 1537 if (cur->state == SECCOMP_NOT 1026 if (cur->state == SECCOMP_NOTIFY_INIT) { 1538 knotif = cur; 1027 knotif = cur; 1539 break; 1028 break; 1540 } 1029 } 1541 } 1030 } 1542 1031 1543 /* 1032 /* 1544 * If we didn't find a notification, 1033 * If we didn't find a notification, it could be that the task was 1545 * interrupted by a fatal signal betw 1034 * interrupted by a fatal signal between the time we were woken and 1546 * when we were able to acquire the r 1035 * when we were able to acquire the rw lock. 1547 */ 1036 */ 1548 if (!knotif) { 1037 if (!knotif) { 1549 ret = -ENOENT; 1038 ret = -ENOENT; 1550 goto out; 1039 goto out; 1551 } 1040 } 1552 1041 1553 unotif.id = knotif->id; 1042 unotif.id = knotif->id; 1554 unotif.pid = task_pid_vnr(knotif->tas 1043 unotif.pid = task_pid_vnr(knotif->task); 1555 unotif.data = *(knotif->data); 1044 unotif.data = *(knotif->data); 1556 1045 1557 knotif->state = SECCOMP_NOTIFY_SENT; 1046 knotif->state = SECCOMP_NOTIFY_SENT; 1558 wake_up_poll(&filter->wqh, EPOLLOUT | !! 1047 wake_up_poll(&filter->notif->wqh, EPOLLOUT | EPOLLWRNORM); 1559 ret = 0; 1048 ret = 0; 1560 out: 1049 out: 1561 mutex_unlock(&filter->notify_lock); 1050 mutex_unlock(&filter->notify_lock); 1562 1051 1563 if (ret == 0 && copy_to_user(buf, &un 1052 if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) { 1564 ret = -EFAULT; 1053 ret = -EFAULT; 1565 1054 1566 /* 1055 /* 1567 * Userspace screwed up. To m 1056 * Userspace screwed up. To make sure that we keep this 1568 * notification alive, let's 1057 * notification alive, let's reset it back to INIT. It 1569 * may have died when we rele 1058 * may have died when we released the lock, so we need to make 1570 * sure it's still around. 1059 * sure it's still around. 1571 */ 1060 */ >> 1061 knotif = NULL; 1572 mutex_lock(&filter->notify_lo 1062 mutex_lock(&filter->notify_lock); 1573 knotif = find_notification(fi !! 1063 list_for_each_entry(cur, &filter->notif->notifications, list) { >> 1064 if (cur->id == unotif.id) { >> 1065 knotif = cur; >> 1066 break; >> 1067 } >> 1068 } >> 1069 1574 if (knotif) { 1070 if (knotif) { 1575 /* Reset the process << 1576 if (should_sleep_kill << 1577 complete(&kno << 1578 knotif->state = SECCO 1071 knotif->state = SECCOMP_NOTIFY_INIT; 1579 atomic_inc(&filter->n !! 1072 up(&filter->notif->request); 1580 wake_up_poll(&filter- << 1581 } 1073 } 1582 mutex_unlock(&filter->notify_ 1074 mutex_unlock(&filter->notify_lock); 1583 } 1075 } 1584 1076 1585 return ret; 1077 return ret; 1586 } 1078 } 1587 1079 1588 static long seccomp_notify_send(struct seccom 1080 static long seccomp_notify_send(struct seccomp_filter *filter, 1589 void __user * 1081 void __user *buf) 1590 { 1082 { 1591 struct seccomp_notif_resp resp = {}; 1083 struct seccomp_notif_resp resp = {}; 1592 struct seccomp_knotif *knotif; !! 1084 struct seccomp_knotif *knotif = NULL, *cur; 1593 long ret; 1085 long ret; 1594 1086 1595 if (copy_from_user(&resp, buf, sizeof 1087 if (copy_from_user(&resp, buf, sizeof(resp))) 1596 return -EFAULT; 1088 return -EFAULT; 1597 1089 1598 if (resp.flags & ~SECCOMP_USER_NOTIF_ !! 1090 if (resp.flags) 1599 return -EINVAL; << 1600 << 1601 if ((resp.flags & SECCOMP_USER_NOTIF_ << 1602 (resp.error || resp.val)) << 1603 return -EINVAL; 1091 return -EINVAL; 1604 1092 1605 ret = mutex_lock_interruptible(&filte 1093 ret = mutex_lock_interruptible(&filter->notify_lock); 1606 if (ret < 0) 1094 if (ret < 0) 1607 return ret; 1095 return ret; 1608 1096 1609 knotif = find_notification(filter, re !! 1097 list_for_each_entry(cur, &filter->notif->notifications, list) { >> 1098 if (cur->id == resp.id) { >> 1099 knotif = cur; >> 1100 break; >> 1101 } >> 1102 } >> 1103 1610 if (!knotif) { 1104 if (!knotif) { 1611 ret = -ENOENT; 1105 ret = -ENOENT; 1612 goto out; 1106 goto out; 1613 } 1107 } 1614 1108 1615 /* Allow exactly one reply. */ 1109 /* Allow exactly one reply. */ 1616 if (knotif->state != SECCOMP_NOTIFY_S 1110 if (knotif->state != SECCOMP_NOTIFY_SENT) { 1617 ret = -EINPROGRESS; 1111 ret = -EINPROGRESS; 1618 goto out; 1112 goto out; 1619 } 1113 } 1620 1114 1621 ret = 0; 1115 ret = 0; 1622 knotif->state = SECCOMP_NOTIFY_REPLIE 1116 knotif->state = SECCOMP_NOTIFY_REPLIED; 1623 knotif->error = resp.error; 1117 knotif->error = resp.error; 1624 knotif->val = resp.val; 1118 knotif->val = resp.val; 1625 knotif->flags = resp.flags; !! 1119 complete(&knotif->ready); 1626 if (filter->notif->flags & SECCOMP_US << 1627 complete_on_current_cpu(&knot << 1628 else << 1629 complete(&knotif->ready); << 1630 out: 1120 out: 1631 mutex_unlock(&filter->notify_lock); 1121 mutex_unlock(&filter->notify_lock); 1632 return ret; 1122 return ret; 1633 } 1123 } 1634 1124 1635 static long seccomp_notify_id_valid(struct se 1125 static long seccomp_notify_id_valid(struct seccomp_filter *filter, 1636 void __us 1126 void __user *buf) 1637 { 1127 { 1638 struct seccomp_knotif *knotif; !! 1128 struct seccomp_knotif *knotif = NULL; 1639 u64 id; 1129 u64 id; 1640 long ret; 1130 long ret; 1641 1131 1642 if (copy_from_user(&id, buf, sizeof(i 1132 if (copy_from_user(&id, buf, sizeof(id))) 1643 return -EFAULT; 1133 return -EFAULT; 1644 1134 1645 ret = mutex_lock_interruptible(&filte 1135 ret = mutex_lock_interruptible(&filter->notify_lock); 1646 if (ret < 0) 1136 if (ret < 0) 1647 return ret; 1137 return ret; 1648 1138 1649 knotif = find_notification(filter, id !! 1139 ret = -ENOENT; 1650 if (knotif && knotif->state == SECCOM !! 1140 list_for_each_entry(knotif, &filter->notif->notifications, list) { 1651 ret = 0; !! 1141 if (knotif->id == id) { 1652 else !! 1142 if (knotif->state == SECCOMP_NOTIFY_SENT) 1653 ret = -ENOENT; !! 1143 ret = 0; 1654 !! 1144 goto out; 1655 mutex_unlock(&filter->notify_lock); << 1656 return ret; << 1657 } << 1658 << 1659 static long seccomp_notify_set_flags(struct s << 1660 unsigned << 1661 { << 1662 long ret; << 1663 << 1664 if (flags & ~SECCOMP_USER_NOTIF_FD_SY << 1665 return -EINVAL; << 1666 << 1667 ret = mutex_lock_interruptible(&filte << 1668 if (ret < 0) << 1669 return ret; << 1670 filter->notif->flags = flags; << 1671 mutex_unlock(&filter->notify_lock); << 1672 return 0; << 1673 } << 1674 << 1675 static long seccomp_notify_addfd(struct secco << 1676 struct secco << 1677 unsigned int << 1678 { << 1679 struct seccomp_notif_addfd addfd; << 1680 struct seccomp_knotif *knotif; << 1681 struct seccomp_kaddfd kaddfd; << 1682 int ret; << 1683 << 1684 BUILD_BUG_ON(sizeof(addfd) < SECCOMP_ << 1685 BUILD_BUG_ON(sizeof(addfd) != SECCOMP << 1686 << 1687 if (size < SECCOMP_NOTIFY_ADDFD_SIZE_ << 1688 return -EINVAL; << 1689 << 1690 ret = copy_struct_from_user(&addfd, s << 1691 if (ret) << 1692 return ret; << 1693 << 1694 if (addfd.newfd_flags & ~O_CLOEXEC) << 1695 return -EINVAL; << 1696 << 1697 if (addfd.flags & ~(SECCOMP_ADDFD_FLA << 1698 return -EINVAL; << 1699 << 1700 if (addfd.newfd && !(addfd.flags & SE << 1701 return -EINVAL; << 1702 << 1703 kaddfd.file = fget(addfd.srcfd); << 1704 if (!kaddfd.file) << 1705 return -EBADF; << 1706 << 1707 kaddfd.ioctl_flags = addfd.flags; << 1708 kaddfd.flags = addfd.newfd_flags; << 1709 kaddfd.setfd = addfd.flags & SECCOMP_ << 1710 kaddfd.fd = addfd.newfd; << 1711 init_completion(&kaddfd.completion); << 1712 << 1713 ret = mutex_lock_interruptible(&filte << 1714 if (ret < 0) << 1715 goto out; << 1716 << 1717 knotif = find_notification(filter, ad << 1718 if (!knotif) { << 1719 ret = -ENOENT; << 1720 goto out_unlock; << 1721 } << 1722 << 1723 /* << 1724 * We do not want to allow for FD inj << 1725 * notification has been picked up by << 1726 * the notification has been replied << 1727 */ << 1728 if (knotif->state != SECCOMP_NOTIFY_S << 1729 ret = -EINPROGRESS; << 1730 goto out_unlock; << 1731 } << 1732 << 1733 if (addfd.flags & SECCOMP_ADDFD_FLAG_ << 1734 /* << 1735 * Disallow queuing an atomic << 1736 * some addfd requests still << 1737 * << 1738 * There is no clear reason t << 1739 * the loop on the other side << 1740 */ << 1741 if (!list_empty(&knotif->addf << 1742 ret = -EBUSY; << 1743 goto out_unlock; << 1744 } 1145 } 1745 << 1746 /* Allow exactly only one rep << 1747 knotif->state = SECCOMP_NOTIF << 1748 } << 1749 << 1750 list_add(&kaddfd.list, &knotif->addfd << 1751 complete(&knotif->ready); << 1752 mutex_unlock(&filter->notify_lock); << 1753 << 1754 /* Now we wait for it to be processed << 1755 ret = wait_for_completion_interruptib << 1756 if (ret == 0) { << 1757 /* << 1758 * We had a successful comple << 1759 * removed us from the addfd << 1760 * wait_for_completion_interr << 1761 * success that lets us read << 1762 * locking. << 1763 */ << 1764 ret = kaddfd.ret; << 1765 goto out; << 1766 } 1146 } 1767 1147 1768 mutex_lock(&filter->notify_lock); << 1769 /* << 1770 * Even though we were woken up by a << 1771 * completion, a completion may have << 1772 * << 1773 * We need to check again if the addf << 1774 * and if not, we will remove it from << 1775 */ << 1776 if (list_empty(&kaddfd.list)) << 1777 ret = kaddfd.ret; << 1778 else << 1779 list_del(&kaddfd.list); << 1780 << 1781 out_unlock: << 1782 mutex_unlock(&filter->notify_lock); << 1783 out: 1148 out: 1784 fput(kaddfd.file); !! 1149 mutex_unlock(&filter->notify_lock); 1785 << 1786 return ret; 1150 return ret; 1787 } 1151 } 1788 1152 1789 static long seccomp_notify_ioctl(struct file 1153 static long seccomp_notify_ioctl(struct file *file, unsigned int cmd, 1790 unsigned lon 1154 unsigned long arg) 1791 { 1155 { 1792 struct seccomp_filter *filter = file- 1156 struct seccomp_filter *filter = file->private_data; 1793 void __user *buf = (void __user *)arg 1157 void __user *buf = (void __user *)arg; 1794 1158 1795 /* Fixed-size ioctls */ << 1796 switch (cmd) { 1159 switch (cmd) { 1797 case SECCOMP_IOCTL_NOTIF_RECV: 1160 case SECCOMP_IOCTL_NOTIF_RECV: 1798 return seccomp_notify_recv(fi 1161 return seccomp_notify_recv(filter, buf); 1799 case SECCOMP_IOCTL_NOTIF_SEND: 1162 case SECCOMP_IOCTL_NOTIF_SEND: 1800 return seccomp_notify_send(fi 1163 return seccomp_notify_send(filter, buf); 1801 case SECCOMP_IOCTL_NOTIF_ID_VALID_WRO << 1802 case SECCOMP_IOCTL_NOTIF_ID_VALID: 1164 case SECCOMP_IOCTL_NOTIF_ID_VALID: 1803 return seccomp_notify_id_vali 1165 return seccomp_notify_id_valid(filter, buf); 1804 case SECCOMP_IOCTL_NOTIF_SET_FLAGS: << 1805 return seccomp_notify_set_fla << 1806 } << 1807 << 1808 /* Extensible Argument ioctls */ << 1809 #define EA_IOCTL(cmd) ((cmd) & ~(IOC_INOUT << 1810 switch (EA_IOCTL(cmd)) { << 1811 case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADD << 1812 return seccomp_notify_addfd(f << 1813 default: 1166 default: 1814 return -EINVAL; 1167 return -EINVAL; 1815 } 1168 } 1816 } 1169 } 1817 1170 1818 static __poll_t seccomp_notify_poll(struct fi 1171 static __poll_t seccomp_notify_poll(struct file *file, 1819 struct po 1172 struct poll_table_struct *poll_tab) 1820 { 1173 { 1821 struct seccomp_filter *filter = file- 1174 struct seccomp_filter *filter = file->private_data; 1822 __poll_t ret = 0; 1175 __poll_t ret = 0; 1823 struct seccomp_knotif *cur; 1176 struct seccomp_knotif *cur; 1824 1177 1825 poll_wait(file, &filter->wqh, poll_ta !! 1178 poll_wait(file, &filter->notif->wqh, poll_tab); 1826 1179 1827 if (mutex_lock_interruptible(&filter- 1180 if (mutex_lock_interruptible(&filter->notify_lock) < 0) 1828 return EPOLLERR; 1181 return EPOLLERR; 1829 1182 1830 list_for_each_entry(cur, &filter->not 1183 list_for_each_entry(cur, &filter->notif->notifications, list) { 1831 if (cur->state == SECCOMP_NOT 1184 if (cur->state == SECCOMP_NOTIFY_INIT) 1832 ret |= EPOLLIN | EPOL 1185 ret |= EPOLLIN | EPOLLRDNORM; 1833 if (cur->state == SECCOMP_NOT 1186 if (cur->state == SECCOMP_NOTIFY_SENT) 1834 ret |= EPOLLOUT | EPO 1187 ret |= EPOLLOUT | EPOLLWRNORM; 1835 if ((ret & EPOLLIN) && (ret & 1188 if ((ret & EPOLLIN) && (ret & EPOLLOUT)) 1836 break; 1189 break; 1837 } 1190 } 1838 1191 1839 mutex_unlock(&filter->notify_lock); 1192 mutex_unlock(&filter->notify_lock); 1840 1193 1841 if (refcount_read(&filter->users) == << 1842 ret |= EPOLLHUP; << 1843 << 1844 return ret; 1194 return ret; 1845 } 1195 } 1846 1196 1847 static const struct file_operations seccomp_n 1197 static const struct file_operations seccomp_notify_ops = { 1848 .poll = seccomp_notify_poll, 1198 .poll = seccomp_notify_poll, 1849 .release = seccomp_notify_release, 1199 .release = seccomp_notify_release, 1850 .unlocked_ioctl = seccomp_notify_ioct 1200 .unlocked_ioctl = seccomp_notify_ioctl, 1851 .compat_ioctl = seccomp_notify_ioctl, << 1852 }; 1201 }; 1853 1202 1854 static struct file *init_listener(struct secc 1203 static struct file *init_listener(struct seccomp_filter *filter) 1855 { 1204 { 1856 struct file *ret; !! 1205 struct file *ret = ERR_PTR(-EBUSY); >> 1206 struct seccomp_filter *cur; >> 1207 >> 1208 for (cur = current->seccomp.filter; cur; cur = cur->prev) { >> 1209 if (cur->notif) >> 1210 goto out; >> 1211 } 1857 1212 1858 ret = ERR_PTR(-ENOMEM); 1213 ret = ERR_PTR(-ENOMEM); 1859 filter->notif = kzalloc(sizeof(*(filt 1214 filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL); 1860 if (!filter->notif) 1215 if (!filter->notif) 1861 goto out; 1216 goto out; 1862 1217 >> 1218 sema_init(&filter->notif->request, 0); 1863 filter->notif->next_id = get_random_u 1219 filter->notif->next_id = get_random_u64(); 1864 INIT_LIST_HEAD(&filter->notif->notifi 1220 INIT_LIST_HEAD(&filter->notif->notifications); >> 1221 init_waitqueue_head(&filter->notif->wqh); 1865 1222 1866 ret = anon_inode_getfile("seccomp not 1223 ret = anon_inode_getfile("seccomp notify", &seccomp_notify_ops, 1867 filter, O_RD 1224 filter, O_RDWR); 1868 if (IS_ERR(ret)) 1225 if (IS_ERR(ret)) 1869 goto out_notif; 1226 goto out_notif; 1870 1227 1871 /* The file has a reference to it now 1228 /* The file has a reference to it now */ 1872 __get_seccomp_filter(filter); 1229 __get_seccomp_filter(filter); 1873 1230 1874 out_notif: 1231 out_notif: 1875 if (IS_ERR(ret)) 1232 if (IS_ERR(ret)) 1876 seccomp_notify_free(filter); !! 1233 kfree(filter->notif); 1877 out: 1234 out: 1878 return ret; 1235 return ret; 1879 } 1236 } 1880 1237 1881 /* << 1882 * Does @new_child have a listener while an a << 1883 * If so, we'll want to reject this filter. << 1884 * This only has to be tested for the current << 1885 * because TSYNC installs @child with the sam << 1886 * Note that @new_child is not hooked up to i << 1887 * we use current->seccomp.filter. << 1888 */ << 1889 static bool has_duplicate_listener(struct sec << 1890 { << 1891 struct seccomp_filter *cur; << 1892 << 1893 /* must be protected against concurre << 1894 lockdep_assert_held(¤t->sighand << 1895 << 1896 if (!new_child->notif) << 1897 return false; << 1898 for (cur = current->seccomp.filter; c << 1899 if (cur->notif) << 1900 return true; << 1901 } << 1902 << 1903 return false; << 1904 } << 1905 << 1906 /** 1238 /** 1907 * seccomp_set_mode_filter: internal function 1239 * seccomp_set_mode_filter: internal function for setting seccomp filter 1908 * @flags: flags to change filter behavior 1240 * @flags: flags to change filter behavior 1909 * @filter: struct sock_fprog containing filt 1241 * @filter: struct sock_fprog containing filter 1910 * 1242 * 1911 * This function may be called repeatedly to 1243 * This function may be called repeatedly to install additional filters. 1912 * Every filter successfully installed will b 1244 * Every filter successfully installed will be evaluated (in reverse order) 1913 * for each system call the task makes. 1245 * for each system call the task makes. 1914 * 1246 * 1915 * Once current->seccomp.mode is non-zero, it 1247 * Once current->seccomp.mode is non-zero, it may not be changed. 1916 * 1248 * 1917 * Returns 0 on success or -EINVAL on failure 1249 * Returns 0 on success or -EINVAL on failure. 1918 */ 1250 */ 1919 static long seccomp_set_mode_filter(unsigned 1251 static long seccomp_set_mode_filter(unsigned int flags, 1920 const cha 1252 const char __user *filter) 1921 { 1253 { 1922 const unsigned long seccomp_mode = SE 1254 const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; 1923 struct seccomp_filter *prepared = NUL 1255 struct seccomp_filter *prepared = NULL; 1924 long ret = -EINVAL; 1256 long ret = -EINVAL; 1925 int listener = -1; 1257 int listener = -1; 1926 struct file *listener_f = NULL; 1258 struct file *listener_f = NULL; 1927 1259 1928 /* Validate flags. */ 1260 /* Validate flags. */ 1929 if (flags & ~SECCOMP_FILTER_FLAG_MASK 1261 if (flags & ~SECCOMP_FILTER_FLAG_MASK) 1930 return -EINVAL; 1262 return -EINVAL; 1931 1263 1932 /* 1264 /* 1933 * In the successful case, NEW_LISTEN 1265 * In the successful case, NEW_LISTENER returns the new listener fd. 1934 * But in the failure case, TSYNC ret 1266 * But in the failure case, TSYNC returns the thread that died. If you 1935 * combine these two flags, there's n 1267 * combine these two flags, there's no way to tell whether something 1936 * succeeded or failed. So, let's dis !! 1268 * succeeded or failed. So, let's disallow this combination. 1937 * has not explicitly requested no er << 1938 */ 1269 */ 1939 if ((flags & SECCOMP_FILTER_FLAG_TSYN 1270 if ((flags & SECCOMP_FILTER_FLAG_TSYNC) && 1940 (flags & SECCOMP_FILTER_FLAG_NEW_ !! 1271 (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER)) 1941 ((flags & SECCOMP_FILTER_FLAG_TSY << 1942 return -EINVAL; << 1943 << 1944 /* << 1945 * The SECCOMP_FILTER_FLAG_WAIT_KILLA << 1946 * without the SECCOMP_FILTER_FLAG_NE << 1947 */ << 1948 if ((flags & SECCOMP_FILTER_FLAG_WAIT << 1949 ((flags & SECCOMP_FILTER_FLAG_NEW << 1950 return -EINVAL; 1272 return -EINVAL; 1951 1273 1952 /* Prepare the new filter before hold 1274 /* Prepare the new filter before holding any locks. */ 1953 prepared = seccomp_prepare_user_filte 1275 prepared = seccomp_prepare_user_filter(filter); 1954 if (IS_ERR(prepared)) 1276 if (IS_ERR(prepared)) 1955 return PTR_ERR(prepared); 1277 return PTR_ERR(prepared); 1956 1278 1957 if (flags & SECCOMP_FILTER_FLAG_NEW_L 1279 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { 1958 listener = get_unused_fd_flag 1280 listener = get_unused_fd_flags(O_CLOEXEC); 1959 if (listener < 0) { 1281 if (listener < 0) { 1960 ret = listener; 1282 ret = listener; 1961 goto out_free; 1283 goto out_free; 1962 } 1284 } 1963 1285 1964 listener_f = init_listener(pr 1286 listener_f = init_listener(prepared); 1965 if (IS_ERR(listener_f)) { 1287 if (IS_ERR(listener_f)) { 1966 put_unused_fd(listene 1288 put_unused_fd(listener); 1967 ret = PTR_ERR(listene 1289 ret = PTR_ERR(listener_f); 1968 goto out_free; 1290 goto out_free; 1969 } 1291 } 1970 } 1292 } 1971 1293 1972 /* 1294 /* 1973 * Make sure we cannot change seccomp 1295 * Make sure we cannot change seccomp or nnp state via TSYNC 1974 * while another thread is in the mid 1296 * while another thread is in the middle of calling exec. 1975 */ 1297 */ 1976 if (flags & SECCOMP_FILTER_FLAG_TSYNC 1298 if (flags & SECCOMP_FILTER_FLAG_TSYNC && 1977 mutex_lock_killable(¤t->sig 1299 mutex_lock_killable(¤t->signal->cred_guard_mutex)) 1978 goto out_put_fd; 1300 goto out_put_fd; 1979 1301 1980 spin_lock_irq(¤t->sighand->sigl 1302 spin_lock_irq(¤t->sighand->siglock); 1981 1303 1982 if (!seccomp_may_assign_mode(seccomp_ 1304 if (!seccomp_may_assign_mode(seccomp_mode)) 1983 goto out; 1305 goto out; 1984 1306 1985 if (has_duplicate_listener(prepared)) << 1986 ret = -EBUSY; << 1987 goto out; << 1988 } << 1989 << 1990 ret = seccomp_attach_filter(flags, pr 1307 ret = seccomp_attach_filter(flags, prepared); 1991 if (ret) 1308 if (ret) 1992 goto out; 1309 goto out; 1993 /* Do not free the successfully attac 1310 /* Do not free the successfully attached filter. */ 1994 prepared = NULL; 1311 prepared = NULL; 1995 1312 1996 seccomp_assign_mode(current, seccomp_ 1313 seccomp_assign_mode(current, seccomp_mode, flags); 1997 out: 1314 out: 1998 spin_unlock_irq(¤t->sighand->si 1315 spin_unlock_irq(¤t->sighand->siglock); 1999 if (flags & SECCOMP_FILTER_FLAG_TSYNC 1316 if (flags & SECCOMP_FILTER_FLAG_TSYNC) 2000 mutex_unlock(¤t->signal 1317 mutex_unlock(¤t->signal->cred_guard_mutex); 2001 out_put_fd: 1318 out_put_fd: 2002 if (flags & SECCOMP_FILTER_FLAG_NEW_L 1319 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { 2003 if (ret) { 1320 if (ret) { 2004 listener_f->private_d 1321 listener_f->private_data = NULL; 2005 fput(listener_f); 1322 fput(listener_f); 2006 put_unused_fd(listene 1323 put_unused_fd(listener); 2007 seccomp_notify_detach << 2008 } else { 1324 } else { 2009 fd_install(listener, 1325 fd_install(listener, listener_f); 2010 ret = listener; 1326 ret = listener; 2011 } 1327 } 2012 } 1328 } 2013 out_free: 1329 out_free: 2014 seccomp_filter_free(prepared); 1330 seccomp_filter_free(prepared); 2015 return ret; 1331 return ret; 2016 } 1332 } 2017 #else 1333 #else 2018 static inline long seccomp_set_mode_filter(un 1334 static inline long seccomp_set_mode_filter(unsigned int flags, 2019 co 1335 const char __user *filter) 2020 { 1336 { 2021 return -EINVAL; 1337 return -EINVAL; 2022 } 1338 } 2023 #endif 1339 #endif 2024 1340 2025 static long seccomp_get_action_avail(const ch 1341 static long seccomp_get_action_avail(const char __user *uaction) 2026 { 1342 { 2027 u32 action; 1343 u32 action; 2028 1344 2029 if (copy_from_user(&action, uaction, 1345 if (copy_from_user(&action, uaction, sizeof(action))) 2030 return -EFAULT; 1346 return -EFAULT; 2031 1347 2032 switch (action) { 1348 switch (action) { 2033 case SECCOMP_RET_KILL_PROCESS: 1349 case SECCOMP_RET_KILL_PROCESS: 2034 case SECCOMP_RET_KILL_THREAD: 1350 case SECCOMP_RET_KILL_THREAD: 2035 case SECCOMP_RET_TRAP: 1351 case SECCOMP_RET_TRAP: 2036 case SECCOMP_RET_ERRNO: 1352 case SECCOMP_RET_ERRNO: 2037 case SECCOMP_RET_USER_NOTIF: 1353 case SECCOMP_RET_USER_NOTIF: 2038 case SECCOMP_RET_TRACE: 1354 case SECCOMP_RET_TRACE: 2039 case SECCOMP_RET_LOG: 1355 case SECCOMP_RET_LOG: 2040 case SECCOMP_RET_ALLOW: 1356 case SECCOMP_RET_ALLOW: 2041 break; 1357 break; 2042 default: 1358 default: 2043 return -EOPNOTSUPP; 1359 return -EOPNOTSUPP; 2044 } 1360 } 2045 1361 2046 return 0; 1362 return 0; 2047 } 1363 } 2048 1364 2049 static long seccomp_get_notif_sizes(void __us 1365 static long seccomp_get_notif_sizes(void __user *usizes) 2050 { 1366 { 2051 struct seccomp_notif_sizes sizes = { 1367 struct seccomp_notif_sizes sizes = { 2052 .seccomp_notif = sizeof(struc 1368 .seccomp_notif = sizeof(struct seccomp_notif), 2053 .seccomp_notif_resp = sizeof( 1369 .seccomp_notif_resp = sizeof(struct seccomp_notif_resp), 2054 .seccomp_data = sizeof(struct 1370 .seccomp_data = sizeof(struct seccomp_data), 2055 }; 1371 }; 2056 1372 2057 if (copy_to_user(usizes, &sizes, size 1373 if (copy_to_user(usizes, &sizes, sizeof(sizes))) 2058 return -EFAULT; 1374 return -EFAULT; 2059 1375 2060 return 0; 1376 return 0; 2061 } 1377 } 2062 1378 2063 /* Common entry point for both prctl and sysc 1379 /* Common entry point for both prctl and syscall. */ 2064 static long do_seccomp(unsigned int op, unsig 1380 static long do_seccomp(unsigned int op, unsigned int flags, 2065 void __user *uargs) 1381 void __user *uargs) 2066 { 1382 { 2067 switch (op) { 1383 switch (op) { 2068 case SECCOMP_SET_MODE_STRICT: 1384 case SECCOMP_SET_MODE_STRICT: 2069 if (flags != 0 || uargs != NU 1385 if (flags != 0 || uargs != NULL) 2070 return -EINVAL; 1386 return -EINVAL; 2071 return seccomp_set_mode_stric 1387 return seccomp_set_mode_strict(); 2072 case SECCOMP_SET_MODE_FILTER: 1388 case SECCOMP_SET_MODE_FILTER: 2073 return seccomp_set_mode_filte 1389 return seccomp_set_mode_filter(flags, uargs); 2074 case SECCOMP_GET_ACTION_AVAIL: 1390 case SECCOMP_GET_ACTION_AVAIL: 2075 if (flags != 0) 1391 if (flags != 0) 2076 return -EINVAL; 1392 return -EINVAL; 2077 1393 2078 return seccomp_get_action_ava 1394 return seccomp_get_action_avail(uargs); 2079 case SECCOMP_GET_NOTIF_SIZES: 1395 case SECCOMP_GET_NOTIF_SIZES: 2080 if (flags != 0) 1396 if (flags != 0) 2081 return -EINVAL; 1397 return -EINVAL; 2082 1398 2083 return seccomp_get_notif_size 1399 return seccomp_get_notif_sizes(uargs); 2084 default: 1400 default: 2085 return -EINVAL; 1401 return -EINVAL; 2086 } 1402 } 2087 } 1403 } 2088 1404 2089 SYSCALL_DEFINE3(seccomp, unsigned int, op, un 1405 SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags, 2090 void __user *, uargs 1406 void __user *, uargs) 2091 { 1407 { 2092 return do_seccomp(op, flags, uargs); 1408 return do_seccomp(op, flags, uargs); 2093 } 1409 } 2094 1410 2095 /** 1411 /** 2096 * prctl_set_seccomp: configures current->sec 1412 * prctl_set_seccomp: configures current->seccomp.mode 2097 * @seccomp_mode: requested mode to use 1413 * @seccomp_mode: requested mode to use 2098 * @filter: optional struct sock_fprog for us 1414 * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER 2099 * 1415 * 2100 * Returns 0 on success or -EINVAL on failure 1416 * Returns 0 on success or -EINVAL on failure. 2101 */ 1417 */ 2102 long prctl_set_seccomp(unsigned long seccomp_ 1418 long prctl_set_seccomp(unsigned long seccomp_mode, void __user *filter) 2103 { 1419 { 2104 unsigned int op; 1420 unsigned int op; 2105 void __user *uargs; 1421 void __user *uargs; 2106 1422 2107 switch (seccomp_mode) { 1423 switch (seccomp_mode) { 2108 case SECCOMP_MODE_STRICT: 1424 case SECCOMP_MODE_STRICT: 2109 op = SECCOMP_SET_MODE_STRICT; 1425 op = SECCOMP_SET_MODE_STRICT; 2110 /* 1426 /* 2111 * Setting strict mode throug 1427 * Setting strict mode through prctl always ignored filter, 2112 * so make sure it is always 1428 * so make sure it is always NULL here to pass the internal 2113 * check in do_seccomp(). 1429 * check in do_seccomp(). 2114 */ 1430 */ 2115 uargs = NULL; 1431 uargs = NULL; 2116 break; 1432 break; 2117 case SECCOMP_MODE_FILTER: 1433 case SECCOMP_MODE_FILTER: 2118 op = SECCOMP_SET_MODE_FILTER; 1434 op = SECCOMP_SET_MODE_FILTER; 2119 uargs = filter; 1435 uargs = filter; 2120 break; 1436 break; 2121 default: 1437 default: 2122 return -EINVAL; 1438 return -EINVAL; 2123 } 1439 } 2124 1440 2125 /* prctl interface doesn't have flags 1441 /* prctl interface doesn't have flags, so they are always zero. */ 2126 return do_seccomp(op, 0, uargs); 1442 return do_seccomp(op, 0, uargs); 2127 } 1443 } 2128 1444 2129 #if defined(CONFIG_SECCOMP_FILTER) && defined 1445 #if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE) 2130 static struct seccomp_filter *get_nth_filter( 1446 static struct seccomp_filter *get_nth_filter(struct task_struct *task, 2131 1447 unsigned long filter_off) 2132 { 1448 { 2133 struct seccomp_filter *orig, *filter; 1449 struct seccomp_filter *orig, *filter; 2134 unsigned long count; 1450 unsigned long count; 2135 1451 2136 /* 1452 /* 2137 * Note: this is only correct because 1453 * Note: this is only correct because the caller should be the (ptrace) 2138 * tracer of the task, otherwise lock 1454 * tracer of the task, otherwise lock_task_sighand is needed. 2139 */ 1455 */ 2140 spin_lock_irq(&task->sighand->siglock 1456 spin_lock_irq(&task->sighand->siglock); 2141 1457 2142 if (task->seccomp.mode != SECCOMP_MOD 1458 if (task->seccomp.mode != SECCOMP_MODE_FILTER) { 2143 spin_unlock_irq(&task->sighan 1459 spin_unlock_irq(&task->sighand->siglock); 2144 return ERR_PTR(-EINVAL); 1460 return ERR_PTR(-EINVAL); 2145 } 1461 } 2146 1462 2147 orig = task->seccomp.filter; 1463 orig = task->seccomp.filter; 2148 __get_seccomp_filter(orig); 1464 __get_seccomp_filter(orig); 2149 spin_unlock_irq(&task->sighand->siglo 1465 spin_unlock_irq(&task->sighand->siglock); 2150 1466 2151 count = 0; 1467 count = 0; 2152 for (filter = orig; filter; filter = 1468 for (filter = orig; filter; filter = filter->prev) 2153 count++; 1469 count++; 2154 1470 2155 if (filter_off >= count) { 1471 if (filter_off >= count) { 2156 filter = ERR_PTR(-ENOENT); 1472 filter = ERR_PTR(-ENOENT); 2157 goto out; 1473 goto out; 2158 } 1474 } 2159 1475 2160 count -= filter_off; 1476 count -= filter_off; 2161 for (filter = orig; filter && count > 1477 for (filter = orig; filter && count > 1; filter = filter->prev) 2162 count--; 1478 count--; 2163 1479 2164 if (WARN_ON(count != 1 || !filter)) { 1480 if (WARN_ON(count != 1 || !filter)) { 2165 filter = ERR_PTR(-ENOENT); 1481 filter = ERR_PTR(-ENOENT); 2166 goto out; 1482 goto out; 2167 } 1483 } 2168 1484 2169 __get_seccomp_filter(filter); 1485 __get_seccomp_filter(filter); 2170 1486 2171 out: 1487 out: 2172 __put_seccomp_filter(orig); 1488 __put_seccomp_filter(orig); 2173 return filter; 1489 return filter; 2174 } 1490 } 2175 1491 2176 long seccomp_get_filter(struct task_struct *t 1492 long seccomp_get_filter(struct task_struct *task, unsigned long filter_off, 2177 void __user *data) 1493 void __user *data) 2178 { 1494 { 2179 struct seccomp_filter *filter; 1495 struct seccomp_filter *filter; 2180 struct sock_fprog_kern *fprog; 1496 struct sock_fprog_kern *fprog; 2181 long ret; 1497 long ret; 2182 1498 2183 if (!capable(CAP_SYS_ADMIN) || 1499 if (!capable(CAP_SYS_ADMIN) || 2184 current->seccomp.mode != SECCOMP_ 1500 current->seccomp.mode != SECCOMP_MODE_DISABLED) { 2185 return -EACCES; 1501 return -EACCES; 2186 } 1502 } 2187 1503 2188 filter = get_nth_filter(task, filter_ 1504 filter = get_nth_filter(task, filter_off); 2189 if (IS_ERR(filter)) 1505 if (IS_ERR(filter)) 2190 return PTR_ERR(filter); 1506 return PTR_ERR(filter); 2191 1507 2192 fprog = filter->prog->orig_prog; 1508 fprog = filter->prog->orig_prog; 2193 if (!fprog) { 1509 if (!fprog) { 2194 /* This must be a new non-cBP 1510 /* This must be a new non-cBPF filter, since we save 2195 * every cBPF filter's orig_p 1511 * every cBPF filter's orig_prog above when 2196 * CONFIG_CHECKPOINT_RESTORE 1512 * CONFIG_CHECKPOINT_RESTORE is enabled. 2197 */ 1513 */ 2198 ret = -EMEDIUMTYPE; 1514 ret = -EMEDIUMTYPE; 2199 goto out; 1515 goto out; 2200 } 1516 } 2201 1517 2202 ret = fprog->len; 1518 ret = fprog->len; 2203 if (!data) 1519 if (!data) 2204 goto out; 1520 goto out; 2205 1521 2206 if (copy_to_user(data, fprog->filter, 1522 if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog))) 2207 ret = -EFAULT; 1523 ret = -EFAULT; 2208 1524 2209 out: 1525 out: 2210 __put_seccomp_filter(filter); 1526 __put_seccomp_filter(filter); 2211 return ret; 1527 return ret; 2212 } 1528 } 2213 1529 2214 long seccomp_get_metadata(struct task_struct 1530 long seccomp_get_metadata(struct task_struct *task, 2215 unsigned long size, 1531 unsigned long size, void __user *data) 2216 { 1532 { 2217 long ret; 1533 long ret; 2218 struct seccomp_filter *filter; 1534 struct seccomp_filter *filter; 2219 struct seccomp_metadata kmd = {}; 1535 struct seccomp_metadata kmd = {}; 2220 1536 2221 if (!capable(CAP_SYS_ADMIN) || 1537 if (!capable(CAP_SYS_ADMIN) || 2222 current->seccomp.mode != SECCOMP_ 1538 current->seccomp.mode != SECCOMP_MODE_DISABLED) { 2223 return -EACCES; 1539 return -EACCES; 2224 } 1540 } 2225 1541 2226 size = min_t(unsigned long, size, siz 1542 size = min_t(unsigned long, size, sizeof(kmd)); 2227 1543 2228 if (size < sizeof(kmd.filter_off)) 1544 if (size < sizeof(kmd.filter_off)) 2229 return -EINVAL; 1545 return -EINVAL; 2230 1546 2231 if (copy_from_user(&kmd.filter_off, d 1547 if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off))) 2232 return -EFAULT; 1548 return -EFAULT; 2233 1549 2234 filter = get_nth_filter(task, kmd.fil 1550 filter = get_nth_filter(task, kmd.filter_off); 2235 if (IS_ERR(filter)) 1551 if (IS_ERR(filter)) 2236 return PTR_ERR(filter); 1552 return PTR_ERR(filter); 2237 1553 2238 if (filter->log) 1554 if (filter->log) 2239 kmd.flags |= SECCOMP_FILTER_F 1555 kmd.flags |= SECCOMP_FILTER_FLAG_LOG; 2240 1556 2241 ret = size; 1557 ret = size; 2242 if (copy_to_user(data, &kmd, size)) 1558 if (copy_to_user(data, &kmd, size)) 2243 ret = -EFAULT; 1559 ret = -EFAULT; 2244 1560 2245 __put_seccomp_filter(filter); 1561 __put_seccomp_filter(filter); 2246 return ret; 1562 return ret; 2247 } 1563 } 2248 #endif 1564 #endif 2249 1565 2250 #ifdef CONFIG_SYSCTL 1566 #ifdef CONFIG_SYSCTL 2251 1567 2252 /* Human readable action names for friendly s 1568 /* Human readable action names for friendly sysctl interaction */ 2253 #define SECCOMP_RET_KILL_PROCESS_NAME "kill 1569 #define SECCOMP_RET_KILL_PROCESS_NAME "kill_process" 2254 #define SECCOMP_RET_KILL_THREAD_NAME "kill 1570 #define SECCOMP_RET_KILL_THREAD_NAME "kill_thread" 2255 #define SECCOMP_RET_TRAP_NAME "trap 1571 #define SECCOMP_RET_TRAP_NAME "trap" 2256 #define SECCOMP_RET_ERRNO_NAME "errn 1572 #define SECCOMP_RET_ERRNO_NAME "errno" 2257 #define SECCOMP_RET_USER_NOTIF_NAME "user 1573 #define SECCOMP_RET_USER_NOTIF_NAME "user_notif" 2258 #define SECCOMP_RET_TRACE_NAME "trac 1574 #define SECCOMP_RET_TRACE_NAME "trace" 2259 #define SECCOMP_RET_LOG_NAME "log" 1575 #define SECCOMP_RET_LOG_NAME "log" 2260 #define SECCOMP_RET_ALLOW_NAME "allo 1576 #define SECCOMP_RET_ALLOW_NAME "allow" 2261 1577 2262 static const char seccomp_actions_avail[] = 1578 static const char seccomp_actions_avail[] = 2263 SECCOMP_RET_K 1579 SECCOMP_RET_KILL_PROCESS_NAME " " 2264 SECCOMP_RET_K 1580 SECCOMP_RET_KILL_THREAD_NAME " " 2265 SECCOMP_RET_T 1581 SECCOMP_RET_TRAP_NAME " " 2266 SECCOMP_RET_E 1582 SECCOMP_RET_ERRNO_NAME " " 2267 SECCOMP_RET_U 1583 SECCOMP_RET_USER_NOTIF_NAME " " 2268 SECCOMP_RET_T 1584 SECCOMP_RET_TRACE_NAME " " 2269 SECCOMP_RET_L 1585 SECCOMP_RET_LOG_NAME " " 2270 SECCOMP_RET_A 1586 SECCOMP_RET_ALLOW_NAME; 2271 1587 2272 struct seccomp_log_name { 1588 struct seccomp_log_name { 2273 u32 log; 1589 u32 log; 2274 const char *name; 1590 const char *name; 2275 }; 1591 }; 2276 1592 2277 static const struct seccomp_log_name seccomp_ 1593 static const struct seccomp_log_name seccomp_log_names[] = { 2278 { SECCOMP_LOG_KILL_PROCESS, SECCOMP_R 1594 { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME }, 2279 { SECCOMP_LOG_KILL_THREAD, SECCOMP_RE 1595 { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME }, 2280 { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_ 1596 { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME }, 2281 { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRN 1597 { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME }, 2282 { SECCOMP_LOG_USER_NOTIF, SECCOMP_RET 1598 { SECCOMP_LOG_USER_NOTIF, SECCOMP_RET_USER_NOTIF_NAME }, 2283 { SECCOMP_LOG_TRACE, SECCOMP_RET_TRAC 1599 { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME }, 2284 { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NA 1600 { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME }, 2285 { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLO 1601 { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME }, 2286 { } 1602 { } 2287 }; 1603 }; 2288 1604 2289 static bool seccomp_names_from_actions_logged 1605 static bool seccomp_names_from_actions_logged(char *names, size_t size, 2290 1606 u32 actions_logged, 2291 1607 const char *sep) 2292 { 1608 { 2293 const struct seccomp_log_name *cur; 1609 const struct seccomp_log_name *cur; 2294 bool append_sep = false; 1610 bool append_sep = false; 2295 1611 2296 for (cur = seccomp_log_names; cur->na 1612 for (cur = seccomp_log_names; cur->name && size; cur++) { 2297 ssize_t ret; 1613 ssize_t ret; 2298 1614 2299 if (!(actions_logged & cur->l 1615 if (!(actions_logged & cur->log)) 2300 continue; 1616 continue; 2301 1617 2302 if (append_sep) { 1618 if (append_sep) { 2303 ret = strscpy(names, 1619 ret = strscpy(names, sep, size); 2304 if (ret < 0) 1620 if (ret < 0) 2305 return false; 1621 return false; 2306 1622 2307 names += ret; 1623 names += ret; 2308 size -= ret; 1624 size -= ret; 2309 } else 1625 } else 2310 append_sep = true; 1626 append_sep = true; 2311 1627 2312 ret = strscpy(names, cur->nam 1628 ret = strscpy(names, cur->name, size); 2313 if (ret < 0) 1629 if (ret < 0) 2314 return false; 1630 return false; 2315 1631 2316 names += ret; 1632 names += ret; 2317 size -= ret; 1633 size -= ret; 2318 } 1634 } 2319 1635 2320 return true; 1636 return true; 2321 } 1637 } 2322 1638 2323 static bool seccomp_action_logged_from_name(u 1639 static bool seccomp_action_logged_from_name(u32 *action_logged, 2324 c 1640 const char *name) 2325 { 1641 { 2326 const struct seccomp_log_name *cur; 1642 const struct seccomp_log_name *cur; 2327 1643 2328 for (cur = seccomp_log_names; cur->na 1644 for (cur = seccomp_log_names; cur->name; cur++) { 2329 if (!strcmp(cur->name, name)) 1645 if (!strcmp(cur->name, name)) { 2330 *action_logged = cur- 1646 *action_logged = cur->log; 2331 return true; 1647 return true; 2332 } 1648 } 2333 } 1649 } 2334 1650 2335 return false; 1651 return false; 2336 } 1652 } 2337 1653 2338 static bool seccomp_actions_logged_from_names 1654 static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names) 2339 { 1655 { 2340 char *name; 1656 char *name; 2341 1657 2342 *actions_logged = 0; 1658 *actions_logged = 0; 2343 while ((name = strsep(&names, " ")) & 1659 while ((name = strsep(&names, " ")) && *name) { 2344 u32 action_logged = 0; 1660 u32 action_logged = 0; 2345 1661 2346 if (!seccomp_action_logged_fr 1662 if (!seccomp_action_logged_from_name(&action_logged, name)) 2347 return false; 1663 return false; 2348 1664 2349 *actions_logged |= action_log 1665 *actions_logged |= action_logged; 2350 } 1666 } 2351 1667 2352 return true; 1668 return true; 2353 } 1669 } 2354 1670 2355 static int read_actions_logged(const struct c !! 1671 static int read_actions_logged(struct ctl_table *ro_table, void __user *buffer, 2356 size_t *lenp, 1672 size_t *lenp, loff_t *ppos) 2357 { 1673 { 2358 char names[sizeof(seccomp_actions_ava 1674 char names[sizeof(seccomp_actions_avail)]; 2359 struct ctl_table table; 1675 struct ctl_table table; 2360 1676 2361 memset(names, 0, sizeof(names)); 1677 memset(names, 0, sizeof(names)); 2362 1678 2363 if (!seccomp_names_from_actions_logge 1679 if (!seccomp_names_from_actions_logged(names, sizeof(names), 2364 1680 seccomp_actions_logged, " ")) 2365 return -EINVAL; 1681 return -EINVAL; 2366 1682 2367 table = *ro_table; 1683 table = *ro_table; 2368 table.data = names; 1684 table.data = names; 2369 table.maxlen = sizeof(names); 1685 table.maxlen = sizeof(names); 2370 return proc_dostring(&table, 0, buffe 1686 return proc_dostring(&table, 0, buffer, lenp, ppos); 2371 } 1687 } 2372 1688 2373 static int write_actions_logged(const struct !! 1689 static int write_actions_logged(struct ctl_table *ro_table, void __user *buffer, 2374 size_t *lenp, 1690 size_t *lenp, loff_t *ppos, u32 *actions_logged) 2375 { 1691 { 2376 char names[sizeof(seccomp_actions_ava 1692 char names[sizeof(seccomp_actions_avail)]; 2377 struct ctl_table table; 1693 struct ctl_table table; 2378 int ret; 1694 int ret; 2379 1695 2380 if (!capable(CAP_SYS_ADMIN)) 1696 if (!capable(CAP_SYS_ADMIN)) 2381 return -EPERM; 1697 return -EPERM; 2382 1698 2383 memset(names, 0, sizeof(names)); 1699 memset(names, 0, sizeof(names)); 2384 1700 2385 table = *ro_table; 1701 table = *ro_table; 2386 table.data = names; 1702 table.data = names; 2387 table.maxlen = sizeof(names); 1703 table.maxlen = sizeof(names); 2388 ret = proc_dostring(&table, 1, buffer 1704 ret = proc_dostring(&table, 1, buffer, lenp, ppos); 2389 if (ret) 1705 if (ret) 2390 return ret; 1706 return ret; 2391 1707 2392 if (!seccomp_actions_logged_from_name 1708 if (!seccomp_actions_logged_from_names(actions_logged, table.data)) 2393 return -EINVAL; 1709 return -EINVAL; 2394 1710 2395 if (*actions_logged & SECCOMP_LOG_ALL 1711 if (*actions_logged & SECCOMP_LOG_ALLOW) 2396 return -EINVAL; 1712 return -EINVAL; 2397 1713 2398 seccomp_actions_logged = *actions_log 1714 seccomp_actions_logged = *actions_logged; 2399 return 0; 1715 return 0; 2400 } 1716 } 2401 1717 2402 static void audit_actions_logged(u32 actions_ 1718 static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged, 2403 int ret) 1719 int ret) 2404 { 1720 { 2405 char names[sizeof(seccomp_actions_ava 1721 char names[sizeof(seccomp_actions_avail)]; 2406 char old_names[sizeof(seccomp_actions 1722 char old_names[sizeof(seccomp_actions_avail)]; 2407 const char *new = names; 1723 const char *new = names; 2408 const char *old = old_names; 1724 const char *old = old_names; 2409 1725 2410 if (!audit_enabled) 1726 if (!audit_enabled) 2411 return; 1727 return; 2412 1728 2413 memset(names, 0, sizeof(names)); 1729 memset(names, 0, sizeof(names)); 2414 memset(old_names, 0, sizeof(old_names 1730 memset(old_names, 0, sizeof(old_names)); 2415 1731 2416 if (ret) 1732 if (ret) 2417 new = "?"; 1733 new = "?"; 2418 else if (!actions_logged) 1734 else if (!actions_logged) 2419 new = "(none)"; 1735 new = "(none)"; 2420 else if (!seccomp_names_from_actions_ 1736 else if (!seccomp_names_from_actions_logged(names, sizeof(names), 2421 1737 actions_logged, ",")) 2422 new = "?"; 1738 new = "?"; 2423 1739 2424 if (!old_actions_logged) 1740 if (!old_actions_logged) 2425 old = "(none)"; 1741 old = "(none)"; 2426 else if (!seccomp_names_from_actions_ 1742 else if (!seccomp_names_from_actions_logged(old_names, 2427 1743 sizeof(old_names), 2428 1744 old_actions_logged, ",")) 2429 old = "?"; 1745 old = "?"; 2430 1746 2431 return audit_seccomp_actions_logged(n 1747 return audit_seccomp_actions_logged(new, old, !ret); 2432 } 1748 } 2433 1749 2434 static int seccomp_actions_logged_handler(con !! 1750 static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write, 2435 voi !! 1751 void __user *buffer, size_t *lenp, 2436 lof 1752 loff_t *ppos) 2437 { 1753 { 2438 int ret; 1754 int ret; 2439 1755 2440 if (write) { 1756 if (write) { 2441 u32 actions_logged = 0; 1757 u32 actions_logged = 0; 2442 u32 old_actions_logged = secc 1758 u32 old_actions_logged = seccomp_actions_logged; 2443 1759 2444 ret = write_actions_logged(ro 1760 ret = write_actions_logged(ro_table, buffer, lenp, ppos, 2445 &a 1761 &actions_logged); 2446 audit_actions_logged(actions_ 1762 audit_actions_logged(actions_logged, old_actions_logged, ret); 2447 } else 1763 } else 2448 ret = read_actions_logged(ro_ 1764 ret = read_actions_logged(ro_table, buffer, lenp, ppos); 2449 1765 2450 return ret; 1766 return ret; 2451 } 1767 } 2452 1768 >> 1769 static struct ctl_path seccomp_sysctl_path[] = { >> 1770 { .procname = "kernel", }, >> 1771 { .procname = "seccomp", }, >> 1772 { } >> 1773 }; >> 1774 2453 static struct ctl_table seccomp_sysctl_table[ 1775 static struct ctl_table seccomp_sysctl_table[] = { 2454 { 1776 { 2455 .procname = "actions_av 1777 .procname = "actions_avail", 2456 .data = (void *) &s 1778 .data = (void *) &seccomp_actions_avail, 2457 .maxlen = sizeof(secc 1779 .maxlen = sizeof(seccomp_actions_avail), 2458 .mode = 0444, 1780 .mode = 0444, 2459 .proc_handler = proc_dostri 1781 .proc_handler = proc_dostring, 2460 }, 1782 }, 2461 { 1783 { 2462 .procname = "actions_lo 1784 .procname = "actions_logged", 2463 .mode = 0644, 1785 .mode = 0644, 2464 .proc_handler = seccomp_act 1786 .proc_handler = seccomp_actions_logged_handler, 2465 }, 1787 }, >> 1788 { } 2466 }; 1789 }; 2467 1790 2468 static int __init seccomp_sysctl_init(void) 1791 static int __init seccomp_sysctl_init(void) 2469 { 1792 { 2470 register_sysctl_init("kernel/seccomp" !! 1793 struct ctl_table_header *hdr; >> 1794 >> 1795 hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table); >> 1796 if (!hdr) >> 1797 pr_warn("seccomp: sysctl registration failed\n"); >> 1798 else >> 1799 kmemleak_not_leak(hdr); >> 1800 2471 return 0; 1801 return 0; 2472 } 1802 } 2473 1803 2474 device_initcall(seccomp_sysctl_init) 1804 device_initcall(seccomp_sysctl_init) 2475 1805 2476 #endif /* CONFIG_SYSCTL */ 1806 #endif /* CONFIG_SYSCTL */ 2477 << 2478 #ifdef CONFIG_SECCOMP_CACHE_DEBUG << 2479 /* Currently CONFIG_SECCOMP_CACHE_DEBUG impli << 2480 static void proc_pid_seccomp_cache_arch(struc << 2481 const << 2482 { << 2483 int nr; << 2484 << 2485 for (nr = 0; nr < bitmap_size; nr++) << 2486 bool cached = test_bit(nr, bi << 2487 char *status = cached ? "ALLO << 2488 << 2489 seq_printf(m, "%s %d %s\n", n << 2490 } << 2491 } << 2492 << 2493 int proc_pid_seccomp_cache(struct seq_file *m << 2494 struct pid *pid, s << 2495 { << 2496 struct seccomp_filter *f; << 2497 unsigned long flags; << 2498 << 2499 /* << 2500 * We don't want some sandboxed proce << 2501 * filters consist of. << 2502 */ << 2503 if (!file_ns_capable(m->file, &init_u << 2504 return -EACCES; << 2505 << 2506 if (!lock_task_sighand(task, &flags)) << 2507 return -ESRCH; << 2508 << 2509 f = READ_ONCE(task->seccomp.filter); << 2510 if (!f) { << 2511 unlock_task_sighand(task, &fl << 2512 return 0; << 2513 } << 2514 << 2515 /* prevent filter from being freed wh << 2516 __get_seccomp_filter(f); << 2517 unlock_task_sighand(task, &flags); << 2518 << 2519 proc_pid_seccomp_cache_arch(m, SECCOM << 2520 f->cache. << 2521 SECCOMP_A << 2522 << 2523 #ifdef SECCOMP_ARCH_COMPAT << 2524 proc_pid_seccomp_cache_arch(m, SECCOM << 2525 f->cache. << 2526 SECCOMP_A << 2527 #endif /* SECCOMP_ARCH_COMPAT */ << 2528 << 2529 __put_seccomp_filter(f); << 2530 return 0; << 2531 } << 2532 #endif /* CONFIG_SECCOMP_CACHE_DEBUG */ << 2533 1807
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.