1 // SPDX-License-Identifier: GPL-2.0-only 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 3 #include <linux/stat.h> 3 #include <linux/stat.h> 4 #include <linux/sysctl.h> 4 #include <linux/sysctl.h> 5 #include <linux/slab.h> 5 #include <linux/slab.h> 6 #include <linux/cred.h> 6 #include <linux/cred.h> 7 #include <linux/hash.h> 7 #include <linux/hash.h> 8 #include <linux/kmemleak.h> 8 #include <linux/kmemleak.h> 9 #include <linux/user_namespace.h> 9 #include <linux/user_namespace.h> 10 10 11 struct ucounts init_ucounts = { 11 struct ucounts init_ucounts = { 12 .ns = &init_user_ns, 12 .ns = &init_user_ns, 13 .uid = GLOBAL_ROOT_UID, 13 .uid = GLOBAL_ROOT_UID, 14 .count = ATOMIC_INIT(1), !! 14 .count = 1, 15 }; 15 }; 16 16 17 #define UCOUNTS_HASHTABLE_BITS 10 17 #define UCOUNTS_HASHTABLE_BITS 10 18 static struct hlist_head ucounts_hashtable[(1 18 static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)]; 19 static DEFINE_SPINLOCK(ucounts_lock); 19 static DEFINE_SPINLOCK(ucounts_lock); 20 20 21 #define ucounts_hashfn(ns, uid) 21 #define ucounts_hashfn(ns, uid) \ 22 hash_long((unsigned long)__kuid_val(ui 22 hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \ 23 UCOUNTS_HASHTABLE_BITS) 23 UCOUNTS_HASHTABLE_BITS) 24 #define ucounts_hashentry(ns, uid) \ 24 #define ucounts_hashentry(ns, uid) \ 25 (ucounts_hashtable + ucounts_hashfn(ns 25 (ucounts_hashtable + ucounts_hashfn(ns, uid)) 26 26 27 27 28 #ifdef CONFIG_SYSCTL 28 #ifdef CONFIG_SYSCTL 29 static struct ctl_table_set * 29 static struct ctl_table_set * 30 set_lookup(struct ctl_table_root *root) 30 set_lookup(struct ctl_table_root *root) 31 { 31 { 32 return ¤t_user_ns()->set; 32 return ¤t_user_ns()->set; 33 } 33 } 34 34 35 static int set_is_seen(struct ctl_table_set *s 35 static int set_is_seen(struct ctl_table_set *set) 36 { 36 { 37 return ¤t_user_ns()->set == set; 37 return ¤t_user_ns()->set == set; 38 } 38 } 39 39 40 static int set_permissions(struct ctl_table_he 40 static int set_permissions(struct ctl_table_header *head, 41 const struct ctl_ta !! 41 struct ctl_table *table) 42 { 42 { 43 struct user_namespace *user_ns = 43 struct user_namespace *user_ns = 44 container_of(head->set, struct 44 container_of(head->set, struct user_namespace, set); 45 int mode; 45 int mode; 46 46 47 /* Allow users with CAP_SYS_RESOURCE u 47 /* Allow users with CAP_SYS_RESOURCE unrestrained access */ 48 if (ns_capable(user_ns, CAP_SYS_RESOUR 48 if (ns_capable(user_ns, CAP_SYS_RESOURCE)) 49 mode = (table->mode & S_IRWXU) 49 mode = (table->mode & S_IRWXU) >> 6; 50 else 50 else 51 /* Allow all others at most read-only 51 /* Allow all others at most read-only access */ 52 mode = table->mode & S_IROTH; 52 mode = table->mode & S_IROTH; 53 return (mode << 6) | (mode << 3) | mod 53 return (mode << 6) | (mode << 3) | mode; 54 } 54 } 55 55 56 static struct ctl_table_root set_root = { 56 static struct ctl_table_root set_root = { 57 .lookup = set_lookup, 57 .lookup = set_lookup, 58 .permissions = set_permissions, 58 .permissions = set_permissions, 59 }; 59 }; 60 60 61 static long ue_zero = 0; !! 61 #define UCOUNT_ENTRY(name) \ 62 static long ue_int_max = INT_MAX; !! 62 { \ 63 !! 63 .procname = name, \ 64 #define UCOUNT_ENTRY(name) !! 64 .maxlen = sizeof(int), \ 65 { !! 65 .mode = 0644, \ 66 .procname = name, !! 66 .proc_handler = proc_dointvec_minmax, \ 67 .maxlen = sizeof(long) !! 67 .extra1 = SYSCTL_ZERO, \ 68 .mode = 0644, !! 68 .extra2 = SYSCTL_INT_MAX, \ 69 .proc_handler = proc_doulong << 70 .extra1 = &ue_zero, << 71 .extra2 = &ue_int_max, << 72 } 69 } 73 static struct ctl_table user_table[] = { 70 static struct ctl_table user_table[] = { 74 UCOUNT_ENTRY("max_user_namespaces"), 71 UCOUNT_ENTRY("max_user_namespaces"), 75 UCOUNT_ENTRY("max_pid_namespaces"), 72 UCOUNT_ENTRY("max_pid_namespaces"), 76 UCOUNT_ENTRY("max_uts_namespaces"), 73 UCOUNT_ENTRY("max_uts_namespaces"), 77 UCOUNT_ENTRY("max_ipc_namespaces"), 74 UCOUNT_ENTRY("max_ipc_namespaces"), 78 UCOUNT_ENTRY("max_net_namespaces"), 75 UCOUNT_ENTRY("max_net_namespaces"), 79 UCOUNT_ENTRY("max_mnt_namespaces"), 76 UCOUNT_ENTRY("max_mnt_namespaces"), 80 UCOUNT_ENTRY("max_cgroup_namespaces"), 77 UCOUNT_ENTRY("max_cgroup_namespaces"), 81 UCOUNT_ENTRY("max_time_namespaces"), 78 UCOUNT_ENTRY("max_time_namespaces"), 82 #ifdef CONFIG_INOTIFY_USER 79 #ifdef CONFIG_INOTIFY_USER 83 UCOUNT_ENTRY("max_inotify_instances"), 80 UCOUNT_ENTRY("max_inotify_instances"), 84 UCOUNT_ENTRY("max_inotify_watches"), 81 UCOUNT_ENTRY("max_inotify_watches"), 85 #endif 82 #endif 86 #ifdef CONFIG_FANOTIFY 83 #ifdef CONFIG_FANOTIFY 87 UCOUNT_ENTRY("max_fanotify_groups"), 84 UCOUNT_ENTRY("max_fanotify_groups"), 88 UCOUNT_ENTRY("max_fanotify_marks"), 85 UCOUNT_ENTRY("max_fanotify_marks"), 89 #endif 86 #endif >> 87 { } 90 }; 88 }; 91 #endif /* CONFIG_SYSCTL */ 89 #endif /* CONFIG_SYSCTL */ 92 90 93 bool setup_userns_sysctls(struct user_namespac 91 bool setup_userns_sysctls(struct user_namespace *ns) 94 { 92 { 95 #ifdef CONFIG_SYSCTL 93 #ifdef CONFIG_SYSCTL 96 struct ctl_table *tbl; 94 struct ctl_table *tbl; 97 95 98 BUILD_BUG_ON(ARRAY_SIZE(user_table) != !! 96 BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS + 1); 99 setup_sysctl_set(&ns->set, &set_root, 97 setup_sysctl_set(&ns->set, &set_root, set_is_seen); 100 tbl = kmemdup(user_table, sizeof(user_ 98 tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL); 101 if (tbl) { 99 if (tbl) { 102 int i; 100 int i; 103 for (i = 0; i < UCOUNT_COUNTS; 101 for (i = 0; i < UCOUNT_COUNTS; i++) { 104 tbl[i].data = &ns->uco 102 tbl[i].data = &ns->ucount_max[i]; 105 } 103 } 106 ns->sysctls = __register_sysct !! 104 ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl); 107 << 108 } 105 } 109 if (!ns->sysctls) { 106 if (!ns->sysctls) { 110 kfree(tbl); 107 kfree(tbl); 111 retire_sysctl_set(&ns->set); 108 retire_sysctl_set(&ns->set); 112 return false; 109 return false; 113 } 110 } 114 #endif 111 #endif 115 return true; 112 return true; 116 } 113 } 117 114 118 void retire_userns_sysctls(struct user_namespa 115 void retire_userns_sysctls(struct user_namespace *ns) 119 { 116 { 120 #ifdef CONFIG_SYSCTL 117 #ifdef CONFIG_SYSCTL 121 const struct ctl_table *tbl; !! 118 struct ctl_table *tbl; 122 119 123 tbl = ns->sysctls->ctl_table_arg; 120 tbl = ns->sysctls->ctl_table_arg; 124 unregister_sysctl_table(ns->sysctls); 121 unregister_sysctl_table(ns->sysctls); 125 retire_sysctl_set(&ns->set); 122 retire_sysctl_set(&ns->set); 126 kfree(tbl); 123 kfree(tbl); 127 #endif 124 #endif 128 } 125 } 129 126 130 static struct ucounts *find_ucounts(struct use 127 static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent) 131 { 128 { 132 struct ucounts *ucounts; 129 struct ucounts *ucounts; 133 130 134 hlist_for_each_entry(ucounts, hashent, 131 hlist_for_each_entry(ucounts, hashent, node) { 135 if (uid_eq(ucounts->uid, uid) 132 if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns)) 136 return ucounts; 133 return ucounts; 137 } 134 } 138 return NULL; 135 return NULL; 139 } 136 } 140 137 141 static void hlist_add_ucounts(struct ucounts * 138 static void hlist_add_ucounts(struct ucounts *ucounts) 142 { 139 { 143 struct hlist_head *hashent = ucounts_h 140 struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid); 144 spin_lock_irq(&ucounts_lock); 141 spin_lock_irq(&ucounts_lock); 145 hlist_add_head(&ucounts->node, hashent 142 hlist_add_head(&ucounts->node, hashent); 146 spin_unlock_irq(&ucounts_lock); 143 spin_unlock_irq(&ucounts_lock); 147 } 144 } 148 145 149 static inline bool get_ucounts_or_wrap(struct << 150 { << 151 /* Returns true on a successful get, f << 152 return !atomic_add_negative(1, &ucount << 153 } << 154 << 155 struct ucounts *get_ucounts(struct ucounts *uc << 156 { << 157 if (!get_ucounts_or_wrap(ucounts)) { << 158 put_ucounts(ucounts); << 159 ucounts = NULL; << 160 } << 161 return ucounts; << 162 } << 163 << 164 struct ucounts *alloc_ucounts(struct user_name 146 struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) 165 { 147 { 166 struct hlist_head *hashent = ucounts_h 148 struct hlist_head *hashent = ucounts_hashentry(ns, uid); 167 struct ucounts *ucounts, *new; 149 struct ucounts *ucounts, *new; 168 bool wrapped; << 169 150 170 spin_lock_irq(&ucounts_lock); 151 spin_lock_irq(&ucounts_lock); 171 ucounts = find_ucounts(ns, uid, hashen 152 ucounts = find_ucounts(ns, uid, hashent); 172 if (!ucounts) { 153 if (!ucounts) { 173 spin_unlock_irq(&ucounts_lock) 154 spin_unlock_irq(&ucounts_lock); 174 155 175 new = kzalloc(sizeof(*new), GF 156 new = kzalloc(sizeof(*new), GFP_KERNEL); 176 if (!new) 157 if (!new) 177 return NULL; 158 return NULL; 178 159 179 new->ns = ns; 160 new->ns = ns; 180 new->uid = uid; 161 new->uid = uid; 181 atomic_set(&new->count, 1); !! 162 new->count = 0; 182 163 183 spin_lock_irq(&ucounts_lock); 164 spin_lock_irq(&ucounts_lock); 184 ucounts = find_ucounts(ns, uid 165 ucounts = find_ucounts(ns, uid, hashent); 185 if (ucounts) { 166 if (ucounts) { 186 kfree(new); 167 kfree(new); 187 } else { 168 } else { 188 hlist_add_head(&new->n 169 hlist_add_head(&new->node, hashent); 189 get_user_ns(new->ns); !! 170 ucounts = new; 190 spin_unlock_irq(&ucoun << 191 return new; << 192 } 171 } 193 } 172 } 194 wrapped = !get_ucounts_or_wrap(ucounts !! 173 if (ucounts->count == INT_MAX) >> 174 ucounts = NULL; >> 175 else >> 176 ucounts->count += 1; 195 spin_unlock_irq(&ucounts_lock); 177 spin_unlock_irq(&ucounts_lock); 196 if (wrapped) { !! 178 return ucounts; 197 put_ucounts(ucounts); !! 179 } >> 180 >> 181 struct ucounts *get_ucounts(struct ucounts *ucounts) >> 182 { >> 183 unsigned long flags; >> 184 >> 185 if (!ucounts) 198 return NULL; 186 return NULL; >> 187 >> 188 spin_lock_irqsave(&ucounts_lock, flags); >> 189 if (ucounts->count == INT_MAX) { >> 190 WARN_ONCE(1, "ucounts: counter has reached its maximum value"); >> 191 ucounts = NULL; >> 192 } else { >> 193 ucounts->count += 1; 199 } 194 } >> 195 spin_unlock_irqrestore(&ucounts_lock, flags); >> 196 200 return ucounts; 197 return ucounts; 201 } 198 } 202 199 203 void put_ucounts(struct ucounts *ucounts) 200 void put_ucounts(struct ucounts *ucounts) 204 { 201 { 205 unsigned long flags; 202 unsigned long flags; 206 203 207 if (atomic_dec_and_lock_irqsave(&ucoun !! 204 spin_lock_irqsave(&ucounts_lock, flags); >> 205 ucounts->count -= 1; >> 206 if (!ucounts->count) 208 hlist_del_init(&ucounts->node) 207 hlist_del_init(&ucounts->node); 209 spin_unlock_irqrestore(&ucount !! 208 else 210 put_user_ns(ucounts->ns); !! 209 ucounts = NULL; 211 kfree(ucounts); !! 210 spin_unlock_irqrestore(&ucounts_lock, flags); 212 } !! 211 >> 212 kfree(ucounts); 213 } 213 } 214 214 215 static inline bool atomic_long_inc_below(atomi !! 215 static inline bool atomic_inc_below(atomic_t *v, int u) 216 { 216 { 217 long c, old; !! 217 int c, old; 218 c = atomic_long_read(v); !! 218 c = atomic_read(v); 219 for (;;) { 219 for (;;) { 220 if (unlikely(c >= u)) 220 if (unlikely(c >= u)) 221 return false; 221 return false; 222 old = atomic_long_cmpxchg(v, c !! 222 old = atomic_cmpxchg(v, c, c+1); 223 if (likely(old == c)) 223 if (likely(old == c)) 224 return true; 224 return true; 225 c = old; 225 c = old; 226 } 226 } 227 } 227 } 228 228 229 struct ucounts *inc_ucount(struct user_namespa 229 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, 230 enum ucount_type ty 230 enum ucount_type type) 231 { 231 { 232 struct ucounts *ucounts, *iter, *bad; 232 struct ucounts *ucounts, *iter, *bad; 233 struct user_namespace *tns; 233 struct user_namespace *tns; 234 ucounts = alloc_ucounts(ns, uid); 234 ucounts = alloc_ucounts(ns, uid); 235 for (iter = ucounts; iter; iter = tns- 235 for (iter = ucounts; iter; iter = tns->ucounts) { 236 long max; !! 236 int max; 237 tns = iter->ns; 237 tns = iter->ns; 238 max = READ_ONCE(tns->ucount_ma 238 max = READ_ONCE(tns->ucount_max[type]); 239 if (!atomic_long_inc_below(&it !! 239 if (!atomic_inc_below(&iter->ucount[type], max)) 240 goto fail; 240 goto fail; 241 } 241 } 242 return ucounts; 242 return ucounts; 243 fail: 243 fail: 244 bad = iter; 244 bad = iter; 245 for (iter = ucounts; iter != bad; iter 245 for (iter = ucounts; iter != bad; iter = iter->ns->ucounts) 246 atomic_long_dec(&iter->ucount[ !! 246 atomic_dec(&iter->ucount[type]); 247 247 248 put_ucounts(ucounts); 248 put_ucounts(ucounts); 249 return NULL; 249 return NULL; 250 } 250 } 251 251 252 void dec_ucount(struct ucounts *ucounts, enum 252 void dec_ucount(struct ucounts *ucounts, enum ucount_type type) 253 { 253 { 254 struct ucounts *iter; 254 struct ucounts *iter; 255 for (iter = ucounts; iter; iter = iter 255 for (iter = ucounts; iter; iter = iter->ns->ucounts) { 256 long dec = atomic_long_dec_if_ !! 256 int dec = atomic_dec_if_positive(&iter->ucount[type]); 257 WARN_ON_ONCE(dec < 0); 257 WARN_ON_ONCE(dec < 0); 258 } 258 } 259 put_ucounts(ucounts); 259 put_ucounts(ucounts); 260 } 260 } 261 261 262 long inc_rlimit_ucounts(struct ucounts *ucount << 263 { << 264 struct ucounts *iter; << 265 long max = LONG_MAX; << 266 long ret = 0; << 267 << 268 for (iter = ucounts; iter; iter = iter << 269 long new = atomic_long_add_ret << 270 if (new < 0 || new > max) << 271 ret = LONG_MAX; << 272 else if (iter == ucounts) << 273 ret = new; << 274 max = get_userns_rlimit_max(it << 275 } << 276 return ret; << 277 } << 278 << 279 bool dec_rlimit_ucounts(struct ucounts *ucount << 280 { << 281 struct ucounts *iter; << 282 long new = -1; /* Silence compiler war << 283 for (iter = ucounts; iter; iter = iter << 284 long dec = atomic_long_sub_ret << 285 WARN_ON_ONCE(dec < 0); << 286 if (iter == ucounts) << 287 new = dec; << 288 } << 289 return (new == 0); << 290 } << 291 << 292 static void do_dec_rlimit_put_ucounts(struct u << 293 struct ucounts << 294 { << 295 struct ucounts *iter, *next; << 296 for (iter = ucounts; iter != last; ite << 297 long dec = atomic_long_sub_ret << 298 WARN_ON_ONCE(dec < 0); << 299 next = iter->ns->ucounts; << 300 if (dec == 0) << 301 put_ucounts(iter); << 302 } << 303 } << 304 << 305 void dec_rlimit_put_ucounts(struct ucounts *uc << 306 { << 307 do_dec_rlimit_put_ucounts(ucounts, NUL << 308 } << 309 << 310 long inc_rlimit_get_ucounts(struct ucounts *uc << 311 { << 312 /* Caller must hold a reference to uco << 313 struct ucounts *iter; << 314 long max = LONG_MAX; << 315 long dec, ret = 0; << 316 << 317 for (iter = ucounts; iter; iter = iter << 318 long new = atomic_long_add_ret << 319 if (new < 0 || new > max) << 320 goto unwind; << 321 if (iter == ucounts) << 322 ret = new; << 323 max = get_userns_rlimit_max(it << 324 /* << 325 * Grab an extra ucount refere << 326 * the rlimit count was previo << 327 */ << 328 if (new != 1) << 329 continue; << 330 if (!get_ucounts(iter)) << 331 goto dec_unwind; << 332 } << 333 return ret; << 334 dec_unwind: << 335 dec = atomic_long_sub_return(1, &iter- << 336 WARN_ON_ONCE(dec < 0); << 337 unwind: << 338 do_dec_rlimit_put_ucounts(ucounts, ite << 339 return 0; << 340 } << 341 << 342 bool is_rlimit_overlimit(struct ucounts *ucoun << 343 { << 344 struct ucounts *iter; << 345 long max = rlimit; << 346 if (rlimit > LONG_MAX) << 347 max = LONG_MAX; << 348 for (iter = ucounts; iter; iter = iter << 349 long val = get_rlimit_value(it << 350 if (val < 0 || val > max) << 351 return true; << 352 max = get_userns_rlimit_max(it << 353 } << 354 return false; << 355 } << 356 << 357 static __init int user_namespace_sysctl_init(v 262 static __init int user_namespace_sysctl_init(void) 358 { 263 { 359 #ifdef CONFIG_SYSCTL 264 #ifdef CONFIG_SYSCTL 360 static struct ctl_table_header *user_h 265 static struct ctl_table_header *user_header; 361 static struct ctl_table empty[1]; 266 static struct ctl_table empty[1]; 362 /* 267 /* 363 * It is necessary to register the use 268 * It is necessary to register the user directory in the 364 * default set so that registrations i 269 * default set so that registrations in the child sets work 365 * properly. 270 * properly. 366 */ 271 */ 367 user_header = register_sysctl_sz("user !! 272 user_header = register_sysctl("user", empty); 368 kmemleak_ignore(user_header); 273 kmemleak_ignore(user_header); 369 BUG_ON(!user_header); 274 BUG_ON(!user_header); 370 BUG_ON(!setup_userns_sysctls(&init_use 275 BUG_ON(!setup_userns_sysctls(&init_user_ns)); 371 #endif 276 #endif 372 hlist_add_ucounts(&init_ucounts); 277 hlist_add_ucounts(&init_ucounts); 373 inc_rlimit_ucounts(&init_ucounts, UCOU << 374 return 0; 278 return 0; 375 } 279 } 376 subsys_initcall(user_namespace_sysctl_init); 280 subsys_initcall(user_namespace_sysctl_init); 377 281
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.