1 // SPDX-License-Identifier: GPL-2.0-only 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 2 /* 3 * fs/eventfd.c 3 * fs/eventfd.c 4 * 4 * 5 * Copyright (C) 2007 Davide Libenzi <davide 5 * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> 6 * 6 * 7 */ 7 */ 8 8 9 #include <linux/file.h> 9 #include <linux/file.h> 10 #include <linux/poll.h> 10 #include <linux/poll.h> 11 #include <linux/init.h> 11 #include <linux/init.h> 12 #include <linux/fs.h> 12 #include <linux/fs.h> 13 #include <linux/sched/signal.h> 13 #include <linux/sched/signal.h> 14 #include <linux/kernel.h> 14 #include <linux/kernel.h> 15 #include <linux/slab.h> 15 #include <linux/slab.h> 16 #include <linux/list.h> 16 #include <linux/list.h> 17 #include <linux/spinlock.h> 17 #include <linux/spinlock.h> 18 #include <linux/anon_inodes.h> 18 #include <linux/anon_inodes.h> 19 #include <linux/syscalls.h> 19 #include <linux/syscalls.h> 20 #include <linux/export.h> 20 #include <linux/export.h> 21 #include <linux/kref.h> 21 #include <linux/kref.h> 22 #include <linux/eventfd.h> 22 #include <linux/eventfd.h> 23 #include <linux/proc_fs.h> 23 #include <linux/proc_fs.h> 24 #include <linux/seq_file.h> 24 #include <linux/seq_file.h> 25 #include <linux/idr.h> 25 #include <linux/idr.h> 26 #include <linux/uio.h> 26 #include <linux/uio.h> 27 27 >> 28 DEFINE_PER_CPU(int, eventfd_wake_count); >> 29 28 static DEFINE_IDA(eventfd_ida); 30 static DEFINE_IDA(eventfd_ida); 29 31 30 struct eventfd_ctx { 32 struct eventfd_ctx { 31 struct kref kref; 33 struct kref kref; 32 wait_queue_head_t wqh; 34 wait_queue_head_t wqh; 33 /* 35 /* 34 * Every time that a write(2) is perfo 36 * Every time that a write(2) is performed on an eventfd, the 35 * value of the __u64 being written is 37 * value of the __u64 being written is added to "count" and a 36 * wakeup is performed on "wqh". If EF !! 38 * wakeup is performed on "wqh". A read(2) will return the "count" 37 * specified, a read(2) will return th !! 39 * value to userspace, and will reset "count" to zero. The kernel 38 * and will reset "count" to zero. The !! 40 * side eventfd_signal() also, adds to the "count" counter and 39 * also, adds to the "count" counter a !! 41 * issue a wakeup. 40 */ 42 */ 41 __u64 count; 43 __u64 count; 42 unsigned int flags; 44 unsigned int flags; 43 int id; 45 int id; 44 }; 46 }; 45 47 46 /** 48 /** 47 * eventfd_signal_mask - Increment the event c !! 49 * eventfd_signal - Adds @n to the eventfd counter. 48 * @ctx: [in] Pointer to the eventfd context. 50 * @ctx: [in] Pointer to the eventfd context. 49 * @mask: [in] poll mask !! 51 * @n: [in] Value of the counter to be added to the eventfd internal counter. >> 52 * The value cannot be negative. 50 * 53 * 51 * This function is supposed to be called by t 54 * This function is supposed to be called by the kernel in paths that do not 52 * allow sleeping. In this function we allow t 55 * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX 53 * value, and we signal this as overflow condi 56 * value, and we signal this as overflow condition by returning a EPOLLERR 54 * to poll(2). 57 * to poll(2). >> 58 * >> 59 * Returns the amount by which the counter was incremented. This will be less >> 60 * than @n if the counter has overflowed. 55 */ 61 */ 56 void eventfd_signal_mask(struct eventfd_ctx *c !! 62 __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) 57 { 63 { 58 unsigned long flags; 64 unsigned long flags; 59 65 60 /* 66 /* 61 * Deadlock or stack overflow issues c 67 * Deadlock or stack overflow issues can happen if we recurse here 62 * through waitqueue wakeup handlers. 68 * through waitqueue wakeup handlers. If the caller users potentially 63 * nested waitqueues with custom wakeu 69 * nested waitqueues with custom wakeup handlers, then it should 64 * check eventfd_signal_allowed() befo !! 70 * check eventfd_signal_count() before calling this function. If 65 * it returns false, the eventfd_signa !! 71 * it returns true, the eventfd_signal() call should be deferred to a 66 * safe context. 72 * safe context. 67 */ 73 */ 68 if (WARN_ON_ONCE(current->in_eventfd)) !! 74 if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count))) 69 return; !! 75 return 0; 70 76 71 spin_lock_irqsave(&ctx->wqh.lock, flag 77 spin_lock_irqsave(&ctx->wqh.lock, flags); 72 current->in_eventfd = 1; !! 78 this_cpu_inc(eventfd_wake_count); 73 if (ctx->count < ULLONG_MAX) !! 79 if (ULLONG_MAX - ctx->count < n) 74 ctx->count++; !! 80 n = ULLONG_MAX - ctx->count; >> 81 ctx->count += n; 75 if (waitqueue_active(&ctx->wqh)) 82 if (waitqueue_active(&ctx->wqh)) 76 wake_up_locked_poll(&ctx->wqh, !! 83 wake_up_locked_poll(&ctx->wqh, EPOLLIN); 77 current->in_eventfd = 0; !! 84 this_cpu_dec(eventfd_wake_count); 78 spin_unlock_irqrestore(&ctx->wqh.lock, 85 spin_unlock_irqrestore(&ctx->wqh.lock, flags); >> 86 >> 87 return n; 79 } 88 } 80 EXPORT_SYMBOL_GPL(eventfd_signal_mask); !! 89 EXPORT_SYMBOL_GPL(eventfd_signal); 81 90 82 static void eventfd_free_ctx(struct eventfd_ct 91 static void eventfd_free_ctx(struct eventfd_ctx *ctx) 83 { 92 { 84 if (ctx->id >= 0) 93 if (ctx->id >= 0) 85 ida_free(&eventfd_ida, ctx->id !! 94 ida_simple_remove(&eventfd_ida, ctx->id); 86 kfree(ctx); 95 kfree(ctx); 87 } 96 } 88 97 89 static void eventfd_free(struct kref *kref) 98 static void eventfd_free(struct kref *kref) 90 { 99 { 91 struct eventfd_ctx *ctx = container_of 100 struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref); 92 101 93 eventfd_free_ctx(ctx); 102 eventfd_free_ctx(ctx); 94 } 103 } 95 104 96 /** 105 /** 97 * eventfd_ctx_put - Releases a reference to t 106 * eventfd_ctx_put - Releases a reference to the internal eventfd context. 98 * @ctx: [in] Pointer to eventfd context. 107 * @ctx: [in] Pointer to eventfd context. 99 * 108 * 100 * The eventfd context reference must have bee 109 * The eventfd context reference must have been previously acquired either 101 * with eventfd_ctx_fdget() or eventfd_ctx_fil 110 * with eventfd_ctx_fdget() or eventfd_ctx_fileget(). 102 */ 111 */ 103 void eventfd_ctx_put(struct eventfd_ctx *ctx) 112 void eventfd_ctx_put(struct eventfd_ctx *ctx) 104 { 113 { 105 kref_put(&ctx->kref, eventfd_free); 114 kref_put(&ctx->kref, eventfd_free); 106 } 115 } 107 EXPORT_SYMBOL_GPL(eventfd_ctx_put); 116 EXPORT_SYMBOL_GPL(eventfd_ctx_put); 108 117 109 static int eventfd_release(struct inode *inode 118 static int eventfd_release(struct inode *inode, struct file *file) 110 { 119 { 111 struct eventfd_ctx *ctx = file->privat 120 struct eventfd_ctx *ctx = file->private_data; 112 121 113 wake_up_poll(&ctx->wqh, EPOLLHUP); 122 wake_up_poll(&ctx->wqh, EPOLLHUP); 114 eventfd_ctx_put(ctx); 123 eventfd_ctx_put(ctx); 115 return 0; 124 return 0; 116 } 125 } 117 126 118 static __poll_t eventfd_poll(struct file *file 127 static __poll_t eventfd_poll(struct file *file, poll_table *wait) 119 { 128 { 120 struct eventfd_ctx *ctx = file->privat 129 struct eventfd_ctx *ctx = file->private_data; 121 __poll_t events = 0; 130 __poll_t events = 0; 122 u64 count; 131 u64 count; 123 132 124 poll_wait(file, &ctx->wqh, wait); 133 poll_wait(file, &ctx->wqh, wait); 125 134 126 /* 135 /* 127 * All writes to ctx->count occur with 136 * All writes to ctx->count occur within ctx->wqh.lock. This read 128 * can be done outside ctx->wqh.lock b 137 * can be done outside ctx->wqh.lock because we know that poll_wait 129 * takes that lock (through add_wait_q 138 * takes that lock (through add_wait_queue) if our caller will sleep. 130 * 139 * 131 * The read _can_ therefore seep into 140 * The read _can_ therefore seep into add_wait_queue's critical 132 * section, but cannot move above it! 141 * section, but cannot move above it! add_wait_queue's spin_lock acts 133 * as an acquire barrier and ensures t 142 * as an acquire barrier and ensures that the read be ordered properly 134 * against the writes. The following 143 * against the writes. The following CAN happen and is safe: 135 * 144 * 136 * poll 145 * poll write 137 * ----------------- 146 * ----------------- ------------ 138 * lock ctx->wqh.lock (in poll_wai 147 * lock ctx->wqh.lock (in poll_wait) 139 * count = ctx->count 148 * count = ctx->count 140 * __add_wait_queue 149 * __add_wait_queue 141 * unlock ctx->wqh.lock 150 * unlock ctx->wqh.lock 142 * 151 * lock ctx->qwh.lock 143 * 152 * ctx->count += n 144 * 153 * if (waitqueue_active) 145 * 154 * wake_up_locked_poll 146 * 155 * unlock ctx->qwh.lock 147 * eventfd_poll returns 0 156 * eventfd_poll returns 0 148 * 157 * 149 * but the following, which would miss 158 * but the following, which would miss a wakeup, cannot happen: 150 * 159 * 151 * poll 160 * poll write 152 * ----------------- 161 * ----------------- ------------ 153 * count = ctx->count (INVALID!) 162 * count = ctx->count (INVALID!) 154 * 163 * lock ctx->qwh.lock 155 * 164 * ctx->count += n 156 * 165 * **waitqueue_active is false** 157 * 166 * **no wake_up_locked_poll!** 158 * 167 * unlock ctx->qwh.lock 159 * lock ctx->wqh.lock (in poll_wai 168 * lock ctx->wqh.lock (in poll_wait) 160 * __add_wait_queue 169 * __add_wait_queue 161 * unlock ctx->wqh.lock 170 * unlock ctx->wqh.lock 162 * eventfd_poll returns 0 171 * eventfd_poll returns 0 163 */ 172 */ 164 count = READ_ONCE(ctx->count); 173 count = READ_ONCE(ctx->count); 165 174 166 if (count > 0) 175 if (count > 0) 167 events |= EPOLLIN; 176 events |= EPOLLIN; 168 if (count == ULLONG_MAX) 177 if (count == ULLONG_MAX) 169 events |= EPOLLERR; 178 events |= EPOLLERR; 170 if (ULLONG_MAX - 1 > count) 179 if (ULLONG_MAX - 1 > count) 171 events |= EPOLLOUT; 180 events |= EPOLLOUT; 172 181 173 return events; 182 return events; 174 } 183 } 175 184 176 void eventfd_ctx_do_read(struct eventfd_ctx *c 185 void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) 177 { 186 { 178 lockdep_assert_held(&ctx->wqh.lock); 187 lockdep_assert_held(&ctx->wqh.lock); 179 188 180 *cnt = ((ctx->flags & EFD_SEMAPHORE) & !! 189 *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; 181 ctx->count -= *cnt; 190 ctx->count -= *cnt; 182 } 191 } 183 EXPORT_SYMBOL_GPL(eventfd_ctx_do_read); 192 EXPORT_SYMBOL_GPL(eventfd_ctx_do_read); 184 193 185 /** 194 /** 186 * eventfd_ctx_remove_wait_queue - Read the cu 195 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. 187 * @ctx: [in] Pointer to eventfd context. 196 * @ctx: [in] Pointer to eventfd context. 188 * @wait: [in] Wait queue to be removed. 197 * @wait: [in] Wait queue to be removed. 189 * @cnt: [out] Pointer to the 64-bit counter v 198 * @cnt: [out] Pointer to the 64-bit counter value. 190 * 199 * 191 * Returns %0 if successful, or the following 200 * Returns %0 if successful, or the following error codes: 192 * 201 * 193 * -EAGAIN : The operation would have blo 202 * -EAGAIN : The operation would have blocked. 194 * 203 * 195 * This is used to atomically remove a wait qu 204 * This is used to atomically remove a wait queue entry from the eventfd wait 196 * queue head, and read/reset the counter valu 205 * queue head, and read/reset the counter value. 197 */ 206 */ 198 int eventfd_ctx_remove_wait_queue(struct event 207 int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, 199 __u64 *cnt) 208 __u64 *cnt) 200 { 209 { 201 unsigned long flags; 210 unsigned long flags; 202 211 203 spin_lock_irqsave(&ctx->wqh.lock, flag 212 spin_lock_irqsave(&ctx->wqh.lock, flags); 204 eventfd_ctx_do_read(ctx, cnt); 213 eventfd_ctx_do_read(ctx, cnt); 205 __remove_wait_queue(&ctx->wqh, wait); 214 __remove_wait_queue(&ctx->wqh, wait); 206 if (*cnt != 0 && waitqueue_active(&ctx 215 if (*cnt != 0 && waitqueue_active(&ctx->wqh)) 207 wake_up_locked_poll(&ctx->wqh, 216 wake_up_locked_poll(&ctx->wqh, EPOLLOUT); 208 spin_unlock_irqrestore(&ctx->wqh.lock, 217 spin_unlock_irqrestore(&ctx->wqh.lock, flags); 209 218 210 return *cnt != 0 ? 0 : -EAGAIN; 219 return *cnt != 0 ? 0 : -EAGAIN; 211 } 220 } 212 EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queu 221 EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue); 213 222 214 static ssize_t eventfd_read(struct kiocb *iocb 223 static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to) 215 { 224 { 216 struct file *file = iocb->ki_filp; 225 struct file *file = iocb->ki_filp; 217 struct eventfd_ctx *ctx = file->privat 226 struct eventfd_ctx *ctx = file->private_data; 218 __u64 ucnt = 0; 227 __u64 ucnt = 0; >> 228 DECLARE_WAITQUEUE(wait, current); 219 229 220 if (iov_iter_count(to) < sizeof(ucnt)) 230 if (iov_iter_count(to) < sizeof(ucnt)) 221 return -EINVAL; 231 return -EINVAL; 222 spin_lock_irq(&ctx->wqh.lock); 232 spin_lock_irq(&ctx->wqh.lock); 223 if (!ctx->count) { 233 if (!ctx->count) { 224 if ((file->f_flags & O_NONBLOC 234 if ((file->f_flags & O_NONBLOCK) || 225 (iocb->ki_flags & IOCB_NOW 235 (iocb->ki_flags & IOCB_NOWAIT)) { 226 spin_unlock_irq(&ctx-> 236 spin_unlock_irq(&ctx->wqh.lock); 227 return -EAGAIN; 237 return -EAGAIN; 228 } 238 } 229 !! 239 __add_wait_queue(&ctx->wqh, &wait); 230 if (wait_event_interruptible_l !! 240 for (;;) { >> 241 set_current_state(TASK_INTERRUPTIBLE); >> 242 if (ctx->count) >> 243 break; >> 244 if (signal_pending(current)) { >> 245 __remove_wait_queue(&ctx->wqh, &wait); >> 246 __set_current_state(TASK_RUNNING); >> 247 spin_unlock_irq(&ctx->wqh.lock); >> 248 return -ERESTARTSYS; >> 249 } 231 spin_unlock_irq(&ctx-> 250 spin_unlock_irq(&ctx->wqh.lock); 232 return -ERESTARTSYS; !! 251 schedule(); >> 252 spin_lock_irq(&ctx->wqh.lock); 233 } 253 } >> 254 __remove_wait_queue(&ctx->wqh, &wait); >> 255 __set_current_state(TASK_RUNNING); 234 } 256 } 235 eventfd_ctx_do_read(ctx, &ucnt); 257 eventfd_ctx_do_read(ctx, &ucnt); 236 current->in_eventfd = 1; << 237 if (waitqueue_active(&ctx->wqh)) 258 if (waitqueue_active(&ctx->wqh)) 238 wake_up_locked_poll(&ctx->wqh, 259 wake_up_locked_poll(&ctx->wqh, EPOLLOUT); 239 current->in_eventfd = 0; << 240 spin_unlock_irq(&ctx->wqh.lock); 260 spin_unlock_irq(&ctx->wqh.lock); 241 if (unlikely(copy_to_iter(&ucnt, sizeo 261 if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt))) 242 return -EFAULT; 262 return -EFAULT; 243 263 244 return sizeof(ucnt); 264 return sizeof(ucnt); 245 } 265 } 246 266 247 static ssize_t eventfd_write(struct file *file 267 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, 248 loff_t *ppos) 268 loff_t *ppos) 249 { 269 { 250 struct eventfd_ctx *ctx = file->privat 270 struct eventfd_ctx *ctx = file->private_data; 251 ssize_t res; 271 ssize_t res; 252 __u64 ucnt; 272 __u64 ucnt; >> 273 DECLARE_WAITQUEUE(wait, current); 253 274 254 if (count != sizeof(ucnt)) !! 275 if (count < sizeof(ucnt)) 255 return -EINVAL; 276 return -EINVAL; 256 if (copy_from_user(&ucnt, buf, sizeof( 277 if (copy_from_user(&ucnt, buf, sizeof(ucnt))) 257 return -EFAULT; 278 return -EFAULT; 258 if (ucnt == ULLONG_MAX) 279 if (ucnt == ULLONG_MAX) 259 return -EINVAL; 280 return -EINVAL; 260 spin_lock_irq(&ctx->wqh.lock); 281 spin_lock_irq(&ctx->wqh.lock); 261 res = -EAGAIN; 282 res = -EAGAIN; 262 if (ULLONG_MAX - ctx->count > ucnt) 283 if (ULLONG_MAX - ctx->count > ucnt) 263 res = sizeof(ucnt); 284 res = sizeof(ucnt); 264 else if (!(file->f_flags & O_NONBLOCK) 285 else if (!(file->f_flags & O_NONBLOCK)) { 265 res = wait_event_interruptible !! 286 __add_wait_queue(&ctx->wqh, &wait); 266 ULLONG_MAX - c !! 287 for (res = 0;;) { 267 if (!res) !! 288 set_current_state(TASK_INTERRUPTIBLE); 268 res = sizeof(ucnt); !! 289 if (ULLONG_MAX - ctx->count > ucnt) { >> 290 res = sizeof(ucnt); >> 291 break; >> 292 } >> 293 if (signal_pending(current)) { >> 294 res = -ERESTARTSYS; >> 295 break; >> 296 } >> 297 spin_unlock_irq(&ctx->wqh.lock); >> 298 schedule(); >> 299 spin_lock_irq(&ctx->wqh.lock); >> 300 } >> 301 __remove_wait_queue(&ctx->wqh, &wait); >> 302 __set_current_state(TASK_RUNNING); 269 } 303 } 270 if (likely(res > 0)) { 304 if (likely(res > 0)) { 271 ctx->count += ucnt; 305 ctx->count += ucnt; 272 current->in_eventfd = 1; << 273 if (waitqueue_active(&ctx->wqh 306 if (waitqueue_active(&ctx->wqh)) 274 wake_up_locked_poll(&c 307 wake_up_locked_poll(&ctx->wqh, EPOLLIN); 275 current->in_eventfd = 0; << 276 } 308 } 277 spin_unlock_irq(&ctx->wqh.lock); 309 spin_unlock_irq(&ctx->wqh.lock); 278 310 279 return res; 311 return res; 280 } 312 } 281 313 282 #ifdef CONFIG_PROC_FS 314 #ifdef CONFIG_PROC_FS 283 static void eventfd_show_fdinfo(struct seq_fil 315 static void eventfd_show_fdinfo(struct seq_file *m, struct file *f) 284 { 316 { 285 struct eventfd_ctx *ctx = f->private_d 317 struct eventfd_ctx *ctx = f->private_data; 286 __u64 cnt; << 287 318 288 spin_lock_irq(&ctx->wqh.lock); 319 spin_lock_irq(&ctx->wqh.lock); 289 cnt = ctx->count; !! 320 seq_printf(m, "eventfd-count: %16llx\n", >> 321 (unsigned long long)ctx->count); 290 spin_unlock_irq(&ctx->wqh.lock); 322 spin_unlock_irq(&ctx->wqh.lock); 291 !! 323 seq_printf(m, "eventfd-id: %d\n", ctx->id); 292 seq_printf(m, << 293 "eventfd-count: %16llx\n" << 294 "eventfd-id: %d\n" << 295 "eventfd-semaphore: %d\n", << 296 cnt, << 297 ctx->id, << 298 !!(ctx->flags & EFD_SEMAPHO << 299 } 324 } 300 #endif 325 #endif 301 326 302 static const struct file_operations eventfd_fo 327 static const struct file_operations eventfd_fops = { 303 #ifdef CONFIG_PROC_FS 328 #ifdef CONFIG_PROC_FS 304 .show_fdinfo = eventfd_show_fdinfo, 329 .show_fdinfo = eventfd_show_fdinfo, 305 #endif 330 #endif 306 .release = eventfd_release, 331 .release = eventfd_release, 307 .poll = eventfd_poll, 332 .poll = eventfd_poll, 308 .read_iter = eventfd_read, 333 .read_iter = eventfd_read, 309 .write = eventfd_write, 334 .write = eventfd_write, 310 .llseek = noop_llseek, 335 .llseek = noop_llseek, 311 }; 336 }; 312 337 313 /** 338 /** 314 * eventfd_fget - Acquire a reference of an ev 339 * eventfd_fget - Acquire a reference of an eventfd file descriptor. 315 * @fd: [in] Eventfd file descriptor. 340 * @fd: [in] Eventfd file descriptor. 316 * 341 * 317 * Returns a pointer to the eventfd file struc 342 * Returns a pointer to the eventfd file structure in case of success, or the 318 * following error pointer: 343 * following error pointer: 319 * 344 * 320 * -EBADF : Invalid @fd file descriptor. 345 * -EBADF : Invalid @fd file descriptor. 321 * -EINVAL : The @fd file descriptor is not 346 * -EINVAL : The @fd file descriptor is not an eventfd file. 322 */ 347 */ 323 struct file *eventfd_fget(int fd) 348 struct file *eventfd_fget(int fd) 324 { 349 { 325 struct file *file; 350 struct file *file; 326 351 327 file = fget(fd); 352 file = fget(fd); 328 if (!file) 353 if (!file) 329 return ERR_PTR(-EBADF); 354 return ERR_PTR(-EBADF); 330 if (file->f_op != &eventfd_fops) { 355 if (file->f_op != &eventfd_fops) { 331 fput(file); 356 fput(file); 332 return ERR_PTR(-EINVAL); 357 return ERR_PTR(-EINVAL); 333 } 358 } 334 359 335 return file; 360 return file; 336 } 361 } 337 EXPORT_SYMBOL_GPL(eventfd_fget); 362 EXPORT_SYMBOL_GPL(eventfd_fget); 338 363 339 /** 364 /** 340 * eventfd_ctx_fdget - Acquires a reference to 365 * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context. 341 * @fd: [in] Eventfd file descriptor. 366 * @fd: [in] Eventfd file descriptor. 342 * 367 * 343 * Returns a pointer to the internal eventfd c 368 * Returns a pointer to the internal eventfd context, otherwise the error 344 * pointers returned by the following function 369 * pointers returned by the following functions: 345 * 370 * 346 * eventfd_fget 371 * eventfd_fget 347 */ 372 */ 348 struct eventfd_ctx *eventfd_ctx_fdget(int fd) 373 struct eventfd_ctx *eventfd_ctx_fdget(int fd) 349 { 374 { 350 struct eventfd_ctx *ctx; 375 struct eventfd_ctx *ctx; 351 struct fd f = fdget(fd); 376 struct fd f = fdget(fd); 352 if (!f.file) 377 if (!f.file) 353 return ERR_PTR(-EBADF); 378 return ERR_PTR(-EBADF); 354 ctx = eventfd_ctx_fileget(f.file); 379 ctx = eventfd_ctx_fileget(f.file); 355 fdput(f); 380 fdput(f); 356 return ctx; 381 return ctx; 357 } 382 } 358 EXPORT_SYMBOL_GPL(eventfd_ctx_fdget); 383 EXPORT_SYMBOL_GPL(eventfd_ctx_fdget); 359 384 360 /** 385 /** 361 * eventfd_ctx_fileget - Acquires a reference 386 * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context. 362 * @file: [in] Eventfd file pointer. 387 * @file: [in] Eventfd file pointer. 363 * 388 * 364 * Returns a pointer to the internal eventfd c 389 * Returns a pointer to the internal eventfd context, otherwise the error 365 * pointer: 390 * pointer: 366 * 391 * 367 * -EINVAL : The @fd file descriptor is not 392 * -EINVAL : The @fd file descriptor is not an eventfd file. 368 */ 393 */ 369 struct eventfd_ctx *eventfd_ctx_fileget(struct 394 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file) 370 { 395 { 371 struct eventfd_ctx *ctx; 396 struct eventfd_ctx *ctx; 372 397 373 if (file->f_op != &eventfd_fops) 398 if (file->f_op != &eventfd_fops) 374 return ERR_PTR(-EINVAL); 399 return ERR_PTR(-EINVAL); 375 400 376 ctx = file->private_data; 401 ctx = file->private_data; 377 kref_get(&ctx->kref); 402 kref_get(&ctx->kref); 378 return ctx; 403 return ctx; 379 } 404 } 380 EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); 405 EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); 381 406 382 static int do_eventfd(unsigned int count, int 407 static int do_eventfd(unsigned int count, int flags) 383 { 408 { 384 struct eventfd_ctx *ctx; 409 struct eventfd_ctx *ctx; 385 struct file *file; 410 struct file *file; 386 int fd; 411 int fd; 387 412 388 /* Check the EFD_* constants for consi 413 /* Check the EFD_* constants for consistency. */ 389 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC) 414 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); 390 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOC 415 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); 391 BUILD_BUG_ON(EFD_SEMAPHORE != (1 << 0) << 392 416 393 if (flags & ~EFD_FLAGS_SET) 417 if (flags & ~EFD_FLAGS_SET) 394 return -EINVAL; 418 return -EINVAL; 395 419 396 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL 420 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 397 if (!ctx) 421 if (!ctx) 398 return -ENOMEM; 422 return -ENOMEM; 399 423 400 kref_init(&ctx->kref); 424 kref_init(&ctx->kref); 401 init_waitqueue_head(&ctx->wqh); 425 init_waitqueue_head(&ctx->wqh); 402 ctx->count = count; 426 ctx->count = count; 403 ctx->flags = flags; 427 ctx->flags = flags; 404 ctx->id = ida_alloc(&eventfd_ida, GFP_ !! 428 ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL); 405 429 406 flags &= EFD_SHARED_FCNTL_FLAGS; 430 flags &= EFD_SHARED_FCNTL_FLAGS; 407 flags |= O_RDWR; 431 flags |= O_RDWR; 408 fd = get_unused_fd_flags(flags); 432 fd = get_unused_fd_flags(flags); 409 if (fd < 0) 433 if (fd < 0) 410 goto err; 434 goto err; 411 435 412 file = anon_inode_getfile("[eventfd]", 436 file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags); 413 if (IS_ERR(file)) { 437 if (IS_ERR(file)) { 414 put_unused_fd(fd); 438 put_unused_fd(fd); 415 fd = PTR_ERR(file); 439 fd = PTR_ERR(file); 416 goto err; 440 goto err; 417 } 441 } 418 442 419 file->f_mode |= FMODE_NOWAIT; 443 file->f_mode |= FMODE_NOWAIT; 420 fd_install(fd, file); 444 fd_install(fd, file); 421 return fd; 445 return fd; 422 err: 446 err: 423 eventfd_free_ctx(ctx); 447 eventfd_free_ctx(ctx); 424 return fd; 448 return fd; 425 } 449 } 426 450 427 SYSCALL_DEFINE2(eventfd2, unsigned int, count, 451 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) 428 { 452 { 429 return do_eventfd(count, flags); 453 return do_eventfd(count, flags); 430 } 454 } 431 455 432 SYSCALL_DEFINE1(eventfd, unsigned int, count) 456 SYSCALL_DEFINE1(eventfd, unsigned int, count) 433 { 457 { 434 return do_eventfd(count, 0); 458 return do_eventfd(count, 0); 435 } 459 } 436 460 437 461
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.