1 // SPDX-License-Identifier: GPL-2.0 1 // SPDX-License-Identifier: GPL-2.0 2 /* 2 /* 3 * linux/fs/read_write.c 3 * linux/fs/read_write.c 4 * 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 6 */ 7 7 8 #include <linux/slab.h> 8 #include <linux/slab.h> 9 #include <linux/stat.h> 9 #include <linux/stat.h> 10 #include <linux/sched/xacct.h> 10 #include <linux/sched/xacct.h> 11 #include <linux/fcntl.h> 11 #include <linux/fcntl.h> 12 #include <linux/file.h> 12 #include <linux/file.h> 13 #include <linux/uio.h> 13 #include <linux/uio.h> 14 #include <linux/fsnotify.h> 14 #include <linux/fsnotify.h> 15 #include <linux/security.h> 15 #include <linux/security.h> 16 #include <linux/export.h> 16 #include <linux/export.h> 17 #include <linux/syscalls.h> 17 #include <linux/syscalls.h> 18 #include <linux/pagemap.h> 18 #include <linux/pagemap.h> 19 #include <linux/splice.h> 19 #include <linux/splice.h> 20 #include <linux/compat.h> 20 #include <linux/compat.h> 21 #include <linux/mount.h> 21 #include <linux/mount.h> 22 #include <linux/fs.h> 22 #include <linux/fs.h> 23 #include "internal.h" 23 #include "internal.h" 24 24 25 #include <linux/uaccess.h> 25 #include <linux/uaccess.h> 26 #include <asm/unistd.h> 26 #include <asm/unistd.h> 27 27 28 const struct file_operations generic_ro_fops = 28 const struct file_operations generic_ro_fops = { 29 .llseek = generic_file_llseek, 29 .llseek = generic_file_llseek, 30 .read_iter = generic_file_read_it 30 .read_iter = generic_file_read_iter, 31 .mmap = generic_file_readonl 31 .mmap = generic_file_readonly_mmap, 32 .splice_read = filemap_splice_read, !! 32 .splice_read = generic_file_splice_read, 33 }; 33 }; 34 34 35 EXPORT_SYMBOL(generic_ro_fops); 35 EXPORT_SYMBOL(generic_ro_fops); 36 36 37 static inline bool unsigned_offsets(struct fil 37 static inline bool unsigned_offsets(struct file *file) 38 { 38 { 39 return file->f_op->fop_flags & FOP_UNS !! 39 return file->f_mode & FMODE_UNSIGNED_OFFSET; 40 } 40 } 41 41 42 /** 42 /** 43 * vfs_setpos_cookie - update the file offset !! 43 * vfs_setpos - update the file offset for lseek 44 * @file: file structure in question 44 * @file: file structure in question 45 * @offset: file offset to seek to 45 * @offset: file offset to seek to 46 * @maxsize: maximum file size 46 * @maxsize: maximum file size 47 * @cookie: cookie to reset << 48 * 47 * 49 * Update the file offset to the value specifi !! 48 * This is a low-level filesystem helper for updating the file offset to 50 * offset is valid and it is not equal to the !! 49 * the value specified by @offset if the given offset is valid and it is 51 * reset the specified cookie to indicate that !! 50 * not equal to the current file offset. 52 * 51 * 53 * Return the specified offset on success and 52 * Return the specified offset on success and -EINVAL on invalid offset. 54 */ 53 */ 55 static loff_t vfs_setpos_cookie(struct file *f !! 54 loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) 56 loff_t maxsize << 57 { 55 { 58 if (offset < 0 && !unsigned_offsets(fi 56 if (offset < 0 && !unsigned_offsets(file)) 59 return -EINVAL; 57 return -EINVAL; 60 if (offset > maxsize) 58 if (offset > maxsize) 61 return -EINVAL; 59 return -EINVAL; 62 60 63 if (offset != file->f_pos) { 61 if (offset != file->f_pos) { 64 file->f_pos = offset; 62 file->f_pos = offset; 65 if (cookie) !! 63 file->f_version = 0; 66 *cookie = 0; << 67 } 64 } 68 return offset; 65 return offset; 69 } 66 } 70 << 71 /** << 72 * vfs_setpos - update the file offset for lse << 73 * @file: file structure in question << 74 * @offset: file offset to seek to << 75 * @maxsize: maximum file size << 76 * << 77 * This is a low-level filesystem helper for u << 78 * the value specified by @offset if the given << 79 * not equal to the current file offset. << 80 * << 81 * Return the specified offset on success and << 82 */ << 83 loff_t vfs_setpos(struct file *file, loff_t of << 84 { << 85 return vfs_setpos_cookie(file, offset, << 86 } << 87 EXPORT_SYMBOL(vfs_setpos); 67 EXPORT_SYMBOL(vfs_setpos); 88 68 89 /** 69 /** 90 * must_set_pos - check whether f_pos has to b !! 70 * generic_file_llseek_size - generic llseek implementation for regular files 91 * @file: file to seek on !! 71 * @file: file structure to seek on 92 * @offset: offset to use !! 72 * @offset: file offset to seek to 93 * @whence: type of seek operation !! 73 * @whence: type of seek 94 * @eof: end of file !! 74 * @size: max size of this file in file system >> 75 * @eof: offset used for SEEK_END position 95 * 76 * 96 * Check whether f_pos needs to be updated and !! 77 * This is a variant of generic_file_llseek that allows passing in a custom 97 * to @whence. !! 78 * maximum file size and a custom EOF position, for e.g. hashed directories 98 * 79 * 99 * Return: 0 if f_pos doesn't need to be updat !! 80 * Synchronization: 100 * updated, and negative error code on failure !! 81 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) >> 82 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. >> 83 * read/writes behave like SEEK_SET against seeks. 101 */ 84 */ 102 static int must_set_pos(struct file *file, lof !! 85 loff_t >> 86 generic_file_llseek_size(struct file *file, loff_t offset, int whence, >> 87 loff_t maxsize, loff_t eof) 103 { 88 { 104 switch (whence) { 89 switch (whence) { 105 case SEEK_END: 90 case SEEK_END: 106 *offset += eof; !! 91 offset += eof; 107 break; 92 break; 108 case SEEK_CUR: 93 case SEEK_CUR: 109 /* 94 /* 110 * Here we special-case the ls 95 * Here we special-case the lseek(fd, 0, SEEK_CUR) 111 * position-querying operation 96 * position-querying operation. Avoid rewriting the "same" 112 * f_pos value back to the fil 97 * f_pos value back to the file because a concurrent read(), 113 * write() or lseek() might ha 98 * write() or lseek() might have altered it 114 */ 99 */ 115 if (*offset == 0) { !! 100 if (offset == 0) 116 *offset = file->f_pos; !! 101 return file->f_pos; 117 return 0; !! 102 /* 118 } !! 103 * f_lock protects against read/modify/write race with other 119 break; !! 104 * SEEK_CURs. Note that parallel writes and reads behave >> 105 * like SEEK_SET. >> 106 */ >> 107 spin_lock(&file->f_lock); >> 108 offset = vfs_setpos(file, file->f_pos + offset, maxsize); >> 109 spin_unlock(&file->f_lock); >> 110 return offset; 120 case SEEK_DATA: 111 case SEEK_DATA: 121 /* 112 /* 122 * In the generic case the ent 113 * In the generic case the entire file is data, so as long as 123 * offset isn't at the end of 114 * offset isn't at the end of the file then the offset is data. 124 */ 115 */ 125 if ((unsigned long long)*offse !! 116 if ((unsigned long long)offset >= eof) 126 return -ENXIO; 117 return -ENXIO; 127 break; 118 break; 128 case SEEK_HOLE: 119 case SEEK_HOLE: 129 /* 120 /* 130 * There is a virtual hole at 121 * There is a virtual hole at the end of the file, so as long as 131 * offset isn't i_size or larg 122 * offset isn't i_size or larger, return i_size. 132 */ 123 */ 133 if ((unsigned long long)*offse !! 124 if ((unsigned long long)offset >= eof) 134 return -ENXIO; 125 return -ENXIO; 135 *offset = eof; !! 126 offset = eof; 136 break; 127 break; 137 } 128 } 138 129 139 return 1; << 140 } << 141 << 142 /** << 143 * generic_file_llseek_size - generic llseek i << 144 * @file: file structure to seek on << 145 * @offset: file offset to seek to << 146 * @whence: type of seek << 147 * @maxsize: max size of this file in file << 148 * @eof: offset used for SEEK_END posit << 149 * << 150 * This is a variant of generic_file_llseek th << 151 * maximum file size and a custom EOF position << 152 * << 153 * Synchronization: << 154 * SEEK_SET and SEEK_END are unsynchronized (b << 155 * SEEK_CUR is synchronized against other SEEK << 156 * read/writes behave like SEEK_SET against se << 157 */ << 158 loff_t << 159 generic_file_llseek_size(struct file *file, lo << 160 loff_t maxsize, loff_t eof) << 161 { << 162 int ret; << 163 << 164 ret = must_set_pos(file, &offset, when << 165 if (ret < 0) << 166 return ret; << 167 if (ret == 0) << 168 return offset; << 169 << 170 if (whence == SEEK_CUR) { << 171 /* << 172 * f_lock protects against rea << 173 * other SEEK_CURs. Note that << 174 * behave like SEEK_SET. << 175 */ << 176 guard(spinlock)(&file->f_lock) << 177 return vfs_setpos(file, file-> << 178 } << 179 << 180 return vfs_setpos(file, offset, maxsiz 130 return vfs_setpos(file, offset, maxsize); 181 } 131 } 182 EXPORT_SYMBOL(generic_file_llseek_size); 132 EXPORT_SYMBOL(generic_file_llseek_size); 183 133 184 /** 134 /** 185 * generic_llseek_cookie - versioned llseek im << 186 * @file: file structure to seek on << 187 * @offset: file offset to seek to << 188 * @whence: type of seek << 189 * @cookie: cookie to update << 190 * << 191 * See generic_file_llseek for a general descr << 192 * << 193 * In contrast to generic_file_llseek, this fu << 194 * specified cookie to indicate a seek took pl << 195 */ << 196 loff_t generic_llseek_cookie(struct file *file << 197 u64 *cookie) << 198 { << 199 struct inode *inode = file->f_mapping- << 200 loff_t maxsize = inode->i_sb->s_maxbyt << 201 loff_t eof = i_size_read(inode); << 202 int ret; << 203 << 204 if (WARN_ON_ONCE(!cookie)) << 205 return -EINVAL; << 206 << 207 /* << 208 * Require that this is only used for << 209 * synchronization between readdir and << 210 * @cookie is correctly synchronized w << 211 */ << 212 if (WARN_ON_ONCE(!(file->f_mode & FMOD << 213 return -EINVAL; << 214 << 215 ret = must_set_pos(file, &offset, when << 216 if (ret < 0) << 217 return ret; << 218 if (ret == 0) << 219 return offset; << 220 << 221 /* No need to hold f_lock because we k << 222 if (whence == SEEK_CUR) << 223 return vfs_setpos_cookie(file, << 224 << 225 return vfs_setpos_cookie(file, offset, << 226 } << 227 EXPORT_SYMBOL(generic_llseek_cookie); << 228 << 229 /** << 230 * generic_file_llseek - generic llseek implem 135 * generic_file_llseek - generic llseek implementation for regular files 231 * @file: file structure to seek on 136 * @file: file structure to seek on 232 * @offset: file offset to seek to 137 * @offset: file offset to seek to 233 * @whence: type of seek 138 * @whence: type of seek 234 * 139 * 235 * This is a generic implemenation of ->llseek 140 * This is a generic implemenation of ->llseek useable for all normal local 236 * filesystems. It just updates the file offs 141 * filesystems. It just updates the file offset to the value specified by 237 * @offset and @whence. 142 * @offset and @whence. 238 */ 143 */ 239 loff_t generic_file_llseek(struct file *file, 144 loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) 240 { 145 { 241 struct inode *inode = file->f_mapping- 146 struct inode *inode = file->f_mapping->host; 242 147 243 return generic_file_llseek_size(file, 148 return generic_file_llseek_size(file, offset, whence, 244 inode- 149 inode->i_sb->s_maxbytes, 245 i_size 150 i_size_read(inode)); 246 } 151 } 247 EXPORT_SYMBOL(generic_file_llseek); 152 EXPORT_SYMBOL(generic_file_llseek); 248 153 249 /** 154 /** 250 * fixed_size_llseek - llseek implementation f 155 * fixed_size_llseek - llseek implementation for fixed-sized devices 251 * @file: file structure to seek on 156 * @file: file structure to seek on 252 * @offset: file offset to seek to 157 * @offset: file offset to seek to 253 * @whence: type of seek 158 * @whence: type of seek 254 * @size: size of the file 159 * @size: size of the file 255 * 160 * 256 */ 161 */ 257 loff_t fixed_size_llseek(struct file *file, lo 162 loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) 258 { 163 { 259 switch (whence) { 164 switch (whence) { 260 case SEEK_SET: case SEEK_CUR: case SEE 165 case SEEK_SET: case SEEK_CUR: case SEEK_END: 261 return generic_file_llseek_siz 166 return generic_file_llseek_size(file, offset, whence, 262 167 size, size); 263 default: 168 default: 264 return -EINVAL; 169 return -EINVAL; 265 } 170 } 266 } 171 } 267 EXPORT_SYMBOL(fixed_size_llseek); 172 EXPORT_SYMBOL(fixed_size_llseek); 268 173 269 /** 174 /** 270 * no_seek_end_llseek - llseek implementation 175 * no_seek_end_llseek - llseek implementation for fixed-sized devices 271 * @file: file structure to seek on 176 * @file: file structure to seek on 272 * @offset: file offset to seek to 177 * @offset: file offset to seek to 273 * @whence: type of seek 178 * @whence: type of seek 274 * 179 * 275 */ 180 */ 276 loff_t no_seek_end_llseek(struct file *file, l 181 loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence) 277 { 182 { 278 switch (whence) { 183 switch (whence) { 279 case SEEK_SET: case SEEK_CUR: 184 case SEEK_SET: case SEEK_CUR: 280 return generic_file_llseek_siz 185 return generic_file_llseek_size(file, offset, whence, 281 186 OFFSET_MAX, 0); 282 default: 187 default: 283 return -EINVAL; 188 return -EINVAL; 284 } 189 } 285 } 190 } 286 EXPORT_SYMBOL(no_seek_end_llseek); 191 EXPORT_SYMBOL(no_seek_end_llseek); 287 192 288 /** 193 /** 289 * no_seek_end_llseek_size - llseek implementa 194 * no_seek_end_llseek_size - llseek implementation for fixed-sized devices 290 * @file: file structure to seek on 195 * @file: file structure to seek on 291 * @offset: file offset to seek to 196 * @offset: file offset to seek to 292 * @whence: type of seek 197 * @whence: type of seek 293 * @size: maximal offset allowed 198 * @size: maximal offset allowed 294 * 199 * 295 */ 200 */ 296 loff_t no_seek_end_llseek_size(struct file *fi 201 loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size) 297 { 202 { 298 switch (whence) { 203 switch (whence) { 299 case SEEK_SET: case SEEK_CUR: 204 case SEEK_SET: case SEEK_CUR: 300 return generic_file_llseek_siz 205 return generic_file_llseek_size(file, offset, whence, 301 206 size, 0); 302 default: 207 default: 303 return -EINVAL; 208 return -EINVAL; 304 } 209 } 305 } 210 } 306 EXPORT_SYMBOL(no_seek_end_llseek_size); 211 EXPORT_SYMBOL(no_seek_end_llseek_size); 307 212 308 /** 213 /** 309 * noop_llseek - No Operation Performed llseek 214 * noop_llseek - No Operation Performed llseek implementation 310 * @file: file structure to seek on 215 * @file: file structure to seek on 311 * @offset: file offset to seek to 216 * @offset: file offset to seek to 312 * @whence: type of seek 217 * @whence: type of seek 313 * 218 * 314 * This is an implementation of ->llseek useab 219 * This is an implementation of ->llseek useable for the rare special case when 315 * userspace expects the seek to succeed but t 220 * userspace expects the seek to succeed but the (device) file is actually not 316 * able to perform the seek. In this case you 221 * able to perform the seek. In this case you use noop_llseek() instead of 317 * falling back to the default implementation 222 * falling back to the default implementation of ->llseek. 318 */ 223 */ 319 loff_t noop_llseek(struct file *file, loff_t o 224 loff_t noop_llseek(struct file *file, loff_t offset, int whence) 320 { 225 { 321 return file->f_pos; 226 return file->f_pos; 322 } 227 } 323 EXPORT_SYMBOL(noop_llseek); 228 EXPORT_SYMBOL(noop_llseek); 324 229 >> 230 loff_t no_llseek(struct file *file, loff_t offset, int whence) >> 231 { >> 232 return -ESPIPE; >> 233 } >> 234 EXPORT_SYMBOL(no_llseek); >> 235 325 loff_t default_llseek(struct file *file, loff_ 236 loff_t default_llseek(struct file *file, loff_t offset, int whence) 326 { 237 { 327 struct inode *inode = file_inode(file) 238 struct inode *inode = file_inode(file); 328 loff_t retval; 239 loff_t retval; 329 240 330 inode_lock(inode); 241 inode_lock(inode); 331 switch (whence) { 242 switch (whence) { 332 case SEEK_END: 243 case SEEK_END: 333 offset += i_size_read( 244 offset += i_size_read(inode); 334 break; 245 break; 335 case SEEK_CUR: 246 case SEEK_CUR: 336 if (offset == 0) { 247 if (offset == 0) { 337 retval = file- 248 retval = file->f_pos; 338 goto out; 249 goto out; 339 } 250 } 340 offset += file->f_pos; 251 offset += file->f_pos; 341 break; 252 break; 342 case SEEK_DATA: 253 case SEEK_DATA: 343 /* 254 /* 344 * In the generic case 255 * In the generic case the entire file is data, so as 345 * long as offset isn' 256 * long as offset isn't at the end of the file then the 346 * offset is data. 257 * offset is data. 347 */ 258 */ 348 if (offset >= inode->i 259 if (offset >= inode->i_size) { 349 retval = -ENXI 260 retval = -ENXIO; 350 goto out; 261 goto out; 351 } 262 } 352 break; 263 break; 353 case SEEK_HOLE: 264 case SEEK_HOLE: 354 /* 265 /* 355 * There is a virtual 266 * There is a virtual hole at the end of the file, so 356 * as long as offset i 267 * as long as offset isn't i_size or larger, return 357 * i_size. 268 * i_size. 358 */ 269 */ 359 if (offset >= inode->i 270 if (offset >= inode->i_size) { 360 retval = -ENXI 271 retval = -ENXIO; 361 goto out; 272 goto out; 362 } 273 } 363 offset = inode->i_size 274 offset = inode->i_size; 364 break; 275 break; 365 } 276 } 366 retval = -EINVAL; 277 retval = -EINVAL; 367 if (offset >= 0 || unsigned_offsets(fi 278 if (offset >= 0 || unsigned_offsets(file)) { 368 if (offset != file->f_pos) !! 279 if (offset != file->f_pos) { 369 file->f_pos = offset; 280 file->f_pos = offset; >> 281 file->f_version = 0; >> 282 } 370 retval = offset; 283 retval = offset; 371 } 284 } 372 out: 285 out: 373 inode_unlock(inode); 286 inode_unlock(inode); 374 return retval; 287 return retval; 375 } 288 } 376 EXPORT_SYMBOL(default_llseek); 289 EXPORT_SYMBOL(default_llseek); 377 290 378 loff_t vfs_llseek(struct file *file, loff_t of 291 loff_t vfs_llseek(struct file *file, loff_t offset, int whence) 379 { 292 { 380 if (!(file->f_mode & FMODE_LSEEK)) !! 293 loff_t (*fn)(struct file *, loff_t, int); 381 return -ESPIPE; !! 294 382 return file->f_op->llseek(file, offset !! 295 fn = no_llseek; >> 296 if (file->f_mode & FMODE_LSEEK) { >> 297 if (file->f_op->llseek) >> 298 fn = file->f_op->llseek; >> 299 } >> 300 return fn(file, offset, whence); 383 } 301 } 384 EXPORT_SYMBOL(vfs_llseek); 302 EXPORT_SYMBOL(vfs_llseek); 385 303 386 static off_t ksys_lseek(unsigned int fd, off_t 304 static off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence) 387 { 305 { 388 off_t retval; 306 off_t retval; 389 struct fd f = fdget_pos(fd); 307 struct fd f = fdget_pos(fd); 390 if (!fd_file(f)) !! 308 if (!f.file) 391 return -EBADF; 309 return -EBADF; 392 310 393 retval = -EINVAL; 311 retval = -EINVAL; 394 if (whence <= SEEK_MAX) { 312 if (whence <= SEEK_MAX) { 395 loff_t res = vfs_llseek(fd_fil !! 313 loff_t res = vfs_llseek(f.file, offset, whence); 396 retval = res; 314 retval = res; 397 if (res != (loff_t)retval) 315 if (res != (loff_t)retval) 398 retval = -EOVERFLOW; 316 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 399 } 317 } 400 fdput_pos(f); 318 fdput_pos(f); 401 return retval; 319 return retval; 402 } 320 } 403 321 404 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t 322 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) 405 { 323 { 406 return ksys_lseek(fd, offset, whence); 324 return ksys_lseek(fd, offset, whence); 407 } 325 } 408 326 409 #ifdef CONFIG_COMPAT 327 #ifdef CONFIG_COMPAT 410 COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd 328 COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence) 411 { 329 { 412 return ksys_lseek(fd, offset, whence); 330 return ksys_lseek(fd, offset, whence); 413 } 331 } 414 #endif 332 #endif 415 333 416 #if !defined(CONFIG_64BIT) || defined(CONFIG_C 334 #if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) || \ 417 defined(__ARCH_WANT_SYS_LLSEEK) 335 defined(__ARCH_WANT_SYS_LLSEEK) 418 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsi 336 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 419 unsigned long, offset_low, lof 337 unsigned long, offset_low, loff_t __user *, result, 420 unsigned int, whence) 338 unsigned int, whence) 421 { 339 { 422 int retval; 340 int retval; 423 struct fd f = fdget_pos(fd); 341 struct fd f = fdget_pos(fd); 424 loff_t offset; 342 loff_t offset; 425 343 426 if (!fd_file(f)) !! 344 if (!f.file) 427 return -EBADF; 345 return -EBADF; 428 346 429 retval = -EINVAL; 347 retval = -EINVAL; 430 if (whence > SEEK_MAX) 348 if (whence > SEEK_MAX) 431 goto out_putf; 349 goto out_putf; 432 350 433 offset = vfs_llseek(fd_file(f), ((loff !! 351 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, 434 whence); 352 whence); 435 353 436 retval = (int)offset; 354 retval = (int)offset; 437 if (offset >= 0) { 355 if (offset >= 0) { 438 retval = -EFAULT; 356 retval = -EFAULT; 439 if (!copy_to_user(result, &off 357 if (!copy_to_user(result, &offset, sizeof(offset))) 440 retval = 0; 358 retval = 0; 441 } 359 } 442 out_putf: 360 out_putf: 443 fdput_pos(f); 361 fdput_pos(f); 444 return retval; 362 return retval; 445 } 363 } 446 #endif 364 #endif 447 365 448 int rw_verify_area(int read_write, struct file 366 int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) 449 { 367 { 450 int mask = read_write == READ ? MAY_RE !! 368 struct inode *inode; 451 int ret; !! 369 int retval = -EINVAL; 452 370 >> 371 inode = file_inode(file); 453 if (unlikely((ssize_t) count < 0)) 372 if (unlikely((ssize_t) count < 0)) 454 return -EINVAL; !! 373 return retval; 455 374 >> 375 /* >> 376 * ranged mandatory locking does not apply to streams - it makes sense >> 377 * only for files where position has a meaning. >> 378 */ 456 if (ppos) { 379 if (ppos) { 457 loff_t pos = *ppos; 380 loff_t pos = *ppos; 458 381 459 if (unlikely(pos < 0)) { 382 if (unlikely(pos < 0)) { 460 if (!unsigned_offsets( 383 if (!unsigned_offsets(file)) 461 return -EINVAL !! 384 return retval; 462 if (count >= -pos) /* 385 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 463 return -EOVERF 386 return -EOVERFLOW; 464 } else if (unlikely((loff_t) ( 387 } else if (unlikely((loff_t) (pos + count) < 0)) { 465 if (!unsigned_offsets( 388 if (!unsigned_offsets(file)) 466 return -EINVAL !! 389 return retval; 467 } 390 } 468 } << 469 391 470 ret = security_file_permission(file, m !! 392 if (unlikely(inode->i_flctx && mandatory_lock(inode))) { 471 if (ret) !! 393 retval = locks_mandatory_area(inode, file, pos, pos + count - 1, 472 return ret; !! 394 read_write == READ ? F_RDLCK : F_WRLCK); >> 395 if (retval < 0) >> 396 return retval; >> 397 } >> 398 } 473 399 474 return fsnotify_file_area_perm(file, m !! 400 return security_file_permission(file, >> 401 read_write == READ ? MAY_READ : MAY_WRITE); 475 } 402 } 476 EXPORT_SYMBOL(rw_verify_area); << 477 403 478 static ssize_t new_sync_read(struct file *filp 404 static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 479 { 405 { >> 406 struct iovec iov = { .iov_base = buf, .iov_len = len }; 480 struct kiocb kiocb; 407 struct kiocb kiocb; 481 struct iov_iter iter; 408 struct iov_iter iter; 482 ssize_t ret; 409 ssize_t ret; 483 410 484 init_sync_kiocb(&kiocb, filp); 411 init_sync_kiocb(&kiocb, filp); 485 kiocb.ki_pos = (ppos ? *ppos : 0); 412 kiocb.ki_pos = (ppos ? *ppos : 0); 486 iov_iter_ubuf(&iter, ITER_DEST, buf, l !! 413 iov_iter_init(&iter, READ, &iov, 1, len); 487 414 488 ret = filp->f_op->read_iter(&kiocb, &i !! 415 ret = call_read_iter(filp, &kiocb, &iter); 489 BUG_ON(ret == -EIOCBQUEUED); 416 BUG_ON(ret == -EIOCBQUEUED); 490 if (ppos) 417 if (ppos) 491 *ppos = kiocb.ki_pos; 418 *ppos = kiocb.ki_pos; 492 return ret; 419 return ret; 493 } 420 } 494 421 495 static int warn_unsupported(struct file *file, 422 static int warn_unsupported(struct file *file, const char *op) 496 { 423 { 497 pr_warn_ratelimited( 424 pr_warn_ratelimited( 498 "kernel %s not supported for f 425 "kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n", 499 op, file, current->pid, curren 426 op, file, current->pid, current->comm); 500 return -EINVAL; 427 return -EINVAL; 501 } 428 } 502 429 503 ssize_t __kernel_read(struct file *file, void 430 ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) 504 { 431 { 505 struct kvec iov = { 432 struct kvec iov = { 506 .iov_base = buf, 433 .iov_base = buf, 507 .iov_len = min_t(size_t 434 .iov_len = min_t(size_t, count, MAX_RW_COUNT), 508 }; 435 }; 509 struct kiocb kiocb; 436 struct kiocb kiocb; 510 struct iov_iter iter; 437 struct iov_iter iter; 511 ssize_t ret; 438 ssize_t ret; 512 439 513 if (WARN_ON_ONCE(!(file->f_mode & FMOD 440 if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) 514 return -EINVAL; 441 return -EINVAL; 515 if (!(file->f_mode & FMODE_CAN_READ)) 442 if (!(file->f_mode & FMODE_CAN_READ)) 516 return -EINVAL; 443 return -EINVAL; 517 /* 444 /* 518 * Also fail if ->read_iter and ->read 445 * Also fail if ->read_iter and ->read are both wired up as that 519 * implies very convoluted semantics. 446 * implies very convoluted semantics. 520 */ 447 */ 521 if (unlikely(!file->f_op->read_iter || 448 if (unlikely(!file->f_op->read_iter || file->f_op->read)) 522 return warn_unsupported(file, 449 return warn_unsupported(file, "read"); 523 450 524 init_sync_kiocb(&kiocb, file); 451 init_sync_kiocb(&kiocb, file); 525 kiocb.ki_pos = pos ? *pos : 0; 452 kiocb.ki_pos = pos ? *pos : 0; 526 iov_iter_kvec(&iter, ITER_DEST, &iov, !! 453 iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len); 527 ret = file->f_op->read_iter(&kiocb, &i 454 ret = file->f_op->read_iter(&kiocb, &iter); 528 if (ret > 0) { 455 if (ret > 0) { 529 if (pos) 456 if (pos) 530 *pos = kiocb.ki_pos; 457 *pos = kiocb.ki_pos; 531 fsnotify_access(file); 458 fsnotify_access(file); 532 add_rchar(current, ret); 459 add_rchar(current, ret); 533 } 460 } 534 inc_syscr(current); 461 inc_syscr(current); 535 return ret; 462 return ret; 536 } 463 } 537 464 538 ssize_t kernel_read(struct file *file, void *b 465 ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) 539 { 466 { 540 ssize_t ret; 467 ssize_t ret; 541 468 542 ret = rw_verify_area(READ, file, pos, 469 ret = rw_verify_area(READ, file, pos, count); 543 if (ret) 470 if (ret) 544 return ret; 471 return ret; 545 return __kernel_read(file, buf, count, 472 return __kernel_read(file, buf, count, pos); 546 } 473 } 547 EXPORT_SYMBOL(kernel_read); 474 EXPORT_SYMBOL(kernel_read); 548 475 549 ssize_t vfs_read(struct file *file, char __use 476 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 550 { 477 { 551 ssize_t ret; 478 ssize_t ret; 552 479 553 if (!(file->f_mode & FMODE_READ)) 480 if (!(file->f_mode & FMODE_READ)) 554 return -EBADF; 481 return -EBADF; 555 if (!(file->f_mode & FMODE_CAN_READ)) 482 if (!(file->f_mode & FMODE_CAN_READ)) 556 return -EINVAL; 483 return -EINVAL; 557 if (unlikely(!access_ok(buf, count))) 484 if (unlikely(!access_ok(buf, count))) 558 return -EFAULT; 485 return -EFAULT; 559 486 560 ret = rw_verify_area(READ, file, pos, 487 ret = rw_verify_area(READ, file, pos, count); 561 if (ret) 488 if (ret) 562 return ret; 489 return ret; 563 if (count > MAX_RW_COUNT) 490 if (count > MAX_RW_COUNT) 564 count = MAX_RW_COUNT; 491 count = MAX_RW_COUNT; 565 492 566 if (file->f_op->read) 493 if (file->f_op->read) 567 ret = file->f_op->read(file, b 494 ret = file->f_op->read(file, buf, count, pos); 568 else if (file->f_op->read_iter) 495 else if (file->f_op->read_iter) 569 ret = new_sync_read(file, buf, 496 ret = new_sync_read(file, buf, count, pos); 570 else 497 else 571 ret = -EINVAL; 498 ret = -EINVAL; 572 if (ret > 0) { 499 if (ret > 0) { 573 fsnotify_access(file); 500 fsnotify_access(file); 574 add_rchar(current, ret); 501 add_rchar(current, ret); 575 } 502 } 576 inc_syscr(current); 503 inc_syscr(current); 577 return ret; 504 return ret; 578 } 505 } 579 506 580 static ssize_t new_sync_write(struct file *fil 507 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 581 { 508 { >> 509 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 582 struct kiocb kiocb; 510 struct kiocb kiocb; 583 struct iov_iter iter; 511 struct iov_iter iter; 584 ssize_t ret; 512 ssize_t ret; 585 513 586 init_sync_kiocb(&kiocb, filp); 514 init_sync_kiocb(&kiocb, filp); 587 kiocb.ki_pos = (ppos ? *ppos : 0); 515 kiocb.ki_pos = (ppos ? *ppos : 0); 588 iov_iter_ubuf(&iter, ITER_SOURCE, (voi !! 516 iov_iter_init(&iter, WRITE, &iov, 1, len); 589 517 590 ret = filp->f_op->write_iter(&kiocb, & !! 518 ret = call_write_iter(filp, &kiocb, &iter); 591 BUG_ON(ret == -EIOCBQUEUED); 519 BUG_ON(ret == -EIOCBQUEUED); 592 if (ret > 0 && ppos) 520 if (ret > 0 && ppos) 593 *ppos = kiocb.ki_pos; 521 *ppos = kiocb.ki_pos; 594 return ret; 522 return ret; 595 } 523 } 596 524 597 /* caller is responsible for file_start_write/ 525 /* caller is responsible for file_start_write/file_end_write */ 598 ssize_t __kernel_write_iter(struct file *file, !! 526 ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) 599 { 527 { >> 528 struct kvec iov = { >> 529 .iov_base = (void *)buf, >> 530 .iov_len = min_t(size_t, count, MAX_RW_COUNT), >> 531 }; 600 struct kiocb kiocb; 532 struct kiocb kiocb; >> 533 struct iov_iter iter; 601 ssize_t ret; 534 ssize_t ret; 602 535 603 if (WARN_ON_ONCE(!(file->f_mode & FMOD 536 if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) 604 return -EBADF; 537 return -EBADF; 605 if (!(file->f_mode & FMODE_CAN_WRITE)) 538 if (!(file->f_mode & FMODE_CAN_WRITE)) 606 return -EINVAL; 539 return -EINVAL; 607 /* 540 /* 608 * Also fail if ->write_iter and ->wri 541 * Also fail if ->write_iter and ->write are both wired up as that 609 * implies very convoluted semantics. 542 * implies very convoluted semantics. 610 */ 543 */ 611 if (unlikely(!file->f_op->write_iter | 544 if (unlikely(!file->f_op->write_iter || file->f_op->write)) 612 return warn_unsupported(file, 545 return warn_unsupported(file, "write"); 613 546 614 init_sync_kiocb(&kiocb, file); 547 init_sync_kiocb(&kiocb, file); 615 kiocb.ki_pos = pos ? *pos : 0; 548 kiocb.ki_pos = pos ? *pos : 0; 616 ret = file->f_op->write_iter(&kiocb, f !! 549 iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); >> 550 ret = file->f_op->write_iter(&kiocb, &iter); 617 if (ret > 0) { 551 if (ret > 0) { 618 if (pos) 552 if (pos) 619 *pos = kiocb.ki_pos; 553 *pos = kiocb.ki_pos; 620 fsnotify_modify(file); 554 fsnotify_modify(file); 621 add_wchar(current, ret); 555 add_wchar(current, ret); 622 } 556 } 623 inc_syscw(current); 557 inc_syscw(current); 624 return ret; 558 return ret; 625 } 559 } 626 << 627 /* caller is responsible for file_start_write/ << 628 ssize_t __kernel_write(struct file *file, cons << 629 { << 630 struct kvec iov = { << 631 .iov_base = (void *)buf, << 632 .iov_len = min_t(size_t << 633 }; << 634 struct iov_iter iter; << 635 iov_iter_kvec(&iter, ITER_SOURCE, &iov << 636 return __kernel_write_iter(file, &iter << 637 } << 638 /* 560 /* 639 * This "EXPORT_SYMBOL_GPL()" is more of a "EX 561 * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()", 640 * but autofs is one of the few internal kerne 562 * but autofs is one of the few internal kernel users that actually 641 * wants this _and_ can be built as a module. 563 * wants this _and_ can be built as a module. So we need to export 642 * this symbol for autofs, even though it real 564 * this symbol for autofs, even though it really isn't appropriate 643 * for any other kernel modules. 565 * for any other kernel modules. 644 */ 566 */ 645 EXPORT_SYMBOL_GPL(__kernel_write); 567 EXPORT_SYMBOL_GPL(__kernel_write); 646 568 647 ssize_t kernel_write(struct file *file, const 569 ssize_t kernel_write(struct file *file, const void *buf, size_t count, 648 loff_t *pos) 570 loff_t *pos) 649 { 571 { 650 ssize_t ret; 572 ssize_t ret; 651 573 652 ret = rw_verify_area(WRITE, file, pos, 574 ret = rw_verify_area(WRITE, file, pos, count); 653 if (ret) 575 if (ret) 654 return ret; 576 return ret; 655 577 656 file_start_write(file); 578 file_start_write(file); 657 ret = __kernel_write(file, buf, count 579 ret = __kernel_write(file, buf, count, pos); 658 file_end_write(file); 580 file_end_write(file); 659 return ret; 581 return ret; 660 } 582 } 661 EXPORT_SYMBOL(kernel_write); 583 EXPORT_SYMBOL(kernel_write); 662 584 663 ssize_t vfs_write(struct file *file, const cha 585 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 664 { 586 { 665 ssize_t ret; 587 ssize_t ret; 666 588 667 if (!(file->f_mode & FMODE_WRITE)) 589 if (!(file->f_mode & FMODE_WRITE)) 668 return -EBADF; 590 return -EBADF; 669 if (!(file->f_mode & FMODE_CAN_WRITE)) 591 if (!(file->f_mode & FMODE_CAN_WRITE)) 670 return -EINVAL; 592 return -EINVAL; 671 if (unlikely(!access_ok(buf, count))) 593 if (unlikely(!access_ok(buf, count))) 672 return -EFAULT; 594 return -EFAULT; 673 595 674 ret = rw_verify_area(WRITE, file, pos, 596 ret = rw_verify_area(WRITE, file, pos, count); 675 if (ret) 597 if (ret) 676 return ret; 598 return ret; 677 if (count > MAX_RW_COUNT) 599 if (count > MAX_RW_COUNT) 678 count = MAX_RW_COUNT; 600 count = MAX_RW_COUNT; 679 file_start_write(file); 601 file_start_write(file); 680 if (file->f_op->write) 602 if (file->f_op->write) 681 ret = file->f_op->write(file, 603 ret = file->f_op->write(file, buf, count, pos); 682 else if (file->f_op->write_iter) 604 else if (file->f_op->write_iter) 683 ret = new_sync_write(file, buf 605 ret = new_sync_write(file, buf, count, pos); 684 else 606 else 685 ret = -EINVAL; 607 ret = -EINVAL; 686 if (ret > 0) { 608 if (ret > 0) { 687 fsnotify_modify(file); 609 fsnotify_modify(file); 688 add_wchar(current, ret); 610 add_wchar(current, ret); 689 } 611 } 690 inc_syscw(current); 612 inc_syscw(current); 691 file_end_write(file); 613 file_end_write(file); 692 return ret; 614 return ret; 693 } 615 } 694 616 695 /* file_ppos returns &file->f_pos or NULL if f 617 /* file_ppos returns &file->f_pos or NULL if file is stream */ 696 static inline loff_t *file_ppos(struct file *f 618 static inline loff_t *file_ppos(struct file *file) 697 { 619 { 698 return file->f_mode & FMODE_STREAM ? N 620 return file->f_mode & FMODE_STREAM ? NULL : &file->f_pos; 699 } 621 } 700 622 701 ssize_t ksys_read(unsigned int fd, char __user 623 ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count) 702 { 624 { 703 struct fd f = fdget_pos(fd); 625 struct fd f = fdget_pos(fd); 704 ssize_t ret = -EBADF; 626 ssize_t ret = -EBADF; 705 627 706 if (fd_file(f)) { !! 628 if (f.file) { 707 loff_t pos, *ppos = file_ppos( !! 629 loff_t pos, *ppos = file_ppos(f.file); 708 if (ppos) { 630 if (ppos) { 709 pos = *ppos; 631 pos = *ppos; 710 ppos = &pos; 632 ppos = &pos; 711 } 633 } 712 ret = vfs_read(fd_file(f), buf !! 634 ret = vfs_read(f.file, buf, count, ppos); 713 if (ret >= 0 && ppos) 635 if (ret >= 0 && ppos) 714 fd_file(f)->f_pos = po !! 636 f.file->f_pos = pos; 715 fdput_pos(f); 637 fdput_pos(f); 716 } 638 } 717 return ret; 639 return ret; 718 } 640 } 719 641 720 SYSCALL_DEFINE3(read, unsigned int, fd, char _ 642 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 721 { 643 { 722 return ksys_read(fd, buf, count); 644 return ksys_read(fd, buf, count); 723 } 645 } 724 646 725 ssize_t ksys_write(unsigned int fd, const char 647 ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count) 726 { 648 { 727 struct fd f = fdget_pos(fd); 649 struct fd f = fdget_pos(fd); 728 ssize_t ret = -EBADF; 650 ssize_t ret = -EBADF; 729 651 730 if (fd_file(f)) { !! 652 if (f.file) { 731 loff_t pos, *ppos = file_ppos( !! 653 loff_t pos, *ppos = file_ppos(f.file); 732 if (ppos) { 654 if (ppos) { 733 pos = *ppos; 655 pos = *ppos; 734 ppos = &pos; 656 ppos = &pos; 735 } 657 } 736 ret = vfs_write(fd_file(f), bu !! 658 ret = vfs_write(f.file, buf, count, ppos); 737 if (ret >= 0 && ppos) 659 if (ret >= 0 && ppos) 738 fd_file(f)->f_pos = po !! 660 f.file->f_pos = pos; 739 fdput_pos(f); 661 fdput_pos(f); 740 } 662 } 741 663 742 return ret; 664 return ret; 743 } 665 } 744 666 745 SYSCALL_DEFINE3(write, unsigned int, fd, const 667 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 746 size_t, count) 668 size_t, count) 747 { 669 { 748 return ksys_write(fd, buf, count); 670 return ksys_write(fd, buf, count); 749 } 671 } 750 672 751 ssize_t ksys_pread64(unsigned int fd, char __u 673 ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, 752 loff_t pos) 674 loff_t pos) 753 { 675 { 754 struct fd f; 676 struct fd f; 755 ssize_t ret = -EBADF; 677 ssize_t ret = -EBADF; 756 678 757 if (pos < 0) 679 if (pos < 0) 758 return -EINVAL; 680 return -EINVAL; 759 681 760 f = fdget(fd); 682 f = fdget(fd); 761 if (fd_file(f)) { !! 683 if (f.file) { 762 ret = -ESPIPE; 684 ret = -ESPIPE; 763 if (fd_file(f)->f_mode & FMODE !! 685 if (f.file->f_mode & FMODE_PREAD) 764 ret = vfs_read(fd_file !! 686 ret = vfs_read(f.file, buf, count, &pos); 765 fdput(f); 687 fdput(f); 766 } 688 } 767 689 768 return ret; 690 return ret; 769 } 691 } 770 692 771 SYSCALL_DEFINE4(pread64, unsigned int, fd, cha 693 SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, 772 size_t, count, loff_t, 694 size_t, count, loff_t, pos) 773 { 695 { 774 return ksys_pread64(fd, buf, count, po 696 return ksys_pread64(fd, buf, count, pos); 775 } 697 } 776 698 777 #if defined(CONFIG_COMPAT) && defined(__ARCH_W << 778 COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, << 779 size_t, count, compat_a << 780 { << 781 return ksys_pread64(fd, buf, count, co << 782 } << 783 #endif << 784 << 785 ssize_t ksys_pwrite64(unsigned int fd, const c 699 ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, 786 size_t count, loff_t pos 700 size_t count, loff_t pos) 787 { 701 { 788 struct fd f; 702 struct fd f; 789 ssize_t ret = -EBADF; 703 ssize_t ret = -EBADF; 790 704 791 if (pos < 0) 705 if (pos < 0) 792 return -EINVAL; 706 return -EINVAL; 793 707 794 f = fdget(fd); 708 f = fdget(fd); 795 if (fd_file(f)) { !! 709 if (f.file) { 796 ret = -ESPIPE; 710 ret = -ESPIPE; 797 if (fd_file(f)->f_mode & FMODE !! 711 if (f.file->f_mode & FMODE_PWRITE) 798 ret = vfs_write(fd_fil !! 712 ret = vfs_write(f.file, buf, count, &pos); 799 fdput(f); 713 fdput(f); 800 } 714 } 801 715 802 return ret; 716 return ret; 803 } 717 } 804 718 805 SYSCALL_DEFINE4(pwrite64, unsigned int, fd, co 719 SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, 806 size_t, count, loff_t 720 size_t, count, loff_t, pos) 807 { 721 { 808 return ksys_pwrite64(fd, buf, count, p 722 return ksys_pwrite64(fd, buf, count, pos); 809 } 723 } 810 724 811 #if defined(CONFIG_COMPAT) && defined(__ARCH_W << 812 COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, << 813 size_t, count, compat_a << 814 { << 815 return ksys_pwrite64(fd, buf, count, c << 816 } << 817 #endif << 818 << 819 static ssize_t do_iter_readv_writev(struct fil 725 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, 820 loff_t *ppos, int type, rwf_t 726 loff_t *ppos, int type, rwf_t flags) 821 { 727 { 822 struct kiocb kiocb; 728 struct kiocb kiocb; 823 ssize_t ret; 729 ssize_t ret; 824 730 825 init_sync_kiocb(&kiocb, filp); 731 init_sync_kiocb(&kiocb, filp); 826 ret = kiocb_set_rw_flags(&kiocb, flags !! 732 ret = kiocb_set_rw_flags(&kiocb, flags); 827 if (ret) 733 if (ret) 828 return ret; 734 return ret; 829 kiocb.ki_pos = (ppos ? *ppos : 0); 735 kiocb.ki_pos = (ppos ? *ppos : 0); 830 736 831 if (type == READ) 737 if (type == READ) 832 ret = filp->f_op->read_iter(&k !! 738 ret = call_read_iter(filp, &kiocb, iter); 833 else 739 else 834 ret = filp->f_op->write_iter(& !! 740 ret = call_write_iter(filp, &kiocb, iter); 835 BUG_ON(ret == -EIOCBQUEUED); 741 BUG_ON(ret == -EIOCBQUEUED); 836 if (ppos) 742 if (ppos) 837 *ppos = kiocb.ki_pos; 743 *ppos = kiocb.ki_pos; 838 return ret; 744 return ret; 839 } 745 } 840 746 841 /* Do it by hand, with file-ops */ 747 /* Do it by hand, with file-ops */ 842 static ssize_t do_loop_readv_writev(struct fil 748 static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, 843 loff_t *ppos, int type, rwf_t 749 loff_t *ppos, int type, rwf_t flags) 844 { 750 { 845 ssize_t ret = 0; 751 ssize_t ret = 0; 846 752 847 if (flags & ~RWF_HIPRI) 753 if (flags & ~RWF_HIPRI) 848 return -EOPNOTSUPP; 754 return -EOPNOTSUPP; 849 755 850 while (iov_iter_count(iter)) { 756 while (iov_iter_count(iter)) { >> 757 struct iovec iovec = iov_iter_iovec(iter); 851 ssize_t nr; 758 ssize_t nr; 852 759 853 if (type == READ) { 760 if (type == READ) { 854 nr = filp->f_op->read( !! 761 nr = filp->f_op->read(filp, iovec.iov_base, 855 !! 762 iovec.iov_len, ppos); 856 } else { 763 } else { 857 nr = filp->f_op->write !! 764 nr = filp->f_op->write(filp, iovec.iov_base, 858 !! 765 iovec.iov_len, ppos); 859 } 766 } 860 767 861 if (nr < 0) { 768 if (nr < 0) { 862 if (!ret) 769 if (!ret) 863 ret = nr; 770 ret = nr; 864 break; 771 break; 865 } 772 } 866 ret += nr; 773 ret += nr; 867 if (nr != iter_iov_len(iter)) !! 774 if (nr != iovec.iov_len) 868 break; 775 break; 869 iov_iter_advance(iter, nr); 776 iov_iter_advance(iter, nr); 870 } 777 } 871 778 872 return ret; 779 return ret; 873 } 780 } 874 781 875 ssize_t vfs_iocb_iter_read(struct file *file, !! 782 static ssize_t do_iter_read(struct file *file, struct iov_iter *iter, 876 struct iov_iter *it !! 783 loff_t *pos, rwf_t flags) 877 { 784 { 878 size_t tot_len; 785 size_t tot_len; 879 ssize_t ret = 0; 786 ssize_t ret = 0; 880 787 881 if (!file->f_op->read_iter) << 882 return -EINVAL; << 883 if (!(file->f_mode & FMODE_READ)) 788 if (!(file->f_mode & FMODE_READ)) 884 return -EBADF; 789 return -EBADF; 885 if (!(file->f_mode & FMODE_CAN_READ)) 790 if (!(file->f_mode & FMODE_CAN_READ)) 886 return -EINVAL; 791 return -EINVAL; 887 792 888 tot_len = iov_iter_count(iter); 793 tot_len = iov_iter_count(iter); 889 if (!tot_len) 794 if (!tot_len) 890 goto out; 795 goto out; 891 ret = rw_verify_area(READ, file, &iocb !! 796 ret = rw_verify_area(READ, file, pos, tot_len); 892 if (ret < 0) 797 if (ret < 0) 893 return ret; 798 return ret; 894 799 895 ret = file->f_op->read_iter(iocb, iter !! 800 if (file->f_op->read_iter) >> 801 ret = do_iter_readv_writev(file, iter, pos, READ, flags); >> 802 else >> 803 ret = do_loop_readv_writev(file, iter, pos, READ, flags); 896 out: 804 out: 897 if (ret >= 0) 805 if (ret >= 0) 898 fsnotify_access(file); 806 fsnotify_access(file); 899 return ret; 807 return ret; 900 } 808 } 901 EXPORT_SYMBOL(vfs_iocb_iter_read); << 902 809 903 ssize_t vfs_iter_read(struct file *file, struc !! 810 ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, 904 rwf_t flags) !! 811 struct iov_iter *iter) 905 { 812 { 906 size_t tot_len; 813 size_t tot_len; 907 ssize_t ret = 0; 814 ssize_t ret = 0; 908 815 909 if (!file->f_op->read_iter) 816 if (!file->f_op->read_iter) 910 return -EINVAL; 817 return -EINVAL; 911 if (!(file->f_mode & FMODE_READ)) 818 if (!(file->f_mode & FMODE_READ)) 912 return -EBADF; 819 return -EBADF; 913 if (!(file->f_mode & FMODE_CAN_READ)) 820 if (!(file->f_mode & FMODE_CAN_READ)) 914 return -EINVAL; 821 return -EINVAL; 915 822 916 tot_len = iov_iter_count(iter); 823 tot_len = iov_iter_count(iter); 917 if (!tot_len) 824 if (!tot_len) 918 goto out; 825 goto out; 919 ret = rw_verify_area(READ, file, ppos, !! 826 ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len); 920 if (ret < 0) 827 if (ret < 0) 921 return ret; 828 return ret; 922 829 923 ret = do_iter_readv_writev(file, iter, !! 830 ret = call_read_iter(file, iocb, iter); 924 out: 831 out: 925 if (ret >= 0) 832 if (ret >= 0) 926 fsnotify_access(file); 833 fsnotify_access(file); 927 return ret; 834 return ret; 928 } 835 } >> 836 EXPORT_SYMBOL(vfs_iocb_iter_read); >> 837 >> 838 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, >> 839 rwf_t flags) >> 840 { >> 841 if (!file->f_op->read_iter) >> 842 return -EINVAL; >> 843 return do_iter_read(file, iter, ppos, flags); >> 844 } 929 EXPORT_SYMBOL(vfs_iter_read); 845 EXPORT_SYMBOL(vfs_iter_read); 930 846 931 /* !! 847 static ssize_t do_iter_write(struct file *file, struct iov_iter *iter, 932 * Caller is responsible for calling kiocb_end !! 848 loff_t *pos, rwf_t flags) 933 * if async iocb was queued. << 934 */ << 935 ssize_t vfs_iocb_iter_write(struct file *file, << 936 struct iov_iter *i << 937 { 849 { 938 size_t tot_len; 850 size_t tot_len; 939 ssize_t ret = 0; 851 ssize_t ret = 0; 940 852 941 if (!file->f_op->write_iter) << 942 return -EINVAL; << 943 if (!(file->f_mode & FMODE_WRITE)) 853 if (!(file->f_mode & FMODE_WRITE)) 944 return -EBADF; 854 return -EBADF; 945 if (!(file->f_mode & FMODE_CAN_WRITE)) 855 if (!(file->f_mode & FMODE_CAN_WRITE)) 946 return -EINVAL; 856 return -EINVAL; 947 857 948 tot_len = iov_iter_count(iter); 858 tot_len = iov_iter_count(iter); 949 if (!tot_len) 859 if (!tot_len) 950 return 0; 860 return 0; 951 ret = rw_verify_area(WRITE, file, &ioc !! 861 ret = rw_verify_area(WRITE, file, pos, tot_len); 952 if (ret < 0) 862 if (ret < 0) 953 return ret; 863 return ret; 954 864 955 kiocb_start_write(iocb); !! 865 if (file->f_op->write_iter) 956 ret = file->f_op->write_iter(iocb, ite !! 866 ret = do_iter_readv_writev(file, iter, pos, WRITE, flags); 957 if (ret != -EIOCBQUEUED) !! 867 else 958 kiocb_end_write(iocb); !! 868 ret = do_loop_readv_writev(file, iter, pos, WRITE, flags); 959 if (ret > 0) 869 if (ret > 0) 960 fsnotify_modify(file); 870 fsnotify_modify(file); 961 << 962 return ret; 871 return ret; 963 } 872 } 964 EXPORT_SYMBOL(vfs_iocb_iter_write); << 965 873 966 ssize_t vfs_iter_write(struct file *file, stru !! 874 ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, 967 rwf_t flags) !! 875 struct iov_iter *iter) 968 { 876 { 969 size_t tot_len; 877 size_t tot_len; 970 ssize_t ret; !! 878 ssize_t ret = 0; 971 879 >> 880 if (!file->f_op->write_iter) >> 881 return -EINVAL; 972 if (!(file->f_mode & FMODE_WRITE)) 882 if (!(file->f_mode & FMODE_WRITE)) 973 return -EBADF; 883 return -EBADF; 974 if (!(file->f_mode & FMODE_CAN_WRITE)) 884 if (!(file->f_mode & FMODE_CAN_WRITE)) 975 return -EINVAL; 885 return -EINVAL; 976 if (!file->f_op->write_iter) << 977 return -EINVAL; << 978 886 979 tot_len = iov_iter_count(iter); 887 tot_len = iov_iter_count(iter); 980 if (!tot_len) 888 if (!tot_len) 981 return 0; 889 return 0; 982 !! 890 ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len); 983 ret = rw_verify_area(WRITE, file, ppos << 984 if (ret < 0) 891 if (ret < 0) 985 return ret; 892 return ret; 986 893 987 file_start_write(file); !! 894 ret = call_write_iter(file, iocb, iter); 988 ret = do_iter_readv_writev(file, iter, << 989 if (ret > 0) 895 if (ret > 0) 990 fsnotify_modify(file); 896 fsnotify_modify(file); 991 file_end_write(file); << 992 897 993 return ret; 898 return ret; 994 } 899 } >> 900 EXPORT_SYMBOL(vfs_iocb_iter_write); >> 901 >> 902 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, >> 903 rwf_t flags) >> 904 { >> 905 if (!file->f_op->write_iter) >> 906 return -EINVAL; >> 907 return do_iter_write(file, iter, ppos, flags); >> 908 } 995 EXPORT_SYMBOL(vfs_iter_write); 909 EXPORT_SYMBOL(vfs_iter_write); 996 910 997 static ssize_t vfs_readv(struct file *file, co 911 static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 998 unsigned long vlen, l !! 912 unsigned long vlen, loff_t *pos, rwf_t flags) 999 { 913 { 1000 struct iovec iovstack[UIO_FASTIOV]; 914 struct iovec iovstack[UIO_FASTIOV]; 1001 struct iovec *iov = iovstack; 915 struct iovec *iov = iovstack; 1002 struct iov_iter iter; 916 struct iov_iter iter; 1003 size_t tot_len; !! 917 ssize_t ret; 1004 ssize_t ret = 0; << 1005 << 1006 if (!(file->f_mode & FMODE_READ)) << 1007 return -EBADF; << 1008 if (!(file->f_mode & FMODE_CAN_READ)) << 1009 return -EINVAL; << 1010 << 1011 ret = import_iovec(ITER_DEST, vec, vl << 1012 &iter); << 1013 if (ret < 0) << 1014 return ret; << 1015 << 1016 tot_len = iov_iter_count(&iter); << 1017 if (!tot_len) << 1018 goto out; << 1019 918 1020 ret = rw_verify_area(READ, file, pos, !! 919 ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); 1021 if (ret < 0) !! 920 if (ret >= 0) { 1022 goto out; !! 921 ret = do_iter_read(file, &iter, pos, flags); >> 922 kfree(iov); >> 923 } 1023 924 1024 if (file->f_op->read_iter) << 1025 ret = do_iter_readv_writev(fi << 1026 else << 1027 ret = do_loop_readv_writev(fi << 1028 out: << 1029 if (ret >= 0) << 1030 fsnotify_access(file); << 1031 kfree(iov); << 1032 return ret; 925 return ret; 1033 } 926 } 1034 927 1035 static ssize_t vfs_writev(struct file *file, 928 static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 1036 unsigned long vlen, !! 929 unsigned long vlen, loff_t *pos, rwf_t flags) 1037 { 930 { 1038 struct iovec iovstack[UIO_FASTIOV]; 931 struct iovec iovstack[UIO_FASTIOV]; 1039 struct iovec *iov = iovstack; 932 struct iovec *iov = iovstack; 1040 struct iov_iter iter; 933 struct iov_iter iter; 1041 size_t tot_len; !! 934 ssize_t ret; 1042 ssize_t ret = 0; << 1043 << 1044 if (!(file->f_mode & FMODE_WRITE)) << 1045 return -EBADF; << 1046 if (!(file->f_mode & FMODE_CAN_WRITE) << 1047 return -EINVAL; << 1048 << 1049 ret = import_iovec(ITER_SOURCE, vec, << 1050 &iter); << 1051 if (ret < 0) << 1052 return ret; << 1053 << 1054 tot_len = iov_iter_count(&iter); << 1055 if (!tot_len) << 1056 goto out; << 1057 << 1058 ret = rw_verify_area(WRITE, file, pos << 1059 if (ret < 0) << 1060 goto out; << 1061 935 1062 file_start_write(file); !! 936 ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); 1063 if (file->f_op->write_iter) !! 937 if (ret >= 0) { 1064 ret = do_iter_readv_writev(fi !! 938 file_start_write(file); 1065 else !! 939 ret = do_iter_write(file, &iter, pos, flags); 1066 ret = do_loop_readv_writev(fi !! 940 file_end_write(file); 1067 if (ret > 0) !! 941 kfree(iov); 1068 fsnotify_modify(file); !! 942 } 1069 file_end_write(file); << 1070 out: << 1071 kfree(iov); << 1072 return ret; 943 return ret; 1073 } 944 } 1074 945 1075 static ssize_t do_readv(unsigned long fd, con 946 static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec, 1076 unsigned long vlen, r 947 unsigned long vlen, rwf_t flags) 1077 { 948 { 1078 struct fd f = fdget_pos(fd); 949 struct fd f = fdget_pos(fd); 1079 ssize_t ret = -EBADF; 950 ssize_t ret = -EBADF; 1080 951 1081 if (fd_file(f)) { !! 952 if (f.file) { 1082 loff_t pos, *ppos = file_ppos !! 953 loff_t pos, *ppos = file_ppos(f.file); 1083 if (ppos) { 954 if (ppos) { 1084 pos = *ppos; 955 pos = *ppos; 1085 ppos = &pos; 956 ppos = &pos; 1086 } 957 } 1087 ret = vfs_readv(fd_file(f), v !! 958 ret = vfs_readv(f.file, vec, vlen, ppos, flags); 1088 if (ret >= 0 && ppos) 959 if (ret >= 0 && ppos) 1089 fd_file(f)->f_pos = p !! 960 f.file->f_pos = pos; 1090 fdput_pos(f); 961 fdput_pos(f); 1091 } 962 } 1092 963 1093 if (ret > 0) 964 if (ret > 0) 1094 add_rchar(current, ret); 965 add_rchar(current, ret); 1095 inc_syscr(current); 966 inc_syscr(current); 1096 return ret; 967 return ret; 1097 } 968 } 1098 969 1099 static ssize_t do_writev(unsigned long fd, co 970 static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec, 1100 unsigned long vlen, 971 unsigned long vlen, rwf_t flags) 1101 { 972 { 1102 struct fd f = fdget_pos(fd); 973 struct fd f = fdget_pos(fd); 1103 ssize_t ret = -EBADF; 974 ssize_t ret = -EBADF; 1104 975 1105 if (fd_file(f)) { !! 976 if (f.file) { 1106 loff_t pos, *ppos = file_ppos !! 977 loff_t pos, *ppos = file_ppos(f.file); 1107 if (ppos) { 978 if (ppos) { 1108 pos = *ppos; 979 pos = *ppos; 1109 ppos = &pos; 980 ppos = &pos; 1110 } 981 } 1111 ret = vfs_writev(fd_file(f), !! 982 ret = vfs_writev(f.file, vec, vlen, ppos, flags); 1112 if (ret >= 0 && ppos) 983 if (ret >= 0 && ppos) 1113 fd_file(f)->f_pos = p !! 984 f.file->f_pos = pos; 1114 fdput_pos(f); 985 fdput_pos(f); 1115 } 986 } 1116 987 1117 if (ret > 0) 988 if (ret > 0) 1118 add_wchar(current, ret); 989 add_wchar(current, ret); 1119 inc_syscw(current); 990 inc_syscw(current); 1120 return ret; 991 return ret; 1121 } 992 } 1122 993 1123 static inline loff_t pos_from_hilo(unsigned l 994 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 1124 { 995 { 1125 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 996 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 1126 return (((loff_t)high << HALF_LONG_BI 997 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 1127 } 998 } 1128 999 1129 static ssize_t do_preadv(unsigned long fd, co 1000 static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec, 1130 unsigned long vlen, 1001 unsigned long vlen, loff_t pos, rwf_t flags) 1131 { 1002 { 1132 struct fd f; 1003 struct fd f; 1133 ssize_t ret = -EBADF; 1004 ssize_t ret = -EBADF; 1134 1005 1135 if (pos < 0) 1006 if (pos < 0) 1136 return -EINVAL; 1007 return -EINVAL; 1137 1008 1138 f = fdget(fd); 1009 f = fdget(fd); 1139 if (fd_file(f)) { !! 1010 if (f.file) { 1140 ret = -ESPIPE; 1011 ret = -ESPIPE; 1141 if (fd_file(f)->f_mode & FMOD !! 1012 if (f.file->f_mode & FMODE_PREAD) 1142 ret = vfs_readv(fd_fi !! 1013 ret = vfs_readv(f.file, vec, vlen, &pos, flags); 1143 fdput(f); 1014 fdput(f); 1144 } 1015 } 1145 1016 1146 if (ret > 0) 1017 if (ret > 0) 1147 add_rchar(current, ret); 1018 add_rchar(current, ret); 1148 inc_syscr(current); 1019 inc_syscr(current); 1149 return ret; 1020 return ret; 1150 } 1021 } 1151 1022 1152 static ssize_t do_pwritev(unsigned long fd, c 1023 static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec, 1153 unsigned long vlen, 1024 unsigned long vlen, loff_t pos, rwf_t flags) 1154 { 1025 { 1155 struct fd f; 1026 struct fd f; 1156 ssize_t ret = -EBADF; 1027 ssize_t ret = -EBADF; 1157 1028 1158 if (pos < 0) 1029 if (pos < 0) 1159 return -EINVAL; 1030 return -EINVAL; 1160 1031 1161 f = fdget(fd); 1032 f = fdget(fd); 1162 if (fd_file(f)) { !! 1033 if (f.file) { 1163 ret = -ESPIPE; 1034 ret = -ESPIPE; 1164 if (fd_file(f)->f_mode & FMOD !! 1035 if (f.file->f_mode & FMODE_PWRITE) 1165 ret = vfs_writev(fd_f !! 1036 ret = vfs_writev(f.file, vec, vlen, &pos, flags); 1166 fdput(f); 1037 fdput(f); 1167 } 1038 } 1168 1039 1169 if (ret > 0) 1040 if (ret > 0) 1170 add_wchar(current, ret); 1041 add_wchar(current, ret); 1171 inc_syscw(current); 1042 inc_syscw(current); 1172 return ret; 1043 return ret; 1173 } 1044 } 1174 1045 1175 SYSCALL_DEFINE3(readv, unsigned long, fd, con 1046 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 1176 unsigned long, vlen) 1047 unsigned long, vlen) 1177 { 1048 { 1178 return do_readv(fd, vec, vlen, 0); 1049 return do_readv(fd, vec, vlen, 0); 1179 } 1050 } 1180 1051 1181 SYSCALL_DEFINE3(writev, unsigned long, fd, co 1052 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 1182 unsigned long, vlen) 1053 unsigned long, vlen) 1183 { 1054 { 1184 return do_writev(fd, vec, vlen, 0); 1055 return do_writev(fd, vec, vlen, 0); 1185 } 1056 } 1186 1057 1187 SYSCALL_DEFINE5(preadv, unsigned long, fd, co 1058 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 1188 unsigned long, vlen, unsigned 1059 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 1189 { 1060 { 1190 loff_t pos = pos_from_hilo(pos_h, pos 1061 loff_t pos = pos_from_hilo(pos_h, pos_l); 1191 1062 1192 return do_preadv(fd, vec, vlen, pos, 1063 return do_preadv(fd, vec, vlen, pos, 0); 1193 } 1064 } 1194 1065 1195 SYSCALL_DEFINE6(preadv2, unsigned long, fd, c 1066 SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec, 1196 unsigned long, vlen, unsigned 1067 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, 1197 rwf_t, flags) 1068 rwf_t, flags) 1198 { 1069 { 1199 loff_t pos = pos_from_hilo(pos_h, pos 1070 loff_t pos = pos_from_hilo(pos_h, pos_l); 1200 1071 1201 if (pos == -1) 1072 if (pos == -1) 1202 return do_readv(fd, vec, vlen 1073 return do_readv(fd, vec, vlen, flags); 1203 1074 1204 return do_preadv(fd, vec, vlen, pos, 1075 return do_preadv(fd, vec, vlen, pos, flags); 1205 } 1076 } 1206 1077 1207 SYSCALL_DEFINE5(pwritev, unsigned long, fd, c 1078 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 1208 unsigned long, vlen, unsigned 1079 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 1209 { 1080 { 1210 loff_t pos = pos_from_hilo(pos_h, pos 1081 loff_t pos = pos_from_hilo(pos_h, pos_l); 1211 1082 1212 return do_pwritev(fd, vec, vlen, pos, 1083 return do_pwritev(fd, vec, vlen, pos, 0); 1213 } 1084 } 1214 1085 1215 SYSCALL_DEFINE6(pwritev2, unsigned long, fd, 1086 SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, 1216 unsigned long, vlen, unsigned 1087 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, 1217 rwf_t, flags) 1088 rwf_t, flags) 1218 { 1089 { 1219 loff_t pos = pos_from_hilo(pos_h, pos 1090 loff_t pos = pos_from_hilo(pos_h, pos_l); 1220 1091 1221 if (pos == -1) 1092 if (pos == -1) 1222 return do_writev(fd, vec, vle 1093 return do_writev(fd, vec, vlen, flags); 1223 1094 1224 return do_pwritev(fd, vec, vlen, pos, 1095 return do_pwritev(fd, vec, vlen, pos, flags); 1225 } 1096 } 1226 1097 1227 /* 1098 /* 1228 * Various compat syscalls. Note that they a 1099 * Various compat syscalls. Note that they all pretend to take a native 1229 * iovec - import_iovec will properly treat t 1100 * iovec - import_iovec will properly treat those as compat_iovecs based on 1230 * in_compat_syscall(). 1101 * in_compat_syscall(). 1231 */ 1102 */ 1232 #ifdef CONFIG_COMPAT 1103 #ifdef CONFIG_COMPAT 1233 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 1104 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 1234 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned lon 1105 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, 1235 const struct iovec __user *, 1106 const struct iovec __user *, vec, 1236 unsigned long, vlen, loff_t, 1107 unsigned long, vlen, loff_t, pos) 1237 { 1108 { 1238 return do_preadv(fd, vec, vlen, pos, 1109 return do_preadv(fd, vec, vlen, pos, 0); 1239 } 1110 } 1240 #endif 1111 #endif 1241 1112 1242 COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t 1113 COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, 1243 const struct iovec __user *, 1114 const struct iovec __user *, vec, 1244 compat_ulong_t, vlen, u32, po 1115 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1245 { 1116 { 1246 loff_t pos = ((loff_t)pos_high << 32) 1117 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1247 1118 1248 return do_preadv(fd, vec, vlen, pos, 1119 return do_preadv(fd, vec, vlen, pos, 0); 1249 } 1120 } 1250 1121 1251 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 1122 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 1252 COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned l 1123 COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, 1253 const struct iovec __user *, 1124 const struct iovec __user *, vec, 1254 unsigned long, vlen, loff_t, 1125 unsigned long, vlen, loff_t, pos, rwf_t, flags) 1255 { 1126 { 1256 if (pos == -1) 1127 if (pos == -1) 1257 return do_readv(fd, vec, vlen 1128 return do_readv(fd, vec, vlen, flags); 1258 return do_preadv(fd, vec, vlen, pos, 1129 return do_preadv(fd, vec, vlen, pos, flags); 1259 } 1130 } 1260 #endif 1131 #endif 1261 1132 1262 COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_ 1133 COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, 1263 const struct iovec __user *, 1134 const struct iovec __user *, vec, 1264 compat_ulong_t, vlen, u32, po 1135 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, 1265 rwf_t, flags) 1136 rwf_t, flags) 1266 { 1137 { 1267 loff_t pos = ((loff_t)pos_high << 32) 1138 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1268 1139 1269 if (pos == -1) 1140 if (pos == -1) 1270 return do_readv(fd, vec, vlen 1141 return do_readv(fd, vec, vlen, flags); 1271 return do_preadv(fd, vec, vlen, pos, 1142 return do_preadv(fd, vec, vlen, pos, flags); 1272 } 1143 } 1273 1144 1274 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 1145 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 1275 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned lo 1146 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, 1276 const struct iovec __user *, 1147 const struct iovec __user *, vec, 1277 unsigned long, vlen, loff_t, 1148 unsigned long, vlen, loff_t, pos) 1278 { 1149 { 1279 return do_pwritev(fd, vec, vlen, pos, 1150 return do_pwritev(fd, vec, vlen, pos, 0); 1280 } 1151 } 1281 #endif 1152 #endif 1282 1153 1283 COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_ 1154 COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, 1284 const struct iovec __user *,v 1155 const struct iovec __user *,vec, 1285 compat_ulong_t, vlen, u32, po 1156 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1286 { 1157 { 1287 loff_t pos = ((loff_t)pos_high << 32) 1158 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1288 1159 1289 return do_pwritev(fd, vec, vlen, pos, 1160 return do_pwritev(fd, vec, vlen, pos, 0); 1290 } 1161 } 1291 1162 1292 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 1163 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 1293 COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned 1164 COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, 1294 const struct iovec __user *, 1165 const struct iovec __user *, vec, 1295 unsigned long, vlen, loff_t, 1166 unsigned long, vlen, loff_t, pos, rwf_t, flags) 1296 { 1167 { 1297 if (pos == -1) 1168 if (pos == -1) 1298 return do_writev(fd, vec, vle 1169 return do_writev(fd, vec, vlen, flags); 1299 return do_pwritev(fd, vec, vlen, pos, 1170 return do_pwritev(fd, vec, vlen, pos, flags); 1300 } 1171 } 1301 #endif 1172 #endif 1302 1173 1303 COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong 1174 COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, 1304 const struct iovec __user *,v 1175 const struct iovec __user *,vec, 1305 compat_ulong_t, vlen, u32, po 1176 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags) 1306 { 1177 { 1307 loff_t pos = ((loff_t)pos_high << 32) 1178 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1308 1179 1309 if (pos == -1) 1180 if (pos == -1) 1310 return do_writev(fd, vec, vle 1181 return do_writev(fd, vec, vlen, flags); 1311 return do_pwritev(fd, vec, vlen, pos, 1182 return do_pwritev(fd, vec, vlen, pos, flags); 1312 } 1183 } 1313 #endif /* CONFIG_COMPAT */ 1184 #endif /* CONFIG_COMPAT */ 1314 1185 1315 static ssize_t do_sendfile(int out_fd, int in 1186 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 1316 size_t count, loff !! 1187 size_t count, loff_t max) 1317 { 1188 { 1318 struct fd in, out; 1189 struct fd in, out; 1319 struct inode *in_inode, *out_inode; 1190 struct inode *in_inode, *out_inode; 1320 struct pipe_inode_info *opipe; << 1321 loff_t pos; 1191 loff_t pos; 1322 loff_t out_pos; 1192 loff_t out_pos; 1323 ssize_t retval; 1193 ssize_t retval; 1324 int fl; 1194 int fl; 1325 1195 1326 /* 1196 /* 1327 * Get input file, and verify that it 1197 * Get input file, and verify that it is ok.. 1328 */ 1198 */ 1329 retval = -EBADF; 1199 retval = -EBADF; 1330 in = fdget(in_fd); 1200 in = fdget(in_fd); 1331 if (!fd_file(in)) !! 1201 if (!in.file) 1332 goto out; 1202 goto out; 1333 if (!(fd_file(in)->f_mode & FMODE_REA !! 1203 if (!(in.file->f_mode & FMODE_READ)) 1334 goto fput_in; 1204 goto fput_in; 1335 retval = -ESPIPE; 1205 retval = -ESPIPE; 1336 if (!ppos) { 1206 if (!ppos) { 1337 pos = fd_file(in)->f_pos; !! 1207 pos = in.file->f_pos; 1338 } else { 1208 } else { 1339 pos = *ppos; 1209 pos = *ppos; 1340 if (!(fd_file(in)->f_mode & F !! 1210 if (!(in.file->f_mode & FMODE_PREAD)) 1341 goto fput_in; 1211 goto fput_in; 1342 } 1212 } 1343 retval = rw_verify_area(READ, fd_file !! 1213 retval = rw_verify_area(READ, in.file, &pos, count); 1344 if (retval < 0) 1214 if (retval < 0) 1345 goto fput_in; 1215 goto fput_in; 1346 if (count > MAX_RW_COUNT) 1216 if (count > MAX_RW_COUNT) 1347 count = MAX_RW_COUNT; 1217 count = MAX_RW_COUNT; 1348 1218 1349 /* 1219 /* 1350 * Get output file, and verify that i 1220 * Get output file, and verify that it is ok.. 1351 */ 1221 */ 1352 retval = -EBADF; 1222 retval = -EBADF; 1353 out = fdget(out_fd); 1223 out = fdget(out_fd); 1354 if (!fd_file(out)) !! 1224 if (!out.file) 1355 goto fput_in; 1225 goto fput_in; 1356 if (!(fd_file(out)->f_mode & FMODE_WR !! 1226 if (!(out.file->f_mode & FMODE_WRITE)) >> 1227 goto fput_out; >> 1228 in_inode = file_inode(in.file); >> 1229 out_inode = file_inode(out.file); >> 1230 out_pos = out.file->f_pos; >> 1231 retval = rw_verify_area(WRITE, out.file, &out_pos, count); >> 1232 if (retval < 0) 1357 goto fput_out; 1233 goto fput_out; 1358 in_inode = file_inode(fd_file(in)); << 1359 out_inode = file_inode(fd_file(out)); << 1360 out_pos = fd_file(out)->f_pos; << 1361 1234 1362 if (!max) 1235 if (!max) 1363 max = min(in_inode->i_sb->s_m 1236 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 1364 1237 1365 if (unlikely(pos + count > max)) { 1238 if (unlikely(pos + count > max)) { 1366 retval = -EOVERFLOW; 1239 retval = -EOVERFLOW; 1367 if (pos >= max) 1240 if (pos >= max) 1368 goto fput_out; 1241 goto fput_out; 1369 count = max - pos; 1242 count = max - pos; 1370 } 1243 } 1371 1244 1372 fl = 0; 1245 fl = 0; 1373 #if 0 1246 #if 0 1374 /* 1247 /* 1375 * We need to debate whether we can e 1248 * We need to debate whether we can enable this or not. The 1376 * man page documents EAGAIN return f 1249 * man page documents EAGAIN return for the output at least, 1377 * and the application is arguably bu 1250 * and the application is arguably buggy if it doesn't expect 1378 * EAGAIN on a non-blocking file desc 1251 * EAGAIN on a non-blocking file descriptor. 1379 */ 1252 */ 1380 if (fd_file(in)->f_flags & O_NONBLOCK !! 1253 if (in.file->f_flags & O_NONBLOCK) 1381 fl = SPLICE_F_NONBLOCK; 1254 fl = SPLICE_F_NONBLOCK; 1382 #endif 1255 #endif 1383 opipe = get_pipe_info(fd_file(out), t !! 1256 file_start_write(out.file); 1384 if (!opipe) { !! 1257 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); 1385 retval = rw_verify_area(WRITE !! 1258 file_end_write(out.file); 1386 if (retval < 0) << 1387 goto fput_out; << 1388 retval = do_splice_direct(fd_ << 1389 cou << 1390 } else { << 1391 if (fd_file(out)->f_flags & O << 1392 fl |= SPLICE_F_NONBLO << 1393 << 1394 retval = splice_file_to_pipe( << 1395 } << 1396 1259 1397 if (retval > 0) { 1260 if (retval > 0) { 1398 add_rchar(current, retval); 1261 add_rchar(current, retval); 1399 add_wchar(current, retval); 1262 add_wchar(current, retval); 1400 fsnotify_access(fd_file(in)); !! 1263 fsnotify_access(in.file); 1401 fsnotify_modify(fd_file(out)) !! 1264 fsnotify_modify(out.file); 1402 fd_file(out)->f_pos = out_pos !! 1265 out.file->f_pos = out_pos; 1403 if (ppos) 1266 if (ppos) 1404 *ppos = pos; 1267 *ppos = pos; 1405 else 1268 else 1406 fd_file(in)->f_pos = !! 1269 in.file->f_pos = pos; 1407 } 1270 } 1408 1271 1409 inc_syscr(current); 1272 inc_syscr(current); 1410 inc_syscw(current); 1273 inc_syscw(current); 1411 if (pos > max) 1274 if (pos > max) 1412 retval = -EOVERFLOW; 1275 retval = -EOVERFLOW; 1413 1276 1414 fput_out: 1277 fput_out: 1415 fdput(out); 1278 fdput(out); 1416 fput_in: 1279 fput_in: 1417 fdput(in); 1280 fdput(in); 1418 out: 1281 out: 1419 return retval; 1282 return retval; 1420 } 1283 } 1421 1284 1422 SYSCALL_DEFINE4(sendfile, int, out_fd, int, i 1285 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 1423 { 1286 { 1424 loff_t pos; 1287 loff_t pos; 1425 off_t off; 1288 off_t off; 1426 ssize_t ret; 1289 ssize_t ret; 1427 1290 1428 if (offset) { 1291 if (offset) { 1429 if (unlikely(get_user(off, of 1292 if (unlikely(get_user(off, offset))) 1430 return -EFAULT; 1293 return -EFAULT; 1431 pos = off; 1294 pos = off; 1432 ret = do_sendfile(out_fd, in_ 1295 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1433 if (unlikely(put_user(pos, of 1296 if (unlikely(put_user(pos, offset))) 1434 return -EFAULT; 1297 return -EFAULT; 1435 return ret; 1298 return ret; 1436 } 1299 } 1437 1300 1438 return do_sendfile(out_fd, in_fd, NUL 1301 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1439 } 1302 } 1440 1303 1441 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, 1304 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 1442 { 1305 { 1443 loff_t pos; 1306 loff_t pos; 1444 ssize_t ret; 1307 ssize_t ret; 1445 1308 1446 if (offset) { 1309 if (offset) { 1447 if (unlikely(copy_from_user(& 1310 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1448 return -EFAULT; 1311 return -EFAULT; 1449 ret = do_sendfile(out_fd, in_ 1312 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1450 if (unlikely(put_user(pos, of 1313 if (unlikely(put_user(pos, offset))) 1451 return -EFAULT; 1314 return -EFAULT; 1452 return ret; 1315 return ret; 1453 } 1316 } 1454 1317 1455 return do_sendfile(out_fd, in_fd, NUL 1318 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1456 } 1319 } 1457 1320 1458 #ifdef CONFIG_COMPAT 1321 #ifdef CONFIG_COMPAT 1459 COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, 1322 COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, 1460 compat_off_t __user *, offset 1323 compat_off_t __user *, offset, compat_size_t, count) 1461 { 1324 { 1462 loff_t pos; 1325 loff_t pos; 1463 off_t off; 1326 off_t off; 1464 ssize_t ret; 1327 ssize_t ret; 1465 1328 1466 if (offset) { 1329 if (offset) { 1467 if (unlikely(get_user(off, of 1330 if (unlikely(get_user(off, offset))) 1468 return -EFAULT; 1331 return -EFAULT; 1469 pos = off; 1332 pos = off; 1470 ret = do_sendfile(out_fd, in_ 1333 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1471 if (unlikely(put_user(pos, of 1334 if (unlikely(put_user(pos, offset))) 1472 return -EFAULT; 1335 return -EFAULT; 1473 return ret; 1336 return ret; 1474 } 1337 } 1475 1338 1476 return do_sendfile(out_fd, in_fd, NUL 1339 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1477 } 1340 } 1478 1341 1479 COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_f 1342 COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, 1480 compat_loff_t __user *, offse 1343 compat_loff_t __user *, offset, compat_size_t, count) 1481 { 1344 { 1482 loff_t pos; 1345 loff_t pos; 1483 ssize_t ret; 1346 ssize_t ret; 1484 1347 1485 if (offset) { 1348 if (offset) { 1486 if (unlikely(copy_from_user(& 1349 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1487 return -EFAULT; 1350 return -EFAULT; 1488 ret = do_sendfile(out_fd, in_ 1351 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1489 if (unlikely(put_user(pos, of 1352 if (unlikely(put_user(pos, offset))) 1490 return -EFAULT; 1353 return -EFAULT; 1491 return ret; 1354 return ret; 1492 } 1355 } 1493 1356 1494 return do_sendfile(out_fd, in_fd, NUL 1357 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1495 } 1358 } 1496 #endif 1359 #endif 1497 1360 >> 1361 /** >> 1362 * generic_copy_file_range - copy data between two files >> 1363 * @file_in: file structure to read from >> 1364 * @pos_in: file offset to read from >> 1365 * @file_out: file structure to write data to >> 1366 * @pos_out: file offset to write data to >> 1367 * @len: amount of data to copy >> 1368 * @flags: copy flags >> 1369 * >> 1370 * This is a generic filesystem helper to copy data from one file to another. >> 1371 * It has no constraints on the source or destination file owners - the files >> 1372 * can belong to different superblocks and different filesystem types. Short >> 1373 * copies are allowed. >> 1374 * >> 1375 * This should be called from the @file_out filesystem, as per the >> 1376 * ->copy_file_range() method. >> 1377 * >> 1378 * Returns the number of bytes copied or a negative error indicating the >> 1379 * failure. >> 1380 */ >> 1381 >> 1382 ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, >> 1383 struct file *file_out, loff_t pos_out, >> 1384 size_t len, unsigned int flags) >> 1385 { >> 1386 return do_splice_direct(file_in, &pos_in, file_out, &pos_out, >> 1387 len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); >> 1388 } >> 1389 EXPORT_SYMBOL(generic_copy_file_range); >> 1390 1498 /* 1391 /* 1499 * Performs necessary checks before doing a f 1392 * Performs necessary checks before doing a file copy 1500 * 1393 * 1501 * Can adjust amount of bytes to copy via @re 1394 * Can adjust amount of bytes to copy via @req_count argument. 1502 * Returns appropriate error code that caller 1395 * Returns appropriate error code that caller should return or 1503 * zero in case the copy should be allowed. 1396 * zero in case the copy should be allowed. 1504 */ 1397 */ 1505 static int generic_copy_file_checks(struct fi 1398 static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, 1506 struct fi 1399 struct file *file_out, loff_t pos_out, 1507 size_t *r 1400 size_t *req_count, unsigned int flags) 1508 { 1401 { 1509 struct inode *inode_in = file_inode(f 1402 struct inode *inode_in = file_inode(file_in); 1510 struct inode *inode_out = file_inode( 1403 struct inode *inode_out = file_inode(file_out); 1511 uint64_t count = *req_count; 1404 uint64_t count = *req_count; 1512 loff_t size_in; 1405 loff_t size_in; 1513 int ret; 1406 int ret; 1514 1407 1515 ret = generic_file_rw_checks(file_in, 1408 ret = generic_file_rw_checks(file_in, file_out); 1516 if (ret) 1409 if (ret) 1517 return ret; 1410 return ret; 1518 1411 1519 /* 1412 /* 1520 * We allow some filesystems to handl 1413 * We allow some filesystems to handle cross sb copy, but passing 1521 * a file of the wrong filesystem typ 1414 * a file of the wrong filesystem type to filesystem driver can result 1522 * in an attempt to dereference the w 1415 * in an attempt to dereference the wrong type of ->private_data, so 1523 * avoid doing that until we really h 1416 * avoid doing that until we really have a good reason. 1524 * 1417 * 1525 * nfs and cifs define several differ 1418 * nfs and cifs define several different file_system_type structures 1526 * and several different sets of file 1419 * and several different sets of file_operations, but they all end up 1527 * using the same ->copy_file_range() 1420 * using the same ->copy_file_range() function pointer. 1528 */ 1421 */ 1529 if (flags & COPY_FILE_SPLICE) { 1422 if (flags & COPY_FILE_SPLICE) { 1530 /* cross sb splice is allowed 1423 /* cross sb splice is allowed */ 1531 } else if (file_out->f_op->copy_file_ 1424 } else if (file_out->f_op->copy_file_range) { 1532 if (file_in->f_op->copy_file_ 1425 if (file_in->f_op->copy_file_range != 1533 file_out->f_op->copy_file 1426 file_out->f_op->copy_file_range) 1534 return -EXDEV; 1427 return -EXDEV; 1535 } else if (file_inode(file_in)->i_sb 1428 } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) { 1536 return -EXDEV; 1429 return -EXDEV; 1537 } 1430 } 1538 1431 1539 /* Don't touch certain kinds of inode 1432 /* Don't touch certain kinds of inodes */ 1540 if (IS_IMMUTABLE(inode_out)) 1433 if (IS_IMMUTABLE(inode_out)) 1541 return -EPERM; 1434 return -EPERM; 1542 1435 1543 if (IS_SWAPFILE(inode_in) || IS_SWAPF 1436 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) 1544 return -ETXTBSY; 1437 return -ETXTBSY; 1545 1438 1546 /* Ensure offsets don't wrap. */ 1439 /* Ensure offsets don't wrap. */ 1547 if (pos_in + count < pos_in || pos_ou 1440 if (pos_in + count < pos_in || pos_out + count < pos_out) 1548 return -EOVERFLOW; 1441 return -EOVERFLOW; 1549 1442 1550 /* Shorten the copy to EOF */ 1443 /* Shorten the copy to EOF */ 1551 size_in = i_size_read(inode_in); 1444 size_in = i_size_read(inode_in); 1552 if (pos_in >= size_in) 1445 if (pos_in >= size_in) 1553 count = 0; 1446 count = 0; 1554 else 1447 else 1555 count = min(count, size_in - 1448 count = min(count, size_in - (uint64_t)pos_in); 1556 1449 1557 ret = generic_write_check_limits(file 1450 ret = generic_write_check_limits(file_out, pos_out, &count); 1558 if (ret) 1451 if (ret) 1559 return ret; 1452 return ret; 1560 1453 1561 /* Don't allow overlapped copying wit 1454 /* Don't allow overlapped copying within the same file. */ 1562 if (inode_in == inode_out && 1455 if (inode_in == inode_out && 1563 pos_out + count > pos_in && 1456 pos_out + count > pos_in && 1564 pos_out < pos_in + count) 1457 pos_out < pos_in + count) 1565 return -EINVAL; 1458 return -EINVAL; 1566 1459 1567 *req_count = count; 1460 *req_count = count; 1568 return 0; 1461 return 0; 1569 } 1462 } 1570 1463 1571 /* 1464 /* 1572 * copy_file_range() differs from regular fil 1465 * copy_file_range() differs from regular file read and write in that it 1573 * specifically allows return partial success 1466 * specifically allows return partial success. When it does so is up to 1574 * the copy_file_range method. 1467 * the copy_file_range method. 1575 */ 1468 */ 1576 ssize_t vfs_copy_file_range(struct file *file 1469 ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, 1577 struct file *file 1470 struct file *file_out, loff_t pos_out, 1578 size_t len, unsig 1471 size_t len, unsigned int flags) 1579 { 1472 { 1580 ssize_t ret; 1473 ssize_t ret; 1581 bool splice = flags & COPY_FILE_SPLIC 1474 bool splice = flags & COPY_FILE_SPLICE; 1582 bool samesb = file_inode(file_in)->i_ << 1583 1475 1584 if (flags & ~COPY_FILE_SPLICE) 1476 if (flags & ~COPY_FILE_SPLICE) 1585 return -EINVAL; 1477 return -EINVAL; 1586 1478 1587 ret = generic_copy_file_checks(file_i 1479 ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, 1588 flags) 1480 flags); 1589 if (unlikely(ret)) 1481 if (unlikely(ret)) 1590 return ret; 1482 return ret; 1591 1483 1592 ret = rw_verify_area(READ, file_in, & 1484 ret = rw_verify_area(READ, file_in, &pos_in, len); 1593 if (unlikely(ret)) 1485 if (unlikely(ret)) 1594 return ret; 1486 return ret; 1595 1487 1596 ret = rw_verify_area(WRITE, file_out, 1488 ret = rw_verify_area(WRITE, file_out, &pos_out, len); 1597 if (unlikely(ret)) 1489 if (unlikely(ret)) 1598 return ret; 1490 return ret; 1599 1491 1600 if (len == 0) 1492 if (len == 0) 1601 return 0; 1493 return 0; 1602 1494 1603 file_start_write(file_out); 1495 file_start_write(file_out); 1604 1496 1605 /* 1497 /* 1606 * Cloning is supported by more file 1498 * Cloning is supported by more file systems, so we implement copy on 1607 * same sb using clone, but for files 1499 * same sb using clone, but for filesystems where both clone and copy 1608 * are supported (e.g. nfs,cifs), we 1500 * are supported (e.g. nfs,cifs), we only call the copy method. 1609 */ 1501 */ 1610 if (!splice && file_out->f_op->copy_f 1502 if (!splice && file_out->f_op->copy_file_range) { 1611 ret = file_out->f_op->copy_fi 1503 ret = file_out->f_op->copy_file_range(file_in, pos_in, 1612 1504 file_out, pos_out, 1613 1505 len, flags); 1614 } else if (!splice && file_in->f_op-> !! 1506 goto done; >> 1507 } >> 1508 >> 1509 if (!splice && file_in->f_op->remap_file_range && >> 1510 file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { 1615 ret = file_in->f_op->remap_fi 1511 ret = file_in->f_op->remap_file_range(file_in, pos_in, 1616 file_out, pos 1512 file_out, pos_out, 1617 min_t(loff_t, 1513 min_t(loff_t, MAX_RW_COUNT, len), 1618 REMAP_FILE_CA 1514 REMAP_FILE_CAN_SHORTEN); 1619 /* fallback to splice */ !! 1515 if (ret > 0) 1620 if (ret <= 0) !! 1516 goto done; 1621 splice = true; << 1622 } else if (samesb) { << 1623 /* Fallback to splice for sam << 1624 splice = true; << 1625 } 1517 } 1626 1518 1627 file_end_write(file_out); << 1628 << 1629 if (!splice) << 1630 goto done; << 1631 << 1632 /* 1519 /* 1633 * We can get here for same sb copy o 1520 * We can get here for same sb copy of filesystems that do not implement 1634 * ->copy_file_range() in case filesy 1521 * ->copy_file_range() in case filesystem does not support clone or in 1635 * case filesystem supports clone but 1522 * case filesystem supports clone but rejected the clone request (e.g. 1636 * because it was not block aligned). 1523 * because it was not block aligned). 1637 * 1524 * 1638 * In both cases, fall back to kernel 1525 * In both cases, fall back to kernel copy so we are able to maintain a 1639 * consistent story about which files 1526 * consistent story about which filesystems support copy_file_range() 1640 * and which filesystems do not, that 1527 * and which filesystems do not, that will allow userspace tools to 1641 * make consistent desicions w.r.t us 1528 * make consistent desicions w.r.t using copy_file_range(). 1642 * 1529 * 1643 * We also get here if caller (e.g. n !! 1530 * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE. 1644 * for server-side-copy between any t << 1645 * << 1646 * In any case, we call do_splice_dir << 1647 * without file_start_write() held, t << 1648 * to splicing from input file, while << 1649 * the output file on a different sb. << 1650 */ 1531 */ 1651 ret = do_splice_direct(file_in, &pos_ !! 1532 ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, 1652 min_t(size_t, !! 1533 flags); >> 1534 1653 done: 1535 done: 1654 if (ret > 0) { 1536 if (ret > 0) { 1655 fsnotify_access(file_in); 1537 fsnotify_access(file_in); 1656 add_rchar(current, ret); 1538 add_rchar(current, ret); 1657 fsnotify_modify(file_out); 1539 fsnotify_modify(file_out); 1658 add_wchar(current, ret); 1540 add_wchar(current, ret); 1659 } 1541 } 1660 1542 1661 inc_syscr(current); 1543 inc_syscr(current); 1662 inc_syscw(current); 1544 inc_syscw(current); 1663 1545 >> 1546 file_end_write(file_out); >> 1547 1664 return ret; 1548 return ret; 1665 } 1549 } 1666 EXPORT_SYMBOL(vfs_copy_file_range); 1550 EXPORT_SYMBOL(vfs_copy_file_range); 1667 1551 1668 SYSCALL_DEFINE6(copy_file_range, int, fd_in, 1552 SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, 1669 int, fd_out, loff_t __user *, 1553 int, fd_out, loff_t __user *, off_out, 1670 size_t, len, unsigned int, fl 1554 size_t, len, unsigned int, flags) 1671 { 1555 { 1672 loff_t pos_in; 1556 loff_t pos_in; 1673 loff_t pos_out; 1557 loff_t pos_out; 1674 struct fd f_in; 1558 struct fd f_in; 1675 struct fd f_out; 1559 struct fd f_out; 1676 ssize_t ret = -EBADF; 1560 ssize_t ret = -EBADF; 1677 1561 1678 f_in = fdget(fd_in); 1562 f_in = fdget(fd_in); 1679 if (!fd_file(f_in)) !! 1563 if (!f_in.file) 1680 goto out2; 1564 goto out2; 1681 1565 1682 f_out = fdget(fd_out); 1566 f_out = fdget(fd_out); 1683 if (!fd_file(f_out)) !! 1567 if (!f_out.file) 1684 goto out1; 1568 goto out1; 1685 1569 1686 ret = -EFAULT; 1570 ret = -EFAULT; 1687 if (off_in) { 1571 if (off_in) { 1688 if (copy_from_user(&pos_in, o 1572 if (copy_from_user(&pos_in, off_in, sizeof(loff_t))) 1689 goto out; 1573 goto out; 1690 } else { 1574 } else { 1691 pos_in = fd_file(f_in)->f_pos !! 1575 pos_in = f_in.file->f_pos; 1692 } 1576 } 1693 1577 1694 if (off_out) { 1578 if (off_out) { 1695 if (copy_from_user(&pos_out, 1579 if (copy_from_user(&pos_out, off_out, sizeof(loff_t))) 1696 goto out; 1580 goto out; 1697 } else { 1581 } else { 1698 pos_out = fd_file(f_out)->f_p !! 1582 pos_out = f_out.file->f_pos; 1699 } 1583 } 1700 1584 1701 ret = -EINVAL; 1585 ret = -EINVAL; 1702 if (flags != 0) 1586 if (flags != 0) 1703 goto out; 1587 goto out; 1704 1588 1705 ret = vfs_copy_file_range(fd_file(f_i !! 1589 ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, 1706 flags); 1590 flags); 1707 if (ret > 0) { 1591 if (ret > 0) { 1708 pos_in += ret; 1592 pos_in += ret; 1709 pos_out += ret; 1593 pos_out += ret; 1710 1594 1711 if (off_in) { 1595 if (off_in) { 1712 if (copy_to_user(off_ 1596 if (copy_to_user(off_in, &pos_in, sizeof(loff_t))) 1713 ret = -EFAULT 1597 ret = -EFAULT; 1714 } else { 1598 } else { 1715 fd_file(f_in)->f_pos !! 1599 f_in.file->f_pos = pos_in; 1716 } 1600 } 1717 1601 1718 if (off_out) { 1602 if (off_out) { 1719 if (copy_to_user(off_ 1603 if (copy_to_user(off_out, &pos_out, sizeof(loff_t))) 1720 ret = -EFAULT 1604 ret = -EFAULT; 1721 } else { 1605 } else { 1722 fd_file(f_out)->f_pos !! 1606 f_out.file->f_pos = pos_out; 1723 } 1607 } 1724 } 1608 } 1725 1609 1726 out: 1610 out: 1727 fdput(f_out); 1611 fdput(f_out); 1728 out1: 1612 out1: 1729 fdput(f_in); 1613 fdput(f_in); 1730 out2: 1614 out2: 1731 return ret; 1615 return ret; 1732 } 1616 } 1733 1617 1734 /* 1618 /* 1735 * Don't operate on ranges the page cache doe 1619 * Don't operate on ranges the page cache doesn't support, and don't exceed the 1736 * LFS limits. If pos is under the limit it 1620 * LFS limits. If pos is under the limit it becomes a short access. If it 1737 * exceeds the limit we return -EFBIG. 1621 * exceeds the limit we return -EFBIG. 1738 */ 1622 */ 1739 int generic_write_check_limits(struct file *f 1623 int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count) 1740 { 1624 { 1741 struct inode *inode = file->f_mapping 1625 struct inode *inode = file->f_mapping->host; 1742 loff_t max_size = inode->i_sb->s_maxb 1626 loff_t max_size = inode->i_sb->s_maxbytes; 1743 loff_t limit = rlimit(RLIMIT_FSIZE); 1627 loff_t limit = rlimit(RLIMIT_FSIZE); 1744 1628 1745 if (limit != RLIM_INFINITY) { 1629 if (limit != RLIM_INFINITY) { 1746 if (pos >= limit) { 1630 if (pos >= limit) { 1747 send_sig(SIGXFSZ, cur 1631 send_sig(SIGXFSZ, current, 0); 1748 return -EFBIG; 1632 return -EFBIG; 1749 } 1633 } 1750 *count = min(*count, limit - 1634 *count = min(*count, limit - pos); 1751 } 1635 } 1752 1636 1753 if (!(file->f_flags & O_LARGEFILE)) 1637 if (!(file->f_flags & O_LARGEFILE)) 1754 max_size = MAX_NON_LFS; 1638 max_size = MAX_NON_LFS; 1755 1639 1756 if (unlikely(pos >= max_size)) 1640 if (unlikely(pos >= max_size)) 1757 return -EFBIG; 1641 return -EFBIG; 1758 1642 1759 *count = min(*count, max_size - pos); 1643 *count = min(*count, max_size - pos); 1760 1644 1761 return 0; 1645 return 0; 1762 } 1646 } 1763 EXPORT_SYMBOL_GPL(generic_write_check_limits) << 1764 1647 1765 /* Like generic_write_checks(), but takes siz !! 1648 /* 1766 int generic_write_checks_count(struct kiocb * !! 1649 * Performs necessary checks before doing a write >> 1650 * >> 1651 * Can adjust writing position or amount of bytes to write. >> 1652 * Returns appropriate error code that caller should return or >> 1653 * zero in case that write should be allowed. >> 1654 */ >> 1655 ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) 1767 { 1656 { 1768 struct file *file = iocb->ki_filp; 1657 struct file *file = iocb->ki_filp; 1769 struct inode *inode = file->f_mapping 1658 struct inode *inode = file->f_mapping->host; >> 1659 loff_t count; >> 1660 int ret; 1770 1661 1771 if (IS_SWAPFILE(inode)) 1662 if (IS_SWAPFILE(inode)) 1772 return -ETXTBSY; 1663 return -ETXTBSY; 1773 1664 1774 if (!*count) !! 1665 if (!iov_iter_count(from)) 1775 return 0; 1666 return 0; 1776 1667 >> 1668 /* FIXME: this is for backwards compatibility with 2.4 */ 1777 if (iocb->ki_flags & IOCB_APPEND) 1669 if (iocb->ki_flags & IOCB_APPEND) 1778 iocb->ki_pos = i_size_read(in 1670 iocb->ki_pos = i_size_read(inode); 1779 1671 1780 if ((iocb->ki_flags & IOCB_NOWAIT) && !! 1672 if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) 1781 !((iocb->ki_flags & IOCB_DIRECT) << 1782 (file->f_op->fop_flags & FOP_BU << 1783 return -EINVAL; 1673 return -EINVAL; 1784 1674 1785 return generic_write_check_limits(ioc !! 1675 count = iov_iter_count(from); 1786 } !! 1676 ret = generic_write_check_limits(file, iocb->ki_pos, &count); 1787 EXPORT_SYMBOL(generic_write_checks_count); << 1788 << 1789 /* << 1790 * Performs necessary checks before doing a w << 1791 * << 1792 * Can adjust writing position or amount of b << 1793 * Returns appropriate error code that caller << 1794 * zero in case that write should be allowed. << 1795 */ << 1796 ssize_t generic_write_checks(struct kiocb *io << 1797 { << 1798 loff_t count = iov_iter_count(from); << 1799 int ret; << 1800 << 1801 ret = generic_write_checks_count(iocb << 1802 if (ret) 1677 if (ret) 1803 return ret; 1678 return ret; 1804 1679 1805 iov_iter_truncate(from, count); 1680 iov_iter_truncate(from, count); 1806 return iov_iter_count(from); 1681 return iov_iter_count(from); 1807 } 1682 } 1808 EXPORT_SYMBOL(generic_write_checks); 1683 EXPORT_SYMBOL(generic_write_checks); 1809 1684 1810 /* 1685 /* 1811 * Performs common checks before doing a file 1686 * Performs common checks before doing a file copy/clone 1812 * from @file_in to @file_out. 1687 * from @file_in to @file_out. 1813 */ 1688 */ 1814 int generic_file_rw_checks(struct file *file_ 1689 int generic_file_rw_checks(struct file *file_in, struct file *file_out) 1815 { 1690 { 1816 struct inode *inode_in = file_inode(f 1691 struct inode *inode_in = file_inode(file_in); 1817 struct inode *inode_out = file_inode( 1692 struct inode *inode_out = file_inode(file_out); 1818 1693 1819 /* Don't copy dirs, pipes, sockets... 1694 /* Don't copy dirs, pipes, sockets... */ 1820 if (S_ISDIR(inode_in->i_mode) || S_IS 1695 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) 1821 return -EISDIR; 1696 return -EISDIR; 1822 if (!S_ISREG(inode_in->i_mode) || !S_ 1697 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) 1823 return -EINVAL; 1698 return -EINVAL; 1824 1699 1825 if (!(file_in->f_mode & FMODE_READ) | 1700 if (!(file_in->f_mode & FMODE_READ) || 1826 !(file_out->f_mode & FMODE_WRITE) 1701 !(file_out->f_mode & FMODE_WRITE) || 1827 (file_out->f_flags & O_APPEND)) 1702 (file_out->f_flags & O_APPEND)) 1828 return -EBADF; 1703 return -EBADF; 1829 1704 1830 return 0; 1705 return 0; 1831 } << 1832 << 1833 bool generic_atomic_write_valid(struct iov_it << 1834 { << 1835 size_t len = iov_iter_count(iter); << 1836 << 1837 if (!iter_is_ubuf(iter)) << 1838 return false; << 1839 << 1840 if (!is_power_of_2(len)) << 1841 return false; << 1842 << 1843 if (!IS_ALIGNED(pos, len)) << 1844 return false; << 1845 << 1846 return true; << 1847 } 1706 } 1848 1707
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.