~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/trace/trace.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * ring buffer based function tracer
  4  *
  5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
  6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
  7  *
  8  * Originally taken from the RT patch by:
  9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
 10  *
 11  * Based on code from the latency_tracer, that is:
 12  *  Copyright (C) 2004-2006 Ingo Molnar
 13  *  Copyright (C) 2004 Nadia Yvette Chambers
 14  */
 15 #include <linux/ring_buffer.h>
 16 #include <linux/utsname.h>
 17 #include <linux/stacktrace.h>
 18 #include <linux/writeback.h>
 19 #include <linux/kallsyms.h>
 20 #include <linux/security.h>
 21 #include <linux/seq_file.h>
 22 #include <linux/irqflags.h>
 23 #include <linux/debugfs.h>
 24 #include <linux/tracefs.h>
 25 #include <linux/pagemap.h>
 26 #include <linux/hardirq.h>
 27 #include <linux/linkage.h>
 28 #include <linux/uaccess.h>
 29 #include <linux/vmalloc.h>
 30 #include <linux/ftrace.h>
 31 #include <linux/module.h>
 32 #include <linux/percpu.h>
 33 #include <linux/splice.h>
 34 #include <linux/kdebug.h>
 35 #include <linux/string.h>
 36 #include <linux/mount.h>
 37 #include <linux/rwsem.h>
 38 #include <linux/slab.h>
 39 #include <linux/ctype.h>
 40 #include <linux/init.h>
 41 #include <linux/panic_notifier.h>
 42 #include <linux/poll.h>
 43 #include <linux/nmi.h>
 44 #include <linux/fs.h>
 45 #include <linux/trace.h>
 46 #include <linux/sched/clock.h>
 47 #include <linux/sched/rt.h>
 48 #include <linux/fsnotify.h>
 49 #include <linux/irq_work.h>
 50 #include <linux/workqueue.h>
 51 
 52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
 53 
 54 #include "trace.h"
 55 #include "trace_output.h"
 56 
 57 #ifdef CONFIG_FTRACE_STARTUP_TEST
 58 /*
 59  * We need to change this state when a selftest is running.
 60  * A selftest will lurk into the ring-buffer to count the
 61  * entries inserted during the selftest although some concurrent
 62  * insertions into the ring-buffer such as trace_printk could occurred
 63  * at the same time, giving false positive or negative results.
 64  */
 65 static bool __read_mostly tracing_selftest_running;
 66 
 67 /*
 68  * If boot-time tracing including tracers/events via kernel cmdline
 69  * is running, we do not want to run SELFTEST.
 70  */
 71 bool __read_mostly tracing_selftest_disabled;
 72 
 73 void __init disable_tracing_selftest(const char *reason)
 74 {
 75         if (!tracing_selftest_disabled) {
 76                 tracing_selftest_disabled = true;
 77                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
 78         }
 79 }
 80 #else
 81 #define tracing_selftest_running        0
 82 #define tracing_selftest_disabled       0
 83 #endif
 84 
 85 /* Pipe tracepoints to printk */
 86 static struct trace_iterator *tracepoint_print_iter;
 87 int tracepoint_printk;
 88 static bool tracepoint_printk_stop_on_boot __initdata;
 89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
 90 
 91 /* For tracers that don't implement custom flags */
 92 static struct tracer_opt dummy_tracer_opt[] = {
 93         { }
 94 };
 95 
 96 static int
 97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 98 {
 99         return 0;
100 }
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118 
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  * Set instance name if you want to dump the specific trace instance
134  * Multiple instance dump is also supported, and instances are seperated
135  * by commas.
136  */
137 /* Set to string format zero to disable by default */
138 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "";
139 
140 /* When set, tracing will stop when a WARN*() is hit */
141 int __disable_trace_on_warning;
142 
143 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
144 /* Map of enums to their values, for "eval_map" file */
145 struct trace_eval_map_head {
146         struct module                   *mod;
147         unsigned long                   length;
148 };
149 
150 union trace_eval_map_item;
151 
152 struct trace_eval_map_tail {
153         /*
154          * "end" is first and points to NULL as it must be different
155          * than "mod" or "eval_string"
156          */
157         union trace_eval_map_item       *next;
158         const char                      *end;   /* points to NULL */
159 };
160 
161 static DEFINE_MUTEX(trace_eval_mutex);
162 
163 /*
164  * The trace_eval_maps are saved in an array with two extra elements,
165  * one at the beginning, and one at the end. The beginning item contains
166  * the count of the saved maps (head.length), and the module they
167  * belong to if not built in (head.mod). The ending item contains a
168  * pointer to the next array of saved eval_map items.
169  */
170 union trace_eval_map_item {
171         struct trace_eval_map           map;
172         struct trace_eval_map_head      head;
173         struct trace_eval_map_tail      tail;
174 };
175 
176 static union trace_eval_map_item *trace_eval_maps;
177 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
178 
179 int tracing_set_tracer(struct trace_array *tr, const char *buf);
180 static void ftrace_trace_userstack(struct trace_array *tr,
181                                    struct trace_buffer *buffer,
182                                    unsigned int trace_ctx);
183 
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189 
190 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
191 static int boot_instance_index;
192 
193 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
194 static int boot_snapshot_index;
195 
196 static int __init set_cmdline_ftrace(char *str)
197 {
198         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
199         default_bootup_tracer = bootup_tracer_buf;
200         /* We are using ftrace early, expand it */
201         trace_set_ring_buffer_expanded(NULL);
202         return 1;
203 }
204 __setup("ftrace=", set_cmdline_ftrace);
205 
206 int ftrace_dump_on_oops_enabled(void)
207 {
208         if (!strcmp("", ftrace_dump_on_oops))
209                 return 0;
210         else
211                 return 1;
212 }
213 
214 static int __init set_ftrace_dump_on_oops(char *str)
215 {
216         if (!*str) {
217                 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
218                 return 1;
219         }
220 
221         if (*str == ',') {
222                 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
223                 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
224                 return 1;
225         }
226 
227         if (*str++ == '=') {
228                 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
229                 return 1;
230         }
231 
232         return 0;
233 }
234 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
235 
236 static int __init stop_trace_on_warning(char *str)
237 {
238         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
239                 __disable_trace_on_warning = 1;
240         return 1;
241 }
242 __setup("traceoff_on_warning", stop_trace_on_warning);
243 
244 static int __init boot_alloc_snapshot(char *str)
245 {
246         char *slot = boot_snapshot_info + boot_snapshot_index;
247         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
248         int ret;
249 
250         if (str[0] == '=') {
251                 str++;
252                 if (strlen(str) >= left)
253                         return -1;
254 
255                 ret = snprintf(slot, left, "%s\t", str);
256                 boot_snapshot_index += ret;
257         } else {
258                 allocate_snapshot = true;
259                 /* We also need the main ring buffer expanded */
260                 trace_set_ring_buffer_expanded(NULL);
261         }
262         return 1;
263 }
264 __setup("alloc_snapshot", boot_alloc_snapshot);
265 
266 
267 static int __init boot_snapshot(char *str)
268 {
269         snapshot_at_boot = true;
270         boot_alloc_snapshot(str);
271         return 1;
272 }
273 __setup("ftrace_boot_snapshot", boot_snapshot);
274 
275 
276 static int __init boot_instance(char *str)
277 {
278         char *slot = boot_instance_info + boot_instance_index;
279         int left = sizeof(boot_instance_info) - boot_instance_index;
280         int ret;
281 
282         if (strlen(str) >= left)
283                 return -1;
284 
285         ret = snprintf(slot, left, "%s\t", str);
286         boot_instance_index += ret;
287 
288         return 1;
289 }
290 __setup("trace_instance=", boot_instance);
291 
292 
293 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
294 
295 static int __init set_trace_boot_options(char *str)
296 {
297         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
298         return 1;
299 }
300 __setup("trace_options=", set_trace_boot_options);
301 
302 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
303 static char *trace_boot_clock __initdata;
304 
305 static int __init set_trace_boot_clock(char *str)
306 {
307         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
308         trace_boot_clock = trace_boot_clock_buf;
309         return 1;
310 }
311 __setup("trace_clock=", set_trace_boot_clock);
312 
313 static int __init set_tracepoint_printk(char *str)
314 {
315         /* Ignore the "tp_printk_stop_on_boot" param */
316         if (*str == '_')
317                 return 0;
318 
319         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
320                 tracepoint_printk = 1;
321         return 1;
322 }
323 __setup("tp_printk", set_tracepoint_printk);
324 
325 static int __init set_tracepoint_printk_stop(char *str)
326 {
327         tracepoint_printk_stop_on_boot = true;
328         return 1;
329 }
330 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
331 
332 unsigned long long ns2usecs(u64 nsec)
333 {
334         nsec += 500;
335         do_div(nsec, 1000);
336         return nsec;
337 }
338 
339 static void
340 trace_process_export(struct trace_export *export,
341                struct ring_buffer_event *event, int flag)
342 {
343         struct trace_entry *entry;
344         unsigned int size = 0;
345 
346         if (export->flags & flag) {
347                 entry = ring_buffer_event_data(event);
348                 size = ring_buffer_event_length(event);
349                 export->write(export, entry, size);
350         }
351 }
352 
353 static DEFINE_MUTEX(ftrace_export_lock);
354 
355 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
356 
357 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
358 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
359 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
360 
361 static inline void ftrace_exports_enable(struct trace_export *export)
362 {
363         if (export->flags & TRACE_EXPORT_FUNCTION)
364                 static_branch_inc(&trace_function_exports_enabled);
365 
366         if (export->flags & TRACE_EXPORT_EVENT)
367                 static_branch_inc(&trace_event_exports_enabled);
368 
369         if (export->flags & TRACE_EXPORT_MARKER)
370                 static_branch_inc(&trace_marker_exports_enabled);
371 }
372 
373 static inline void ftrace_exports_disable(struct trace_export *export)
374 {
375         if (export->flags & TRACE_EXPORT_FUNCTION)
376                 static_branch_dec(&trace_function_exports_enabled);
377 
378         if (export->flags & TRACE_EXPORT_EVENT)
379                 static_branch_dec(&trace_event_exports_enabled);
380 
381         if (export->flags & TRACE_EXPORT_MARKER)
382                 static_branch_dec(&trace_marker_exports_enabled);
383 }
384 
385 static void ftrace_exports(struct ring_buffer_event *event, int flag)
386 {
387         struct trace_export *export;
388 
389         preempt_disable_notrace();
390 
391         export = rcu_dereference_raw_check(ftrace_exports_list);
392         while (export) {
393                 trace_process_export(export, event, flag);
394                 export = rcu_dereference_raw_check(export->next);
395         }
396 
397         preempt_enable_notrace();
398 }
399 
400 static inline void
401 add_trace_export(struct trace_export **list, struct trace_export *export)
402 {
403         rcu_assign_pointer(export->next, *list);
404         /*
405          * We are entering export into the list but another
406          * CPU might be walking that list. We need to make sure
407          * the export->next pointer is valid before another CPU sees
408          * the export pointer included into the list.
409          */
410         rcu_assign_pointer(*list, export);
411 }
412 
413 static inline int
414 rm_trace_export(struct trace_export **list, struct trace_export *export)
415 {
416         struct trace_export **p;
417 
418         for (p = list; *p != NULL; p = &(*p)->next)
419                 if (*p == export)
420                         break;
421 
422         if (*p != export)
423                 return -1;
424 
425         rcu_assign_pointer(*p, (*p)->next);
426 
427         return 0;
428 }
429 
430 static inline void
431 add_ftrace_export(struct trace_export **list, struct trace_export *export)
432 {
433         ftrace_exports_enable(export);
434 
435         add_trace_export(list, export);
436 }
437 
438 static inline int
439 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
440 {
441         int ret;
442 
443         ret = rm_trace_export(list, export);
444         ftrace_exports_disable(export);
445 
446         return ret;
447 }
448 
449 int register_ftrace_export(struct trace_export *export)
450 {
451         if (WARN_ON_ONCE(!export->write))
452                 return -1;
453 
454         mutex_lock(&ftrace_export_lock);
455 
456         add_ftrace_export(&ftrace_exports_list, export);
457 
458         mutex_unlock(&ftrace_export_lock);
459 
460         return 0;
461 }
462 EXPORT_SYMBOL_GPL(register_ftrace_export);
463 
464 int unregister_ftrace_export(struct trace_export *export)
465 {
466         int ret;
467 
468         mutex_lock(&ftrace_export_lock);
469 
470         ret = rm_ftrace_export(&ftrace_exports_list, export);
471 
472         mutex_unlock(&ftrace_export_lock);
473 
474         return ret;
475 }
476 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
477 
478 /* trace_flags holds trace_options default values */
479 #define TRACE_DEFAULT_FLAGS                                             \
480         (FUNCTION_DEFAULT_FLAGS |                                       \
481          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
482          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
483          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
484          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
485          TRACE_ITER_HASH_PTR)
486 
487 /* trace_options that are only supported by global_trace */
488 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
489                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
490 
491 /* trace_flags that are default zero for instances */
492 #define ZEROED_TRACE_FLAGS \
493         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
494 
495 /*
496  * The global_trace is the descriptor that holds the top-level tracing
497  * buffers for the live tracing.
498  */
499 static struct trace_array global_trace = {
500         .trace_flags = TRACE_DEFAULT_FLAGS,
501 };
502 
503 void trace_set_ring_buffer_expanded(struct trace_array *tr)
504 {
505         if (!tr)
506                 tr = &global_trace;
507         tr->ring_buffer_expanded = true;
508 }
509 
510 LIST_HEAD(ftrace_trace_arrays);
511 
512 int trace_array_get(struct trace_array *this_tr)
513 {
514         struct trace_array *tr;
515         int ret = -ENODEV;
516 
517         mutex_lock(&trace_types_lock);
518         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
519                 if (tr == this_tr) {
520                         tr->ref++;
521                         ret = 0;
522                         break;
523                 }
524         }
525         mutex_unlock(&trace_types_lock);
526 
527         return ret;
528 }
529 
530 static void __trace_array_put(struct trace_array *this_tr)
531 {
532         WARN_ON(!this_tr->ref);
533         this_tr->ref--;
534 }
535 
536 /**
537  * trace_array_put - Decrement the reference counter for this trace array.
538  * @this_tr : pointer to the trace array
539  *
540  * NOTE: Use this when we no longer need the trace array returned by
541  * trace_array_get_by_name(). This ensures the trace array can be later
542  * destroyed.
543  *
544  */
545 void trace_array_put(struct trace_array *this_tr)
546 {
547         if (!this_tr)
548                 return;
549 
550         mutex_lock(&trace_types_lock);
551         __trace_array_put(this_tr);
552         mutex_unlock(&trace_types_lock);
553 }
554 EXPORT_SYMBOL_GPL(trace_array_put);
555 
556 int tracing_check_open_get_tr(struct trace_array *tr)
557 {
558         int ret;
559 
560         ret = security_locked_down(LOCKDOWN_TRACEFS);
561         if (ret)
562                 return ret;
563 
564         if (tracing_disabled)
565                 return -ENODEV;
566 
567         if (tr && trace_array_get(tr) < 0)
568                 return -ENODEV;
569 
570         return 0;
571 }
572 
573 int call_filter_check_discard(struct trace_event_call *call, void *rec,
574                               struct trace_buffer *buffer,
575                               struct ring_buffer_event *event)
576 {
577         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
578             !filter_match_preds(call->filter, rec)) {
579                 __trace_event_discard_commit(buffer, event);
580                 return 1;
581         }
582 
583         return 0;
584 }
585 
586 /**
587  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
588  * @filtered_pids: The list of pids to check
589  * @search_pid: The PID to find in @filtered_pids
590  *
591  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
592  */
593 bool
594 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
595 {
596         return trace_pid_list_is_set(filtered_pids, search_pid);
597 }
598 
599 /**
600  * trace_ignore_this_task - should a task be ignored for tracing
601  * @filtered_pids: The list of pids to check
602  * @filtered_no_pids: The list of pids not to be traced
603  * @task: The task that should be ignored if not filtered
604  *
605  * Checks if @task should be traced or not from @filtered_pids.
606  * Returns true if @task should *NOT* be traced.
607  * Returns false if @task should be traced.
608  */
609 bool
610 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
611                        struct trace_pid_list *filtered_no_pids,
612                        struct task_struct *task)
613 {
614         /*
615          * If filtered_no_pids is not empty, and the task's pid is listed
616          * in filtered_no_pids, then return true.
617          * Otherwise, if filtered_pids is empty, that means we can
618          * trace all tasks. If it has content, then only trace pids
619          * within filtered_pids.
620          */
621 
622         return (filtered_pids &&
623                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
624                 (filtered_no_pids &&
625                  trace_find_filtered_pid(filtered_no_pids, task->pid));
626 }
627 
628 /**
629  * trace_filter_add_remove_task - Add or remove a task from a pid_list
630  * @pid_list: The list to modify
631  * @self: The current task for fork or NULL for exit
632  * @task: The task to add or remove
633  *
634  * If adding a task, if @self is defined, the task is only added if @self
635  * is also included in @pid_list. This happens on fork and tasks should
636  * only be added when the parent is listed. If @self is NULL, then the
637  * @task pid will be removed from the list, which would happen on exit
638  * of a task.
639  */
640 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
641                                   struct task_struct *self,
642                                   struct task_struct *task)
643 {
644         if (!pid_list)
645                 return;
646 
647         /* For forks, we only add if the forking task is listed */
648         if (self) {
649                 if (!trace_find_filtered_pid(pid_list, self->pid))
650                         return;
651         }
652 
653         /* "self" is set for forks, and NULL for exits */
654         if (self)
655                 trace_pid_list_set(pid_list, task->pid);
656         else
657                 trace_pid_list_clear(pid_list, task->pid);
658 }
659 
660 /**
661  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
662  * @pid_list: The pid list to show
663  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
664  * @pos: The position of the file
665  *
666  * This is used by the seq_file "next" operation to iterate the pids
667  * listed in a trace_pid_list structure.
668  *
669  * Returns the pid+1 as we want to display pid of zero, but NULL would
670  * stop the iteration.
671  */
672 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
673 {
674         long pid = (unsigned long)v;
675         unsigned int next;
676 
677         (*pos)++;
678 
679         /* pid already is +1 of the actual previous bit */
680         if (trace_pid_list_next(pid_list, pid, &next) < 0)
681                 return NULL;
682 
683         pid = next;
684 
685         /* Return pid + 1 to allow zero to be represented */
686         return (void *)(pid + 1);
687 }
688 
689 /**
690  * trace_pid_start - Used for seq_file to start reading pid lists
691  * @pid_list: The pid list to show
692  * @pos: The position of the file
693  *
694  * This is used by seq_file "start" operation to start the iteration
695  * of listing pids.
696  *
697  * Returns the pid+1 as we want to display pid of zero, but NULL would
698  * stop the iteration.
699  */
700 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
701 {
702         unsigned long pid;
703         unsigned int first;
704         loff_t l = 0;
705 
706         if (trace_pid_list_first(pid_list, &first) < 0)
707                 return NULL;
708 
709         pid = first;
710 
711         /* Return pid + 1 so that zero can be the exit value */
712         for (pid++; pid && l < *pos;
713              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
714                 ;
715         return (void *)pid;
716 }
717 
718 /**
719  * trace_pid_show - show the current pid in seq_file processing
720  * @m: The seq_file structure to write into
721  * @v: A void pointer of the pid (+1) value to display
722  *
723  * Can be directly used by seq_file operations to display the current
724  * pid value.
725  */
726 int trace_pid_show(struct seq_file *m, void *v)
727 {
728         unsigned long pid = (unsigned long)v - 1;
729 
730         seq_printf(m, "%lu\n", pid);
731         return 0;
732 }
733 
734 /* 128 should be much more than enough */
735 #define PID_BUF_SIZE            127
736 
737 int trace_pid_write(struct trace_pid_list *filtered_pids,
738                     struct trace_pid_list **new_pid_list,
739                     const char __user *ubuf, size_t cnt)
740 {
741         struct trace_pid_list *pid_list;
742         struct trace_parser parser;
743         unsigned long val;
744         int nr_pids = 0;
745         ssize_t read = 0;
746         ssize_t ret;
747         loff_t pos;
748         pid_t pid;
749 
750         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
751                 return -ENOMEM;
752 
753         /*
754          * Always recreate a new array. The write is an all or nothing
755          * operation. Always create a new array when adding new pids by
756          * the user. If the operation fails, then the current list is
757          * not modified.
758          */
759         pid_list = trace_pid_list_alloc();
760         if (!pid_list) {
761                 trace_parser_put(&parser);
762                 return -ENOMEM;
763         }
764 
765         if (filtered_pids) {
766                 /* copy the current bits to the new max */
767                 ret = trace_pid_list_first(filtered_pids, &pid);
768                 while (!ret) {
769                         trace_pid_list_set(pid_list, pid);
770                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
771                         nr_pids++;
772                 }
773         }
774 
775         ret = 0;
776         while (cnt > 0) {
777 
778                 pos = 0;
779 
780                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
781                 if (ret < 0)
782                         break;
783 
784                 read += ret;
785                 ubuf += ret;
786                 cnt -= ret;
787 
788                 if (!trace_parser_loaded(&parser))
789                         break;
790 
791                 ret = -EINVAL;
792                 if (kstrtoul(parser.buffer, 0, &val))
793                         break;
794 
795                 pid = (pid_t)val;
796 
797                 if (trace_pid_list_set(pid_list, pid) < 0) {
798                         ret = -1;
799                         break;
800                 }
801                 nr_pids++;
802 
803                 trace_parser_clear(&parser);
804                 ret = 0;
805         }
806         trace_parser_put(&parser);
807 
808         if (ret < 0) {
809                 trace_pid_list_free(pid_list);
810                 return ret;
811         }
812 
813         if (!nr_pids) {
814                 /* Cleared the list of pids */
815                 trace_pid_list_free(pid_list);
816                 pid_list = NULL;
817         }
818 
819         *new_pid_list = pid_list;
820 
821         return read;
822 }
823 
824 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
825 {
826         u64 ts;
827 
828         /* Early boot up does not have a buffer yet */
829         if (!buf->buffer)
830                 return trace_clock_local();
831 
832         ts = ring_buffer_time_stamp(buf->buffer);
833         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
834 
835         return ts;
836 }
837 
838 u64 ftrace_now(int cpu)
839 {
840         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
841 }
842 
843 /**
844  * tracing_is_enabled - Show if global_trace has been enabled
845  *
846  * Shows if the global trace has been enabled or not. It uses the
847  * mirror flag "buffer_disabled" to be used in fast paths such as for
848  * the irqsoff tracer. But it may be inaccurate due to races. If you
849  * need to know the accurate state, use tracing_is_on() which is a little
850  * slower, but accurate.
851  */
852 int tracing_is_enabled(void)
853 {
854         /*
855          * For quick access (irqsoff uses this in fast path), just
856          * return the mirror variable of the state of the ring buffer.
857          * It's a little racy, but we don't really care.
858          */
859         smp_rmb();
860         return !global_trace.buffer_disabled;
861 }
862 
863 /*
864  * trace_buf_size is the size in bytes that is allocated
865  * for a buffer. Note, the number of bytes is always rounded
866  * to page size.
867  *
868  * This number is purposely set to a low number of 16384.
869  * If the dump on oops happens, it will be much appreciated
870  * to not have to wait for all that output. Anyway this can be
871  * boot time and run time configurable.
872  */
873 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
874 
875 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
876 
877 /* trace_types holds a link list of available tracers. */
878 static struct tracer            *trace_types __read_mostly;
879 
880 /*
881  * trace_types_lock is used to protect the trace_types list.
882  */
883 DEFINE_MUTEX(trace_types_lock);
884 
885 /*
886  * serialize the access of the ring buffer
887  *
888  * ring buffer serializes readers, but it is low level protection.
889  * The validity of the events (which returns by ring_buffer_peek() ..etc)
890  * are not protected by ring buffer.
891  *
892  * The content of events may become garbage if we allow other process consumes
893  * these events concurrently:
894  *   A) the page of the consumed events may become a normal page
895  *      (not reader page) in ring buffer, and this page will be rewritten
896  *      by events producer.
897  *   B) The page of the consumed events may become a page for splice_read,
898  *      and this page will be returned to system.
899  *
900  * These primitives allow multi process access to different cpu ring buffer
901  * concurrently.
902  *
903  * These primitives don't distinguish read-only and read-consume access.
904  * Multi read-only access are also serialized.
905  */
906 
907 #ifdef CONFIG_SMP
908 static DECLARE_RWSEM(all_cpu_access_lock);
909 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
910 
911 static inline void trace_access_lock(int cpu)
912 {
913         if (cpu == RING_BUFFER_ALL_CPUS) {
914                 /* gain it for accessing the whole ring buffer. */
915                 down_write(&all_cpu_access_lock);
916         } else {
917                 /* gain it for accessing a cpu ring buffer. */
918 
919                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
920                 down_read(&all_cpu_access_lock);
921 
922                 /* Secondly block other access to this @cpu ring buffer. */
923                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
924         }
925 }
926 
927 static inline void trace_access_unlock(int cpu)
928 {
929         if (cpu == RING_BUFFER_ALL_CPUS) {
930                 up_write(&all_cpu_access_lock);
931         } else {
932                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
933                 up_read(&all_cpu_access_lock);
934         }
935 }
936 
937 static inline void trace_access_lock_init(void)
938 {
939         int cpu;
940 
941         for_each_possible_cpu(cpu)
942                 mutex_init(&per_cpu(cpu_access_lock, cpu));
943 }
944 
945 #else
946 
947 static DEFINE_MUTEX(access_lock);
948 
949 static inline void trace_access_lock(int cpu)
950 {
951         (void)cpu;
952         mutex_lock(&access_lock);
953 }
954 
955 static inline void trace_access_unlock(int cpu)
956 {
957         (void)cpu;
958         mutex_unlock(&access_lock);
959 }
960 
961 static inline void trace_access_lock_init(void)
962 {
963 }
964 
965 #endif
966 
967 #ifdef CONFIG_STACKTRACE
968 static void __ftrace_trace_stack(struct trace_buffer *buffer,
969                                  unsigned int trace_ctx,
970                                  int skip, struct pt_regs *regs);
971 static inline void ftrace_trace_stack(struct trace_array *tr,
972                                       struct trace_buffer *buffer,
973                                       unsigned int trace_ctx,
974                                       int skip, struct pt_regs *regs);
975 
976 #else
977 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
978                                         unsigned int trace_ctx,
979                                         int skip, struct pt_regs *regs)
980 {
981 }
982 static inline void ftrace_trace_stack(struct trace_array *tr,
983                                       struct trace_buffer *buffer,
984                                       unsigned long trace_ctx,
985                                       int skip, struct pt_regs *regs)
986 {
987 }
988 
989 #endif
990 
991 static __always_inline void
992 trace_event_setup(struct ring_buffer_event *event,
993                   int type, unsigned int trace_ctx)
994 {
995         struct trace_entry *ent = ring_buffer_event_data(event);
996 
997         tracing_generic_entry_update(ent, type, trace_ctx);
998 }
999 
1000 static __always_inline struct ring_buffer_event *
1001 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1002                           int type,
1003                           unsigned long len,
1004                           unsigned int trace_ctx)
1005 {
1006         struct ring_buffer_event *event;
1007 
1008         event = ring_buffer_lock_reserve(buffer, len);
1009         if (event != NULL)
1010                 trace_event_setup(event, type, trace_ctx);
1011 
1012         return event;
1013 }
1014 
1015 void tracer_tracing_on(struct trace_array *tr)
1016 {
1017         if (tr->array_buffer.buffer)
1018                 ring_buffer_record_on(tr->array_buffer.buffer);
1019         /*
1020          * This flag is looked at when buffers haven't been allocated
1021          * yet, or by some tracers (like irqsoff), that just want to
1022          * know if the ring buffer has been disabled, but it can handle
1023          * races of where it gets disabled but we still do a record.
1024          * As the check is in the fast path of the tracers, it is more
1025          * important to be fast than accurate.
1026          */
1027         tr->buffer_disabled = 0;
1028         /* Make the flag seen by readers */
1029         smp_wmb();
1030 }
1031 
1032 /**
1033  * tracing_on - enable tracing buffers
1034  *
1035  * This function enables tracing buffers that may have been
1036  * disabled with tracing_off.
1037  */
1038 void tracing_on(void)
1039 {
1040         tracer_tracing_on(&global_trace);
1041 }
1042 EXPORT_SYMBOL_GPL(tracing_on);
1043 
1044 
1045 static __always_inline void
1046 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1047 {
1048         __this_cpu_write(trace_taskinfo_save, true);
1049 
1050         /* If this is the temp buffer, we need to commit fully */
1051         if (this_cpu_read(trace_buffered_event) == event) {
1052                 /* Length is in event->array[0] */
1053                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1054                 /* Release the temp buffer */
1055                 this_cpu_dec(trace_buffered_event_cnt);
1056                 /* ring_buffer_unlock_commit() enables preemption */
1057                 preempt_enable_notrace();
1058         } else
1059                 ring_buffer_unlock_commit(buffer);
1060 }
1061 
1062 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1063                        const char *str, int size)
1064 {
1065         struct ring_buffer_event *event;
1066         struct trace_buffer *buffer;
1067         struct print_entry *entry;
1068         unsigned int trace_ctx;
1069         int alloc;
1070 
1071         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1072                 return 0;
1073 
1074         if (unlikely(tracing_selftest_running && tr == &global_trace))
1075                 return 0;
1076 
1077         if (unlikely(tracing_disabled))
1078                 return 0;
1079 
1080         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1081 
1082         trace_ctx = tracing_gen_ctx();
1083         buffer = tr->array_buffer.buffer;
1084         ring_buffer_nest_start(buffer);
1085         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1086                                             trace_ctx);
1087         if (!event) {
1088                 size = 0;
1089                 goto out;
1090         }
1091 
1092         entry = ring_buffer_event_data(event);
1093         entry->ip = ip;
1094 
1095         memcpy(&entry->buf, str, size);
1096 
1097         /* Add a newline if necessary */
1098         if (entry->buf[size - 1] != '\n') {
1099                 entry->buf[size] = '\n';
1100                 entry->buf[size + 1] = '\0';
1101         } else
1102                 entry->buf[size] = '\0';
1103 
1104         __buffer_unlock_commit(buffer, event);
1105         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1106  out:
1107         ring_buffer_nest_end(buffer);
1108         return size;
1109 }
1110 EXPORT_SYMBOL_GPL(__trace_array_puts);
1111 
1112 /**
1113  * __trace_puts - write a constant string into the trace buffer.
1114  * @ip:    The address of the caller
1115  * @str:   The constant string to write
1116  * @size:  The size of the string.
1117  */
1118 int __trace_puts(unsigned long ip, const char *str, int size)
1119 {
1120         return __trace_array_puts(&global_trace, ip, str, size);
1121 }
1122 EXPORT_SYMBOL_GPL(__trace_puts);
1123 
1124 /**
1125  * __trace_bputs - write the pointer to a constant string into trace buffer
1126  * @ip:    The address of the caller
1127  * @str:   The constant string to write to the buffer to
1128  */
1129 int __trace_bputs(unsigned long ip, const char *str)
1130 {
1131         struct ring_buffer_event *event;
1132         struct trace_buffer *buffer;
1133         struct bputs_entry *entry;
1134         unsigned int trace_ctx;
1135         int size = sizeof(struct bputs_entry);
1136         int ret = 0;
1137 
1138         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1139                 return 0;
1140 
1141         if (unlikely(tracing_selftest_running || tracing_disabled))
1142                 return 0;
1143 
1144         trace_ctx = tracing_gen_ctx();
1145         buffer = global_trace.array_buffer.buffer;
1146 
1147         ring_buffer_nest_start(buffer);
1148         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1149                                             trace_ctx);
1150         if (!event)
1151                 goto out;
1152 
1153         entry = ring_buffer_event_data(event);
1154         entry->ip                       = ip;
1155         entry->str                      = str;
1156 
1157         __buffer_unlock_commit(buffer, event);
1158         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1159 
1160         ret = 1;
1161  out:
1162         ring_buffer_nest_end(buffer);
1163         return ret;
1164 }
1165 EXPORT_SYMBOL_GPL(__trace_bputs);
1166 
1167 #ifdef CONFIG_TRACER_SNAPSHOT
1168 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1169                                            void *cond_data)
1170 {
1171         struct tracer *tracer = tr->current_trace;
1172         unsigned long flags;
1173 
1174         if (in_nmi()) {
1175                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1176                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1177                 return;
1178         }
1179 
1180         if (!tr->allocated_snapshot) {
1181                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1182                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1183                 tracer_tracing_off(tr);
1184                 return;
1185         }
1186 
1187         /* Note, snapshot can not be used when the tracer uses it */
1188         if (tracer->use_max_tr) {
1189                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1190                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1191                 return;
1192         }
1193 
1194         if (tr->mapped) {
1195                 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1196                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1197                 return;
1198         }
1199 
1200         local_irq_save(flags);
1201         update_max_tr(tr, current, smp_processor_id(), cond_data);
1202         local_irq_restore(flags);
1203 }
1204 
1205 void tracing_snapshot_instance(struct trace_array *tr)
1206 {
1207         tracing_snapshot_instance_cond(tr, NULL);
1208 }
1209 
1210 /**
1211  * tracing_snapshot - take a snapshot of the current buffer.
1212  *
1213  * This causes a swap between the snapshot buffer and the current live
1214  * tracing buffer. You can use this to take snapshots of the live
1215  * trace when some condition is triggered, but continue to trace.
1216  *
1217  * Note, make sure to allocate the snapshot with either
1218  * a tracing_snapshot_alloc(), or by doing it manually
1219  * with: echo 1 > /sys/kernel/tracing/snapshot
1220  *
1221  * If the snapshot buffer is not allocated, it will stop tracing.
1222  * Basically making a permanent snapshot.
1223  */
1224 void tracing_snapshot(void)
1225 {
1226         struct trace_array *tr = &global_trace;
1227 
1228         tracing_snapshot_instance(tr);
1229 }
1230 EXPORT_SYMBOL_GPL(tracing_snapshot);
1231 
1232 /**
1233  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1234  * @tr:         The tracing instance to snapshot
1235  * @cond_data:  The data to be tested conditionally, and possibly saved
1236  *
1237  * This is the same as tracing_snapshot() except that the snapshot is
1238  * conditional - the snapshot will only happen if the
1239  * cond_snapshot.update() implementation receiving the cond_data
1240  * returns true, which means that the trace array's cond_snapshot
1241  * update() operation used the cond_data to determine whether the
1242  * snapshot should be taken, and if it was, presumably saved it along
1243  * with the snapshot.
1244  */
1245 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1246 {
1247         tracing_snapshot_instance_cond(tr, cond_data);
1248 }
1249 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1250 
1251 /**
1252  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1253  * @tr:         The tracing instance
1254  *
1255  * When the user enables a conditional snapshot using
1256  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1257  * with the snapshot.  This accessor is used to retrieve it.
1258  *
1259  * Should not be called from cond_snapshot.update(), since it takes
1260  * the tr->max_lock lock, which the code calling
1261  * cond_snapshot.update() has already done.
1262  *
1263  * Returns the cond_data associated with the trace array's snapshot.
1264  */
1265 void *tracing_cond_snapshot_data(struct trace_array *tr)
1266 {
1267         void *cond_data = NULL;
1268 
1269         local_irq_disable();
1270         arch_spin_lock(&tr->max_lock);
1271 
1272         if (tr->cond_snapshot)
1273                 cond_data = tr->cond_snapshot->cond_data;
1274 
1275         arch_spin_unlock(&tr->max_lock);
1276         local_irq_enable();
1277 
1278         return cond_data;
1279 }
1280 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1281 
1282 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1283                                         struct array_buffer *size_buf, int cpu_id);
1284 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1285 
1286 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1287 {
1288         int order;
1289         int ret;
1290 
1291         if (!tr->allocated_snapshot) {
1292 
1293                 /* Make the snapshot buffer have the same order as main buffer */
1294                 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1295                 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1296                 if (ret < 0)
1297                         return ret;
1298 
1299                 /* allocate spare buffer */
1300                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1301                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1302                 if (ret < 0)
1303                         return ret;
1304 
1305                 tr->allocated_snapshot = true;
1306         }
1307 
1308         return 0;
1309 }
1310 
1311 static void free_snapshot(struct trace_array *tr)
1312 {
1313         /*
1314          * We don't free the ring buffer. instead, resize it because
1315          * The max_tr ring buffer has some state (e.g. ring->clock) and
1316          * we want preserve it.
1317          */
1318         ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1319         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1320         set_buffer_entries(&tr->max_buffer, 1);
1321         tracing_reset_online_cpus(&tr->max_buffer);
1322         tr->allocated_snapshot = false;
1323 }
1324 
1325 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1326 {
1327         int ret;
1328 
1329         lockdep_assert_held(&trace_types_lock);
1330 
1331         spin_lock(&tr->snapshot_trigger_lock);
1332         if (tr->snapshot == UINT_MAX || tr->mapped) {
1333                 spin_unlock(&tr->snapshot_trigger_lock);
1334                 return -EBUSY;
1335         }
1336 
1337         tr->snapshot++;
1338         spin_unlock(&tr->snapshot_trigger_lock);
1339 
1340         ret = tracing_alloc_snapshot_instance(tr);
1341         if (ret) {
1342                 spin_lock(&tr->snapshot_trigger_lock);
1343                 tr->snapshot--;
1344                 spin_unlock(&tr->snapshot_trigger_lock);
1345         }
1346 
1347         return ret;
1348 }
1349 
1350 int tracing_arm_snapshot(struct trace_array *tr)
1351 {
1352         int ret;
1353 
1354         mutex_lock(&trace_types_lock);
1355         ret = tracing_arm_snapshot_locked(tr);
1356         mutex_unlock(&trace_types_lock);
1357 
1358         return ret;
1359 }
1360 
1361 void tracing_disarm_snapshot(struct trace_array *tr)
1362 {
1363         spin_lock(&tr->snapshot_trigger_lock);
1364         if (!WARN_ON(!tr->snapshot))
1365                 tr->snapshot--;
1366         spin_unlock(&tr->snapshot_trigger_lock);
1367 }
1368 
1369 /**
1370  * tracing_alloc_snapshot - allocate snapshot buffer.
1371  *
1372  * This only allocates the snapshot buffer if it isn't already
1373  * allocated - it doesn't also take a snapshot.
1374  *
1375  * This is meant to be used in cases where the snapshot buffer needs
1376  * to be set up for events that can't sleep but need to be able to
1377  * trigger a snapshot.
1378  */
1379 int tracing_alloc_snapshot(void)
1380 {
1381         struct trace_array *tr = &global_trace;
1382         int ret;
1383 
1384         ret = tracing_alloc_snapshot_instance(tr);
1385         WARN_ON(ret < 0);
1386 
1387         return ret;
1388 }
1389 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1390 
1391 /**
1392  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1393  *
1394  * This is similar to tracing_snapshot(), but it will allocate the
1395  * snapshot buffer if it isn't already allocated. Use this only
1396  * where it is safe to sleep, as the allocation may sleep.
1397  *
1398  * This causes a swap between the snapshot buffer and the current live
1399  * tracing buffer. You can use this to take snapshots of the live
1400  * trace when some condition is triggered, but continue to trace.
1401  */
1402 void tracing_snapshot_alloc(void)
1403 {
1404         int ret;
1405 
1406         ret = tracing_alloc_snapshot();
1407         if (ret < 0)
1408                 return;
1409 
1410         tracing_snapshot();
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1413 
1414 /**
1415  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1416  * @tr:         The tracing instance
1417  * @cond_data:  User data to associate with the snapshot
1418  * @update:     Implementation of the cond_snapshot update function
1419  *
1420  * Check whether the conditional snapshot for the given instance has
1421  * already been enabled, or if the current tracer is already using a
1422  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1423  * save the cond_data and update function inside.
1424  *
1425  * Returns 0 if successful, error otherwise.
1426  */
1427 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1428                                  cond_update_fn_t update)
1429 {
1430         struct cond_snapshot *cond_snapshot;
1431         int ret = 0;
1432 
1433         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1434         if (!cond_snapshot)
1435                 return -ENOMEM;
1436 
1437         cond_snapshot->cond_data = cond_data;
1438         cond_snapshot->update = update;
1439 
1440         mutex_lock(&trace_types_lock);
1441 
1442         if (tr->current_trace->use_max_tr) {
1443                 ret = -EBUSY;
1444                 goto fail_unlock;
1445         }
1446 
1447         /*
1448          * The cond_snapshot can only change to NULL without the
1449          * trace_types_lock. We don't care if we race with it going
1450          * to NULL, but we want to make sure that it's not set to
1451          * something other than NULL when we get here, which we can
1452          * do safely with only holding the trace_types_lock and not
1453          * having to take the max_lock.
1454          */
1455         if (tr->cond_snapshot) {
1456                 ret = -EBUSY;
1457                 goto fail_unlock;
1458         }
1459 
1460         ret = tracing_arm_snapshot_locked(tr);
1461         if (ret)
1462                 goto fail_unlock;
1463 
1464         local_irq_disable();
1465         arch_spin_lock(&tr->max_lock);
1466         tr->cond_snapshot = cond_snapshot;
1467         arch_spin_unlock(&tr->max_lock);
1468         local_irq_enable();
1469 
1470         mutex_unlock(&trace_types_lock);
1471 
1472         return ret;
1473 
1474  fail_unlock:
1475         mutex_unlock(&trace_types_lock);
1476         kfree(cond_snapshot);
1477         return ret;
1478 }
1479 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1480 
1481 /**
1482  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1483  * @tr:         The tracing instance
1484  *
1485  * Check whether the conditional snapshot for the given instance is
1486  * enabled; if so, free the cond_snapshot associated with it,
1487  * otherwise return -EINVAL.
1488  *
1489  * Returns 0 if successful, error otherwise.
1490  */
1491 int tracing_snapshot_cond_disable(struct trace_array *tr)
1492 {
1493         int ret = 0;
1494 
1495         local_irq_disable();
1496         arch_spin_lock(&tr->max_lock);
1497 
1498         if (!tr->cond_snapshot)
1499                 ret = -EINVAL;
1500         else {
1501                 kfree(tr->cond_snapshot);
1502                 tr->cond_snapshot = NULL;
1503         }
1504 
1505         arch_spin_unlock(&tr->max_lock);
1506         local_irq_enable();
1507 
1508         tracing_disarm_snapshot(tr);
1509 
1510         return ret;
1511 }
1512 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1513 #else
1514 void tracing_snapshot(void)
1515 {
1516         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1517 }
1518 EXPORT_SYMBOL_GPL(tracing_snapshot);
1519 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1520 {
1521         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1522 }
1523 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1524 int tracing_alloc_snapshot(void)
1525 {
1526         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1527         return -ENODEV;
1528 }
1529 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1530 void tracing_snapshot_alloc(void)
1531 {
1532         /* Give warning */
1533         tracing_snapshot();
1534 }
1535 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1536 void *tracing_cond_snapshot_data(struct trace_array *tr)
1537 {
1538         return NULL;
1539 }
1540 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1541 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1542 {
1543         return -ENODEV;
1544 }
1545 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1546 int tracing_snapshot_cond_disable(struct trace_array *tr)
1547 {
1548         return false;
1549 }
1550 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1551 #define free_snapshot(tr)       do { } while (0)
1552 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1553 #endif /* CONFIG_TRACER_SNAPSHOT */
1554 
1555 void tracer_tracing_off(struct trace_array *tr)
1556 {
1557         if (tr->array_buffer.buffer)
1558                 ring_buffer_record_off(tr->array_buffer.buffer);
1559         /*
1560          * This flag is looked at when buffers haven't been allocated
1561          * yet, or by some tracers (like irqsoff), that just want to
1562          * know if the ring buffer has been disabled, but it can handle
1563          * races of where it gets disabled but we still do a record.
1564          * As the check is in the fast path of the tracers, it is more
1565          * important to be fast than accurate.
1566          */
1567         tr->buffer_disabled = 1;
1568         /* Make the flag seen by readers */
1569         smp_wmb();
1570 }
1571 
1572 /**
1573  * tracing_off - turn off tracing buffers
1574  *
1575  * This function stops the tracing buffers from recording data.
1576  * It does not disable any overhead the tracers themselves may
1577  * be causing. This function simply causes all recording to
1578  * the ring buffers to fail.
1579  */
1580 void tracing_off(void)
1581 {
1582         tracer_tracing_off(&global_trace);
1583 }
1584 EXPORT_SYMBOL_GPL(tracing_off);
1585 
1586 void disable_trace_on_warning(void)
1587 {
1588         if (__disable_trace_on_warning) {
1589                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1590                         "Disabling tracing due to warning\n");
1591                 tracing_off();
1592         }
1593 }
1594 
1595 /**
1596  * tracer_tracing_is_on - show real state of ring buffer enabled
1597  * @tr : the trace array to know if ring buffer is enabled
1598  *
1599  * Shows real state of the ring buffer if it is enabled or not.
1600  */
1601 bool tracer_tracing_is_on(struct trace_array *tr)
1602 {
1603         if (tr->array_buffer.buffer)
1604                 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1605         return !tr->buffer_disabled;
1606 }
1607 
1608 /**
1609  * tracing_is_on - show state of ring buffers enabled
1610  */
1611 int tracing_is_on(void)
1612 {
1613         return tracer_tracing_is_on(&global_trace);
1614 }
1615 EXPORT_SYMBOL_GPL(tracing_is_on);
1616 
1617 static int __init set_buf_size(char *str)
1618 {
1619         unsigned long buf_size;
1620 
1621         if (!str)
1622                 return 0;
1623         buf_size = memparse(str, &str);
1624         /*
1625          * nr_entries can not be zero and the startup
1626          * tests require some buffer space. Therefore
1627          * ensure we have at least 4096 bytes of buffer.
1628          */
1629         trace_buf_size = max(4096UL, buf_size);
1630         return 1;
1631 }
1632 __setup("trace_buf_size=", set_buf_size);
1633 
1634 static int __init set_tracing_thresh(char *str)
1635 {
1636         unsigned long threshold;
1637         int ret;
1638 
1639         if (!str)
1640                 return 0;
1641         ret = kstrtoul(str, 0, &threshold);
1642         if (ret < 0)
1643                 return 0;
1644         tracing_thresh = threshold * 1000;
1645         return 1;
1646 }
1647 __setup("tracing_thresh=", set_tracing_thresh);
1648 
1649 unsigned long nsecs_to_usecs(unsigned long nsecs)
1650 {
1651         return nsecs / 1000;
1652 }
1653 
1654 /*
1655  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1656  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1657  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1658  * of strings in the order that the evals (enum) were defined.
1659  */
1660 #undef C
1661 #define C(a, b) b
1662 
1663 /* These must match the bit positions in trace_iterator_flags */
1664 static const char *trace_options[] = {
1665         TRACE_FLAGS
1666         NULL
1667 };
1668 
1669 static struct {
1670         u64 (*func)(void);
1671         const char *name;
1672         int in_ns;              /* is this clock in nanoseconds? */
1673 } trace_clocks[] = {
1674         { trace_clock_local,            "local",        1 },
1675         { trace_clock_global,           "global",       1 },
1676         { trace_clock_counter,          "counter",      0 },
1677         { trace_clock_jiffies,          "uptime",       0 },
1678         { trace_clock,                  "perf",         1 },
1679         { ktime_get_mono_fast_ns,       "mono",         1 },
1680         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1681         { ktime_get_boot_fast_ns,       "boot",         1 },
1682         { ktime_get_tai_fast_ns,        "tai",          1 },
1683         ARCH_TRACE_CLOCKS
1684 };
1685 
1686 bool trace_clock_in_ns(struct trace_array *tr)
1687 {
1688         if (trace_clocks[tr->clock_id].in_ns)
1689                 return true;
1690 
1691         return false;
1692 }
1693 
1694 /*
1695  * trace_parser_get_init - gets the buffer for trace parser
1696  */
1697 int trace_parser_get_init(struct trace_parser *parser, int size)
1698 {
1699         memset(parser, 0, sizeof(*parser));
1700 
1701         parser->buffer = kmalloc(size, GFP_KERNEL);
1702         if (!parser->buffer)
1703                 return 1;
1704 
1705         parser->size = size;
1706         return 0;
1707 }
1708 
1709 /*
1710  * trace_parser_put - frees the buffer for trace parser
1711  */
1712 void trace_parser_put(struct trace_parser *parser)
1713 {
1714         kfree(parser->buffer);
1715         parser->buffer = NULL;
1716 }
1717 
1718 /*
1719  * trace_get_user - reads the user input string separated by  space
1720  * (matched by isspace(ch))
1721  *
1722  * For each string found the 'struct trace_parser' is updated,
1723  * and the function returns.
1724  *
1725  * Returns number of bytes read.
1726  *
1727  * See kernel/trace/trace.h for 'struct trace_parser' details.
1728  */
1729 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1730         size_t cnt, loff_t *ppos)
1731 {
1732         char ch;
1733         size_t read = 0;
1734         ssize_t ret;
1735 
1736         if (!*ppos)
1737                 trace_parser_clear(parser);
1738 
1739         ret = get_user(ch, ubuf++);
1740         if (ret)
1741                 goto out;
1742 
1743         read++;
1744         cnt--;
1745 
1746         /*
1747          * The parser is not finished with the last write,
1748          * continue reading the user input without skipping spaces.
1749          */
1750         if (!parser->cont) {
1751                 /* skip white space */
1752                 while (cnt && isspace(ch)) {
1753                         ret = get_user(ch, ubuf++);
1754                         if (ret)
1755                                 goto out;
1756                         read++;
1757                         cnt--;
1758                 }
1759 
1760                 parser->idx = 0;
1761 
1762                 /* only spaces were written */
1763                 if (isspace(ch) || !ch) {
1764                         *ppos += read;
1765                         ret = read;
1766                         goto out;
1767                 }
1768         }
1769 
1770         /* read the non-space input */
1771         while (cnt && !isspace(ch) && ch) {
1772                 if (parser->idx < parser->size - 1)
1773                         parser->buffer[parser->idx++] = ch;
1774                 else {
1775                         ret = -EINVAL;
1776                         goto out;
1777                 }
1778                 ret = get_user(ch, ubuf++);
1779                 if (ret)
1780                         goto out;
1781                 read++;
1782                 cnt--;
1783         }
1784 
1785         /* We either got finished input or we have to wait for another call. */
1786         if (isspace(ch) || !ch) {
1787                 parser->buffer[parser->idx] = 0;
1788                 parser->cont = false;
1789         } else if (parser->idx < parser->size - 1) {
1790                 parser->cont = true;
1791                 parser->buffer[parser->idx++] = ch;
1792                 /* Make sure the parsed string always terminates with '\0'. */
1793                 parser->buffer[parser->idx] = 0;
1794         } else {
1795                 ret = -EINVAL;
1796                 goto out;
1797         }
1798 
1799         *ppos += read;
1800         ret = read;
1801 
1802 out:
1803         return ret;
1804 }
1805 
1806 /* TODO add a seq_buf_to_buffer() */
1807 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1808 {
1809         int len;
1810 
1811         if (trace_seq_used(s) <= s->readpos)
1812                 return -EBUSY;
1813 
1814         len = trace_seq_used(s) - s->readpos;
1815         if (cnt > len)
1816                 cnt = len;
1817         memcpy(buf, s->buffer + s->readpos, cnt);
1818 
1819         s->readpos += cnt;
1820         return cnt;
1821 }
1822 
1823 unsigned long __read_mostly     tracing_thresh;
1824 
1825 #ifdef CONFIG_TRACER_MAX_TRACE
1826 static const struct file_operations tracing_max_lat_fops;
1827 
1828 #ifdef LATENCY_FS_NOTIFY
1829 
1830 static struct workqueue_struct *fsnotify_wq;
1831 
1832 static void latency_fsnotify_workfn(struct work_struct *work)
1833 {
1834         struct trace_array *tr = container_of(work, struct trace_array,
1835                                               fsnotify_work);
1836         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1837 }
1838 
1839 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1840 {
1841         struct trace_array *tr = container_of(iwork, struct trace_array,
1842                                               fsnotify_irqwork);
1843         queue_work(fsnotify_wq, &tr->fsnotify_work);
1844 }
1845 
1846 static void trace_create_maxlat_file(struct trace_array *tr,
1847                                      struct dentry *d_tracer)
1848 {
1849         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1850         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1851         tr->d_max_latency = trace_create_file("tracing_max_latency",
1852                                               TRACE_MODE_WRITE,
1853                                               d_tracer, tr,
1854                                               &tracing_max_lat_fops);
1855 }
1856 
1857 __init static int latency_fsnotify_init(void)
1858 {
1859         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1860                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1861         if (!fsnotify_wq) {
1862                 pr_err("Unable to allocate tr_max_lat_wq\n");
1863                 return -ENOMEM;
1864         }
1865         return 0;
1866 }
1867 
1868 late_initcall_sync(latency_fsnotify_init);
1869 
1870 void latency_fsnotify(struct trace_array *tr)
1871 {
1872         if (!fsnotify_wq)
1873                 return;
1874         /*
1875          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1876          * possible that we are called from __schedule() or do_idle(), which
1877          * could cause a deadlock.
1878          */
1879         irq_work_queue(&tr->fsnotify_irqwork);
1880 }
1881 
1882 #else /* !LATENCY_FS_NOTIFY */
1883 
1884 #define trace_create_maxlat_file(tr, d_tracer)                          \
1885         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1886                           d_tracer, tr, &tracing_max_lat_fops)
1887 
1888 #endif
1889 
1890 /*
1891  * Copy the new maximum trace into the separate maximum-trace
1892  * structure. (this way the maximum trace is permanently saved,
1893  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1894  */
1895 static void
1896 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1897 {
1898         struct array_buffer *trace_buf = &tr->array_buffer;
1899         struct array_buffer *max_buf = &tr->max_buffer;
1900         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1901         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1902 
1903         max_buf->cpu = cpu;
1904         max_buf->time_start = data->preempt_timestamp;
1905 
1906         max_data->saved_latency = tr->max_latency;
1907         max_data->critical_start = data->critical_start;
1908         max_data->critical_end = data->critical_end;
1909 
1910         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1911         max_data->pid = tsk->pid;
1912         /*
1913          * If tsk == current, then use current_uid(), as that does not use
1914          * RCU. The irq tracer can be called out of RCU scope.
1915          */
1916         if (tsk == current)
1917                 max_data->uid = current_uid();
1918         else
1919                 max_data->uid = task_uid(tsk);
1920 
1921         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1922         max_data->policy = tsk->policy;
1923         max_data->rt_priority = tsk->rt_priority;
1924 
1925         /* record this tasks comm */
1926         tracing_record_cmdline(tsk);
1927         latency_fsnotify(tr);
1928 }
1929 
1930 /**
1931  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1932  * @tr: tracer
1933  * @tsk: the task with the latency
1934  * @cpu: The cpu that initiated the trace.
1935  * @cond_data: User data associated with a conditional snapshot
1936  *
1937  * Flip the buffers between the @tr and the max_tr and record information
1938  * about which task was the cause of this latency.
1939  */
1940 void
1941 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1942               void *cond_data)
1943 {
1944         if (tr->stop_count)
1945                 return;
1946 
1947         WARN_ON_ONCE(!irqs_disabled());
1948 
1949         if (!tr->allocated_snapshot) {
1950                 /* Only the nop tracer should hit this when disabling */
1951                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1952                 return;
1953         }
1954 
1955         arch_spin_lock(&tr->max_lock);
1956 
1957         /* Inherit the recordable setting from array_buffer */
1958         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1959                 ring_buffer_record_on(tr->max_buffer.buffer);
1960         else
1961                 ring_buffer_record_off(tr->max_buffer.buffer);
1962 
1963 #ifdef CONFIG_TRACER_SNAPSHOT
1964         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1965                 arch_spin_unlock(&tr->max_lock);
1966                 return;
1967         }
1968 #endif
1969         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1970 
1971         __update_max_tr(tr, tsk, cpu);
1972 
1973         arch_spin_unlock(&tr->max_lock);
1974 
1975         /* Any waiters on the old snapshot buffer need to wake up */
1976         ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1977 }
1978 
1979 /**
1980  * update_max_tr_single - only copy one trace over, and reset the rest
1981  * @tr: tracer
1982  * @tsk: task with the latency
1983  * @cpu: the cpu of the buffer to copy.
1984  *
1985  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1986  */
1987 void
1988 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1989 {
1990         int ret;
1991 
1992         if (tr->stop_count)
1993                 return;
1994 
1995         WARN_ON_ONCE(!irqs_disabled());
1996         if (!tr->allocated_snapshot) {
1997                 /* Only the nop tracer should hit this when disabling */
1998                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1999                 return;
2000         }
2001 
2002         arch_spin_lock(&tr->max_lock);
2003 
2004         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2005 
2006         if (ret == -EBUSY) {
2007                 /*
2008                  * We failed to swap the buffer due to a commit taking
2009                  * place on this CPU. We fail to record, but we reset
2010                  * the max trace buffer (no one writes directly to it)
2011                  * and flag that it failed.
2012                  * Another reason is resize is in progress.
2013                  */
2014                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2015                         "Failed to swap buffers due to commit or resize in progress\n");
2016         }
2017 
2018         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2019 
2020         __update_max_tr(tr, tsk, cpu);
2021         arch_spin_unlock(&tr->max_lock);
2022 }
2023 
2024 #endif /* CONFIG_TRACER_MAX_TRACE */
2025 
2026 struct pipe_wait {
2027         struct trace_iterator           *iter;
2028         int                             wait_index;
2029 };
2030 
2031 static bool wait_pipe_cond(void *data)
2032 {
2033         struct pipe_wait *pwait = data;
2034         struct trace_iterator *iter = pwait->iter;
2035 
2036         if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2037                 return true;
2038 
2039         return iter->closed;
2040 }
2041 
2042 static int wait_on_pipe(struct trace_iterator *iter, int full)
2043 {
2044         struct pipe_wait pwait;
2045         int ret;
2046 
2047         /* Iterators are static, they should be filled or empty */
2048         if (trace_buffer_iter(iter, iter->cpu_file))
2049                 return 0;
2050 
2051         pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2052         pwait.iter = iter;
2053 
2054         ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2055                                wait_pipe_cond, &pwait);
2056 
2057 #ifdef CONFIG_TRACER_MAX_TRACE
2058         /*
2059          * Make sure this is still the snapshot buffer, as if a snapshot were
2060          * to happen, this would now be the main buffer.
2061          */
2062         if (iter->snapshot)
2063                 iter->array_buffer = &iter->tr->max_buffer;
2064 #endif
2065         return ret;
2066 }
2067 
2068 #ifdef CONFIG_FTRACE_STARTUP_TEST
2069 static bool selftests_can_run;
2070 
2071 struct trace_selftests {
2072         struct list_head                list;
2073         struct tracer                   *type;
2074 };
2075 
2076 static LIST_HEAD(postponed_selftests);
2077 
2078 static int save_selftest(struct tracer *type)
2079 {
2080         struct trace_selftests *selftest;
2081 
2082         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2083         if (!selftest)
2084                 return -ENOMEM;
2085 
2086         selftest->type = type;
2087         list_add(&selftest->list, &postponed_selftests);
2088         return 0;
2089 }
2090 
2091 static int run_tracer_selftest(struct tracer *type)
2092 {
2093         struct trace_array *tr = &global_trace;
2094         struct tracer *saved_tracer = tr->current_trace;
2095         int ret;
2096 
2097         if (!type->selftest || tracing_selftest_disabled)
2098                 return 0;
2099 
2100         /*
2101          * If a tracer registers early in boot up (before scheduling is
2102          * initialized and such), then do not run its selftests yet.
2103          * Instead, run it a little later in the boot process.
2104          */
2105         if (!selftests_can_run)
2106                 return save_selftest(type);
2107 
2108         if (!tracing_is_on()) {
2109                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2110                         type->name);
2111                 return 0;
2112         }
2113 
2114         /*
2115          * Run a selftest on this tracer.
2116          * Here we reset the trace buffer, and set the current
2117          * tracer to be this tracer. The tracer can then run some
2118          * internal tracing to verify that everything is in order.
2119          * If we fail, we do not register this tracer.
2120          */
2121         tracing_reset_online_cpus(&tr->array_buffer);
2122 
2123         tr->current_trace = type;
2124 
2125 #ifdef CONFIG_TRACER_MAX_TRACE
2126         if (type->use_max_tr) {
2127                 /* If we expanded the buffers, make sure the max is expanded too */
2128                 if (tr->ring_buffer_expanded)
2129                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2130                                            RING_BUFFER_ALL_CPUS);
2131                 tr->allocated_snapshot = true;
2132         }
2133 #endif
2134 
2135         /* the test is responsible for initializing and enabling */
2136         pr_info("Testing tracer %s: ", type->name);
2137         ret = type->selftest(type, tr);
2138         /* the test is responsible for resetting too */
2139         tr->current_trace = saved_tracer;
2140         if (ret) {
2141                 printk(KERN_CONT "FAILED!\n");
2142                 /* Add the warning after printing 'FAILED' */
2143                 WARN_ON(1);
2144                 return -1;
2145         }
2146         /* Only reset on passing, to avoid touching corrupted buffers */
2147         tracing_reset_online_cpus(&tr->array_buffer);
2148 
2149 #ifdef CONFIG_TRACER_MAX_TRACE
2150         if (type->use_max_tr) {
2151                 tr->allocated_snapshot = false;
2152 
2153                 /* Shrink the max buffer again */
2154                 if (tr->ring_buffer_expanded)
2155                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2156                                            RING_BUFFER_ALL_CPUS);
2157         }
2158 #endif
2159 
2160         printk(KERN_CONT "PASSED\n");
2161         return 0;
2162 }
2163 
2164 static int do_run_tracer_selftest(struct tracer *type)
2165 {
2166         int ret;
2167 
2168         /*
2169          * Tests can take a long time, especially if they are run one after the
2170          * other, as does happen during bootup when all the tracers are
2171          * registered. This could cause the soft lockup watchdog to trigger.
2172          */
2173         cond_resched();
2174 
2175         tracing_selftest_running = true;
2176         ret = run_tracer_selftest(type);
2177         tracing_selftest_running = false;
2178 
2179         return ret;
2180 }
2181 
2182 static __init int init_trace_selftests(void)
2183 {
2184         struct trace_selftests *p, *n;
2185         struct tracer *t, **last;
2186         int ret;
2187 
2188         selftests_can_run = true;
2189 
2190         mutex_lock(&trace_types_lock);
2191 
2192         if (list_empty(&postponed_selftests))
2193                 goto out;
2194 
2195         pr_info("Running postponed tracer tests:\n");
2196 
2197         tracing_selftest_running = true;
2198         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2199                 /* This loop can take minutes when sanitizers are enabled, so
2200                  * lets make sure we allow RCU processing.
2201                  */
2202                 cond_resched();
2203                 ret = run_tracer_selftest(p->type);
2204                 /* If the test fails, then warn and remove from available_tracers */
2205                 if (ret < 0) {
2206                         WARN(1, "tracer: %s failed selftest, disabling\n",
2207                              p->type->name);
2208                         last = &trace_types;
2209                         for (t = trace_types; t; t = t->next) {
2210                                 if (t == p->type) {
2211                                         *last = t->next;
2212                                         break;
2213                                 }
2214                                 last = &t->next;
2215                         }
2216                 }
2217                 list_del(&p->list);
2218                 kfree(p);
2219         }
2220         tracing_selftest_running = false;
2221 
2222  out:
2223         mutex_unlock(&trace_types_lock);
2224 
2225         return 0;
2226 }
2227 core_initcall(init_trace_selftests);
2228 #else
2229 static inline int do_run_tracer_selftest(struct tracer *type)
2230 {
2231         return 0;
2232 }
2233 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2234 
2235 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2236 
2237 static void __init apply_trace_boot_options(void);
2238 
2239 /**
2240  * register_tracer - register a tracer with the ftrace system.
2241  * @type: the plugin for the tracer
2242  *
2243  * Register a new plugin tracer.
2244  */
2245 int __init register_tracer(struct tracer *type)
2246 {
2247         struct tracer *t;
2248         int ret = 0;
2249 
2250         if (!type->name) {
2251                 pr_info("Tracer must have a name\n");
2252                 return -1;
2253         }
2254 
2255         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2256                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2257                 return -1;
2258         }
2259 
2260         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2261                 pr_warn("Can not register tracer %s due to lockdown\n",
2262                            type->name);
2263                 return -EPERM;
2264         }
2265 
2266         mutex_lock(&trace_types_lock);
2267 
2268         for (t = trace_types; t; t = t->next) {
2269                 if (strcmp(type->name, t->name) == 0) {
2270                         /* already found */
2271                         pr_info("Tracer %s already registered\n",
2272                                 type->name);
2273                         ret = -1;
2274                         goto out;
2275                 }
2276         }
2277 
2278         if (!type->set_flag)
2279                 type->set_flag = &dummy_set_flag;
2280         if (!type->flags) {
2281                 /*allocate a dummy tracer_flags*/
2282                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2283                 if (!type->flags) {
2284                         ret = -ENOMEM;
2285                         goto out;
2286                 }
2287                 type->flags->val = 0;
2288                 type->flags->opts = dummy_tracer_opt;
2289         } else
2290                 if (!type->flags->opts)
2291                         type->flags->opts = dummy_tracer_opt;
2292 
2293         /* store the tracer for __set_tracer_option */
2294         type->flags->trace = type;
2295 
2296         ret = do_run_tracer_selftest(type);
2297         if (ret < 0)
2298                 goto out;
2299 
2300         type->next = trace_types;
2301         trace_types = type;
2302         add_tracer_options(&global_trace, type);
2303 
2304  out:
2305         mutex_unlock(&trace_types_lock);
2306 
2307         if (ret || !default_bootup_tracer)
2308                 goto out_unlock;
2309 
2310         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2311                 goto out_unlock;
2312 
2313         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2314         /* Do we want this tracer to start on bootup? */
2315         tracing_set_tracer(&global_trace, type->name);
2316         default_bootup_tracer = NULL;
2317 
2318         apply_trace_boot_options();
2319 
2320         /* disable other selftests, since this will break it. */
2321         disable_tracing_selftest("running a tracer");
2322 
2323  out_unlock:
2324         return ret;
2325 }
2326 
2327 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2328 {
2329         struct trace_buffer *buffer = buf->buffer;
2330 
2331         if (!buffer)
2332                 return;
2333 
2334         ring_buffer_record_disable(buffer);
2335 
2336         /* Make sure all commits have finished */
2337         synchronize_rcu();
2338         ring_buffer_reset_cpu(buffer, cpu);
2339 
2340         ring_buffer_record_enable(buffer);
2341 }
2342 
2343 void tracing_reset_online_cpus(struct array_buffer *buf)
2344 {
2345         struct trace_buffer *buffer = buf->buffer;
2346 
2347         if (!buffer)
2348                 return;
2349 
2350         ring_buffer_record_disable(buffer);
2351 
2352         /* Make sure all commits have finished */
2353         synchronize_rcu();
2354 
2355         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2356 
2357         ring_buffer_reset_online_cpus(buffer);
2358 
2359         ring_buffer_record_enable(buffer);
2360 }
2361 
2362 /* Must have trace_types_lock held */
2363 void tracing_reset_all_online_cpus_unlocked(void)
2364 {
2365         struct trace_array *tr;
2366 
2367         lockdep_assert_held(&trace_types_lock);
2368 
2369         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2370                 if (!tr->clear_trace)
2371                         continue;
2372                 tr->clear_trace = false;
2373                 tracing_reset_online_cpus(&tr->array_buffer);
2374 #ifdef CONFIG_TRACER_MAX_TRACE
2375                 tracing_reset_online_cpus(&tr->max_buffer);
2376 #endif
2377         }
2378 }
2379 
2380 void tracing_reset_all_online_cpus(void)
2381 {
2382         mutex_lock(&trace_types_lock);
2383         tracing_reset_all_online_cpus_unlocked();
2384         mutex_unlock(&trace_types_lock);
2385 }
2386 
2387 int is_tracing_stopped(void)
2388 {
2389         return global_trace.stop_count;
2390 }
2391 
2392 static void tracing_start_tr(struct trace_array *tr)
2393 {
2394         struct trace_buffer *buffer;
2395         unsigned long flags;
2396 
2397         if (tracing_disabled)
2398                 return;
2399 
2400         raw_spin_lock_irqsave(&tr->start_lock, flags);
2401         if (--tr->stop_count) {
2402                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2403                         /* Someone screwed up their debugging */
2404                         tr->stop_count = 0;
2405                 }
2406                 goto out;
2407         }
2408 
2409         /* Prevent the buffers from switching */
2410         arch_spin_lock(&tr->max_lock);
2411 
2412         buffer = tr->array_buffer.buffer;
2413         if (buffer)
2414                 ring_buffer_record_enable(buffer);
2415 
2416 #ifdef CONFIG_TRACER_MAX_TRACE
2417         buffer = tr->max_buffer.buffer;
2418         if (buffer)
2419                 ring_buffer_record_enable(buffer);
2420 #endif
2421 
2422         arch_spin_unlock(&tr->max_lock);
2423 
2424  out:
2425         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2426 }
2427 
2428 /**
2429  * tracing_start - quick start of the tracer
2430  *
2431  * If tracing is enabled but was stopped by tracing_stop,
2432  * this will start the tracer back up.
2433  */
2434 void tracing_start(void)
2435 
2436 {
2437         return tracing_start_tr(&global_trace);
2438 }
2439 
2440 static void tracing_stop_tr(struct trace_array *tr)
2441 {
2442         struct trace_buffer *buffer;
2443         unsigned long flags;
2444 
2445         raw_spin_lock_irqsave(&tr->start_lock, flags);
2446         if (tr->stop_count++)
2447                 goto out;
2448 
2449         /* Prevent the buffers from switching */
2450         arch_spin_lock(&tr->max_lock);
2451 
2452         buffer = tr->array_buffer.buffer;
2453         if (buffer)
2454                 ring_buffer_record_disable(buffer);
2455 
2456 #ifdef CONFIG_TRACER_MAX_TRACE
2457         buffer = tr->max_buffer.buffer;
2458         if (buffer)
2459                 ring_buffer_record_disable(buffer);
2460 #endif
2461 
2462         arch_spin_unlock(&tr->max_lock);
2463 
2464  out:
2465         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2466 }
2467 
2468 /**
2469  * tracing_stop - quick stop of the tracer
2470  *
2471  * Light weight way to stop tracing. Use in conjunction with
2472  * tracing_start.
2473  */
2474 void tracing_stop(void)
2475 {
2476         return tracing_stop_tr(&global_trace);
2477 }
2478 
2479 /*
2480  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2481  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2482  * simplifies those functions and keeps them in sync.
2483  */
2484 enum print_line_t trace_handle_return(struct trace_seq *s)
2485 {
2486         return trace_seq_has_overflowed(s) ?
2487                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2488 }
2489 EXPORT_SYMBOL_GPL(trace_handle_return);
2490 
2491 static unsigned short migration_disable_value(void)
2492 {
2493 #if defined(CONFIG_SMP)
2494         return current->migration_disabled;
2495 #else
2496         return 0;
2497 #endif
2498 }
2499 
2500 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2501 {
2502         unsigned int trace_flags = irqs_status;
2503         unsigned int pc;
2504 
2505         pc = preempt_count();
2506 
2507         if (pc & NMI_MASK)
2508                 trace_flags |= TRACE_FLAG_NMI;
2509         if (pc & HARDIRQ_MASK)
2510                 trace_flags |= TRACE_FLAG_HARDIRQ;
2511         if (in_serving_softirq())
2512                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2513         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2514                 trace_flags |= TRACE_FLAG_BH_OFF;
2515 
2516         if (tif_need_resched())
2517                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2518         if (test_preempt_need_resched())
2519                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2520         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2521                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2522 }
2523 
2524 struct ring_buffer_event *
2525 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2526                           int type,
2527                           unsigned long len,
2528                           unsigned int trace_ctx)
2529 {
2530         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2531 }
2532 
2533 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2534 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2535 static int trace_buffered_event_ref;
2536 
2537 /**
2538  * trace_buffered_event_enable - enable buffering events
2539  *
2540  * When events are being filtered, it is quicker to use a temporary
2541  * buffer to write the event data into if there's a likely chance
2542  * that it will not be committed. The discard of the ring buffer
2543  * is not as fast as committing, and is much slower than copying
2544  * a commit.
2545  *
2546  * When an event is to be filtered, allocate per cpu buffers to
2547  * write the event data into, and if the event is filtered and discarded
2548  * it is simply dropped, otherwise, the entire data is to be committed
2549  * in one shot.
2550  */
2551 void trace_buffered_event_enable(void)
2552 {
2553         struct ring_buffer_event *event;
2554         struct page *page;
2555         int cpu;
2556 
2557         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2558 
2559         if (trace_buffered_event_ref++)
2560                 return;
2561 
2562         for_each_tracing_cpu(cpu) {
2563                 page = alloc_pages_node(cpu_to_node(cpu),
2564                                         GFP_KERNEL | __GFP_NORETRY, 0);
2565                 /* This is just an optimization and can handle failures */
2566                 if (!page) {
2567                         pr_err("Failed to allocate event buffer\n");
2568                         break;
2569                 }
2570 
2571                 event = page_address(page);
2572                 memset(event, 0, sizeof(*event));
2573 
2574                 per_cpu(trace_buffered_event, cpu) = event;
2575 
2576                 preempt_disable();
2577                 if (cpu == smp_processor_id() &&
2578                     __this_cpu_read(trace_buffered_event) !=
2579                     per_cpu(trace_buffered_event, cpu))
2580                         WARN_ON_ONCE(1);
2581                 preempt_enable();
2582         }
2583 }
2584 
2585 static void enable_trace_buffered_event(void *data)
2586 {
2587         /* Probably not needed, but do it anyway */
2588         smp_rmb();
2589         this_cpu_dec(trace_buffered_event_cnt);
2590 }
2591 
2592 static void disable_trace_buffered_event(void *data)
2593 {
2594         this_cpu_inc(trace_buffered_event_cnt);
2595 }
2596 
2597 /**
2598  * trace_buffered_event_disable - disable buffering events
2599  *
2600  * When a filter is removed, it is faster to not use the buffered
2601  * events, and to commit directly into the ring buffer. Free up
2602  * the temp buffers when there are no more users. This requires
2603  * special synchronization with current events.
2604  */
2605 void trace_buffered_event_disable(void)
2606 {
2607         int cpu;
2608 
2609         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2610 
2611         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2612                 return;
2613 
2614         if (--trace_buffered_event_ref)
2615                 return;
2616 
2617         /* For each CPU, set the buffer as used. */
2618         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2619                          NULL, true);
2620 
2621         /* Wait for all current users to finish */
2622         synchronize_rcu();
2623 
2624         for_each_tracing_cpu(cpu) {
2625                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2626                 per_cpu(trace_buffered_event, cpu) = NULL;
2627         }
2628 
2629         /*
2630          * Wait for all CPUs that potentially started checking if they can use
2631          * their event buffer only after the previous synchronize_rcu() call and
2632          * they still read a valid pointer from trace_buffered_event. It must be
2633          * ensured they don't see cleared trace_buffered_event_cnt else they
2634          * could wrongly decide to use the pointed-to buffer which is now freed.
2635          */
2636         synchronize_rcu();
2637 
2638         /* For each CPU, relinquish the buffer */
2639         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2640                          true);
2641 }
2642 
2643 static struct trace_buffer *temp_buffer;
2644 
2645 struct ring_buffer_event *
2646 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2647                           struct trace_event_file *trace_file,
2648                           int type, unsigned long len,
2649                           unsigned int trace_ctx)
2650 {
2651         struct ring_buffer_event *entry;
2652         struct trace_array *tr = trace_file->tr;
2653         int val;
2654 
2655         *current_rb = tr->array_buffer.buffer;
2656 
2657         if (!tr->no_filter_buffering_ref &&
2658             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2659                 preempt_disable_notrace();
2660                 /*
2661                  * Filtering is on, so try to use the per cpu buffer first.
2662                  * This buffer will simulate a ring_buffer_event,
2663                  * where the type_len is zero and the array[0] will
2664                  * hold the full length.
2665                  * (see include/linux/ring-buffer.h for details on
2666                  *  how the ring_buffer_event is structured).
2667                  *
2668                  * Using a temp buffer during filtering and copying it
2669                  * on a matched filter is quicker than writing directly
2670                  * into the ring buffer and then discarding it when
2671                  * it doesn't match. That is because the discard
2672                  * requires several atomic operations to get right.
2673                  * Copying on match and doing nothing on a failed match
2674                  * is still quicker than no copy on match, but having
2675                  * to discard out of the ring buffer on a failed match.
2676                  */
2677                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2678                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2679 
2680                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2681 
2682                         /*
2683                          * Preemption is disabled, but interrupts and NMIs
2684                          * can still come in now. If that happens after
2685                          * the above increment, then it will have to go
2686                          * back to the old method of allocating the event
2687                          * on the ring buffer, and if the filter fails, it
2688                          * will have to call ring_buffer_discard_commit()
2689                          * to remove it.
2690                          *
2691                          * Need to also check the unlikely case that the
2692                          * length is bigger than the temp buffer size.
2693                          * If that happens, then the reserve is pretty much
2694                          * guaranteed to fail, as the ring buffer currently
2695                          * only allows events less than a page. But that may
2696                          * change in the future, so let the ring buffer reserve
2697                          * handle the failure in that case.
2698                          */
2699                         if (val == 1 && likely(len <= max_len)) {
2700                                 trace_event_setup(entry, type, trace_ctx);
2701                                 entry->array[0] = len;
2702                                 /* Return with preemption disabled */
2703                                 return entry;
2704                         }
2705                         this_cpu_dec(trace_buffered_event_cnt);
2706                 }
2707                 /* __trace_buffer_lock_reserve() disables preemption */
2708                 preempt_enable_notrace();
2709         }
2710 
2711         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2712                                             trace_ctx);
2713         /*
2714          * If tracing is off, but we have triggers enabled
2715          * we still need to look at the event data. Use the temp_buffer
2716          * to store the trace event for the trigger to use. It's recursive
2717          * safe and will not be recorded anywhere.
2718          */
2719         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2720                 *current_rb = temp_buffer;
2721                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2722                                                     trace_ctx);
2723         }
2724         return entry;
2725 }
2726 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2727 
2728 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2729 static DEFINE_MUTEX(tracepoint_printk_mutex);
2730 
2731 static void output_printk(struct trace_event_buffer *fbuffer)
2732 {
2733         struct trace_event_call *event_call;
2734         struct trace_event_file *file;
2735         struct trace_event *event;
2736         unsigned long flags;
2737         struct trace_iterator *iter = tracepoint_print_iter;
2738 
2739         /* We should never get here if iter is NULL */
2740         if (WARN_ON_ONCE(!iter))
2741                 return;
2742 
2743         event_call = fbuffer->trace_file->event_call;
2744         if (!event_call || !event_call->event.funcs ||
2745             !event_call->event.funcs->trace)
2746                 return;
2747 
2748         file = fbuffer->trace_file;
2749         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2750             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2751              !filter_match_preds(file->filter, fbuffer->entry)))
2752                 return;
2753 
2754         event = &fbuffer->trace_file->event_call->event;
2755 
2756         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2757         trace_seq_init(&iter->seq);
2758         iter->ent = fbuffer->entry;
2759         event_call->event.funcs->trace(iter, 0, event);
2760         trace_seq_putc(&iter->seq, 0);
2761         printk("%s", iter->seq.buffer);
2762 
2763         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2764 }
2765 
2766 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2767                              void *buffer, size_t *lenp,
2768                              loff_t *ppos)
2769 {
2770         int save_tracepoint_printk;
2771         int ret;
2772 
2773         mutex_lock(&tracepoint_printk_mutex);
2774         save_tracepoint_printk = tracepoint_printk;
2775 
2776         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2777 
2778         /*
2779          * This will force exiting early, as tracepoint_printk
2780          * is always zero when tracepoint_printk_iter is not allocated
2781          */
2782         if (!tracepoint_print_iter)
2783                 tracepoint_printk = 0;
2784 
2785         if (save_tracepoint_printk == tracepoint_printk)
2786                 goto out;
2787 
2788         if (tracepoint_printk)
2789                 static_key_enable(&tracepoint_printk_key.key);
2790         else
2791                 static_key_disable(&tracepoint_printk_key.key);
2792 
2793  out:
2794         mutex_unlock(&tracepoint_printk_mutex);
2795 
2796         return ret;
2797 }
2798 
2799 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2800 {
2801         enum event_trigger_type tt = ETT_NONE;
2802         struct trace_event_file *file = fbuffer->trace_file;
2803 
2804         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2805                         fbuffer->entry, &tt))
2806                 goto discard;
2807 
2808         if (static_key_false(&tracepoint_printk_key.key))
2809                 output_printk(fbuffer);
2810 
2811         if (static_branch_unlikely(&trace_event_exports_enabled))
2812                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2813 
2814         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2815                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2816 
2817 discard:
2818         if (tt)
2819                 event_triggers_post_call(file, tt);
2820 
2821 }
2822 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2823 
2824 /*
2825  * Skip 3:
2826  *
2827  *   trace_buffer_unlock_commit_regs()
2828  *   trace_event_buffer_commit()
2829  *   trace_event_raw_event_xxx()
2830  */
2831 # define STACK_SKIP 3
2832 
2833 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2834                                      struct trace_buffer *buffer,
2835                                      struct ring_buffer_event *event,
2836                                      unsigned int trace_ctx,
2837                                      struct pt_regs *regs)
2838 {
2839         __buffer_unlock_commit(buffer, event);
2840 
2841         /*
2842          * If regs is not set, then skip the necessary functions.
2843          * Note, we can still get here via blktrace, wakeup tracer
2844          * and mmiotrace, but that's ok if they lose a function or
2845          * two. They are not that meaningful.
2846          */
2847         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2848         ftrace_trace_userstack(tr, buffer, trace_ctx);
2849 }
2850 
2851 /*
2852  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2853  */
2854 void
2855 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2856                                    struct ring_buffer_event *event)
2857 {
2858         __buffer_unlock_commit(buffer, event);
2859 }
2860 
2861 void
2862 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2863                parent_ip, unsigned int trace_ctx)
2864 {
2865         struct trace_event_call *call = &event_function;
2866         struct trace_buffer *buffer = tr->array_buffer.buffer;
2867         struct ring_buffer_event *event;
2868         struct ftrace_entry *entry;
2869 
2870         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2871                                             trace_ctx);
2872         if (!event)
2873                 return;
2874         entry   = ring_buffer_event_data(event);
2875         entry->ip                       = ip;
2876         entry->parent_ip                = parent_ip;
2877 
2878         if (!call_filter_check_discard(call, entry, buffer, event)) {
2879                 if (static_branch_unlikely(&trace_function_exports_enabled))
2880                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2881                 __buffer_unlock_commit(buffer, event);
2882         }
2883 }
2884 
2885 #ifdef CONFIG_STACKTRACE
2886 
2887 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2888 #define FTRACE_KSTACK_NESTING   4
2889 
2890 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2891 
2892 struct ftrace_stack {
2893         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2894 };
2895 
2896 
2897 struct ftrace_stacks {
2898         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2899 };
2900 
2901 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2902 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2903 
2904 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2905                                  unsigned int trace_ctx,
2906                                  int skip, struct pt_regs *regs)
2907 {
2908         struct trace_event_call *call = &event_kernel_stack;
2909         struct ring_buffer_event *event;
2910         unsigned int size, nr_entries;
2911         struct ftrace_stack *fstack;
2912         struct stack_entry *entry;
2913         int stackidx;
2914 
2915         /*
2916          * Add one, for this function and the call to save_stack_trace()
2917          * If regs is set, then these functions will not be in the way.
2918          */
2919 #ifndef CONFIG_UNWINDER_ORC
2920         if (!regs)
2921                 skip++;
2922 #endif
2923 
2924         preempt_disable_notrace();
2925 
2926         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2927 
2928         /* This should never happen. If it does, yell once and skip */
2929         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2930                 goto out;
2931 
2932         /*
2933          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2934          * interrupt will either see the value pre increment or post
2935          * increment. If the interrupt happens pre increment it will have
2936          * restored the counter when it returns.  We just need a barrier to
2937          * keep gcc from moving things around.
2938          */
2939         barrier();
2940 
2941         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2942         size = ARRAY_SIZE(fstack->calls);
2943 
2944         if (regs) {
2945                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2946                                                    size, skip);
2947         } else {
2948                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2949         }
2950 
2951         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2952                                     struct_size(entry, caller, nr_entries),
2953                                     trace_ctx);
2954         if (!event)
2955                 goto out;
2956         entry = ring_buffer_event_data(event);
2957 
2958         entry->size = nr_entries;
2959         memcpy(&entry->caller, fstack->calls,
2960                flex_array_size(entry, caller, nr_entries));
2961 
2962         if (!call_filter_check_discard(call, entry, buffer, event))
2963                 __buffer_unlock_commit(buffer, event);
2964 
2965  out:
2966         /* Again, don't let gcc optimize things here */
2967         barrier();
2968         __this_cpu_dec(ftrace_stack_reserve);
2969         preempt_enable_notrace();
2970 
2971 }
2972 
2973 static inline void ftrace_trace_stack(struct trace_array *tr,
2974                                       struct trace_buffer *buffer,
2975                                       unsigned int trace_ctx,
2976                                       int skip, struct pt_regs *regs)
2977 {
2978         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2979                 return;
2980 
2981         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
2982 }
2983 
2984 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
2985                    int skip)
2986 {
2987         struct trace_buffer *buffer = tr->array_buffer.buffer;
2988 
2989         if (rcu_is_watching()) {
2990                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
2991                 return;
2992         }
2993 
2994         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
2995                 return;
2996 
2997         /*
2998          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
2999          * but if the above rcu_is_watching() failed, then the NMI
3000          * triggered someplace critical, and ct_irq_enter() should
3001          * not be called from NMI.
3002          */
3003         if (unlikely(in_nmi()))
3004                 return;
3005 
3006         ct_irq_enter_irqson();
3007         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3008         ct_irq_exit_irqson();
3009 }
3010 
3011 /**
3012  * trace_dump_stack - record a stack back trace in the trace buffer
3013  * @skip: Number of functions to skip (helper handlers)
3014  */
3015 void trace_dump_stack(int skip)
3016 {
3017         if (tracing_disabled || tracing_selftest_running)
3018                 return;
3019 
3020 #ifndef CONFIG_UNWINDER_ORC
3021         /* Skip 1 to skip this function. */
3022         skip++;
3023 #endif
3024         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3025                              tracing_gen_ctx(), skip, NULL);
3026 }
3027 EXPORT_SYMBOL_GPL(trace_dump_stack);
3028 
3029 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3030 static DEFINE_PER_CPU(int, user_stack_count);
3031 
3032 static void
3033 ftrace_trace_userstack(struct trace_array *tr,
3034                        struct trace_buffer *buffer, unsigned int trace_ctx)
3035 {
3036         struct trace_event_call *call = &event_user_stack;
3037         struct ring_buffer_event *event;
3038         struct userstack_entry *entry;
3039 
3040         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3041                 return;
3042 
3043         /*
3044          * NMIs can not handle page faults, even with fix ups.
3045          * The save user stack can (and often does) fault.
3046          */
3047         if (unlikely(in_nmi()))
3048                 return;
3049 
3050         /*
3051          * prevent recursion, since the user stack tracing may
3052          * trigger other kernel events.
3053          */
3054         preempt_disable();
3055         if (__this_cpu_read(user_stack_count))
3056                 goto out;
3057 
3058         __this_cpu_inc(user_stack_count);
3059 
3060         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3061                                             sizeof(*entry), trace_ctx);
3062         if (!event)
3063                 goto out_drop_count;
3064         entry   = ring_buffer_event_data(event);
3065 
3066         entry->tgid             = current->tgid;
3067         memset(&entry->caller, 0, sizeof(entry->caller));
3068 
3069         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3070         if (!call_filter_check_discard(call, entry, buffer, event))
3071                 __buffer_unlock_commit(buffer, event);
3072 
3073  out_drop_count:
3074         __this_cpu_dec(user_stack_count);
3075  out:
3076         preempt_enable();
3077 }
3078 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3079 static void ftrace_trace_userstack(struct trace_array *tr,
3080                                    struct trace_buffer *buffer,
3081                                    unsigned int trace_ctx)
3082 {
3083 }
3084 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3085 
3086 #endif /* CONFIG_STACKTRACE */
3087 
3088 static inline void
3089 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3090                           unsigned long long delta)
3091 {
3092         entry->bottom_delta_ts = delta & U32_MAX;
3093         entry->top_delta_ts = (delta >> 32);
3094 }
3095 
3096 void trace_last_func_repeats(struct trace_array *tr,
3097                              struct trace_func_repeats *last_info,
3098                              unsigned int trace_ctx)
3099 {
3100         struct trace_buffer *buffer = tr->array_buffer.buffer;
3101         struct func_repeats_entry *entry;
3102         struct ring_buffer_event *event;
3103         u64 delta;
3104 
3105         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3106                                             sizeof(*entry), trace_ctx);
3107         if (!event)
3108                 return;
3109 
3110         delta = ring_buffer_event_time_stamp(buffer, event) -
3111                 last_info->ts_last_call;
3112 
3113         entry = ring_buffer_event_data(event);
3114         entry->ip = last_info->ip;
3115         entry->parent_ip = last_info->parent_ip;
3116         entry->count = last_info->count;
3117         func_repeats_set_delta_ts(entry, delta);
3118 
3119         __buffer_unlock_commit(buffer, event);
3120 }
3121 
3122 /* created for use with alloc_percpu */
3123 struct trace_buffer_struct {
3124         int nesting;
3125         char buffer[4][TRACE_BUF_SIZE];
3126 };
3127 
3128 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3129 
3130 /*
3131  * This allows for lockless recording.  If we're nested too deeply, then
3132  * this returns NULL.
3133  */
3134 static char *get_trace_buf(void)
3135 {
3136         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3137 
3138         if (!trace_percpu_buffer || buffer->nesting >= 4)
3139                 return NULL;
3140 
3141         buffer->nesting++;
3142 
3143         /* Interrupts must see nesting incremented before we use the buffer */
3144         barrier();
3145         return &buffer->buffer[buffer->nesting - 1][0];
3146 }
3147 
3148 static void put_trace_buf(void)
3149 {
3150         /* Don't let the decrement of nesting leak before this */
3151         barrier();
3152         this_cpu_dec(trace_percpu_buffer->nesting);
3153 }
3154 
3155 static int alloc_percpu_trace_buffer(void)
3156 {
3157         struct trace_buffer_struct __percpu *buffers;
3158 
3159         if (trace_percpu_buffer)
3160                 return 0;
3161 
3162         buffers = alloc_percpu(struct trace_buffer_struct);
3163         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3164                 return -ENOMEM;
3165 
3166         trace_percpu_buffer = buffers;
3167         return 0;
3168 }
3169 
3170 static int buffers_allocated;
3171 
3172 void trace_printk_init_buffers(void)
3173 {
3174         if (buffers_allocated)
3175                 return;
3176 
3177         if (alloc_percpu_trace_buffer())
3178                 return;
3179 
3180         /* trace_printk() is for debug use only. Don't use it in production. */
3181 
3182         pr_warn("\n");
3183         pr_warn("**********************************************************\n");
3184         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3185         pr_warn("**                                                      **\n");
3186         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3187         pr_warn("**                                                      **\n");
3188         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3189         pr_warn("** unsafe for production use.                           **\n");
3190         pr_warn("**                                                      **\n");
3191         pr_warn("** If you see this message and you are not debugging    **\n");
3192         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3193         pr_warn("**                                                      **\n");
3194         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3195         pr_warn("**********************************************************\n");
3196 
3197         /* Expand the buffers to set size */
3198         tracing_update_buffers(&global_trace);
3199 
3200         buffers_allocated = 1;
3201 
3202         /*
3203          * trace_printk_init_buffers() can be called by modules.
3204          * If that happens, then we need to start cmdline recording
3205          * directly here. If the global_trace.buffer is already
3206          * allocated here, then this was called by module code.
3207          */
3208         if (global_trace.array_buffer.buffer)
3209                 tracing_start_cmdline_record();
3210 }
3211 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3212 
3213 void trace_printk_start_comm(void)
3214 {
3215         /* Start tracing comms if trace printk is set */
3216         if (!buffers_allocated)
3217                 return;
3218         tracing_start_cmdline_record();
3219 }
3220 
3221 static void trace_printk_start_stop_comm(int enabled)
3222 {
3223         if (!buffers_allocated)
3224                 return;
3225 
3226         if (enabled)
3227                 tracing_start_cmdline_record();
3228         else
3229                 tracing_stop_cmdline_record();
3230 }
3231 
3232 /**
3233  * trace_vbprintk - write binary msg to tracing buffer
3234  * @ip:    The address of the caller
3235  * @fmt:   The string format to write to the buffer
3236  * @args:  Arguments for @fmt
3237  */
3238 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3239 {
3240         struct trace_event_call *call = &event_bprint;
3241         struct ring_buffer_event *event;
3242         struct trace_buffer *buffer;
3243         struct trace_array *tr = &global_trace;
3244         struct bprint_entry *entry;
3245         unsigned int trace_ctx;
3246         char *tbuffer;
3247         int len = 0, size;
3248 
3249         if (unlikely(tracing_selftest_running || tracing_disabled))
3250                 return 0;
3251 
3252         /* Don't pollute graph traces with trace_vprintk internals */
3253         pause_graph_tracing();
3254 
3255         trace_ctx = tracing_gen_ctx();
3256         preempt_disable_notrace();
3257 
3258         tbuffer = get_trace_buf();
3259         if (!tbuffer) {
3260                 len = 0;
3261                 goto out_nobuffer;
3262         }
3263 
3264         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3265 
3266         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3267                 goto out_put;
3268 
3269         size = sizeof(*entry) + sizeof(u32) * len;
3270         buffer = tr->array_buffer.buffer;
3271         ring_buffer_nest_start(buffer);
3272         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3273                                             trace_ctx);
3274         if (!event)
3275                 goto out;
3276         entry = ring_buffer_event_data(event);
3277         entry->ip                       = ip;
3278         entry->fmt                      = fmt;
3279 
3280         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3281         if (!call_filter_check_discard(call, entry, buffer, event)) {
3282                 __buffer_unlock_commit(buffer, event);
3283                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3284         }
3285 
3286 out:
3287         ring_buffer_nest_end(buffer);
3288 out_put:
3289         put_trace_buf();
3290 
3291 out_nobuffer:
3292         preempt_enable_notrace();
3293         unpause_graph_tracing();
3294 
3295         return len;
3296 }
3297 EXPORT_SYMBOL_GPL(trace_vbprintk);
3298 
3299 __printf(3, 0)
3300 static int
3301 __trace_array_vprintk(struct trace_buffer *buffer,
3302                       unsigned long ip, const char *fmt, va_list args)
3303 {
3304         struct trace_event_call *call = &event_print;
3305         struct ring_buffer_event *event;
3306         int len = 0, size;
3307         struct print_entry *entry;
3308         unsigned int trace_ctx;
3309         char *tbuffer;
3310 
3311         if (tracing_disabled)
3312                 return 0;
3313 
3314         /* Don't pollute graph traces with trace_vprintk internals */
3315         pause_graph_tracing();
3316 
3317         trace_ctx = tracing_gen_ctx();
3318         preempt_disable_notrace();
3319 
3320 
3321         tbuffer = get_trace_buf();
3322         if (!tbuffer) {
3323                 len = 0;
3324                 goto out_nobuffer;
3325         }
3326 
3327         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3328 
3329         size = sizeof(*entry) + len + 1;
3330         ring_buffer_nest_start(buffer);
3331         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3332                                             trace_ctx);
3333         if (!event)
3334                 goto out;
3335         entry = ring_buffer_event_data(event);
3336         entry->ip = ip;
3337 
3338         memcpy(&entry->buf, tbuffer, len + 1);
3339         if (!call_filter_check_discard(call, entry, buffer, event)) {
3340                 __buffer_unlock_commit(buffer, event);
3341                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3342         }
3343 
3344 out:
3345         ring_buffer_nest_end(buffer);
3346         put_trace_buf();
3347 
3348 out_nobuffer:
3349         preempt_enable_notrace();
3350         unpause_graph_tracing();
3351 
3352         return len;
3353 }
3354 
3355 __printf(3, 0)
3356 int trace_array_vprintk(struct trace_array *tr,
3357                         unsigned long ip, const char *fmt, va_list args)
3358 {
3359         if (tracing_selftest_running && tr == &global_trace)
3360                 return 0;
3361 
3362         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3363 }
3364 
3365 /**
3366  * trace_array_printk - Print a message to a specific instance
3367  * @tr: The instance trace_array descriptor
3368  * @ip: The instruction pointer that this is called from.
3369  * @fmt: The format to print (printf format)
3370  *
3371  * If a subsystem sets up its own instance, they have the right to
3372  * printk strings into their tracing instance buffer using this
3373  * function. Note, this function will not write into the top level
3374  * buffer (use trace_printk() for that), as writing into the top level
3375  * buffer should only have events that can be individually disabled.
3376  * trace_printk() is only used for debugging a kernel, and should not
3377  * be ever incorporated in normal use.
3378  *
3379  * trace_array_printk() can be used, as it will not add noise to the
3380  * top level tracing buffer.
3381  *
3382  * Note, trace_array_init_printk() must be called on @tr before this
3383  * can be used.
3384  */
3385 __printf(3, 0)
3386 int trace_array_printk(struct trace_array *tr,
3387                        unsigned long ip, const char *fmt, ...)
3388 {
3389         int ret;
3390         va_list ap;
3391 
3392         if (!tr)
3393                 return -ENOENT;
3394 
3395         /* This is only allowed for created instances */
3396         if (tr == &global_trace)
3397                 return 0;
3398 
3399         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3400                 return 0;
3401 
3402         va_start(ap, fmt);
3403         ret = trace_array_vprintk(tr, ip, fmt, ap);
3404         va_end(ap);
3405         return ret;
3406 }
3407 EXPORT_SYMBOL_GPL(trace_array_printk);
3408 
3409 /**
3410  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3411  * @tr: The trace array to initialize the buffers for
3412  *
3413  * As trace_array_printk() only writes into instances, they are OK to
3414  * have in the kernel (unlike trace_printk()). This needs to be called
3415  * before trace_array_printk() can be used on a trace_array.
3416  */
3417 int trace_array_init_printk(struct trace_array *tr)
3418 {
3419         if (!tr)
3420                 return -ENOENT;
3421 
3422         /* This is only allowed for created instances */
3423         if (tr == &global_trace)
3424                 return -EINVAL;
3425 
3426         return alloc_percpu_trace_buffer();
3427 }
3428 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3429 
3430 __printf(3, 4)
3431 int trace_array_printk_buf(struct trace_buffer *buffer,
3432                            unsigned long ip, const char *fmt, ...)
3433 {
3434         int ret;
3435         va_list ap;
3436 
3437         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3438                 return 0;
3439 
3440         va_start(ap, fmt);
3441         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3442         va_end(ap);
3443         return ret;
3444 }
3445 
3446 __printf(2, 0)
3447 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3448 {
3449         return trace_array_vprintk(&global_trace, ip, fmt, args);
3450 }
3451 EXPORT_SYMBOL_GPL(trace_vprintk);
3452 
3453 static void trace_iterator_increment(struct trace_iterator *iter)
3454 {
3455         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3456 
3457         iter->idx++;
3458         if (buf_iter)
3459                 ring_buffer_iter_advance(buf_iter);
3460 }
3461 
3462 static struct trace_entry *
3463 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3464                 unsigned long *lost_events)
3465 {
3466         struct ring_buffer_event *event;
3467         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3468 
3469         if (buf_iter) {
3470                 event = ring_buffer_iter_peek(buf_iter, ts);
3471                 if (lost_events)
3472                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3473                                 (unsigned long)-1 : 0;
3474         } else {
3475                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3476                                          lost_events);
3477         }
3478 
3479         if (event) {
3480                 iter->ent_size = ring_buffer_event_length(event);
3481                 return ring_buffer_event_data(event);
3482         }
3483         iter->ent_size = 0;
3484         return NULL;
3485 }
3486 
3487 static struct trace_entry *
3488 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3489                   unsigned long *missing_events, u64 *ent_ts)
3490 {
3491         struct trace_buffer *buffer = iter->array_buffer->buffer;
3492         struct trace_entry *ent, *next = NULL;
3493         unsigned long lost_events = 0, next_lost = 0;
3494         int cpu_file = iter->cpu_file;
3495         u64 next_ts = 0, ts;
3496         int next_cpu = -1;
3497         int next_size = 0;
3498         int cpu;
3499 
3500         /*
3501          * If we are in a per_cpu trace file, don't bother by iterating over
3502          * all cpu and peek directly.
3503          */
3504         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3505                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3506                         return NULL;
3507                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3508                 if (ent_cpu)
3509                         *ent_cpu = cpu_file;
3510 
3511                 return ent;
3512         }
3513 
3514         for_each_tracing_cpu(cpu) {
3515 
3516                 if (ring_buffer_empty_cpu(buffer, cpu))
3517                         continue;
3518 
3519                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3520 
3521                 /*
3522                  * Pick the entry with the smallest timestamp:
3523                  */
3524                 if (ent && (!next || ts < next_ts)) {
3525                         next = ent;
3526                         next_cpu = cpu;
3527                         next_ts = ts;
3528                         next_lost = lost_events;
3529                         next_size = iter->ent_size;
3530                 }
3531         }
3532 
3533         iter->ent_size = next_size;
3534 
3535         if (ent_cpu)
3536                 *ent_cpu = next_cpu;
3537 
3538         if (ent_ts)
3539                 *ent_ts = next_ts;
3540 
3541         if (missing_events)
3542                 *missing_events = next_lost;
3543 
3544         return next;
3545 }
3546 
3547 #define STATIC_FMT_BUF_SIZE     128
3548 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3549 
3550 char *trace_iter_expand_format(struct trace_iterator *iter)
3551 {
3552         char *tmp;
3553 
3554         /*
3555          * iter->tr is NULL when used with tp_printk, which makes
3556          * this get called where it is not safe to call krealloc().
3557          */
3558         if (!iter->tr || iter->fmt == static_fmt_buf)
3559                 return NULL;
3560 
3561         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3562                        GFP_KERNEL);
3563         if (tmp) {
3564                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3565                 iter->fmt = tmp;
3566         }
3567 
3568         return tmp;
3569 }
3570 
3571 /* Returns true if the string is safe to dereference from an event */
3572 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3573                            bool star, int len)
3574 {
3575         unsigned long addr = (unsigned long)str;
3576         struct trace_event *trace_event;
3577         struct trace_event_call *event;
3578 
3579         /* Ignore strings with no length */
3580         if (star && !len)
3581                 return true;
3582 
3583         /* OK if part of the event data */
3584         if ((addr >= (unsigned long)iter->ent) &&
3585             (addr < (unsigned long)iter->ent + iter->ent_size))
3586                 return true;
3587 
3588         /* OK if part of the temp seq buffer */
3589         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3590             (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3591                 return true;
3592 
3593         /* Core rodata can not be freed */
3594         if (is_kernel_rodata(addr))
3595                 return true;
3596 
3597         if (trace_is_tracepoint_string(str))
3598                 return true;
3599 
3600         /*
3601          * Now this could be a module event, referencing core module
3602          * data, which is OK.
3603          */
3604         if (!iter->ent)
3605                 return false;
3606 
3607         trace_event = ftrace_find_event(iter->ent->type);
3608         if (!trace_event)
3609                 return false;
3610 
3611         event = container_of(trace_event, struct trace_event_call, event);
3612         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3613                 return false;
3614 
3615         /* Would rather have rodata, but this will suffice */
3616         if (within_module_core(addr, event->module))
3617                 return true;
3618 
3619         return false;
3620 }
3621 
3622 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3623 
3624 static int test_can_verify_check(const char *fmt, ...)
3625 {
3626         char buf[16];
3627         va_list ap;
3628         int ret;
3629 
3630         /*
3631          * The verifier is dependent on vsnprintf() modifies the va_list
3632          * passed to it, where it is sent as a reference. Some architectures
3633          * (like x86_32) passes it by value, which means that vsnprintf()
3634          * does not modify the va_list passed to it, and the verifier
3635          * would then need to be able to understand all the values that
3636          * vsnprintf can use. If it is passed by value, then the verifier
3637          * is disabled.
3638          */
3639         va_start(ap, fmt);
3640         vsnprintf(buf, 16, "%d", ap);
3641         ret = va_arg(ap, int);
3642         va_end(ap);
3643 
3644         return ret;
3645 }
3646 
3647 static void test_can_verify(void)
3648 {
3649         if (!test_can_verify_check("%d %d", 0, 1)) {
3650                 pr_info("trace event string verifier disabled\n");
3651                 static_branch_inc(&trace_no_verify);
3652         }
3653 }
3654 
3655 /**
3656  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3657  * @iter: The iterator that holds the seq buffer and the event being printed
3658  * @fmt: The format used to print the event
3659  * @ap: The va_list holding the data to print from @fmt.
3660  *
3661  * This writes the data into the @iter->seq buffer using the data from
3662  * @fmt and @ap. If the format has a %s, then the source of the string
3663  * is examined to make sure it is safe to print, otherwise it will
3664  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3665  * pointer.
3666  */
3667 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3668                          va_list ap)
3669 {
3670         const char *p = fmt;
3671         const char *str;
3672         int i, j;
3673 
3674         if (WARN_ON_ONCE(!fmt))
3675                 return;
3676 
3677         if (static_branch_unlikely(&trace_no_verify))
3678                 goto print;
3679 
3680         /* Don't bother checking when doing a ftrace_dump() */
3681         if (iter->fmt == static_fmt_buf)
3682                 goto print;
3683 
3684         while (*p) {
3685                 bool star = false;
3686                 int len = 0;
3687 
3688                 j = 0;
3689 
3690                 /* We only care about %s and variants */
3691                 for (i = 0; p[i]; i++) {
3692                         if (i + 1 >= iter->fmt_size) {
3693                                 /*
3694                                  * If we can't expand the copy buffer,
3695                                  * just print it.
3696                                  */
3697                                 if (!trace_iter_expand_format(iter))
3698                                         goto print;
3699                         }
3700 
3701                         if (p[i] == '\\' && p[i+1]) {
3702                                 i++;
3703                                 continue;
3704                         }
3705                         if (p[i] == '%') {
3706                                 /* Need to test cases like %08.*s */
3707                                 for (j = 1; p[i+j]; j++) {
3708                                         if (isdigit(p[i+j]) ||
3709                                             p[i+j] == '.')
3710                                                 continue;
3711                                         if (p[i+j] == '*') {
3712                                                 star = true;
3713                                                 continue;
3714                                         }
3715                                         break;
3716                                 }
3717                                 if (p[i+j] == 's')
3718                                         break;
3719                                 star = false;
3720                         }
3721                         j = 0;
3722                 }
3723                 /* If no %s found then just print normally */
3724                 if (!p[i])
3725                         break;
3726 
3727                 /* Copy up to the %s, and print that */
3728                 strncpy(iter->fmt, p, i);
3729                 iter->fmt[i] = '\0';
3730                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3731 
3732                 /*
3733                  * If iter->seq is full, the above call no longer guarantees
3734                  * that ap is in sync with fmt processing, and further calls
3735                  * to va_arg() can return wrong positional arguments.
3736                  *
3737                  * Ensure that ap is no longer used in this case.
3738                  */
3739                 if (iter->seq.full) {
3740                         p = "";
3741                         break;
3742                 }
3743 
3744                 if (star)
3745                         len = va_arg(ap, int);
3746 
3747                 /* The ap now points to the string data of the %s */
3748                 str = va_arg(ap, const char *);
3749 
3750                 /*
3751                  * If you hit this warning, it is likely that the
3752                  * trace event in question used %s on a string that
3753                  * was saved at the time of the event, but may not be
3754                  * around when the trace is read. Use __string(),
3755                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3756                  * instead. See samples/trace_events/trace-events-sample.h
3757                  * for reference.
3758                  */
3759                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3760                               "fmt: '%s' current_buffer: '%s'",
3761                               fmt, seq_buf_str(&iter->seq.seq))) {
3762                         int ret;
3763 
3764                         /* Try to safely read the string */
3765                         if (star) {
3766                                 if (len + 1 > iter->fmt_size)
3767                                         len = iter->fmt_size - 1;
3768                                 if (len < 0)
3769                                         len = 0;
3770                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3771                                 iter->fmt[len] = 0;
3772                                 star = false;
3773                         } else {
3774                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3775                                                                   iter->fmt_size);
3776                         }
3777                         if (ret < 0)
3778                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3779                         else
3780                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3781                                                  str, iter->fmt);
3782                         str = "[UNSAFE-MEMORY]";
3783                         strcpy(iter->fmt, "%s");
3784                 } else {
3785                         strncpy(iter->fmt, p + i, j + 1);
3786                         iter->fmt[j+1] = '\0';
3787                 }
3788                 if (star)
3789                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3790                 else
3791                         trace_seq_printf(&iter->seq, iter->fmt, str);
3792 
3793                 p += i + j + 1;
3794         }
3795  print:
3796         if (*p)
3797                 trace_seq_vprintf(&iter->seq, p, ap);
3798 }
3799 
3800 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3801 {
3802         const char *p, *new_fmt;
3803         char *q;
3804 
3805         if (WARN_ON_ONCE(!fmt))
3806                 return fmt;
3807 
3808         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3809                 return fmt;
3810 
3811         p = fmt;
3812         new_fmt = q = iter->fmt;
3813         while (*p) {
3814                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3815                         if (!trace_iter_expand_format(iter))
3816                                 return fmt;
3817 
3818                         q += iter->fmt - new_fmt;
3819                         new_fmt = iter->fmt;
3820                 }
3821 
3822                 *q++ = *p++;
3823 
3824                 /* Replace %p with %px */
3825                 if (p[-1] == '%') {
3826                         if (p[0] == '%') {
3827                                 *q++ = *p++;
3828                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3829                                 *q++ = *p++;
3830                                 *q++ = 'x';
3831                         }
3832                 }
3833         }
3834         *q = '\0';
3835 
3836         return new_fmt;
3837 }
3838 
3839 #define STATIC_TEMP_BUF_SIZE    128
3840 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3841 
3842 /* Find the next real entry, without updating the iterator itself */
3843 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3844                                           int *ent_cpu, u64 *ent_ts)
3845 {
3846         /* __find_next_entry will reset ent_size */
3847         int ent_size = iter->ent_size;
3848         struct trace_entry *entry;
3849 
3850         /*
3851          * If called from ftrace_dump(), then the iter->temp buffer
3852          * will be the static_temp_buf and not created from kmalloc.
3853          * If the entry size is greater than the buffer, we can
3854          * not save it. Just return NULL in that case. This is only
3855          * used to add markers when two consecutive events' time
3856          * stamps have a large delta. See trace_print_lat_context()
3857          */
3858         if (iter->temp == static_temp_buf &&
3859             STATIC_TEMP_BUF_SIZE < ent_size)
3860                 return NULL;
3861 
3862         /*
3863          * The __find_next_entry() may call peek_next_entry(), which may
3864          * call ring_buffer_peek() that may make the contents of iter->ent
3865          * undefined. Need to copy iter->ent now.
3866          */
3867         if (iter->ent && iter->ent != iter->temp) {
3868                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3869                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3870                         void *temp;
3871                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3872                         if (!temp)
3873                                 return NULL;
3874                         kfree(iter->temp);
3875                         iter->temp = temp;
3876                         iter->temp_size = iter->ent_size;
3877                 }
3878                 memcpy(iter->temp, iter->ent, iter->ent_size);
3879                 iter->ent = iter->temp;
3880         }
3881         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3882         /* Put back the original ent_size */
3883         iter->ent_size = ent_size;
3884 
3885         return entry;
3886 }
3887 
3888 /* Find the next real entry, and increment the iterator to the next entry */
3889 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3890 {
3891         iter->ent = __find_next_entry(iter, &iter->cpu,
3892                                       &iter->lost_events, &iter->ts);
3893 
3894         if (iter->ent)
3895                 trace_iterator_increment(iter);
3896 
3897         return iter->ent ? iter : NULL;
3898 }
3899 
3900 static void trace_consume(struct trace_iterator *iter)
3901 {
3902         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3903                             &iter->lost_events);
3904 }
3905 
3906 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3907 {
3908         struct trace_iterator *iter = m->private;
3909         int i = (int)*pos;
3910         void *ent;
3911 
3912         WARN_ON_ONCE(iter->leftover);
3913 
3914         (*pos)++;
3915 
3916         /* can't go backwards */
3917         if (iter->idx > i)
3918                 return NULL;
3919 
3920         if (iter->idx < 0)
3921                 ent = trace_find_next_entry_inc(iter);
3922         else
3923                 ent = iter;
3924 
3925         while (ent && iter->idx < i)
3926                 ent = trace_find_next_entry_inc(iter);
3927 
3928         iter->pos = *pos;
3929 
3930         return ent;
3931 }
3932 
3933 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3934 {
3935         struct ring_buffer_iter *buf_iter;
3936         unsigned long entries = 0;
3937         u64 ts;
3938 
3939         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3940 
3941         buf_iter = trace_buffer_iter(iter, cpu);
3942         if (!buf_iter)
3943                 return;
3944 
3945         ring_buffer_iter_reset(buf_iter);
3946 
3947         /*
3948          * We could have the case with the max latency tracers
3949          * that a reset never took place on a cpu. This is evident
3950          * by the timestamp being before the start of the buffer.
3951          */
3952         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3953                 if (ts >= iter->array_buffer->time_start)
3954                         break;
3955                 entries++;
3956                 ring_buffer_iter_advance(buf_iter);
3957                 /* This could be a big loop */
3958                 cond_resched();
3959         }
3960 
3961         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3962 }
3963 
3964 /*
3965  * The current tracer is copied to avoid a global locking
3966  * all around.
3967  */
3968 static void *s_start(struct seq_file *m, loff_t *pos)
3969 {
3970         struct trace_iterator *iter = m->private;
3971         struct trace_array *tr = iter->tr;
3972         int cpu_file = iter->cpu_file;
3973         void *p = NULL;
3974         loff_t l = 0;
3975         int cpu;
3976 
3977         mutex_lock(&trace_types_lock);
3978         if (unlikely(tr->current_trace != iter->trace)) {
3979                 /* Close iter->trace before switching to the new current tracer */
3980                 if (iter->trace->close)
3981                         iter->trace->close(iter);
3982                 iter->trace = tr->current_trace;
3983                 /* Reopen the new current tracer */
3984                 if (iter->trace->open)
3985                         iter->trace->open(iter);
3986         }
3987         mutex_unlock(&trace_types_lock);
3988 
3989 #ifdef CONFIG_TRACER_MAX_TRACE
3990         if (iter->snapshot && iter->trace->use_max_tr)
3991                 return ERR_PTR(-EBUSY);
3992 #endif
3993 
3994         if (*pos != iter->pos) {
3995                 iter->ent = NULL;
3996                 iter->cpu = 0;
3997                 iter->idx = -1;
3998 
3999                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4000                         for_each_tracing_cpu(cpu)
4001                                 tracing_iter_reset(iter, cpu);
4002                 } else
4003                         tracing_iter_reset(iter, cpu_file);
4004 
4005                 iter->leftover = 0;
4006                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4007                         ;
4008 
4009         } else {
4010                 /*
4011                  * If we overflowed the seq_file before, then we want
4012                  * to just reuse the trace_seq buffer again.
4013                  */
4014                 if (iter->leftover)
4015                         p = iter;
4016                 else {
4017                         l = *pos - 1;
4018                         p = s_next(m, p, &l);
4019                 }
4020         }
4021 
4022         trace_event_read_lock();
4023         trace_access_lock(cpu_file);
4024         return p;
4025 }
4026 
4027 static void s_stop(struct seq_file *m, void *p)
4028 {
4029         struct trace_iterator *iter = m->private;
4030 
4031 #ifdef CONFIG_TRACER_MAX_TRACE
4032         if (iter->snapshot && iter->trace->use_max_tr)
4033                 return;
4034 #endif
4035 
4036         trace_access_unlock(iter->cpu_file);
4037         trace_event_read_unlock();
4038 }
4039 
4040 static void
4041 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4042                       unsigned long *entries, int cpu)
4043 {
4044         unsigned long count;
4045 
4046         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4047         /*
4048          * If this buffer has skipped entries, then we hold all
4049          * entries for the trace and we need to ignore the
4050          * ones before the time stamp.
4051          */
4052         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4053                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4054                 /* total is the same as the entries */
4055                 *total = count;
4056         } else
4057                 *total = count +
4058                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4059         *entries = count;
4060 }
4061 
4062 static void
4063 get_total_entries(struct array_buffer *buf,
4064                   unsigned long *total, unsigned long *entries)
4065 {
4066         unsigned long t, e;
4067         int cpu;
4068 
4069         *total = 0;
4070         *entries = 0;
4071 
4072         for_each_tracing_cpu(cpu) {
4073                 get_total_entries_cpu(buf, &t, &e, cpu);
4074                 *total += t;
4075                 *entries += e;
4076         }
4077 }
4078 
4079 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4080 {
4081         unsigned long total, entries;
4082 
4083         if (!tr)
4084                 tr = &global_trace;
4085 
4086         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4087 
4088         return entries;
4089 }
4090 
4091 unsigned long trace_total_entries(struct trace_array *tr)
4092 {
4093         unsigned long total, entries;
4094 
4095         if (!tr)
4096                 tr = &global_trace;
4097 
4098         get_total_entries(&tr->array_buffer, &total, &entries);
4099 
4100         return entries;
4101 }
4102 
4103 static void print_lat_help_header(struct seq_file *m)
4104 {
4105         seq_puts(m, "#                    _------=> CPU#            \n"
4106                     "#                   / _-----=> irqs-off/BH-disabled\n"
4107                     "#                  | / _----=> need-resched    \n"
4108                     "#                  || / _---=> hardirq/softirq \n"
4109                     "#                  ||| / _--=> preempt-depth   \n"
4110                     "#                  |||| / _-=> migrate-disable \n"
4111                     "#                  ||||| /     delay           \n"
4112                     "#  cmd     pid     |||||| time  |   caller     \n"
4113                     "#     \\   /        ||||||  \\    |    /       \n");
4114 }
4115 
4116 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4117 {
4118         unsigned long total;
4119         unsigned long entries;
4120 
4121         get_total_entries(buf, &total, &entries);
4122         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4123                    entries, total, num_online_cpus());
4124         seq_puts(m, "#\n");
4125 }
4126 
4127 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4128                                    unsigned int flags)
4129 {
4130         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4131 
4132         print_event_info(buf, m);
4133 
4134         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4135         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4136 }
4137 
4138 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4139                                        unsigned int flags)
4140 {
4141         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4142         static const char space[] = "            ";
4143         int prec = tgid ? 12 : 2;
4144 
4145         print_event_info(buf, m);
4146 
4147         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4148         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4149         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4150         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4151         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4152         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4153         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4154         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4155 }
4156 
4157 void
4158 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4159 {
4160         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4161         struct array_buffer *buf = iter->array_buffer;
4162         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4163         struct tracer *type = iter->trace;
4164         unsigned long entries;
4165         unsigned long total;
4166         const char *name = type->name;
4167 
4168         get_total_entries(buf, &total, &entries);
4169 
4170         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4171                    name, init_utsname()->release);
4172         seq_puts(m, "# -----------------------------------"
4173                  "---------------------------------\n");
4174         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4175                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4176                    nsecs_to_usecs(data->saved_latency),
4177                    entries,
4178                    total,
4179                    buf->cpu,
4180                    preempt_model_none()      ? "server" :
4181                    preempt_model_voluntary() ? "desktop" :
4182                    preempt_model_full()      ? "preempt" :
4183                    preempt_model_rt()        ? "preempt_rt" :
4184                    "unknown",
4185                    /* These are reserved for later use */
4186                    0, 0, 0, 0);
4187 #ifdef CONFIG_SMP
4188         seq_printf(m, " #P:%d)\n", num_online_cpus());
4189 #else
4190         seq_puts(m, ")\n");
4191 #endif
4192         seq_puts(m, "#    -----------------\n");
4193         seq_printf(m, "#    | task: %.16s-%d "
4194                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4195                    data->comm, data->pid,
4196                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4197                    data->policy, data->rt_priority);
4198         seq_puts(m, "#    -----------------\n");
4199 
4200         if (data->critical_start) {
4201                 seq_puts(m, "#  => started at: ");
4202                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4203                 trace_print_seq(m, &iter->seq);
4204                 seq_puts(m, "\n#  => ended at:   ");
4205                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4206                 trace_print_seq(m, &iter->seq);
4207                 seq_puts(m, "\n#\n");
4208         }
4209 
4210         seq_puts(m, "#\n");
4211 }
4212 
4213 static void test_cpu_buff_start(struct trace_iterator *iter)
4214 {
4215         struct trace_seq *s = &iter->seq;
4216         struct trace_array *tr = iter->tr;
4217 
4218         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4219                 return;
4220 
4221         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4222                 return;
4223 
4224         if (cpumask_available(iter->started) &&
4225             cpumask_test_cpu(iter->cpu, iter->started))
4226                 return;
4227 
4228         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4229                 return;
4230 
4231         if (cpumask_available(iter->started))
4232                 cpumask_set_cpu(iter->cpu, iter->started);
4233 
4234         /* Don't print started cpu buffer for the first entry of the trace */
4235         if (iter->idx > 1)
4236                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4237                                 iter->cpu);
4238 }
4239 
4240 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4241 {
4242         struct trace_array *tr = iter->tr;
4243         struct trace_seq *s = &iter->seq;
4244         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4245         struct trace_entry *entry;
4246         struct trace_event *event;
4247 
4248         entry = iter->ent;
4249 
4250         test_cpu_buff_start(iter);
4251 
4252         event = ftrace_find_event(entry->type);
4253 
4254         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4255                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4256                         trace_print_lat_context(iter);
4257                 else
4258                         trace_print_context(iter);
4259         }
4260 
4261         if (trace_seq_has_overflowed(s))
4262                 return TRACE_TYPE_PARTIAL_LINE;
4263 
4264         if (event) {
4265                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4266                         return print_event_fields(iter, event);
4267                 return event->funcs->trace(iter, sym_flags, event);
4268         }
4269 
4270         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4271 
4272         return trace_handle_return(s);
4273 }
4274 
4275 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4276 {
4277         struct trace_array *tr = iter->tr;
4278         struct trace_seq *s = &iter->seq;
4279         struct trace_entry *entry;
4280         struct trace_event *event;
4281 
4282         entry = iter->ent;
4283 
4284         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4285                 trace_seq_printf(s, "%d %d %llu ",
4286                                  entry->pid, iter->cpu, iter->ts);
4287 
4288         if (trace_seq_has_overflowed(s))
4289                 return TRACE_TYPE_PARTIAL_LINE;
4290 
4291         event = ftrace_find_event(entry->type);
4292         if (event)
4293                 return event->funcs->raw(iter, 0, event);
4294 
4295         trace_seq_printf(s, "%d ?\n", entry->type);
4296 
4297         return trace_handle_return(s);
4298 }
4299 
4300 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4301 {
4302         struct trace_array *tr = iter->tr;
4303         struct trace_seq *s = &iter->seq;
4304         unsigned char newline = '\n';
4305         struct trace_entry *entry;
4306         struct trace_event *event;
4307 
4308         entry = iter->ent;
4309 
4310         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4311                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4312                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4313                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4314                 if (trace_seq_has_overflowed(s))
4315                         return TRACE_TYPE_PARTIAL_LINE;
4316         }
4317 
4318         event = ftrace_find_event(entry->type);
4319         if (event) {
4320                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4321                 if (ret != TRACE_TYPE_HANDLED)
4322                         return ret;
4323         }
4324 
4325         SEQ_PUT_FIELD(s, newline);
4326 
4327         return trace_handle_return(s);
4328 }
4329 
4330 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4331 {
4332         struct trace_array *tr = iter->tr;
4333         struct trace_seq *s = &iter->seq;
4334         struct trace_entry *entry;
4335         struct trace_event *event;
4336 
4337         entry = iter->ent;
4338 
4339         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4340                 SEQ_PUT_FIELD(s, entry->pid);
4341                 SEQ_PUT_FIELD(s, iter->cpu);
4342                 SEQ_PUT_FIELD(s, iter->ts);
4343                 if (trace_seq_has_overflowed(s))
4344                         return TRACE_TYPE_PARTIAL_LINE;
4345         }
4346 
4347         event = ftrace_find_event(entry->type);
4348         return event ? event->funcs->binary(iter, 0, event) :
4349                 TRACE_TYPE_HANDLED;
4350 }
4351 
4352 int trace_empty(struct trace_iterator *iter)
4353 {
4354         struct ring_buffer_iter *buf_iter;
4355         int cpu;
4356 
4357         /* If we are looking at one CPU buffer, only check that one */
4358         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4359                 cpu = iter->cpu_file;
4360                 buf_iter = trace_buffer_iter(iter, cpu);
4361                 if (buf_iter) {
4362                         if (!ring_buffer_iter_empty(buf_iter))
4363                                 return 0;
4364                 } else {
4365                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4366                                 return 0;
4367                 }
4368                 return 1;
4369         }
4370 
4371         for_each_tracing_cpu(cpu) {
4372                 buf_iter = trace_buffer_iter(iter, cpu);
4373                 if (buf_iter) {
4374                         if (!ring_buffer_iter_empty(buf_iter))
4375                                 return 0;
4376                 } else {
4377                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4378                                 return 0;
4379                 }
4380         }
4381 
4382         return 1;
4383 }
4384 
4385 /*  Called with trace_event_read_lock() held. */
4386 enum print_line_t print_trace_line(struct trace_iterator *iter)
4387 {
4388         struct trace_array *tr = iter->tr;
4389         unsigned long trace_flags = tr->trace_flags;
4390         enum print_line_t ret;
4391 
4392         if (iter->lost_events) {
4393                 if (iter->lost_events == (unsigned long)-1)
4394                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4395                                          iter->cpu);
4396                 else
4397                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4398                                          iter->cpu, iter->lost_events);
4399                 if (trace_seq_has_overflowed(&iter->seq))
4400                         return TRACE_TYPE_PARTIAL_LINE;
4401         }
4402 
4403         if (iter->trace && iter->trace->print_line) {
4404                 ret = iter->trace->print_line(iter);
4405                 if (ret != TRACE_TYPE_UNHANDLED)
4406                         return ret;
4407         }
4408 
4409         if (iter->ent->type == TRACE_BPUTS &&
4410                         trace_flags & TRACE_ITER_PRINTK &&
4411                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4412                 return trace_print_bputs_msg_only(iter);
4413 
4414         if (iter->ent->type == TRACE_BPRINT &&
4415                         trace_flags & TRACE_ITER_PRINTK &&
4416                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4417                 return trace_print_bprintk_msg_only(iter);
4418 
4419         if (iter->ent->type == TRACE_PRINT &&
4420                         trace_flags & TRACE_ITER_PRINTK &&
4421                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4422                 return trace_print_printk_msg_only(iter);
4423 
4424         if (trace_flags & TRACE_ITER_BIN)
4425                 return print_bin_fmt(iter);
4426 
4427         if (trace_flags & TRACE_ITER_HEX)
4428                 return print_hex_fmt(iter);
4429 
4430         if (trace_flags & TRACE_ITER_RAW)
4431                 return print_raw_fmt(iter);
4432 
4433         return print_trace_fmt(iter);
4434 }
4435 
4436 void trace_latency_header(struct seq_file *m)
4437 {
4438         struct trace_iterator *iter = m->private;
4439         struct trace_array *tr = iter->tr;
4440 
4441         /* print nothing if the buffers are empty */
4442         if (trace_empty(iter))
4443                 return;
4444 
4445         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4446                 print_trace_header(m, iter);
4447 
4448         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4449                 print_lat_help_header(m);
4450 }
4451 
4452 void trace_default_header(struct seq_file *m)
4453 {
4454         struct trace_iterator *iter = m->private;
4455         struct trace_array *tr = iter->tr;
4456         unsigned long trace_flags = tr->trace_flags;
4457 
4458         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4459                 return;
4460 
4461         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4462                 /* print nothing if the buffers are empty */
4463                 if (trace_empty(iter))
4464                         return;
4465                 print_trace_header(m, iter);
4466                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4467                         print_lat_help_header(m);
4468         } else {
4469                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4470                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4471                                 print_func_help_header_irq(iter->array_buffer,
4472                                                            m, trace_flags);
4473                         else
4474                                 print_func_help_header(iter->array_buffer, m,
4475                                                        trace_flags);
4476                 }
4477         }
4478 }
4479 
4480 static void test_ftrace_alive(struct seq_file *m)
4481 {
4482         if (!ftrace_is_dead())
4483                 return;
4484         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4485                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4486 }
4487 
4488 #ifdef CONFIG_TRACER_MAX_TRACE
4489 static void show_snapshot_main_help(struct seq_file *m)
4490 {
4491         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4492                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4493                     "#                      Takes a snapshot of the main buffer.\n"
4494                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4495                     "#                      (Doesn't have to be '2' works with any number that\n"
4496                     "#                       is not a '' or '1')\n");
4497 }
4498 
4499 static void show_snapshot_percpu_help(struct seq_file *m)
4500 {
4501         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4502 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4503         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4504                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4505 #else
4506         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4507                     "#                     Must use main snapshot file to allocate.\n");
4508 #endif
4509         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4510                     "#                      (Doesn't have to be '2' works with any number that\n"
4511                     "#                       is not a '' or '1')\n");
4512 }
4513 
4514 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4515 {
4516         if (iter->tr->allocated_snapshot)
4517                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4518         else
4519                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4520 
4521         seq_puts(m, "# Snapshot commands:\n");
4522         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4523                 show_snapshot_main_help(m);
4524         else
4525                 show_snapshot_percpu_help(m);
4526 }
4527 #else
4528 /* Should never be called */
4529 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4530 #endif
4531 
4532 static int s_show(struct seq_file *m, void *v)
4533 {
4534         struct trace_iterator *iter = v;
4535         int ret;
4536 
4537         if (iter->ent == NULL) {
4538                 if (iter->tr) {
4539                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4540                         seq_puts(m, "#\n");
4541                         test_ftrace_alive(m);
4542                 }
4543                 if (iter->snapshot && trace_empty(iter))
4544                         print_snapshot_help(m, iter);
4545                 else if (iter->trace && iter->trace->print_header)
4546                         iter->trace->print_header(m);
4547                 else
4548                         trace_default_header(m);
4549 
4550         } else if (iter->leftover) {
4551                 /*
4552                  * If we filled the seq_file buffer earlier, we
4553                  * want to just show it now.
4554                  */
4555                 ret = trace_print_seq(m, &iter->seq);
4556 
4557                 /* ret should this time be zero, but you never know */
4558                 iter->leftover = ret;
4559 
4560         } else {
4561                 ret = print_trace_line(iter);
4562                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4563                         iter->seq.full = 0;
4564                         trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4565                 }
4566                 ret = trace_print_seq(m, &iter->seq);
4567                 /*
4568                  * If we overflow the seq_file buffer, then it will
4569                  * ask us for this data again at start up.
4570                  * Use that instead.
4571                  *  ret is 0 if seq_file write succeeded.
4572                  *        -1 otherwise.
4573                  */
4574                 iter->leftover = ret;
4575         }
4576 
4577         return 0;
4578 }
4579 
4580 /*
4581  * Should be used after trace_array_get(), trace_types_lock
4582  * ensures that i_cdev was already initialized.
4583  */
4584 static inline int tracing_get_cpu(struct inode *inode)
4585 {
4586         if (inode->i_cdev) /* See trace_create_cpu_file() */
4587                 return (long)inode->i_cdev - 1;
4588         return RING_BUFFER_ALL_CPUS;
4589 }
4590 
4591 static const struct seq_operations tracer_seq_ops = {
4592         .start          = s_start,
4593         .next           = s_next,
4594         .stop           = s_stop,
4595         .show           = s_show,
4596 };
4597 
4598 /*
4599  * Note, as iter itself can be allocated and freed in different
4600  * ways, this function is only used to free its content, and not
4601  * the iterator itself. The only requirement to all the allocations
4602  * is that it must zero all fields (kzalloc), as freeing works with
4603  * ethier allocated content or NULL.
4604  */
4605 static void free_trace_iter_content(struct trace_iterator *iter)
4606 {
4607         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4608         if (iter->fmt != static_fmt_buf)
4609                 kfree(iter->fmt);
4610 
4611         kfree(iter->temp);
4612         kfree(iter->buffer_iter);
4613         mutex_destroy(&iter->mutex);
4614         free_cpumask_var(iter->started);
4615 }
4616 
4617 static struct trace_iterator *
4618 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4619 {
4620         struct trace_array *tr = inode->i_private;
4621         struct trace_iterator *iter;
4622         int cpu;
4623 
4624         if (tracing_disabled)
4625                 return ERR_PTR(-ENODEV);
4626 
4627         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4628         if (!iter)
4629                 return ERR_PTR(-ENOMEM);
4630 
4631         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4632                                     GFP_KERNEL);
4633         if (!iter->buffer_iter)
4634                 goto release;
4635 
4636         /*
4637          * trace_find_next_entry() may need to save off iter->ent.
4638          * It will place it into the iter->temp buffer. As most
4639          * events are less than 128, allocate a buffer of that size.
4640          * If one is greater, then trace_find_next_entry() will
4641          * allocate a new buffer to adjust for the bigger iter->ent.
4642          * It's not critical if it fails to get allocated here.
4643          */
4644         iter->temp = kmalloc(128, GFP_KERNEL);
4645         if (iter->temp)
4646                 iter->temp_size = 128;
4647 
4648         /*
4649          * trace_event_printf() may need to modify given format
4650          * string to replace %p with %px so that it shows real address
4651          * instead of hash value. However, that is only for the event
4652          * tracing, other tracer may not need. Defer the allocation
4653          * until it is needed.
4654          */
4655         iter->fmt = NULL;
4656         iter->fmt_size = 0;
4657 
4658         mutex_lock(&trace_types_lock);
4659         iter->trace = tr->current_trace;
4660 
4661         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4662                 goto fail;
4663 
4664         iter->tr = tr;
4665 
4666 #ifdef CONFIG_TRACER_MAX_TRACE
4667         /* Currently only the top directory has a snapshot */
4668         if (tr->current_trace->print_max || snapshot)
4669                 iter->array_buffer = &tr->max_buffer;
4670         else
4671 #endif
4672                 iter->array_buffer = &tr->array_buffer;
4673         iter->snapshot = snapshot;
4674         iter->pos = -1;
4675         iter->cpu_file = tracing_get_cpu(inode);
4676         mutex_init(&iter->mutex);
4677 
4678         /* Notify the tracer early; before we stop tracing. */
4679         if (iter->trace->open)
4680                 iter->trace->open(iter);
4681 
4682         /* Annotate start of buffers if we had overruns */
4683         if (ring_buffer_overruns(iter->array_buffer->buffer))
4684                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4685 
4686         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4687         if (trace_clocks[tr->clock_id].in_ns)
4688                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4689 
4690         /*
4691          * If pause-on-trace is enabled, then stop the trace while
4692          * dumping, unless this is the "snapshot" file
4693          */
4694         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4695                 tracing_stop_tr(tr);
4696 
4697         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4698                 for_each_tracing_cpu(cpu) {
4699                         iter->buffer_iter[cpu] =
4700                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4701                                                          cpu, GFP_KERNEL);
4702                 }
4703                 ring_buffer_read_prepare_sync();
4704                 for_each_tracing_cpu(cpu) {
4705                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4706                         tracing_iter_reset(iter, cpu);
4707                 }
4708         } else {
4709                 cpu = iter->cpu_file;
4710                 iter->buffer_iter[cpu] =
4711                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4712                                                  cpu, GFP_KERNEL);
4713                 ring_buffer_read_prepare_sync();
4714                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4715                 tracing_iter_reset(iter, cpu);
4716         }
4717 
4718         mutex_unlock(&trace_types_lock);
4719 
4720         return iter;
4721 
4722  fail:
4723         mutex_unlock(&trace_types_lock);
4724         free_trace_iter_content(iter);
4725 release:
4726         seq_release_private(inode, file);
4727         return ERR_PTR(-ENOMEM);
4728 }
4729 
4730 int tracing_open_generic(struct inode *inode, struct file *filp)
4731 {
4732         int ret;
4733 
4734         ret = tracing_check_open_get_tr(NULL);
4735         if (ret)
4736                 return ret;
4737 
4738         filp->private_data = inode->i_private;
4739         return 0;
4740 }
4741 
4742 bool tracing_is_disabled(void)
4743 {
4744         return (tracing_disabled) ? true: false;
4745 }
4746 
4747 /*
4748  * Open and update trace_array ref count.
4749  * Must have the current trace_array passed to it.
4750  */
4751 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4752 {
4753         struct trace_array *tr = inode->i_private;
4754         int ret;
4755 
4756         ret = tracing_check_open_get_tr(tr);
4757         if (ret)
4758                 return ret;
4759 
4760         filp->private_data = inode->i_private;
4761 
4762         return 0;
4763 }
4764 
4765 /*
4766  * The private pointer of the inode is the trace_event_file.
4767  * Update the tr ref count associated to it.
4768  */
4769 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4770 {
4771         struct trace_event_file *file = inode->i_private;
4772         int ret;
4773 
4774         ret = tracing_check_open_get_tr(file->tr);
4775         if (ret)
4776                 return ret;
4777 
4778         mutex_lock(&event_mutex);
4779 
4780         /* Fail if the file is marked for removal */
4781         if (file->flags & EVENT_FILE_FL_FREED) {
4782                 trace_array_put(file->tr);
4783                 ret = -ENODEV;
4784         } else {
4785                 event_file_get(file);
4786         }
4787 
4788         mutex_unlock(&event_mutex);
4789         if (ret)
4790                 return ret;
4791 
4792         filp->private_data = inode->i_private;
4793 
4794         return 0;
4795 }
4796 
4797 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4798 {
4799         struct trace_event_file *file = inode->i_private;
4800 
4801         trace_array_put(file->tr);
4802         event_file_put(file);
4803 
4804         return 0;
4805 }
4806 
4807 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4808 {
4809         tracing_release_file_tr(inode, filp);
4810         return single_release(inode, filp);
4811 }
4812 
4813 static int tracing_mark_open(struct inode *inode, struct file *filp)
4814 {
4815         stream_open(inode, filp);
4816         return tracing_open_generic_tr(inode, filp);
4817 }
4818 
4819 static int tracing_release(struct inode *inode, struct file *file)
4820 {
4821         struct trace_array *tr = inode->i_private;
4822         struct seq_file *m = file->private_data;
4823         struct trace_iterator *iter;
4824         int cpu;
4825 
4826         if (!(file->f_mode & FMODE_READ)) {
4827                 trace_array_put(tr);
4828                 return 0;
4829         }
4830 
4831         /* Writes do not use seq_file */
4832         iter = m->private;
4833         mutex_lock(&trace_types_lock);
4834 
4835         for_each_tracing_cpu(cpu) {
4836                 if (iter->buffer_iter[cpu])
4837                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4838         }
4839 
4840         if (iter->trace && iter->trace->close)
4841                 iter->trace->close(iter);
4842 
4843         if (!iter->snapshot && tr->stop_count)
4844                 /* reenable tracing if it was previously enabled */
4845                 tracing_start_tr(tr);
4846 
4847         __trace_array_put(tr);
4848 
4849         mutex_unlock(&trace_types_lock);
4850 
4851         free_trace_iter_content(iter);
4852         seq_release_private(inode, file);
4853 
4854         return 0;
4855 }
4856 
4857 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4858 {
4859         struct trace_array *tr = inode->i_private;
4860 
4861         trace_array_put(tr);
4862         return 0;
4863 }
4864 
4865 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4866 {
4867         struct trace_array *tr = inode->i_private;
4868 
4869         trace_array_put(tr);
4870 
4871         return single_release(inode, file);
4872 }
4873 
4874 static int tracing_open(struct inode *inode, struct file *file)
4875 {
4876         struct trace_array *tr = inode->i_private;
4877         struct trace_iterator *iter;
4878         int ret;
4879 
4880         ret = tracing_check_open_get_tr(tr);
4881         if (ret)
4882                 return ret;
4883 
4884         /* If this file was open for write, then erase contents */
4885         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4886                 int cpu = tracing_get_cpu(inode);
4887                 struct array_buffer *trace_buf = &tr->array_buffer;
4888 
4889 #ifdef CONFIG_TRACER_MAX_TRACE
4890                 if (tr->current_trace->print_max)
4891                         trace_buf = &tr->max_buffer;
4892 #endif
4893 
4894                 if (cpu == RING_BUFFER_ALL_CPUS)
4895                         tracing_reset_online_cpus(trace_buf);
4896                 else
4897                         tracing_reset_cpu(trace_buf, cpu);
4898         }
4899 
4900         if (file->f_mode & FMODE_READ) {
4901                 iter = __tracing_open(inode, file, false);
4902                 if (IS_ERR(iter))
4903                         ret = PTR_ERR(iter);
4904                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4905                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4906         }
4907 
4908         if (ret < 0)
4909                 trace_array_put(tr);
4910 
4911         return ret;
4912 }
4913 
4914 /*
4915  * Some tracers are not suitable for instance buffers.
4916  * A tracer is always available for the global array (toplevel)
4917  * or if it explicitly states that it is.
4918  */
4919 static bool
4920 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4921 {
4922         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4923 }
4924 
4925 /* Find the next tracer that this trace array may use */
4926 static struct tracer *
4927 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4928 {
4929         while (t && !trace_ok_for_array(t, tr))
4930                 t = t->next;
4931 
4932         return t;
4933 }
4934 
4935 static void *
4936 t_next(struct seq_file *m, void *v, loff_t *pos)
4937 {
4938         struct trace_array *tr = m->private;
4939         struct tracer *t = v;
4940 
4941         (*pos)++;
4942 
4943         if (t)
4944                 t = get_tracer_for_array(tr, t->next);
4945 
4946         return t;
4947 }
4948 
4949 static void *t_start(struct seq_file *m, loff_t *pos)
4950 {
4951         struct trace_array *tr = m->private;
4952         struct tracer *t;
4953         loff_t l = 0;
4954 
4955         mutex_lock(&trace_types_lock);
4956 
4957         t = get_tracer_for_array(tr, trace_types);
4958         for (; t && l < *pos; t = t_next(m, t, &l))
4959                         ;
4960 
4961         return t;
4962 }
4963 
4964 static void t_stop(struct seq_file *m, void *p)
4965 {
4966         mutex_unlock(&trace_types_lock);
4967 }
4968 
4969 static int t_show(struct seq_file *m, void *v)
4970 {
4971         struct tracer *t = v;
4972 
4973         if (!t)
4974                 return 0;
4975 
4976         seq_puts(m, t->name);
4977         if (t->next)
4978                 seq_putc(m, ' ');
4979         else
4980                 seq_putc(m, '\n');
4981 
4982         return 0;
4983 }
4984 
4985 static const struct seq_operations show_traces_seq_ops = {
4986         .start          = t_start,
4987         .next           = t_next,
4988         .stop           = t_stop,
4989         .show           = t_show,
4990 };
4991 
4992 static int show_traces_open(struct inode *inode, struct file *file)
4993 {
4994         struct trace_array *tr = inode->i_private;
4995         struct seq_file *m;
4996         int ret;
4997 
4998         ret = tracing_check_open_get_tr(tr);
4999         if (ret)
5000                 return ret;
5001 
5002         ret = seq_open(file, &show_traces_seq_ops);
5003         if (ret) {
5004                 trace_array_put(tr);
5005                 return ret;
5006         }
5007 
5008         m = file->private_data;
5009         m->private = tr;
5010 
5011         return 0;
5012 }
5013 
5014 static int show_traces_release(struct inode *inode, struct file *file)
5015 {
5016         struct trace_array *tr = inode->i_private;
5017 
5018         trace_array_put(tr);
5019         return seq_release(inode, file);
5020 }
5021 
5022 static ssize_t
5023 tracing_write_stub(struct file *filp, const char __user *ubuf,
5024                    size_t count, loff_t *ppos)
5025 {
5026         return count;
5027 }
5028 
5029 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5030 {
5031         int ret;
5032 
5033         if (file->f_mode & FMODE_READ)
5034                 ret = seq_lseek(file, offset, whence);
5035         else
5036                 file->f_pos = ret = 0;
5037 
5038         return ret;
5039 }
5040 
5041 static const struct file_operations tracing_fops = {
5042         .open           = tracing_open,
5043         .read           = seq_read,
5044         .read_iter      = seq_read_iter,
5045         .splice_read    = copy_splice_read,
5046         .write          = tracing_write_stub,
5047         .llseek         = tracing_lseek,
5048         .release        = tracing_release,
5049 };
5050 
5051 static const struct file_operations show_traces_fops = {
5052         .open           = show_traces_open,
5053         .read           = seq_read,
5054         .llseek         = seq_lseek,
5055         .release        = show_traces_release,
5056 };
5057 
5058 static ssize_t
5059 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5060                      size_t count, loff_t *ppos)
5061 {
5062         struct trace_array *tr = file_inode(filp)->i_private;
5063         char *mask_str;
5064         int len;
5065 
5066         len = snprintf(NULL, 0, "%*pb\n",
5067                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5068         mask_str = kmalloc(len, GFP_KERNEL);
5069         if (!mask_str)
5070                 return -ENOMEM;
5071 
5072         len = snprintf(mask_str, len, "%*pb\n",
5073                        cpumask_pr_args(tr->tracing_cpumask));
5074         if (len >= count) {
5075                 count = -EINVAL;
5076                 goto out_err;
5077         }
5078         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5079 
5080 out_err:
5081         kfree(mask_str);
5082 
5083         return count;
5084 }
5085 
5086 int tracing_set_cpumask(struct trace_array *tr,
5087                         cpumask_var_t tracing_cpumask_new)
5088 {
5089         int cpu;
5090 
5091         if (!tr)
5092                 return -EINVAL;
5093 
5094         local_irq_disable();
5095         arch_spin_lock(&tr->max_lock);
5096         for_each_tracing_cpu(cpu) {
5097                 /*
5098                  * Increase/decrease the disabled counter if we are
5099                  * about to flip a bit in the cpumask:
5100                  */
5101                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5102                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5103                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5104                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5105 #ifdef CONFIG_TRACER_MAX_TRACE
5106                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5107 #endif
5108                 }
5109                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5110                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5111                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5112                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5113 #ifdef CONFIG_TRACER_MAX_TRACE
5114                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5115 #endif
5116                 }
5117         }
5118         arch_spin_unlock(&tr->max_lock);
5119         local_irq_enable();
5120 
5121         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5122 
5123         return 0;
5124 }
5125 
5126 static ssize_t
5127 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5128                       size_t count, loff_t *ppos)
5129 {
5130         struct trace_array *tr = file_inode(filp)->i_private;
5131         cpumask_var_t tracing_cpumask_new;
5132         int err;
5133 
5134         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5135                 return -ENOMEM;
5136 
5137         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5138         if (err)
5139                 goto err_free;
5140 
5141         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5142         if (err)
5143                 goto err_free;
5144 
5145         free_cpumask_var(tracing_cpumask_new);
5146 
5147         return count;
5148 
5149 err_free:
5150         free_cpumask_var(tracing_cpumask_new);
5151 
5152         return err;
5153 }
5154 
5155 static const struct file_operations tracing_cpumask_fops = {
5156         .open           = tracing_open_generic_tr,
5157         .read           = tracing_cpumask_read,
5158         .write          = tracing_cpumask_write,
5159         .release        = tracing_release_generic_tr,
5160         .llseek         = generic_file_llseek,
5161 };
5162 
5163 static int tracing_trace_options_show(struct seq_file *m, void *v)
5164 {
5165         struct tracer_opt *trace_opts;
5166         struct trace_array *tr = m->private;
5167         u32 tracer_flags;
5168         int i;
5169 
5170         mutex_lock(&trace_types_lock);
5171         tracer_flags = tr->current_trace->flags->val;
5172         trace_opts = tr->current_trace->flags->opts;
5173 
5174         for (i = 0; trace_options[i]; i++) {
5175                 if (tr->trace_flags & (1 << i))
5176                         seq_printf(m, "%s\n", trace_options[i]);
5177                 else
5178                         seq_printf(m, "no%s\n", trace_options[i]);
5179         }
5180 
5181         for (i = 0; trace_opts[i].name; i++) {
5182                 if (tracer_flags & trace_opts[i].bit)
5183                         seq_printf(m, "%s\n", trace_opts[i].name);
5184                 else
5185                         seq_printf(m, "no%s\n", trace_opts[i].name);
5186         }
5187         mutex_unlock(&trace_types_lock);
5188 
5189         return 0;
5190 }
5191 
5192 static int __set_tracer_option(struct trace_array *tr,
5193                                struct tracer_flags *tracer_flags,
5194                                struct tracer_opt *opts, int neg)
5195 {
5196         struct tracer *trace = tracer_flags->trace;
5197         int ret;
5198 
5199         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5200         if (ret)
5201                 return ret;
5202 
5203         if (neg)
5204                 tracer_flags->val &= ~opts->bit;
5205         else
5206                 tracer_flags->val |= opts->bit;
5207         return 0;
5208 }
5209 
5210 /* Try to assign a tracer specific option */
5211 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5212 {
5213         struct tracer *trace = tr->current_trace;
5214         struct tracer_flags *tracer_flags = trace->flags;
5215         struct tracer_opt *opts = NULL;
5216         int i;
5217 
5218         for (i = 0; tracer_flags->opts[i].name; i++) {
5219                 opts = &tracer_flags->opts[i];
5220 
5221                 if (strcmp(cmp, opts->name) == 0)
5222                         return __set_tracer_option(tr, trace->flags, opts, neg);
5223         }
5224 
5225         return -EINVAL;
5226 }
5227 
5228 /* Some tracers require overwrite to stay enabled */
5229 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5230 {
5231         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5232                 return -1;
5233 
5234         return 0;
5235 }
5236 
5237 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5238 {
5239         if ((mask == TRACE_ITER_RECORD_TGID) ||
5240             (mask == TRACE_ITER_RECORD_CMD))
5241                 lockdep_assert_held(&event_mutex);
5242 
5243         /* do nothing if flag is already set */
5244         if (!!(tr->trace_flags & mask) == !!enabled)
5245                 return 0;
5246 
5247         /* Give the tracer a chance to approve the change */
5248         if (tr->current_trace->flag_changed)
5249                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5250                         return -EINVAL;
5251 
5252         if (enabled)
5253                 tr->trace_flags |= mask;
5254         else
5255                 tr->trace_flags &= ~mask;
5256 
5257         if (mask == TRACE_ITER_RECORD_CMD)
5258                 trace_event_enable_cmd_record(enabled);
5259 
5260         if (mask == TRACE_ITER_RECORD_TGID) {
5261 
5262                 if (trace_alloc_tgid_map() < 0) {
5263                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5264                         return -ENOMEM;
5265                 }
5266 
5267                 trace_event_enable_tgid_record(enabled);
5268         }
5269 
5270         if (mask == TRACE_ITER_EVENT_FORK)
5271                 trace_event_follow_fork(tr, enabled);
5272 
5273         if (mask == TRACE_ITER_FUNC_FORK)
5274                 ftrace_pid_follow_fork(tr, enabled);
5275 
5276         if (mask == TRACE_ITER_OVERWRITE) {
5277                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5278 #ifdef CONFIG_TRACER_MAX_TRACE
5279                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5280 #endif
5281         }
5282 
5283         if (mask == TRACE_ITER_PRINTK) {
5284                 trace_printk_start_stop_comm(enabled);
5285                 trace_printk_control(enabled);
5286         }
5287 
5288         return 0;
5289 }
5290 
5291 int trace_set_options(struct trace_array *tr, char *option)
5292 {
5293         char *cmp;
5294         int neg = 0;
5295         int ret;
5296         size_t orig_len = strlen(option);
5297         int len;
5298 
5299         cmp = strstrip(option);
5300 
5301         len = str_has_prefix(cmp, "no");
5302         if (len)
5303                 neg = 1;
5304 
5305         cmp += len;
5306 
5307         mutex_lock(&event_mutex);
5308         mutex_lock(&trace_types_lock);
5309 
5310         ret = match_string(trace_options, -1, cmp);
5311         /* If no option could be set, test the specific tracer options */
5312         if (ret < 0)
5313                 ret = set_tracer_option(tr, cmp, neg);
5314         else
5315                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5316 
5317         mutex_unlock(&trace_types_lock);
5318         mutex_unlock(&event_mutex);
5319 
5320         /*
5321          * If the first trailing whitespace is replaced with '\0' by strstrip,
5322          * turn it back into a space.
5323          */
5324         if (orig_len > strlen(option))
5325                 option[strlen(option)] = ' ';
5326 
5327         return ret;
5328 }
5329 
5330 static void __init apply_trace_boot_options(void)
5331 {
5332         char *buf = trace_boot_options_buf;
5333         char *option;
5334 
5335         while (true) {
5336                 option = strsep(&buf, ",");
5337 
5338                 if (!option)
5339                         break;
5340 
5341                 if (*option)
5342                         trace_set_options(&global_trace, option);
5343 
5344                 /* Put back the comma to allow this to be called again */
5345                 if (buf)
5346                         *(buf - 1) = ',';
5347         }
5348 }
5349 
5350 static ssize_t
5351 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5352                         size_t cnt, loff_t *ppos)
5353 {
5354         struct seq_file *m = filp->private_data;
5355         struct trace_array *tr = m->private;
5356         char buf[64];
5357         int ret;
5358 
5359         if (cnt >= sizeof(buf))
5360                 return -EINVAL;
5361 
5362         if (copy_from_user(buf, ubuf, cnt))
5363                 return -EFAULT;
5364 
5365         buf[cnt] = 0;
5366 
5367         ret = trace_set_options(tr, buf);
5368         if (ret < 0)
5369                 return ret;
5370 
5371         *ppos += cnt;
5372 
5373         return cnt;
5374 }
5375 
5376 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5377 {
5378         struct trace_array *tr = inode->i_private;
5379         int ret;
5380 
5381         ret = tracing_check_open_get_tr(tr);
5382         if (ret)
5383                 return ret;
5384 
5385         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5386         if (ret < 0)
5387                 trace_array_put(tr);
5388 
5389         return ret;
5390 }
5391 
5392 static const struct file_operations tracing_iter_fops = {
5393         .open           = tracing_trace_options_open,
5394         .read           = seq_read,
5395         .llseek         = seq_lseek,
5396         .release        = tracing_single_release_tr,
5397         .write          = tracing_trace_options_write,
5398 };
5399 
5400 static const char readme_msg[] =
5401         "tracing mini-HOWTO:\n\n"
5402         "# echo 0 > tracing_on : quick way to disable tracing\n"
5403         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5404         " Important files:\n"
5405         "  trace\t\t\t- The static contents of the buffer\n"
5406         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5407         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5408         "  current_tracer\t- function and latency tracers\n"
5409         "  available_tracers\t- list of configured tracers for current_tracer\n"
5410         "  error_log\t- error log for failed commands (that support it)\n"
5411         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5412         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5413         "  trace_clock\t\t- change the clock used to order events\n"
5414         "       local:   Per cpu clock but may not be synced across CPUs\n"
5415         "      global:   Synced across CPUs but slows tracing down.\n"
5416         "     counter:   Not a clock, but just an increment\n"
5417         "      uptime:   Jiffy counter from time of boot\n"
5418         "        perf:   Same clock that perf events use\n"
5419 #ifdef CONFIG_X86_64
5420         "     x86-tsc:   TSC cycle counter\n"
5421 #endif
5422         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5423         "       delta:   Delta difference against a buffer-wide timestamp\n"
5424         "    absolute:   Absolute (standalone) timestamp\n"
5425         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5426         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5427         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5428         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5429         "\t\t\t  Remove sub-buffer with rmdir\n"
5430         "  trace_options\t\t- Set format or modify how tracing happens\n"
5431         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5432         "\t\t\t  option name\n"
5433         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5434 #ifdef CONFIG_DYNAMIC_FTRACE
5435         "\n  available_filter_functions - list of functions that can be filtered on\n"
5436         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5437         "\t\t\t  functions\n"
5438         "\t     accepts: func_full_name or glob-matching-pattern\n"
5439         "\t     modules: Can select a group via module\n"
5440         "\t      Format: :mod:<module-name>\n"
5441         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5442         "\t    triggers: a command to perform when function is hit\n"
5443         "\t      Format: <function>:<trigger>[:count]\n"
5444         "\t     trigger: traceon, traceoff\n"
5445         "\t\t      enable_event:<system>:<event>\n"
5446         "\t\t      disable_event:<system>:<event>\n"
5447 #ifdef CONFIG_STACKTRACE
5448         "\t\t      stacktrace\n"
5449 #endif
5450 #ifdef CONFIG_TRACER_SNAPSHOT
5451         "\t\t      snapshot\n"
5452 #endif
5453         "\t\t      dump\n"
5454         "\t\t      cpudump\n"
5455         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5456         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5457         "\t     The first one will disable tracing every time do_fault is hit\n"
5458         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5459         "\t       The first time do trap is hit and it disables tracing, the\n"
5460         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5461         "\t       the counter will not decrement. It only decrements when the\n"
5462         "\t       trigger did work\n"
5463         "\t     To remove trigger without count:\n"
5464         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5465         "\t     To remove trigger with a count:\n"
5466         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5467         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5468         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5469         "\t    modules: Can select a group via module command :mod:\n"
5470         "\t    Does not accept triggers\n"
5471 #endif /* CONFIG_DYNAMIC_FTRACE */
5472 #ifdef CONFIG_FUNCTION_TRACER
5473         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5474         "\t\t    (function)\n"
5475         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5476         "\t\t    (function)\n"
5477 #endif
5478 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5479         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5480         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5481         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5482 #endif
5483 #ifdef CONFIG_TRACER_SNAPSHOT
5484         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5485         "\t\t\t  snapshot buffer. Read the contents for more\n"
5486         "\t\t\t  information\n"
5487 #endif
5488 #ifdef CONFIG_STACK_TRACER
5489         "  stack_trace\t\t- Shows the max stack trace when active\n"
5490         "  stack_max_size\t- Shows current max stack size that was traced\n"
5491         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5492         "\t\t\t  new trace)\n"
5493 #ifdef CONFIG_DYNAMIC_FTRACE
5494         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5495         "\t\t\t  traces\n"
5496 #endif
5497 #endif /* CONFIG_STACK_TRACER */
5498 #ifdef CONFIG_DYNAMIC_EVENTS
5499         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5500         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5501 #endif
5502 #ifdef CONFIG_KPROBE_EVENTS
5503         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5504         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5505 #endif
5506 #ifdef CONFIG_UPROBE_EVENTS
5507         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5508         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5509 #endif
5510 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5511     defined(CONFIG_FPROBE_EVENTS)
5512         "\t  accepts: event-definitions (one definition per line)\n"
5513 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5514         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5515         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5516 #endif
5517 #ifdef CONFIG_FPROBE_EVENTS
5518         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5519         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5520 #endif
5521 #ifdef CONFIG_HIST_TRIGGERS
5522         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5523 #endif
5524         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5525         "\t           -:[<group>/][<event>]\n"
5526 #ifdef CONFIG_KPROBE_EVENTS
5527         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5528   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5529 #endif
5530 #ifdef CONFIG_UPROBE_EVENTS
5531   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5532 #endif
5533         "\t     args: <name>=fetcharg[:type]\n"
5534         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5535 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5536         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5537 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5538         "\t           <argname>[->field[->field|.field...]],\n"
5539 #endif
5540 #else
5541         "\t           $stack<index>, $stack, $retval, $comm,\n"
5542 #endif
5543         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5544         "\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5545         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5546         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5547         "\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5548 #ifdef CONFIG_HIST_TRIGGERS
5549         "\t    field: <stype> <name>;\n"
5550         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5551         "\t           [unsigned] char/int/long\n"
5552 #endif
5553         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5554         "\t            of the <attached-group>/<attached-event>.\n"
5555 #endif
5556         "  events/\t\t- Directory containing all trace event subsystems:\n"
5557         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5558         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5559         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5560         "\t\t\t  events\n"
5561         "      filter\t\t- If set, only events passing filter are traced\n"
5562         "  events/<system>/<event>/\t- Directory containing control files for\n"
5563         "\t\t\t  <event>:\n"
5564         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5565         "      filter\t\t- If set, only events passing filter are traced\n"
5566         "      trigger\t\t- If set, a command to perform when event is hit\n"
5567         "\t    Format: <trigger>[:count][if <filter>]\n"
5568         "\t   trigger: traceon, traceoff\n"
5569         "\t            enable_event:<system>:<event>\n"
5570         "\t            disable_event:<system>:<event>\n"
5571 #ifdef CONFIG_HIST_TRIGGERS
5572         "\t            enable_hist:<system>:<event>\n"
5573         "\t            disable_hist:<system>:<event>\n"
5574 #endif
5575 #ifdef CONFIG_STACKTRACE
5576         "\t\t    stacktrace\n"
5577 #endif
5578 #ifdef CONFIG_TRACER_SNAPSHOT
5579         "\t\t    snapshot\n"
5580 #endif
5581 #ifdef CONFIG_HIST_TRIGGERS
5582         "\t\t    hist (see below)\n"
5583 #endif
5584         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5585         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5586         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5587         "\t                  events/block/block_unplug/trigger\n"
5588         "\t   The first disables tracing every time block_unplug is hit.\n"
5589         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5590         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5591         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5592         "\t   Like function triggers, the counter is only decremented if it\n"
5593         "\t    enabled or disabled tracing.\n"
5594         "\t   To remove a trigger without a count:\n"
5595         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5596         "\t   To remove a trigger with a count:\n"
5597         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5598         "\t   Filters can be ignored when removing a trigger.\n"
5599 #ifdef CONFIG_HIST_TRIGGERS
5600         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5601         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5602         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5603         "\t            [:values=<field1[,field2,...]>]\n"
5604         "\t            [:sort=<field1[,field2,...]>]\n"
5605         "\t            [:size=#entries]\n"
5606         "\t            [:pause][:continue][:clear]\n"
5607         "\t            [:name=histname1]\n"
5608         "\t            [:nohitcount]\n"
5609         "\t            [:<handler>.<action>]\n"
5610         "\t            [if <filter>]\n\n"
5611         "\t    Note, special fields can be used as well:\n"
5612         "\t            common_timestamp - to record current timestamp\n"
5613         "\t            common_cpu - to record the CPU the event happened on\n"
5614         "\n"
5615         "\t    A hist trigger variable can be:\n"
5616         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5617         "\t        - a reference to another variable e.g. y=$x,\n"
5618         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5619         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5620         "\n"
5621         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5622         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5623         "\t    variable reference, field or numeric literal.\n"
5624         "\n"
5625         "\t    When a matching event is hit, an entry is added to a hash\n"
5626         "\t    table using the key(s) and value(s) named, and the value of a\n"
5627         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5628         "\t    correspond to fields in the event's format description.  Keys\n"
5629         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5630         "\t    Compound keys consisting of up to two fields can be specified\n"
5631         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5632         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5633         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5634         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5635         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5636         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5637         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5638         "\t    its histogram data will be shared with other triggers of the\n"
5639         "\t    same name, and trigger hits will update this common data.\n\n"
5640         "\t    Reading the 'hist' file for the event will dump the hash\n"
5641         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5642         "\t    triggers attached to an event, there will be a table for each\n"
5643         "\t    trigger in the output.  The table displayed for a named\n"
5644         "\t    trigger will be the same as any other instance having the\n"
5645         "\t    same name.  The default format used to display a given field\n"
5646         "\t    can be modified by appending any of the following modifiers\n"
5647         "\t    to the field name, as applicable:\n\n"
5648         "\t            .hex        display a number as a hex value\n"
5649         "\t            .sym        display an address as a symbol\n"
5650         "\t            .sym-offset display an address as a symbol and offset\n"
5651         "\t            .execname   display a common_pid as a program name\n"
5652         "\t            .syscall    display a syscall id as a syscall name\n"
5653         "\t            .log2       display log2 value rather than raw number\n"
5654         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5655         "\t            .usecs      display a common_timestamp in microseconds\n"
5656         "\t            .percent    display a number of percentage value\n"
5657         "\t            .graph      display a bar-graph of a value\n\n"
5658         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5659         "\t    trigger or to start a hist trigger but not log any events\n"
5660         "\t    until told to do so.  'continue' can be used to start or\n"
5661         "\t    restart a paused hist trigger.\n\n"
5662         "\t    The 'clear' parameter will clear the contents of a running\n"
5663         "\t    hist trigger and leave its current paused/active state\n"
5664         "\t    unchanged.\n\n"
5665         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5666         "\t    raw hitcount in the histogram.\n\n"
5667         "\t    The enable_hist and disable_hist triggers can be used to\n"
5668         "\t    have one event conditionally start and stop another event's\n"
5669         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5670         "\t    the enable_event and disable_event triggers.\n\n"
5671         "\t    Hist trigger handlers and actions are executed whenever a\n"
5672         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5673         "\t        <handler>.<action>\n\n"
5674         "\t    The available handlers are:\n\n"
5675         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5676         "\t        onmax(var)               - invoke if var exceeds current max\n"
5677         "\t        onchange(var)            - invoke action if var changes\n\n"
5678         "\t    The available actions are:\n\n"
5679         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5680         "\t        save(field,...)                      - save current event fields\n"
5681 #ifdef CONFIG_TRACER_SNAPSHOT
5682         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5683 #endif
5684 #ifdef CONFIG_SYNTH_EVENTS
5685         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5686         "\t  Write into this file to define/undefine new synthetic events.\n"
5687         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5688 #endif
5689 #endif
5690 ;
5691 
5692 static ssize_t
5693 tracing_readme_read(struct file *filp, char __user *ubuf,
5694                        size_t cnt, loff_t *ppos)
5695 {
5696         return simple_read_from_buffer(ubuf, cnt, ppos,
5697                                         readme_msg, strlen(readme_msg));
5698 }
5699 
5700 static const struct file_operations tracing_readme_fops = {
5701         .open           = tracing_open_generic,
5702         .read           = tracing_readme_read,
5703         .llseek         = generic_file_llseek,
5704 };
5705 
5706 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5707 static union trace_eval_map_item *
5708 update_eval_map(union trace_eval_map_item *ptr)
5709 {
5710         if (!ptr->map.eval_string) {
5711                 if (ptr->tail.next) {
5712                         ptr = ptr->tail.next;
5713                         /* Set ptr to the next real item (skip head) */
5714                         ptr++;
5715                 } else
5716                         return NULL;
5717         }
5718         return ptr;
5719 }
5720 
5721 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5722 {
5723         union trace_eval_map_item *ptr = v;
5724 
5725         /*
5726          * Paranoid! If ptr points to end, we don't want to increment past it.
5727          * This really should never happen.
5728          */
5729         (*pos)++;
5730         ptr = update_eval_map(ptr);
5731         if (WARN_ON_ONCE(!ptr))
5732                 return NULL;
5733 
5734         ptr++;
5735         ptr = update_eval_map(ptr);
5736 
5737         return ptr;
5738 }
5739 
5740 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5741 {
5742         union trace_eval_map_item *v;
5743         loff_t l = 0;
5744 
5745         mutex_lock(&trace_eval_mutex);
5746 
5747         v = trace_eval_maps;
5748         if (v)
5749                 v++;
5750 
5751         while (v && l < *pos) {
5752                 v = eval_map_next(m, v, &l);
5753         }
5754 
5755         return v;
5756 }
5757 
5758 static void eval_map_stop(struct seq_file *m, void *v)
5759 {
5760         mutex_unlock(&trace_eval_mutex);
5761 }
5762 
5763 static int eval_map_show(struct seq_file *m, void *v)
5764 {
5765         union trace_eval_map_item *ptr = v;
5766 
5767         seq_printf(m, "%s %ld (%s)\n",
5768                    ptr->map.eval_string, ptr->map.eval_value,
5769                    ptr->map.system);
5770 
5771         return 0;
5772 }
5773 
5774 static const struct seq_operations tracing_eval_map_seq_ops = {
5775         .start          = eval_map_start,
5776         .next           = eval_map_next,
5777         .stop           = eval_map_stop,
5778         .show           = eval_map_show,
5779 };
5780 
5781 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5782 {
5783         int ret;
5784 
5785         ret = tracing_check_open_get_tr(NULL);
5786         if (ret)
5787                 return ret;
5788 
5789         return seq_open(filp, &tracing_eval_map_seq_ops);
5790 }
5791 
5792 static const struct file_operations tracing_eval_map_fops = {
5793         .open           = tracing_eval_map_open,
5794         .read           = seq_read,
5795         .llseek         = seq_lseek,
5796         .release        = seq_release,
5797 };
5798 
5799 static inline union trace_eval_map_item *
5800 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5801 {
5802         /* Return tail of array given the head */
5803         return ptr + ptr->head.length + 1;
5804 }
5805 
5806 static void
5807 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5808                            int len)
5809 {
5810         struct trace_eval_map **stop;
5811         struct trace_eval_map **map;
5812         union trace_eval_map_item *map_array;
5813         union trace_eval_map_item *ptr;
5814 
5815         stop = start + len;
5816 
5817         /*
5818          * The trace_eval_maps contains the map plus a head and tail item,
5819          * where the head holds the module and length of array, and the
5820          * tail holds a pointer to the next list.
5821          */
5822         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5823         if (!map_array) {
5824                 pr_warn("Unable to allocate trace eval mapping\n");
5825                 return;
5826         }
5827 
5828         mutex_lock(&trace_eval_mutex);
5829 
5830         if (!trace_eval_maps)
5831                 trace_eval_maps = map_array;
5832         else {
5833                 ptr = trace_eval_maps;
5834                 for (;;) {
5835                         ptr = trace_eval_jmp_to_tail(ptr);
5836                         if (!ptr->tail.next)
5837                                 break;
5838                         ptr = ptr->tail.next;
5839 
5840                 }
5841                 ptr->tail.next = map_array;
5842         }
5843         map_array->head.mod = mod;
5844         map_array->head.length = len;
5845         map_array++;
5846 
5847         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5848                 map_array->map = **map;
5849                 map_array++;
5850         }
5851         memset(map_array, 0, sizeof(*map_array));
5852 
5853         mutex_unlock(&trace_eval_mutex);
5854 }
5855 
5856 static void trace_create_eval_file(struct dentry *d_tracer)
5857 {
5858         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5859                           NULL, &tracing_eval_map_fops);
5860 }
5861 
5862 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5863 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5864 static inline void trace_insert_eval_map_file(struct module *mod,
5865                               struct trace_eval_map **start, int len) { }
5866 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5867 
5868 static void trace_insert_eval_map(struct module *mod,
5869                                   struct trace_eval_map **start, int len)
5870 {
5871         struct trace_eval_map **map;
5872 
5873         if (len <= 0)
5874                 return;
5875 
5876         map = start;
5877 
5878         trace_event_eval_update(map, len);
5879 
5880         trace_insert_eval_map_file(mod, start, len);
5881 }
5882 
5883 static ssize_t
5884 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5885                        size_t cnt, loff_t *ppos)
5886 {
5887         struct trace_array *tr = filp->private_data;
5888         char buf[MAX_TRACER_SIZE+2];
5889         int r;
5890 
5891         mutex_lock(&trace_types_lock);
5892         r = sprintf(buf, "%s\n", tr->current_trace->name);
5893         mutex_unlock(&trace_types_lock);
5894 
5895         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5896 }
5897 
5898 int tracer_init(struct tracer *t, struct trace_array *tr)
5899 {
5900         tracing_reset_online_cpus(&tr->array_buffer);
5901         return t->init(tr);
5902 }
5903 
5904 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5905 {
5906         int cpu;
5907 
5908         for_each_tracing_cpu(cpu)
5909                 per_cpu_ptr(buf->data, cpu)->entries = val;
5910 }
5911 
5912 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5913 {
5914         if (cpu == RING_BUFFER_ALL_CPUS) {
5915                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5916         } else {
5917                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5918         }
5919 }
5920 
5921 #ifdef CONFIG_TRACER_MAX_TRACE
5922 /* resize @tr's buffer to the size of @size_tr's entries */
5923 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5924                                         struct array_buffer *size_buf, int cpu_id)
5925 {
5926         int cpu, ret = 0;
5927 
5928         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5929                 for_each_tracing_cpu(cpu) {
5930                         ret = ring_buffer_resize(trace_buf->buffer,
5931                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5932                         if (ret < 0)
5933                                 break;
5934                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5935                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5936                 }
5937         } else {
5938                 ret = ring_buffer_resize(trace_buf->buffer,
5939                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5940                 if (ret == 0)
5941                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5942                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5943         }
5944 
5945         return ret;
5946 }
5947 #endif /* CONFIG_TRACER_MAX_TRACE */
5948 
5949 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5950                                         unsigned long size, int cpu)
5951 {
5952         int ret;
5953 
5954         /*
5955          * If kernel or user changes the size of the ring buffer
5956          * we use the size that was given, and we can forget about
5957          * expanding it later.
5958          */
5959         trace_set_ring_buffer_expanded(tr);
5960 
5961         /* May be called before buffers are initialized */
5962         if (!tr->array_buffer.buffer)
5963                 return 0;
5964 
5965         /* Do not allow tracing while resizing ring buffer */
5966         tracing_stop_tr(tr);
5967 
5968         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5969         if (ret < 0)
5970                 goto out_start;
5971 
5972 #ifdef CONFIG_TRACER_MAX_TRACE
5973         if (!tr->allocated_snapshot)
5974                 goto out;
5975 
5976         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5977         if (ret < 0) {
5978                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5979                                                      &tr->array_buffer, cpu);
5980                 if (r < 0) {
5981                         /*
5982                          * AARGH! We are left with different
5983                          * size max buffer!!!!
5984                          * The max buffer is our "snapshot" buffer.
5985                          * When a tracer needs a snapshot (one of the
5986                          * latency tracers), it swaps the max buffer
5987                          * with the saved snap shot. We succeeded to
5988                          * update the size of the main buffer, but failed to
5989                          * update the size of the max buffer. But when we tried
5990                          * to reset the main buffer to the original size, we
5991                          * failed there too. This is very unlikely to
5992                          * happen, but if it does, warn and kill all
5993                          * tracing.
5994                          */
5995                         WARN_ON(1);
5996                         tracing_disabled = 1;
5997                 }
5998                 goto out_start;
5999         }
6000 
6001         update_buffer_entries(&tr->max_buffer, cpu);
6002 
6003  out:
6004 #endif /* CONFIG_TRACER_MAX_TRACE */
6005 
6006         update_buffer_entries(&tr->array_buffer, cpu);
6007  out_start:
6008         tracing_start_tr(tr);
6009         return ret;
6010 }
6011 
6012 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6013                                   unsigned long size, int cpu_id)
6014 {
6015         int ret;
6016 
6017         mutex_lock(&trace_types_lock);
6018 
6019         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6020                 /* make sure, this cpu is enabled in the mask */
6021                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6022                         ret = -EINVAL;
6023                         goto out;
6024                 }
6025         }
6026 
6027         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6028         if (ret < 0)
6029                 ret = -ENOMEM;
6030 
6031 out:
6032         mutex_unlock(&trace_types_lock);
6033 
6034         return ret;
6035 }
6036 
6037 
6038 /**
6039  * tracing_update_buffers - used by tracing facility to expand ring buffers
6040  * @tr: The tracing instance
6041  *
6042  * To save on memory when the tracing is never used on a system with it
6043  * configured in. The ring buffers are set to a minimum size. But once
6044  * a user starts to use the tracing facility, then they need to grow
6045  * to their default size.
6046  *
6047  * This function is to be called when a tracer is about to be used.
6048  */
6049 int tracing_update_buffers(struct trace_array *tr)
6050 {
6051         int ret = 0;
6052 
6053         mutex_lock(&trace_types_lock);
6054         if (!tr->ring_buffer_expanded)
6055                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6056                                                 RING_BUFFER_ALL_CPUS);
6057         mutex_unlock(&trace_types_lock);
6058 
6059         return ret;
6060 }
6061 
6062 struct trace_option_dentry;
6063 
6064 static void
6065 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6066 
6067 /*
6068  * Used to clear out the tracer before deletion of an instance.
6069  * Must have trace_types_lock held.
6070  */
6071 static void tracing_set_nop(struct trace_array *tr)
6072 {
6073         if (tr->current_trace == &nop_trace)
6074                 return;
6075 
6076         tr->current_trace->enabled--;
6077 
6078         if (tr->current_trace->reset)
6079                 tr->current_trace->reset(tr);
6080 
6081         tr->current_trace = &nop_trace;
6082 }
6083 
6084 static bool tracer_options_updated;
6085 
6086 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6087 {
6088         /* Only enable if the directory has been created already. */
6089         if (!tr->dir)
6090                 return;
6091 
6092         /* Only create trace option files after update_tracer_options finish */
6093         if (!tracer_options_updated)
6094                 return;
6095 
6096         create_trace_option_files(tr, t);
6097 }
6098 
6099 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6100 {
6101         struct tracer *t;
6102 #ifdef CONFIG_TRACER_MAX_TRACE
6103         bool had_max_tr;
6104 #endif
6105         int ret = 0;
6106 
6107         mutex_lock(&trace_types_lock);
6108 
6109         if (!tr->ring_buffer_expanded) {
6110                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6111                                                 RING_BUFFER_ALL_CPUS);
6112                 if (ret < 0)
6113                         goto out;
6114                 ret = 0;
6115         }
6116 
6117         for (t = trace_types; t; t = t->next) {
6118                 if (strcmp(t->name, buf) == 0)
6119                         break;
6120         }
6121         if (!t) {
6122                 ret = -EINVAL;
6123                 goto out;
6124         }
6125         if (t == tr->current_trace)
6126                 goto out;
6127 
6128 #ifdef CONFIG_TRACER_SNAPSHOT
6129         if (t->use_max_tr) {
6130                 local_irq_disable();
6131                 arch_spin_lock(&tr->max_lock);
6132                 if (tr->cond_snapshot)
6133                         ret = -EBUSY;
6134                 arch_spin_unlock(&tr->max_lock);
6135                 local_irq_enable();
6136                 if (ret)
6137                         goto out;
6138         }
6139 #endif
6140         /* Some tracers won't work on kernel command line */
6141         if (system_state < SYSTEM_RUNNING && t->noboot) {
6142                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6143                         t->name);
6144                 goto out;
6145         }
6146 
6147         /* Some tracers are only allowed for the top level buffer */
6148         if (!trace_ok_for_array(t, tr)) {
6149                 ret = -EINVAL;
6150                 goto out;
6151         }
6152 
6153         /* If trace pipe files are being read, we can't change the tracer */
6154         if (tr->trace_ref) {
6155                 ret = -EBUSY;
6156                 goto out;
6157         }
6158 
6159         trace_branch_disable();
6160 
6161         tr->current_trace->enabled--;
6162 
6163         if (tr->current_trace->reset)
6164                 tr->current_trace->reset(tr);
6165 
6166 #ifdef CONFIG_TRACER_MAX_TRACE
6167         had_max_tr = tr->current_trace->use_max_tr;
6168 
6169         /* Current trace needs to be nop_trace before synchronize_rcu */
6170         tr->current_trace = &nop_trace;
6171 
6172         if (had_max_tr && !t->use_max_tr) {
6173                 /*
6174                  * We need to make sure that the update_max_tr sees that
6175                  * current_trace changed to nop_trace to keep it from
6176                  * swapping the buffers after we resize it.
6177                  * The update_max_tr is called from interrupts disabled
6178                  * so a synchronized_sched() is sufficient.
6179                  */
6180                 synchronize_rcu();
6181                 free_snapshot(tr);
6182                 tracing_disarm_snapshot(tr);
6183         }
6184 
6185         if (!had_max_tr && t->use_max_tr) {
6186                 ret = tracing_arm_snapshot_locked(tr);
6187                 if (ret)
6188                         goto out;
6189         }
6190 #else
6191         tr->current_trace = &nop_trace;
6192 #endif
6193 
6194         if (t->init) {
6195                 ret = tracer_init(t, tr);
6196                 if (ret) {
6197 #ifdef CONFIG_TRACER_MAX_TRACE
6198                         if (t->use_max_tr)
6199                                 tracing_disarm_snapshot(tr);
6200 #endif
6201                         goto out;
6202                 }
6203         }
6204 
6205         tr->current_trace = t;
6206         tr->current_trace->enabled++;
6207         trace_branch_enable(tr);
6208  out:
6209         mutex_unlock(&trace_types_lock);
6210 
6211         return ret;
6212 }
6213 
6214 static ssize_t
6215 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6216                         size_t cnt, loff_t *ppos)
6217 {
6218         struct trace_array *tr = filp->private_data;
6219         char buf[MAX_TRACER_SIZE+1];
6220         char *name;
6221         size_t ret;
6222         int err;
6223 
6224         ret = cnt;
6225 
6226         if (cnt > MAX_TRACER_SIZE)
6227                 cnt = MAX_TRACER_SIZE;
6228 
6229         if (copy_from_user(buf, ubuf, cnt))
6230                 return -EFAULT;
6231 
6232         buf[cnt] = 0;
6233 
6234         name = strim(buf);
6235 
6236         err = tracing_set_tracer(tr, name);
6237         if (err)
6238                 return err;
6239 
6240         *ppos += ret;
6241 
6242         return ret;
6243 }
6244 
6245 static ssize_t
6246 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6247                    size_t cnt, loff_t *ppos)
6248 {
6249         char buf[64];
6250         int r;
6251 
6252         r = snprintf(buf, sizeof(buf), "%ld\n",
6253                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6254         if (r > sizeof(buf))
6255                 r = sizeof(buf);
6256         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6257 }
6258 
6259 static ssize_t
6260 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6261                     size_t cnt, loff_t *ppos)
6262 {
6263         unsigned long val;
6264         int ret;
6265 
6266         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6267         if (ret)
6268                 return ret;
6269 
6270         *ptr = val * 1000;
6271 
6272         return cnt;
6273 }
6274 
6275 static ssize_t
6276 tracing_thresh_read(struct file *filp, char __user *ubuf,
6277                     size_t cnt, loff_t *ppos)
6278 {
6279         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6280 }
6281 
6282 static ssize_t
6283 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6284                      size_t cnt, loff_t *ppos)
6285 {
6286         struct trace_array *tr = filp->private_data;
6287         int ret;
6288 
6289         mutex_lock(&trace_types_lock);
6290         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6291         if (ret < 0)
6292                 goto out;
6293 
6294         if (tr->current_trace->update_thresh) {
6295                 ret = tr->current_trace->update_thresh(tr);
6296                 if (ret < 0)
6297                         goto out;
6298         }
6299 
6300         ret = cnt;
6301 out:
6302         mutex_unlock(&trace_types_lock);
6303 
6304         return ret;
6305 }
6306 
6307 #ifdef CONFIG_TRACER_MAX_TRACE
6308 
6309 static ssize_t
6310 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6311                      size_t cnt, loff_t *ppos)
6312 {
6313         struct trace_array *tr = filp->private_data;
6314 
6315         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6316 }
6317 
6318 static ssize_t
6319 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6320                       size_t cnt, loff_t *ppos)
6321 {
6322         struct trace_array *tr = filp->private_data;
6323 
6324         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6325 }
6326 
6327 #endif
6328 
6329 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6330 {
6331         if (cpu == RING_BUFFER_ALL_CPUS) {
6332                 if (cpumask_empty(tr->pipe_cpumask)) {
6333                         cpumask_setall(tr->pipe_cpumask);
6334                         return 0;
6335                 }
6336         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6337                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6338                 return 0;
6339         }
6340         return -EBUSY;
6341 }
6342 
6343 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6344 {
6345         if (cpu == RING_BUFFER_ALL_CPUS) {
6346                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6347                 cpumask_clear(tr->pipe_cpumask);
6348         } else {
6349                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6350                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6351         }
6352 }
6353 
6354 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6355 {
6356         struct trace_array *tr = inode->i_private;
6357         struct trace_iterator *iter;
6358         int cpu;
6359         int ret;
6360 
6361         ret = tracing_check_open_get_tr(tr);
6362         if (ret)
6363                 return ret;
6364 
6365         mutex_lock(&trace_types_lock);
6366         cpu = tracing_get_cpu(inode);
6367         ret = open_pipe_on_cpu(tr, cpu);
6368         if (ret)
6369                 goto fail_pipe_on_cpu;
6370 
6371         /* create a buffer to store the information to pass to userspace */
6372         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6373         if (!iter) {
6374                 ret = -ENOMEM;
6375                 goto fail_alloc_iter;
6376         }
6377 
6378         trace_seq_init(&iter->seq);
6379         iter->trace = tr->current_trace;
6380 
6381         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6382                 ret = -ENOMEM;
6383                 goto fail;
6384         }
6385 
6386         /* trace pipe does not show start of buffer */
6387         cpumask_setall(iter->started);
6388 
6389         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6390                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6391 
6392         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6393         if (trace_clocks[tr->clock_id].in_ns)
6394                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6395 
6396         iter->tr = tr;
6397         iter->array_buffer = &tr->array_buffer;
6398         iter->cpu_file = cpu;
6399         mutex_init(&iter->mutex);
6400         filp->private_data = iter;
6401 
6402         if (iter->trace->pipe_open)
6403                 iter->trace->pipe_open(iter);
6404 
6405         nonseekable_open(inode, filp);
6406 
6407         tr->trace_ref++;
6408 
6409         mutex_unlock(&trace_types_lock);
6410         return ret;
6411 
6412 fail:
6413         kfree(iter);
6414 fail_alloc_iter:
6415         close_pipe_on_cpu(tr, cpu);
6416 fail_pipe_on_cpu:
6417         __trace_array_put(tr);
6418         mutex_unlock(&trace_types_lock);
6419         return ret;
6420 }
6421 
6422 static int tracing_release_pipe(struct inode *inode, struct file *file)
6423 {
6424         struct trace_iterator *iter = file->private_data;
6425         struct trace_array *tr = inode->i_private;
6426 
6427         mutex_lock(&trace_types_lock);
6428 
6429         tr->trace_ref--;
6430 
6431         if (iter->trace->pipe_close)
6432                 iter->trace->pipe_close(iter);
6433         close_pipe_on_cpu(tr, iter->cpu_file);
6434         mutex_unlock(&trace_types_lock);
6435 
6436         free_trace_iter_content(iter);
6437         kfree(iter);
6438 
6439         trace_array_put(tr);
6440 
6441         return 0;
6442 }
6443 
6444 static __poll_t
6445 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6446 {
6447         struct trace_array *tr = iter->tr;
6448 
6449         /* Iterators are static, they should be filled or empty */
6450         if (trace_buffer_iter(iter, iter->cpu_file))
6451                 return EPOLLIN | EPOLLRDNORM;
6452 
6453         if (tr->trace_flags & TRACE_ITER_BLOCK)
6454                 /*
6455                  * Always select as readable when in blocking mode
6456                  */
6457                 return EPOLLIN | EPOLLRDNORM;
6458         else
6459                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6460                                              filp, poll_table, iter->tr->buffer_percent);
6461 }
6462 
6463 static __poll_t
6464 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6465 {
6466         struct trace_iterator *iter = filp->private_data;
6467 
6468         return trace_poll(iter, filp, poll_table);
6469 }
6470 
6471 /* Must be called with iter->mutex held. */
6472 static int tracing_wait_pipe(struct file *filp)
6473 {
6474         struct trace_iterator *iter = filp->private_data;
6475         int ret;
6476 
6477         while (trace_empty(iter)) {
6478 
6479                 if ((filp->f_flags & O_NONBLOCK)) {
6480                         return -EAGAIN;
6481                 }
6482 
6483                 /*
6484                  * We block until we read something and tracing is disabled.
6485                  * We still block if tracing is disabled, but we have never
6486                  * read anything. This allows a user to cat this file, and
6487                  * then enable tracing. But after we have read something,
6488                  * we give an EOF when tracing is again disabled.
6489                  *
6490                  * iter->pos will be 0 if we haven't read anything.
6491                  */
6492                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6493                         break;
6494 
6495                 mutex_unlock(&iter->mutex);
6496 
6497                 ret = wait_on_pipe(iter, 0);
6498 
6499                 mutex_lock(&iter->mutex);
6500 
6501                 if (ret)
6502                         return ret;
6503         }
6504 
6505         return 1;
6506 }
6507 
6508 /*
6509  * Consumer reader.
6510  */
6511 static ssize_t
6512 tracing_read_pipe(struct file *filp, char __user *ubuf,
6513                   size_t cnt, loff_t *ppos)
6514 {
6515         struct trace_iterator *iter = filp->private_data;
6516         ssize_t sret;
6517 
6518         /*
6519          * Avoid more than one consumer on a single file descriptor
6520          * This is just a matter of traces coherency, the ring buffer itself
6521          * is protected.
6522          */
6523         mutex_lock(&iter->mutex);
6524 
6525         /* return any leftover data */
6526         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6527         if (sret != -EBUSY)
6528                 goto out;
6529 
6530         trace_seq_init(&iter->seq);
6531 
6532         if (iter->trace->read) {
6533                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6534                 if (sret)
6535                         goto out;
6536         }
6537 
6538 waitagain:
6539         sret = tracing_wait_pipe(filp);
6540         if (sret <= 0)
6541                 goto out;
6542 
6543         /* stop when tracing is finished */
6544         if (trace_empty(iter)) {
6545                 sret = 0;
6546                 goto out;
6547         }
6548 
6549         if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6550                 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6551 
6552         /* reset all but tr, trace, and overruns */
6553         trace_iterator_reset(iter);
6554         cpumask_clear(iter->started);
6555         trace_seq_init(&iter->seq);
6556 
6557         trace_event_read_lock();
6558         trace_access_lock(iter->cpu_file);
6559         while (trace_find_next_entry_inc(iter) != NULL) {
6560                 enum print_line_t ret;
6561                 int save_len = iter->seq.seq.len;
6562 
6563                 ret = print_trace_line(iter);
6564                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6565                         /*
6566                          * If one print_trace_line() fills entire trace_seq in one shot,
6567                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6568                          * In this case, we need to consume it, otherwise, loop will peek
6569                          * this event next time, resulting in an infinite loop.
6570                          */
6571                         if (save_len == 0) {
6572                                 iter->seq.full = 0;
6573                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6574                                 trace_consume(iter);
6575                                 break;
6576                         }
6577 
6578                         /* In other cases, don't print partial lines */
6579                         iter->seq.seq.len = save_len;
6580                         break;
6581                 }
6582                 if (ret != TRACE_TYPE_NO_CONSUME)
6583                         trace_consume(iter);
6584 
6585                 if (trace_seq_used(&iter->seq) >= cnt)
6586                         break;
6587 
6588                 /*
6589                  * Setting the full flag means we reached the trace_seq buffer
6590                  * size and we should leave by partial output condition above.
6591                  * One of the trace_seq_* functions is not used properly.
6592                  */
6593                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6594                           iter->ent->type);
6595         }
6596         trace_access_unlock(iter->cpu_file);
6597         trace_event_read_unlock();
6598 
6599         /* Now copy what we have to the user */
6600         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6601         if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6602                 trace_seq_init(&iter->seq);
6603 
6604         /*
6605          * If there was nothing to send to user, in spite of consuming trace
6606          * entries, go back to wait for more entries.
6607          */
6608         if (sret == -EBUSY)
6609                 goto waitagain;
6610 
6611 out:
6612         mutex_unlock(&iter->mutex);
6613 
6614         return sret;
6615 }
6616 
6617 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6618                                      unsigned int idx)
6619 {
6620         __free_page(spd->pages[idx]);
6621 }
6622 
6623 static size_t
6624 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6625 {
6626         size_t count;
6627         int save_len;
6628         int ret;
6629 
6630         /* Seq buffer is page-sized, exactly what we need. */
6631         for (;;) {
6632                 save_len = iter->seq.seq.len;
6633                 ret = print_trace_line(iter);
6634 
6635                 if (trace_seq_has_overflowed(&iter->seq)) {
6636                         iter->seq.seq.len = save_len;
6637                         break;
6638                 }
6639 
6640                 /*
6641                  * This should not be hit, because it should only
6642                  * be set if the iter->seq overflowed. But check it
6643                  * anyway to be safe.
6644                  */
6645                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6646                         iter->seq.seq.len = save_len;
6647                         break;
6648                 }
6649 
6650                 count = trace_seq_used(&iter->seq) - save_len;
6651                 if (rem < count) {
6652                         rem = 0;
6653                         iter->seq.seq.len = save_len;
6654                         break;
6655                 }
6656 
6657                 if (ret != TRACE_TYPE_NO_CONSUME)
6658                         trace_consume(iter);
6659                 rem -= count;
6660                 if (!trace_find_next_entry_inc(iter))   {
6661                         rem = 0;
6662                         iter->ent = NULL;
6663                         break;
6664                 }
6665         }
6666 
6667         return rem;
6668 }
6669 
6670 static ssize_t tracing_splice_read_pipe(struct file *filp,
6671                                         loff_t *ppos,
6672                                         struct pipe_inode_info *pipe,
6673                                         size_t len,
6674                                         unsigned int flags)
6675 {
6676         struct page *pages_def[PIPE_DEF_BUFFERS];
6677         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6678         struct trace_iterator *iter = filp->private_data;
6679         struct splice_pipe_desc spd = {
6680                 .pages          = pages_def,
6681                 .partial        = partial_def,
6682                 .nr_pages       = 0, /* This gets updated below. */
6683                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6684                 .ops            = &default_pipe_buf_ops,
6685                 .spd_release    = tracing_spd_release_pipe,
6686         };
6687         ssize_t ret;
6688         size_t rem;
6689         unsigned int i;
6690 
6691         if (splice_grow_spd(pipe, &spd))
6692                 return -ENOMEM;
6693 
6694         mutex_lock(&iter->mutex);
6695 
6696         if (iter->trace->splice_read) {
6697                 ret = iter->trace->splice_read(iter, filp,
6698                                                ppos, pipe, len, flags);
6699                 if (ret)
6700                         goto out_err;
6701         }
6702 
6703         ret = tracing_wait_pipe(filp);
6704         if (ret <= 0)
6705                 goto out_err;
6706 
6707         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6708                 ret = -EFAULT;
6709                 goto out_err;
6710         }
6711 
6712         trace_event_read_lock();
6713         trace_access_lock(iter->cpu_file);
6714 
6715         /* Fill as many pages as possible. */
6716         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6717                 spd.pages[i] = alloc_page(GFP_KERNEL);
6718                 if (!spd.pages[i])
6719                         break;
6720 
6721                 rem = tracing_fill_pipe_page(rem, iter);
6722 
6723                 /* Copy the data into the page, so we can start over. */
6724                 ret = trace_seq_to_buffer(&iter->seq,
6725                                           page_address(spd.pages[i]),
6726                                           trace_seq_used(&iter->seq));
6727                 if (ret < 0) {
6728                         __free_page(spd.pages[i]);
6729                         break;
6730                 }
6731                 spd.partial[i].offset = 0;
6732                 spd.partial[i].len = trace_seq_used(&iter->seq);
6733 
6734                 trace_seq_init(&iter->seq);
6735         }
6736 
6737         trace_access_unlock(iter->cpu_file);
6738         trace_event_read_unlock();
6739         mutex_unlock(&iter->mutex);
6740 
6741         spd.nr_pages = i;
6742 
6743         if (i)
6744                 ret = splice_to_pipe(pipe, &spd);
6745         else
6746                 ret = 0;
6747 out:
6748         splice_shrink_spd(&spd);
6749         return ret;
6750 
6751 out_err:
6752         mutex_unlock(&iter->mutex);
6753         goto out;
6754 }
6755 
6756 static ssize_t
6757 tracing_entries_read(struct file *filp, char __user *ubuf,
6758                      size_t cnt, loff_t *ppos)
6759 {
6760         struct inode *inode = file_inode(filp);
6761         struct trace_array *tr = inode->i_private;
6762         int cpu = tracing_get_cpu(inode);
6763         char buf[64];
6764         int r = 0;
6765         ssize_t ret;
6766 
6767         mutex_lock(&trace_types_lock);
6768 
6769         if (cpu == RING_BUFFER_ALL_CPUS) {
6770                 int cpu, buf_size_same;
6771                 unsigned long size;
6772 
6773                 size = 0;
6774                 buf_size_same = 1;
6775                 /* check if all cpu sizes are same */
6776                 for_each_tracing_cpu(cpu) {
6777                         /* fill in the size from first enabled cpu */
6778                         if (size == 0)
6779                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6780                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6781                                 buf_size_same = 0;
6782                                 break;
6783                         }
6784                 }
6785 
6786                 if (buf_size_same) {
6787                         if (!tr->ring_buffer_expanded)
6788                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6789                                             size >> 10,
6790                                             trace_buf_size >> 10);
6791                         else
6792                                 r = sprintf(buf, "%lu\n", size >> 10);
6793                 } else
6794                         r = sprintf(buf, "X\n");
6795         } else
6796                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6797 
6798         mutex_unlock(&trace_types_lock);
6799 
6800         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6801         return ret;
6802 }
6803 
6804 static ssize_t
6805 tracing_entries_write(struct file *filp, const char __user *ubuf,
6806                       size_t cnt, loff_t *ppos)
6807 {
6808         struct inode *inode = file_inode(filp);
6809         struct trace_array *tr = inode->i_private;
6810         unsigned long val;
6811         int ret;
6812 
6813         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6814         if (ret)
6815                 return ret;
6816 
6817         /* must have at least 1 entry */
6818         if (!val)
6819                 return -EINVAL;
6820 
6821         /* value is in KB */
6822         val <<= 10;
6823         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6824         if (ret < 0)
6825                 return ret;
6826 
6827         *ppos += cnt;
6828 
6829         return cnt;
6830 }
6831 
6832 static ssize_t
6833 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6834                                 size_t cnt, loff_t *ppos)
6835 {
6836         struct trace_array *tr = filp->private_data;
6837         char buf[64];
6838         int r, cpu;
6839         unsigned long size = 0, expanded_size = 0;
6840 
6841         mutex_lock(&trace_types_lock);
6842         for_each_tracing_cpu(cpu) {
6843                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6844                 if (!tr->ring_buffer_expanded)
6845                         expanded_size += trace_buf_size >> 10;
6846         }
6847         if (tr->ring_buffer_expanded)
6848                 r = sprintf(buf, "%lu\n", size);
6849         else
6850                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6851         mutex_unlock(&trace_types_lock);
6852 
6853         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6854 }
6855 
6856 static ssize_t
6857 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6858                           size_t cnt, loff_t *ppos)
6859 {
6860         /*
6861          * There is no need to read what the user has written, this function
6862          * is just to make sure that there is no error when "echo" is used
6863          */
6864 
6865         *ppos += cnt;
6866 
6867         return cnt;
6868 }
6869 
6870 static int
6871 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6872 {
6873         struct trace_array *tr = inode->i_private;
6874 
6875         /* disable tracing ? */
6876         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6877                 tracer_tracing_off(tr);
6878         /* resize the ring buffer to 0 */
6879         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6880 
6881         trace_array_put(tr);
6882 
6883         return 0;
6884 }
6885 
6886 #define TRACE_MARKER_MAX_SIZE           4096
6887 
6888 static ssize_t
6889 tracing_mark_write(struct file *filp, const char __user *ubuf,
6890                                         size_t cnt, loff_t *fpos)
6891 {
6892         struct trace_array *tr = filp->private_data;
6893         struct ring_buffer_event *event;
6894         enum event_trigger_type tt = ETT_NONE;
6895         struct trace_buffer *buffer;
6896         struct print_entry *entry;
6897         int meta_size;
6898         ssize_t written;
6899         size_t size;
6900         int len;
6901 
6902 /* Used in tracing_mark_raw_write() as well */
6903 #define FAULTED_STR "<faulted>"
6904 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6905 
6906         if (tracing_disabled)
6907                 return -EINVAL;
6908 
6909         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6910                 return -EINVAL;
6911 
6912         if ((ssize_t)cnt < 0)
6913                 return -EINVAL;
6914 
6915         if (cnt > TRACE_MARKER_MAX_SIZE)
6916                 cnt = TRACE_MARKER_MAX_SIZE;
6917 
6918         meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
6919  again:
6920         size = cnt + meta_size;
6921 
6922         /* If less than "<faulted>", then make sure we can still add that */
6923         if (cnt < FAULTED_SIZE)
6924                 size += FAULTED_SIZE - cnt;
6925 
6926         buffer = tr->array_buffer.buffer;
6927         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6928                                             tracing_gen_ctx());
6929         if (unlikely(!event)) {
6930                 /*
6931                  * If the size was greater than what was allowed, then
6932                  * make it smaller and try again.
6933                  */
6934                 if (size > ring_buffer_max_event_size(buffer)) {
6935                         /* cnt < FAULTED size should never be bigger than max */
6936                         if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
6937                                 return -EBADF;
6938                         cnt = ring_buffer_max_event_size(buffer) - meta_size;
6939                         /* The above should only happen once */
6940                         if (WARN_ON_ONCE(cnt + meta_size == size))
6941                                 return -EBADF;
6942                         goto again;
6943                 }
6944 
6945                 /* Ring buffer disabled, return as if not open for write */
6946                 return -EBADF;
6947         }
6948 
6949         entry = ring_buffer_event_data(event);
6950         entry->ip = _THIS_IP_;
6951 
6952         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6953         if (len) {
6954                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6955                 cnt = FAULTED_SIZE;
6956                 written = -EFAULT;
6957         } else
6958                 written = cnt;
6959 
6960         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6961                 /* do not add \n before testing triggers, but add \0 */
6962                 entry->buf[cnt] = '\0';
6963                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6964         }
6965 
6966         if (entry->buf[cnt - 1] != '\n') {
6967                 entry->buf[cnt] = '\n';
6968                 entry->buf[cnt + 1] = '\0';
6969         } else
6970                 entry->buf[cnt] = '\0';
6971 
6972         if (static_branch_unlikely(&trace_marker_exports_enabled))
6973                 ftrace_exports(event, TRACE_EXPORT_MARKER);
6974         __buffer_unlock_commit(buffer, event);
6975 
6976         if (tt)
6977                 event_triggers_post_call(tr->trace_marker_file, tt);
6978 
6979         return written;
6980 }
6981 
6982 static ssize_t
6983 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6984                                         size_t cnt, loff_t *fpos)
6985 {
6986         struct trace_array *tr = filp->private_data;
6987         struct ring_buffer_event *event;
6988         struct trace_buffer *buffer;
6989         struct raw_data_entry *entry;
6990         ssize_t written;
6991         int size;
6992         int len;
6993 
6994 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6995 
6996         if (tracing_disabled)
6997                 return -EINVAL;
6998 
6999         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7000                 return -EINVAL;
7001 
7002         /* The marker must at least have a tag id */
7003         if (cnt < sizeof(unsigned int))
7004                 return -EINVAL;
7005 
7006         size = sizeof(*entry) + cnt;
7007         if (cnt < FAULT_SIZE_ID)
7008                 size += FAULT_SIZE_ID - cnt;
7009 
7010         buffer = tr->array_buffer.buffer;
7011 
7012         if (size > ring_buffer_max_event_size(buffer))
7013                 return -EINVAL;
7014 
7015         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7016                                             tracing_gen_ctx());
7017         if (!event)
7018                 /* Ring buffer disabled, return as if not open for write */
7019                 return -EBADF;
7020 
7021         entry = ring_buffer_event_data(event);
7022 
7023         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7024         if (len) {
7025                 entry->id = -1;
7026                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7027                 written = -EFAULT;
7028         } else
7029                 written = cnt;
7030 
7031         __buffer_unlock_commit(buffer, event);
7032 
7033         return written;
7034 }
7035 
7036 static int tracing_clock_show(struct seq_file *m, void *v)
7037 {
7038         struct trace_array *tr = m->private;
7039         int i;
7040 
7041         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7042                 seq_printf(m,
7043                         "%s%s%s%s", i ? " " : "",
7044                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7045                         i == tr->clock_id ? "]" : "");
7046         seq_putc(m, '\n');
7047 
7048         return 0;
7049 }
7050 
7051 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7052 {
7053         int i;
7054 
7055         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7056                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7057                         break;
7058         }
7059         if (i == ARRAY_SIZE(trace_clocks))
7060                 return -EINVAL;
7061 
7062         mutex_lock(&trace_types_lock);
7063 
7064         tr->clock_id = i;
7065 
7066         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7067 
7068         /*
7069          * New clock may not be consistent with the previous clock.
7070          * Reset the buffer so that it doesn't have incomparable timestamps.
7071          */
7072         tracing_reset_online_cpus(&tr->array_buffer);
7073 
7074 #ifdef CONFIG_TRACER_MAX_TRACE
7075         if (tr->max_buffer.buffer)
7076                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7077         tracing_reset_online_cpus(&tr->max_buffer);
7078 #endif
7079 
7080         mutex_unlock(&trace_types_lock);
7081 
7082         return 0;
7083 }
7084 
7085 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7086                                    size_t cnt, loff_t *fpos)
7087 {
7088         struct seq_file *m = filp->private_data;
7089         struct trace_array *tr = m->private;
7090         char buf[64];
7091         const char *clockstr;
7092         int ret;
7093 
7094         if (cnt >= sizeof(buf))
7095                 return -EINVAL;
7096 
7097         if (copy_from_user(buf, ubuf, cnt))
7098                 return -EFAULT;
7099 
7100         buf[cnt] = 0;
7101 
7102         clockstr = strstrip(buf);
7103 
7104         ret = tracing_set_clock(tr, clockstr);
7105         if (ret)
7106                 return ret;
7107 
7108         *fpos += cnt;
7109 
7110         return cnt;
7111 }
7112 
7113 static int tracing_clock_open(struct inode *inode, struct file *file)
7114 {
7115         struct trace_array *tr = inode->i_private;
7116         int ret;
7117 
7118         ret = tracing_check_open_get_tr(tr);
7119         if (ret)
7120                 return ret;
7121 
7122         ret = single_open(file, tracing_clock_show, inode->i_private);
7123         if (ret < 0)
7124                 trace_array_put(tr);
7125 
7126         return ret;
7127 }
7128 
7129 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7130 {
7131         struct trace_array *tr = m->private;
7132 
7133         mutex_lock(&trace_types_lock);
7134 
7135         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7136                 seq_puts(m, "delta [absolute]\n");
7137         else
7138                 seq_puts(m, "[delta] absolute\n");
7139 
7140         mutex_unlock(&trace_types_lock);
7141 
7142         return 0;
7143 }
7144 
7145 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7146 {
7147         struct trace_array *tr = inode->i_private;
7148         int ret;
7149 
7150         ret = tracing_check_open_get_tr(tr);
7151         if (ret)
7152                 return ret;
7153 
7154         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7155         if (ret < 0)
7156                 trace_array_put(tr);
7157 
7158         return ret;
7159 }
7160 
7161 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7162 {
7163         if (rbe == this_cpu_read(trace_buffered_event))
7164                 return ring_buffer_time_stamp(buffer);
7165 
7166         return ring_buffer_event_time_stamp(buffer, rbe);
7167 }
7168 
7169 /*
7170  * Set or disable using the per CPU trace_buffer_event when possible.
7171  */
7172 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7173 {
7174         int ret = 0;
7175 
7176         mutex_lock(&trace_types_lock);
7177 
7178         if (set && tr->no_filter_buffering_ref++)
7179                 goto out;
7180 
7181         if (!set) {
7182                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7183                         ret = -EINVAL;
7184                         goto out;
7185                 }
7186 
7187                 --tr->no_filter_buffering_ref;
7188         }
7189  out:
7190         mutex_unlock(&trace_types_lock);
7191 
7192         return ret;
7193 }
7194 
7195 struct ftrace_buffer_info {
7196         struct trace_iterator   iter;
7197         void                    *spare;
7198         unsigned int            spare_cpu;
7199         unsigned int            spare_size;
7200         unsigned int            read;
7201 };
7202 
7203 #ifdef CONFIG_TRACER_SNAPSHOT
7204 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7205 {
7206         struct trace_array *tr = inode->i_private;
7207         struct trace_iterator *iter;
7208         struct seq_file *m;
7209         int ret;
7210 
7211         ret = tracing_check_open_get_tr(tr);
7212         if (ret)
7213                 return ret;
7214 
7215         if (file->f_mode & FMODE_READ) {
7216                 iter = __tracing_open(inode, file, true);
7217                 if (IS_ERR(iter))
7218                         ret = PTR_ERR(iter);
7219         } else {
7220                 /* Writes still need the seq_file to hold the private data */
7221                 ret = -ENOMEM;
7222                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7223                 if (!m)
7224                         goto out;
7225                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7226                 if (!iter) {
7227                         kfree(m);
7228                         goto out;
7229                 }
7230                 ret = 0;
7231 
7232                 iter->tr = tr;
7233                 iter->array_buffer = &tr->max_buffer;
7234                 iter->cpu_file = tracing_get_cpu(inode);
7235                 m->private = iter;
7236                 file->private_data = m;
7237         }
7238 out:
7239         if (ret < 0)
7240                 trace_array_put(tr);
7241 
7242         return ret;
7243 }
7244 
7245 static void tracing_swap_cpu_buffer(void *tr)
7246 {
7247         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7248 }
7249 
7250 static ssize_t
7251 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7252                        loff_t *ppos)
7253 {
7254         struct seq_file *m = filp->private_data;
7255         struct trace_iterator *iter = m->private;
7256         struct trace_array *tr = iter->tr;
7257         unsigned long val;
7258         int ret;
7259 
7260         ret = tracing_update_buffers(tr);
7261         if (ret < 0)
7262                 return ret;
7263 
7264         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7265         if (ret)
7266                 return ret;
7267 
7268         mutex_lock(&trace_types_lock);
7269 
7270         if (tr->current_trace->use_max_tr) {
7271                 ret = -EBUSY;
7272                 goto out;
7273         }
7274 
7275         local_irq_disable();
7276         arch_spin_lock(&tr->max_lock);
7277         if (tr->cond_snapshot)
7278                 ret = -EBUSY;
7279         arch_spin_unlock(&tr->max_lock);
7280         local_irq_enable();
7281         if (ret)
7282                 goto out;
7283 
7284         switch (val) {
7285         case 0:
7286                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7287                         ret = -EINVAL;
7288                         break;
7289                 }
7290                 if (tr->allocated_snapshot)
7291                         free_snapshot(tr);
7292                 break;
7293         case 1:
7294 /* Only allow per-cpu swap if the ring buffer supports it */
7295 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7296                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7297                         ret = -EINVAL;
7298                         break;
7299                 }
7300 #endif
7301                 if (tr->allocated_snapshot)
7302                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7303                                         &tr->array_buffer, iter->cpu_file);
7304 
7305                 ret = tracing_arm_snapshot_locked(tr);
7306                 if (ret)
7307                         break;
7308 
7309                 /* Now, we're going to swap */
7310                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7311                         local_irq_disable();
7312                         update_max_tr(tr, current, smp_processor_id(), NULL);
7313                         local_irq_enable();
7314                 } else {
7315                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7316                                                  (void *)tr, 1);
7317                 }
7318                 tracing_disarm_snapshot(tr);
7319                 break;
7320         default:
7321                 if (tr->allocated_snapshot) {
7322                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7323                                 tracing_reset_online_cpus(&tr->max_buffer);
7324                         else
7325                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7326                 }
7327                 break;
7328         }
7329 
7330         if (ret >= 0) {
7331                 *ppos += cnt;
7332                 ret = cnt;
7333         }
7334 out:
7335         mutex_unlock(&trace_types_lock);
7336         return ret;
7337 }
7338 
7339 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7340 {
7341         struct seq_file *m = file->private_data;
7342         int ret;
7343 
7344         ret = tracing_release(inode, file);
7345 
7346         if (file->f_mode & FMODE_READ)
7347                 return ret;
7348 
7349         /* If write only, the seq_file is just a stub */
7350         if (m)
7351                 kfree(m->private);
7352         kfree(m);
7353 
7354         return 0;
7355 }
7356 
7357 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7358 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7359                                     size_t count, loff_t *ppos);
7360 static int tracing_buffers_release(struct inode *inode, struct file *file);
7361 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7362                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7363 
7364 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7365 {
7366         struct ftrace_buffer_info *info;
7367         int ret;
7368 
7369         /* The following checks for tracefs lockdown */
7370         ret = tracing_buffers_open(inode, filp);
7371         if (ret < 0)
7372                 return ret;
7373 
7374         info = filp->private_data;
7375 
7376         if (info->iter.trace->use_max_tr) {
7377                 tracing_buffers_release(inode, filp);
7378                 return -EBUSY;
7379         }
7380 
7381         info->iter.snapshot = true;
7382         info->iter.array_buffer = &info->iter.tr->max_buffer;
7383 
7384         return ret;
7385 }
7386 
7387 #endif /* CONFIG_TRACER_SNAPSHOT */
7388 
7389 
7390 static const struct file_operations tracing_thresh_fops = {
7391         .open           = tracing_open_generic,
7392         .read           = tracing_thresh_read,
7393         .write          = tracing_thresh_write,
7394         .llseek         = generic_file_llseek,
7395 };
7396 
7397 #ifdef CONFIG_TRACER_MAX_TRACE
7398 static const struct file_operations tracing_max_lat_fops = {
7399         .open           = tracing_open_generic_tr,
7400         .read           = tracing_max_lat_read,
7401         .write          = tracing_max_lat_write,
7402         .llseek         = generic_file_llseek,
7403         .release        = tracing_release_generic_tr,
7404 };
7405 #endif
7406 
7407 static const struct file_operations set_tracer_fops = {
7408         .open           = tracing_open_generic_tr,
7409         .read           = tracing_set_trace_read,
7410         .write          = tracing_set_trace_write,
7411         .llseek         = generic_file_llseek,
7412         .release        = tracing_release_generic_tr,
7413 };
7414 
7415 static const struct file_operations tracing_pipe_fops = {
7416         .open           = tracing_open_pipe,
7417         .poll           = tracing_poll_pipe,
7418         .read           = tracing_read_pipe,
7419         .splice_read    = tracing_splice_read_pipe,
7420         .release        = tracing_release_pipe,
7421         .llseek         = no_llseek,
7422 };
7423 
7424 static const struct file_operations tracing_entries_fops = {
7425         .open           = tracing_open_generic_tr,
7426         .read           = tracing_entries_read,
7427         .write          = tracing_entries_write,
7428         .llseek         = generic_file_llseek,
7429         .release        = tracing_release_generic_tr,
7430 };
7431 
7432 static const struct file_operations tracing_total_entries_fops = {
7433         .open           = tracing_open_generic_tr,
7434         .read           = tracing_total_entries_read,
7435         .llseek         = generic_file_llseek,
7436         .release        = tracing_release_generic_tr,
7437 };
7438 
7439 static const struct file_operations tracing_free_buffer_fops = {
7440         .open           = tracing_open_generic_tr,
7441         .write          = tracing_free_buffer_write,
7442         .release        = tracing_free_buffer_release,
7443 };
7444 
7445 static const struct file_operations tracing_mark_fops = {
7446         .open           = tracing_mark_open,
7447         .write          = tracing_mark_write,
7448         .release        = tracing_release_generic_tr,
7449 };
7450 
7451 static const struct file_operations tracing_mark_raw_fops = {
7452         .open           = tracing_mark_open,
7453         .write          = tracing_mark_raw_write,
7454         .release        = tracing_release_generic_tr,
7455 };
7456 
7457 static const struct file_operations trace_clock_fops = {
7458         .open           = tracing_clock_open,
7459         .read           = seq_read,
7460         .llseek         = seq_lseek,
7461         .release        = tracing_single_release_tr,
7462         .write          = tracing_clock_write,
7463 };
7464 
7465 static const struct file_operations trace_time_stamp_mode_fops = {
7466         .open           = tracing_time_stamp_mode_open,
7467         .read           = seq_read,
7468         .llseek         = seq_lseek,
7469         .release        = tracing_single_release_tr,
7470 };
7471 
7472 #ifdef CONFIG_TRACER_SNAPSHOT
7473 static const struct file_operations snapshot_fops = {
7474         .open           = tracing_snapshot_open,
7475         .read           = seq_read,
7476         .write          = tracing_snapshot_write,
7477         .llseek         = tracing_lseek,
7478         .release        = tracing_snapshot_release,
7479 };
7480 
7481 static const struct file_operations snapshot_raw_fops = {
7482         .open           = snapshot_raw_open,
7483         .read           = tracing_buffers_read,
7484         .release        = tracing_buffers_release,
7485         .splice_read    = tracing_buffers_splice_read,
7486         .llseek         = no_llseek,
7487 };
7488 
7489 #endif /* CONFIG_TRACER_SNAPSHOT */
7490 
7491 /*
7492  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7493  * @filp: The active open file structure
7494  * @ubuf: The userspace provided buffer to read value into
7495  * @cnt: The maximum number of bytes to read
7496  * @ppos: The current "file" position
7497  *
7498  * This function implements the write interface for a struct trace_min_max_param.
7499  * The filp->private_data must point to a trace_min_max_param structure that
7500  * defines where to write the value, the min and the max acceptable values,
7501  * and a lock to protect the write.
7502  */
7503 static ssize_t
7504 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7505 {
7506         struct trace_min_max_param *param = filp->private_data;
7507         u64 val;
7508         int err;
7509 
7510         if (!param)
7511                 return -EFAULT;
7512 
7513         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7514         if (err)
7515                 return err;
7516 
7517         if (param->lock)
7518                 mutex_lock(param->lock);
7519 
7520         if (param->min && val < *param->min)
7521                 err = -EINVAL;
7522 
7523         if (param->max && val > *param->max)
7524                 err = -EINVAL;
7525 
7526         if (!err)
7527                 *param->val = val;
7528 
7529         if (param->lock)
7530                 mutex_unlock(param->lock);
7531 
7532         if (err)
7533                 return err;
7534 
7535         return cnt;
7536 }
7537 
7538 /*
7539  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7540  * @filp: The active open file structure
7541  * @ubuf: The userspace provided buffer to read value into
7542  * @cnt: The maximum number of bytes to read
7543  * @ppos: The current "file" position
7544  *
7545  * This function implements the read interface for a struct trace_min_max_param.
7546  * The filp->private_data must point to a trace_min_max_param struct with valid
7547  * data.
7548  */
7549 static ssize_t
7550 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7551 {
7552         struct trace_min_max_param *param = filp->private_data;
7553         char buf[U64_STR_SIZE];
7554         int len;
7555         u64 val;
7556 
7557         if (!param)
7558                 return -EFAULT;
7559 
7560         val = *param->val;
7561 
7562         if (cnt > sizeof(buf))
7563                 cnt = sizeof(buf);
7564 
7565         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7566 
7567         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7568 }
7569 
7570 const struct file_operations trace_min_max_fops = {
7571         .open           = tracing_open_generic,
7572         .read           = trace_min_max_read,
7573         .write          = trace_min_max_write,
7574 };
7575 
7576 #define TRACING_LOG_ERRS_MAX    8
7577 #define TRACING_LOG_LOC_MAX     128
7578 
7579 #define CMD_PREFIX "  Command: "
7580 
7581 struct err_info {
7582         const char      **errs; /* ptr to loc-specific array of err strings */
7583         u8              type;   /* index into errs -> specific err string */
7584         u16             pos;    /* caret position */
7585         u64             ts;
7586 };
7587 
7588 struct tracing_log_err {
7589         struct list_head        list;
7590         struct err_info         info;
7591         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7592         char                    *cmd;                     /* what caused err */
7593 };
7594 
7595 static DEFINE_MUTEX(tracing_err_log_lock);
7596 
7597 static struct tracing_log_err *alloc_tracing_log_err(int len)
7598 {
7599         struct tracing_log_err *err;
7600 
7601         err = kzalloc(sizeof(*err), GFP_KERNEL);
7602         if (!err)
7603                 return ERR_PTR(-ENOMEM);
7604 
7605         err->cmd = kzalloc(len, GFP_KERNEL);
7606         if (!err->cmd) {
7607                 kfree(err);
7608                 return ERR_PTR(-ENOMEM);
7609         }
7610 
7611         return err;
7612 }
7613 
7614 static void free_tracing_log_err(struct tracing_log_err *err)
7615 {
7616         kfree(err->cmd);
7617         kfree(err);
7618 }
7619 
7620 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7621                                                    int len)
7622 {
7623         struct tracing_log_err *err;
7624         char *cmd;
7625 
7626         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7627                 err = alloc_tracing_log_err(len);
7628                 if (PTR_ERR(err) != -ENOMEM)
7629                         tr->n_err_log_entries++;
7630 
7631                 return err;
7632         }
7633         cmd = kzalloc(len, GFP_KERNEL);
7634         if (!cmd)
7635                 return ERR_PTR(-ENOMEM);
7636         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7637         kfree(err->cmd);
7638         err->cmd = cmd;
7639         list_del(&err->list);
7640 
7641         return err;
7642 }
7643 
7644 /**
7645  * err_pos - find the position of a string within a command for error careting
7646  * @cmd: The tracing command that caused the error
7647  * @str: The string to position the caret at within @cmd
7648  *
7649  * Finds the position of the first occurrence of @str within @cmd.  The
7650  * return value can be passed to tracing_log_err() for caret placement
7651  * within @cmd.
7652  *
7653  * Returns the index within @cmd of the first occurrence of @str or 0
7654  * if @str was not found.
7655  */
7656 unsigned int err_pos(char *cmd, const char *str)
7657 {
7658         char *found;
7659 
7660         if (WARN_ON(!strlen(cmd)))
7661                 return 0;
7662 
7663         found = strstr(cmd, str);
7664         if (found)
7665                 return found - cmd;
7666 
7667         return 0;
7668 }
7669 
7670 /**
7671  * tracing_log_err - write an error to the tracing error log
7672  * @tr: The associated trace array for the error (NULL for top level array)
7673  * @loc: A string describing where the error occurred
7674  * @cmd: The tracing command that caused the error
7675  * @errs: The array of loc-specific static error strings
7676  * @type: The index into errs[], which produces the specific static err string
7677  * @pos: The position the caret should be placed in the cmd
7678  *
7679  * Writes an error into tracing/error_log of the form:
7680  *
7681  * <loc>: error: <text>
7682  *   Command: <cmd>
7683  *              ^
7684  *
7685  * tracing/error_log is a small log file containing the last
7686  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7687  * unless there has been a tracing error, and the error log can be
7688  * cleared and have its memory freed by writing the empty string in
7689  * truncation mode to it i.e. echo > tracing/error_log.
7690  *
7691  * NOTE: the @errs array along with the @type param are used to
7692  * produce a static error string - this string is not copied and saved
7693  * when the error is logged - only a pointer to it is saved.  See
7694  * existing callers for examples of how static strings are typically
7695  * defined for use with tracing_log_err().
7696  */
7697 void tracing_log_err(struct trace_array *tr,
7698                      const char *loc, const char *cmd,
7699                      const char **errs, u8 type, u16 pos)
7700 {
7701         struct tracing_log_err *err;
7702         int len = 0;
7703 
7704         if (!tr)
7705                 tr = &global_trace;
7706 
7707         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7708 
7709         mutex_lock(&tracing_err_log_lock);
7710         err = get_tracing_log_err(tr, len);
7711         if (PTR_ERR(err) == -ENOMEM) {
7712                 mutex_unlock(&tracing_err_log_lock);
7713                 return;
7714         }
7715 
7716         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7717         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7718 
7719         err->info.errs = errs;
7720         err->info.type = type;
7721         err->info.pos = pos;
7722         err->info.ts = local_clock();
7723 
7724         list_add_tail(&err->list, &tr->err_log);
7725         mutex_unlock(&tracing_err_log_lock);
7726 }
7727 
7728 static void clear_tracing_err_log(struct trace_array *tr)
7729 {
7730         struct tracing_log_err *err, *next;
7731 
7732         mutex_lock(&tracing_err_log_lock);
7733         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7734                 list_del(&err->list);
7735                 free_tracing_log_err(err);
7736         }
7737 
7738         tr->n_err_log_entries = 0;
7739         mutex_unlock(&tracing_err_log_lock);
7740 }
7741 
7742 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7743 {
7744         struct trace_array *tr = m->private;
7745 
7746         mutex_lock(&tracing_err_log_lock);
7747 
7748         return seq_list_start(&tr->err_log, *pos);
7749 }
7750 
7751 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7752 {
7753         struct trace_array *tr = m->private;
7754 
7755         return seq_list_next(v, &tr->err_log, pos);
7756 }
7757 
7758 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7759 {
7760         mutex_unlock(&tracing_err_log_lock);
7761 }
7762 
7763 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7764 {
7765         u16 i;
7766 
7767         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7768                 seq_putc(m, ' ');
7769         for (i = 0; i < pos; i++)
7770                 seq_putc(m, ' ');
7771         seq_puts(m, "^\n");
7772 }
7773 
7774 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7775 {
7776         struct tracing_log_err *err = v;
7777 
7778         if (err) {
7779                 const char *err_text = err->info.errs[err->info.type];
7780                 u64 sec = err->info.ts;
7781                 u32 nsec;
7782 
7783                 nsec = do_div(sec, NSEC_PER_SEC);
7784                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7785                            err->loc, err_text);
7786                 seq_printf(m, "%s", err->cmd);
7787                 tracing_err_log_show_pos(m, err->info.pos);
7788         }
7789 
7790         return 0;
7791 }
7792 
7793 static const struct seq_operations tracing_err_log_seq_ops = {
7794         .start  = tracing_err_log_seq_start,
7795         .next   = tracing_err_log_seq_next,
7796         .stop   = tracing_err_log_seq_stop,
7797         .show   = tracing_err_log_seq_show
7798 };
7799 
7800 static int tracing_err_log_open(struct inode *inode, struct file *file)
7801 {
7802         struct trace_array *tr = inode->i_private;
7803         int ret = 0;
7804 
7805         ret = tracing_check_open_get_tr(tr);
7806         if (ret)
7807                 return ret;
7808 
7809         /* If this file was opened for write, then erase contents */
7810         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7811                 clear_tracing_err_log(tr);
7812 
7813         if (file->f_mode & FMODE_READ) {
7814                 ret = seq_open(file, &tracing_err_log_seq_ops);
7815                 if (!ret) {
7816                         struct seq_file *m = file->private_data;
7817                         m->private = tr;
7818                 } else {
7819                         trace_array_put(tr);
7820                 }
7821         }
7822         return ret;
7823 }
7824 
7825 static ssize_t tracing_err_log_write(struct file *file,
7826                                      const char __user *buffer,
7827                                      size_t count, loff_t *ppos)
7828 {
7829         return count;
7830 }
7831 
7832 static int tracing_err_log_release(struct inode *inode, struct file *file)
7833 {
7834         struct trace_array *tr = inode->i_private;
7835 
7836         trace_array_put(tr);
7837 
7838         if (file->f_mode & FMODE_READ)
7839                 seq_release(inode, file);
7840 
7841         return 0;
7842 }
7843 
7844 static const struct file_operations tracing_err_log_fops = {
7845         .open           = tracing_err_log_open,
7846         .write          = tracing_err_log_write,
7847         .read           = seq_read,
7848         .llseek         = tracing_lseek,
7849         .release        = tracing_err_log_release,
7850 };
7851 
7852 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7853 {
7854         struct trace_array *tr = inode->i_private;
7855         struct ftrace_buffer_info *info;
7856         int ret;
7857 
7858         ret = tracing_check_open_get_tr(tr);
7859         if (ret)
7860                 return ret;
7861 
7862         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7863         if (!info) {
7864                 trace_array_put(tr);
7865                 return -ENOMEM;
7866         }
7867 
7868         mutex_lock(&trace_types_lock);
7869 
7870         info->iter.tr           = tr;
7871         info->iter.cpu_file     = tracing_get_cpu(inode);
7872         info->iter.trace        = tr->current_trace;
7873         info->iter.array_buffer = &tr->array_buffer;
7874         info->spare             = NULL;
7875         /* Force reading ring buffer for first read */
7876         info->read              = (unsigned int)-1;
7877 
7878         filp->private_data = info;
7879 
7880         tr->trace_ref++;
7881 
7882         mutex_unlock(&trace_types_lock);
7883 
7884         ret = nonseekable_open(inode, filp);
7885         if (ret < 0)
7886                 trace_array_put(tr);
7887 
7888         return ret;
7889 }
7890 
7891 static __poll_t
7892 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7893 {
7894         struct ftrace_buffer_info *info = filp->private_data;
7895         struct trace_iterator *iter = &info->iter;
7896 
7897         return trace_poll(iter, filp, poll_table);
7898 }
7899 
7900 static ssize_t
7901 tracing_buffers_read(struct file *filp, char __user *ubuf,
7902                      size_t count, loff_t *ppos)
7903 {
7904         struct ftrace_buffer_info *info = filp->private_data;
7905         struct trace_iterator *iter = &info->iter;
7906         void *trace_data;
7907         int page_size;
7908         ssize_t ret = 0;
7909         ssize_t size;
7910 
7911         if (!count)
7912                 return 0;
7913 
7914 #ifdef CONFIG_TRACER_MAX_TRACE
7915         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7916                 return -EBUSY;
7917 #endif
7918 
7919         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7920 
7921         /* Make sure the spare matches the current sub buffer size */
7922         if (info->spare) {
7923                 if (page_size != info->spare_size) {
7924                         ring_buffer_free_read_page(iter->array_buffer->buffer,
7925                                                    info->spare_cpu, info->spare);
7926                         info->spare = NULL;
7927                 }
7928         }
7929 
7930         if (!info->spare) {
7931                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7932                                                           iter->cpu_file);
7933                 if (IS_ERR(info->spare)) {
7934                         ret = PTR_ERR(info->spare);
7935                         info->spare = NULL;
7936                 } else {
7937                         info->spare_cpu = iter->cpu_file;
7938                         info->spare_size = page_size;
7939                 }
7940         }
7941         if (!info->spare)
7942                 return ret;
7943 
7944         /* Do we have previous read data to read? */
7945         if (info->read < page_size)
7946                 goto read;
7947 
7948  again:
7949         trace_access_lock(iter->cpu_file);
7950         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7951                                     info->spare,
7952                                     count,
7953                                     iter->cpu_file, 0);
7954         trace_access_unlock(iter->cpu_file);
7955 
7956         if (ret < 0) {
7957                 if (trace_empty(iter) && !iter->closed) {
7958                         if ((filp->f_flags & O_NONBLOCK))
7959                                 return -EAGAIN;
7960 
7961                         ret = wait_on_pipe(iter, 0);
7962                         if (ret)
7963                                 return ret;
7964 
7965                         goto again;
7966                 }
7967                 return 0;
7968         }
7969 
7970         info->read = 0;
7971  read:
7972         size = page_size - info->read;
7973         if (size > count)
7974                 size = count;
7975         trace_data = ring_buffer_read_page_data(info->spare);
7976         ret = copy_to_user(ubuf, trace_data + info->read, size);
7977         if (ret == size)
7978                 return -EFAULT;
7979 
7980         size -= ret;
7981 
7982         *ppos += size;
7983         info->read += size;
7984 
7985         return size;
7986 }
7987 
7988 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7989 {
7990         struct ftrace_buffer_info *info = file->private_data;
7991         struct trace_iterator *iter = &info->iter;
7992 
7993         iter->closed = true;
7994         /* Make sure the waiters see the new wait_index */
7995         (void)atomic_fetch_inc_release(&iter->wait_index);
7996 
7997         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7998 
7999         return 0;
8000 }
8001 
8002 static int tracing_buffers_release(struct inode *inode, struct file *file)
8003 {
8004         struct ftrace_buffer_info *info = file->private_data;
8005         struct trace_iterator *iter = &info->iter;
8006 
8007         mutex_lock(&trace_types_lock);
8008 
8009         iter->tr->trace_ref--;
8010 
8011         __trace_array_put(iter->tr);
8012 
8013         if (info->spare)
8014                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8015                                            info->spare_cpu, info->spare);
8016         kvfree(info);
8017 
8018         mutex_unlock(&trace_types_lock);
8019 
8020         return 0;
8021 }
8022 
8023 struct buffer_ref {
8024         struct trace_buffer     *buffer;
8025         void                    *page;
8026         int                     cpu;
8027         refcount_t              refcount;
8028 };
8029 
8030 static void buffer_ref_release(struct buffer_ref *ref)
8031 {
8032         if (!refcount_dec_and_test(&ref->refcount))
8033                 return;
8034         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8035         kfree(ref);
8036 }
8037 
8038 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8039                                     struct pipe_buffer *buf)
8040 {
8041         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8042 
8043         buffer_ref_release(ref);
8044         buf->private = 0;
8045 }
8046 
8047 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8048                                 struct pipe_buffer *buf)
8049 {
8050         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8051 
8052         if (refcount_read(&ref->refcount) > INT_MAX/2)
8053                 return false;
8054 
8055         refcount_inc(&ref->refcount);
8056         return true;
8057 }
8058 
8059 /* Pipe buffer operations for a buffer. */
8060 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8061         .release                = buffer_pipe_buf_release,
8062         .get                    = buffer_pipe_buf_get,
8063 };
8064 
8065 /*
8066  * Callback from splice_to_pipe(), if we need to release some pages
8067  * at the end of the spd in case we error'ed out in filling the pipe.
8068  */
8069 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8070 {
8071         struct buffer_ref *ref =
8072                 (struct buffer_ref *)spd->partial[i].private;
8073 
8074         buffer_ref_release(ref);
8075         spd->partial[i].private = 0;
8076 }
8077 
8078 static ssize_t
8079 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8080                             struct pipe_inode_info *pipe, size_t len,
8081                             unsigned int flags)
8082 {
8083         struct ftrace_buffer_info *info = file->private_data;
8084         struct trace_iterator *iter = &info->iter;
8085         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8086         struct page *pages_def[PIPE_DEF_BUFFERS];
8087         struct splice_pipe_desc spd = {
8088                 .pages          = pages_def,
8089                 .partial        = partial_def,
8090                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8091                 .ops            = &buffer_pipe_buf_ops,
8092                 .spd_release    = buffer_spd_release,
8093         };
8094         struct buffer_ref *ref;
8095         bool woken = false;
8096         int page_size;
8097         int entries, i;
8098         ssize_t ret = 0;
8099 
8100 #ifdef CONFIG_TRACER_MAX_TRACE
8101         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8102                 return -EBUSY;
8103 #endif
8104 
8105         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8106         if (*ppos & (page_size - 1))
8107                 return -EINVAL;
8108 
8109         if (len & (page_size - 1)) {
8110                 if (len < page_size)
8111                         return -EINVAL;
8112                 len &= (~(page_size - 1));
8113         }
8114 
8115         if (splice_grow_spd(pipe, &spd))
8116                 return -ENOMEM;
8117 
8118  again:
8119         trace_access_lock(iter->cpu_file);
8120         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8121 
8122         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8123                 struct page *page;
8124                 int r;
8125 
8126                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8127                 if (!ref) {
8128                         ret = -ENOMEM;
8129                         break;
8130                 }
8131 
8132                 refcount_set(&ref->refcount, 1);
8133                 ref->buffer = iter->array_buffer->buffer;
8134                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8135                 if (IS_ERR(ref->page)) {
8136                         ret = PTR_ERR(ref->page);
8137                         ref->page = NULL;
8138                         kfree(ref);
8139                         break;
8140                 }
8141                 ref->cpu = iter->cpu_file;
8142 
8143                 r = ring_buffer_read_page(ref->buffer, ref->page,
8144                                           len, iter->cpu_file, 1);
8145                 if (r < 0) {
8146                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8147                                                    ref->page);
8148                         kfree(ref);
8149                         break;
8150                 }
8151 
8152                 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8153 
8154                 spd.pages[i] = page;
8155                 spd.partial[i].len = page_size;
8156                 spd.partial[i].offset = 0;
8157                 spd.partial[i].private = (unsigned long)ref;
8158                 spd.nr_pages++;
8159                 *ppos += page_size;
8160 
8161                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8162         }
8163 
8164         trace_access_unlock(iter->cpu_file);
8165         spd.nr_pages = i;
8166 
8167         /* did we read anything? */
8168         if (!spd.nr_pages) {
8169 
8170                 if (ret)
8171                         goto out;
8172 
8173                 if (woken)
8174                         goto out;
8175 
8176                 ret = -EAGAIN;
8177                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8178                         goto out;
8179 
8180                 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8181                 if (ret)
8182                         goto out;
8183 
8184                 /* No need to wait after waking up when tracing is off */
8185                 if (!tracer_tracing_is_on(iter->tr))
8186                         goto out;
8187 
8188                 /* Iterate one more time to collect any new data then exit */
8189                 woken = true;
8190 
8191                 goto again;
8192         }
8193 
8194         ret = splice_to_pipe(pipe, &spd);
8195 out:
8196         splice_shrink_spd(&spd);
8197 
8198         return ret;
8199 }
8200 
8201 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8202 {
8203         struct ftrace_buffer_info *info = file->private_data;
8204         struct trace_iterator *iter = &info->iter;
8205         int err;
8206 
8207         if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8208                 if (!(file->f_flags & O_NONBLOCK)) {
8209                         err = ring_buffer_wait(iter->array_buffer->buffer,
8210                                                iter->cpu_file,
8211                                                iter->tr->buffer_percent,
8212                                                NULL, NULL);
8213                         if (err)
8214                                 return err;
8215                 }
8216 
8217                 return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8218                                                   iter->cpu_file);
8219         } else if (cmd) {
8220                 return -ENOTTY;
8221         }
8222 
8223         /*
8224          * An ioctl call with cmd 0 to the ring buffer file will wake up all
8225          * waiters
8226          */
8227         mutex_lock(&trace_types_lock);
8228 
8229         /* Make sure the waiters see the new wait_index */
8230         (void)atomic_fetch_inc_release(&iter->wait_index);
8231 
8232         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8233 
8234         mutex_unlock(&trace_types_lock);
8235         return 0;
8236 }
8237 
8238 #ifdef CONFIG_TRACER_MAX_TRACE
8239 static int get_snapshot_map(struct trace_array *tr)
8240 {
8241         int err = 0;
8242 
8243         /*
8244          * Called with mmap_lock held. lockdep would be unhappy if we would now
8245          * take trace_types_lock. Instead use the specific
8246          * snapshot_trigger_lock.
8247          */
8248         spin_lock(&tr->snapshot_trigger_lock);
8249 
8250         if (tr->snapshot || tr->mapped == UINT_MAX)
8251                 err = -EBUSY;
8252         else
8253                 tr->mapped++;
8254 
8255         spin_unlock(&tr->snapshot_trigger_lock);
8256 
8257         /* Wait for update_max_tr() to observe iter->tr->mapped */
8258         if (tr->mapped == 1)
8259                 synchronize_rcu();
8260 
8261         return err;
8262 
8263 }
8264 static void put_snapshot_map(struct trace_array *tr)
8265 {
8266         spin_lock(&tr->snapshot_trigger_lock);
8267         if (!WARN_ON(!tr->mapped))
8268                 tr->mapped--;
8269         spin_unlock(&tr->snapshot_trigger_lock);
8270 }
8271 #else
8272 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8273 static inline void put_snapshot_map(struct trace_array *tr) { }
8274 #endif
8275 
8276 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8277 {
8278         struct ftrace_buffer_info *info = vma->vm_file->private_data;
8279         struct trace_iterator *iter = &info->iter;
8280 
8281         WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8282         put_snapshot_map(iter->tr);
8283 }
8284 
8285 static const struct vm_operations_struct tracing_buffers_vmops = {
8286         .close          = tracing_buffers_mmap_close,
8287 };
8288 
8289 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8290 {
8291         struct ftrace_buffer_info *info = filp->private_data;
8292         struct trace_iterator *iter = &info->iter;
8293         int ret = 0;
8294 
8295         ret = get_snapshot_map(iter->tr);
8296         if (ret)
8297                 return ret;
8298 
8299         ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8300         if (ret)
8301                 put_snapshot_map(iter->tr);
8302 
8303         vma->vm_ops = &tracing_buffers_vmops;
8304 
8305         return ret;
8306 }
8307 
8308 static const struct file_operations tracing_buffers_fops = {
8309         .open           = tracing_buffers_open,
8310         .read           = tracing_buffers_read,
8311         .poll           = tracing_buffers_poll,
8312         .release        = tracing_buffers_release,
8313         .flush          = tracing_buffers_flush,
8314         .splice_read    = tracing_buffers_splice_read,
8315         .unlocked_ioctl = tracing_buffers_ioctl,
8316         .llseek         = no_llseek,
8317         .mmap           = tracing_buffers_mmap,
8318 };
8319 
8320 static ssize_t
8321 tracing_stats_read(struct file *filp, char __user *ubuf,
8322                    size_t count, loff_t *ppos)
8323 {
8324         struct inode *inode = file_inode(filp);
8325         struct trace_array *tr = inode->i_private;
8326         struct array_buffer *trace_buf = &tr->array_buffer;
8327         int cpu = tracing_get_cpu(inode);
8328         struct trace_seq *s;
8329         unsigned long cnt;
8330         unsigned long long t;
8331         unsigned long usec_rem;
8332 
8333         s = kmalloc(sizeof(*s), GFP_KERNEL);
8334         if (!s)
8335                 return -ENOMEM;
8336 
8337         trace_seq_init(s);
8338 
8339         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8340         trace_seq_printf(s, "entries: %ld\n", cnt);
8341 
8342         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8343         trace_seq_printf(s, "overrun: %ld\n", cnt);
8344 
8345         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8346         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8347 
8348         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8349         trace_seq_printf(s, "bytes: %ld\n", cnt);
8350 
8351         if (trace_clocks[tr->clock_id].in_ns) {
8352                 /* local or global for trace_clock */
8353                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8354                 usec_rem = do_div(t, USEC_PER_SEC);
8355                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8356                                                                 t, usec_rem);
8357 
8358                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8359                 usec_rem = do_div(t, USEC_PER_SEC);
8360                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8361         } else {
8362                 /* counter or tsc mode for trace_clock */
8363                 trace_seq_printf(s, "oldest event ts: %llu\n",
8364                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8365 
8366                 trace_seq_printf(s, "now ts: %llu\n",
8367                                 ring_buffer_time_stamp(trace_buf->buffer));
8368         }
8369 
8370         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8371         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8372 
8373         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8374         trace_seq_printf(s, "read events: %ld\n", cnt);
8375 
8376         count = simple_read_from_buffer(ubuf, count, ppos,
8377                                         s->buffer, trace_seq_used(s));
8378 
8379         kfree(s);
8380 
8381         return count;
8382 }
8383 
8384 static const struct file_operations tracing_stats_fops = {
8385         .open           = tracing_open_generic_tr,
8386         .read           = tracing_stats_read,
8387         .llseek         = generic_file_llseek,
8388         .release        = tracing_release_generic_tr,
8389 };
8390 
8391 #ifdef CONFIG_DYNAMIC_FTRACE
8392 
8393 static ssize_t
8394 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8395                   size_t cnt, loff_t *ppos)
8396 {
8397         ssize_t ret;
8398         char *buf;
8399         int r;
8400 
8401         /* 256 should be plenty to hold the amount needed */
8402         buf = kmalloc(256, GFP_KERNEL);
8403         if (!buf)
8404                 return -ENOMEM;
8405 
8406         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8407                       ftrace_update_tot_cnt,
8408                       ftrace_number_of_pages,
8409                       ftrace_number_of_groups);
8410 
8411         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8412         kfree(buf);
8413         return ret;
8414 }
8415 
8416 static const struct file_operations tracing_dyn_info_fops = {
8417         .open           = tracing_open_generic,
8418         .read           = tracing_read_dyn_info,
8419         .llseek         = generic_file_llseek,
8420 };
8421 #endif /* CONFIG_DYNAMIC_FTRACE */
8422 
8423 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8424 static void
8425 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8426                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8427                 void *data)
8428 {
8429         tracing_snapshot_instance(tr);
8430 }
8431 
8432 static void
8433 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8434                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8435                       void *data)
8436 {
8437         struct ftrace_func_mapper *mapper = data;
8438         long *count = NULL;
8439 
8440         if (mapper)
8441                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8442 
8443         if (count) {
8444 
8445                 if (*count <= 0)
8446                         return;
8447 
8448                 (*count)--;
8449         }
8450 
8451         tracing_snapshot_instance(tr);
8452 }
8453 
8454 static int
8455 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8456                       struct ftrace_probe_ops *ops, void *data)
8457 {
8458         struct ftrace_func_mapper *mapper = data;
8459         long *count = NULL;
8460 
8461         seq_printf(m, "%ps:", (void *)ip);
8462 
8463         seq_puts(m, "snapshot");
8464 
8465         if (mapper)
8466                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8467 
8468         if (count)
8469                 seq_printf(m, ":count=%ld\n", *count);
8470         else
8471                 seq_puts(m, ":unlimited\n");
8472 
8473         return 0;
8474 }
8475 
8476 static int
8477 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8478                      unsigned long ip, void *init_data, void **data)
8479 {
8480         struct ftrace_func_mapper *mapper = *data;
8481 
8482         if (!mapper) {
8483                 mapper = allocate_ftrace_func_mapper();
8484                 if (!mapper)
8485                         return -ENOMEM;
8486                 *data = mapper;
8487         }
8488 
8489         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8490 }
8491 
8492 static void
8493 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8494                      unsigned long ip, void *data)
8495 {
8496         struct ftrace_func_mapper *mapper = data;
8497 
8498         if (!ip) {
8499                 if (!mapper)
8500                         return;
8501                 free_ftrace_func_mapper(mapper, NULL);
8502                 return;
8503         }
8504 
8505         ftrace_func_mapper_remove_ip(mapper, ip);
8506 }
8507 
8508 static struct ftrace_probe_ops snapshot_probe_ops = {
8509         .func                   = ftrace_snapshot,
8510         .print                  = ftrace_snapshot_print,
8511 };
8512 
8513 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8514         .func                   = ftrace_count_snapshot,
8515         .print                  = ftrace_snapshot_print,
8516         .init                   = ftrace_snapshot_init,
8517         .free                   = ftrace_snapshot_free,
8518 };
8519 
8520 static int
8521 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8522                                char *glob, char *cmd, char *param, int enable)
8523 {
8524         struct ftrace_probe_ops *ops;
8525         void *count = (void *)-1;
8526         char *number;
8527         int ret;
8528 
8529         if (!tr)
8530                 return -ENODEV;
8531 
8532         /* hash funcs only work with set_ftrace_filter */
8533         if (!enable)
8534                 return -EINVAL;
8535 
8536         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8537 
8538         if (glob[0] == '!') {
8539                 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8540                 if (!ret)
8541                         tracing_disarm_snapshot(tr);
8542 
8543                 return ret;
8544         }
8545 
8546         if (!param)
8547                 goto out_reg;
8548 
8549         number = strsep(&param, ":");
8550 
8551         if (!strlen(number))
8552                 goto out_reg;
8553 
8554         /*
8555          * We use the callback data field (which is a pointer)
8556          * as our counter.
8557          */
8558         ret = kstrtoul(number, 0, (unsigned long *)&count);
8559         if (ret)
8560                 return ret;
8561 
8562  out_reg:
8563         ret = tracing_arm_snapshot(tr);
8564         if (ret < 0)
8565                 goto out;
8566 
8567         ret = register_ftrace_function_probe(glob, tr, ops, count);
8568         if (ret < 0)
8569                 tracing_disarm_snapshot(tr);
8570  out:
8571         return ret < 0 ? ret : 0;
8572 }
8573 
8574 static struct ftrace_func_command ftrace_snapshot_cmd = {
8575         .name                   = "snapshot",
8576         .func                   = ftrace_trace_snapshot_callback,
8577 };
8578 
8579 static __init int register_snapshot_cmd(void)
8580 {
8581         return register_ftrace_command(&ftrace_snapshot_cmd);
8582 }
8583 #else
8584 static inline __init int register_snapshot_cmd(void) { return 0; }
8585 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8586 
8587 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8588 {
8589         if (WARN_ON(!tr->dir))
8590                 return ERR_PTR(-ENODEV);
8591 
8592         /* Top directory uses NULL as the parent */
8593         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8594                 return NULL;
8595 
8596         /* All sub buffers have a descriptor */
8597         return tr->dir;
8598 }
8599 
8600 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8601 {
8602         struct dentry *d_tracer;
8603 
8604         if (tr->percpu_dir)
8605                 return tr->percpu_dir;
8606 
8607         d_tracer = tracing_get_dentry(tr);
8608         if (IS_ERR(d_tracer))
8609                 return NULL;
8610 
8611         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8612 
8613         MEM_FAIL(!tr->percpu_dir,
8614                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8615 
8616         return tr->percpu_dir;
8617 }
8618 
8619 static struct dentry *
8620 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8621                       void *data, long cpu, const struct file_operations *fops)
8622 {
8623         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8624 
8625         if (ret) /* See tracing_get_cpu() */
8626                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8627         return ret;
8628 }
8629 
8630 static void
8631 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8632 {
8633         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8634         struct dentry *d_cpu;
8635         char cpu_dir[30]; /* 30 characters should be more than enough */
8636 
8637         if (!d_percpu)
8638                 return;
8639 
8640         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8641         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8642         if (!d_cpu) {
8643                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8644                 return;
8645         }
8646 
8647         /* per cpu trace_pipe */
8648         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8649                                 tr, cpu, &tracing_pipe_fops);
8650 
8651         /* per cpu trace */
8652         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8653                                 tr, cpu, &tracing_fops);
8654 
8655         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8656                                 tr, cpu, &tracing_buffers_fops);
8657 
8658         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8659                                 tr, cpu, &tracing_stats_fops);
8660 
8661         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8662                                 tr, cpu, &tracing_entries_fops);
8663 
8664 #ifdef CONFIG_TRACER_SNAPSHOT
8665         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8666                                 tr, cpu, &snapshot_fops);
8667 
8668         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8669                                 tr, cpu, &snapshot_raw_fops);
8670 #endif
8671 }
8672 
8673 #ifdef CONFIG_FTRACE_SELFTEST
8674 /* Let selftest have access to static functions in this file */
8675 #include "trace_selftest.c"
8676 #endif
8677 
8678 static ssize_t
8679 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8680                         loff_t *ppos)
8681 {
8682         struct trace_option_dentry *topt = filp->private_data;
8683         char *buf;
8684 
8685         if (topt->flags->val & topt->opt->bit)
8686                 buf = "1\n";
8687         else
8688                 buf = "\n";
8689 
8690         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8691 }
8692 
8693 static ssize_t
8694 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8695                          loff_t *ppos)
8696 {
8697         struct trace_option_dentry *topt = filp->private_data;
8698         unsigned long val;
8699         int ret;
8700 
8701         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8702         if (ret)
8703                 return ret;
8704 
8705         if (val != 0 && val != 1)
8706                 return -EINVAL;
8707 
8708         if (!!(topt->flags->val & topt->opt->bit) != val) {
8709                 mutex_lock(&trace_types_lock);
8710                 ret = __set_tracer_option(topt->tr, topt->flags,
8711                                           topt->opt, !val);
8712                 mutex_unlock(&trace_types_lock);
8713                 if (ret)
8714                         return ret;
8715         }
8716 
8717         *ppos += cnt;
8718 
8719         return cnt;
8720 }
8721 
8722 static int tracing_open_options(struct inode *inode, struct file *filp)
8723 {
8724         struct trace_option_dentry *topt = inode->i_private;
8725         int ret;
8726 
8727         ret = tracing_check_open_get_tr(topt->tr);
8728         if (ret)
8729                 return ret;
8730 
8731         filp->private_data = inode->i_private;
8732         return 0;
8733 }
8734 
8735 static int tracing_release_options(struct inode *inode, struct file *file)
8736 {
8737         struct trace_option_dentry *topt = file->private_data;
8738 
8739         trace_array_put(topt->tr);
8740         return 0;
8741 }
8742 
8743 static const struct file_operations trace_options_fops = {
8744         .open = tracing_open_options,
8745         .read = trace_options_read,
8746         .write = trace_options_write,
8747         .llseek = generic_file_llseek,
8748         .release = tracing_release_options,
8749 };
8750 
8751 /*
8752  * In order to pass in both the trace_array descriptor as well as the index
8753  * to the flag that the trace option file represents, the trace_array
8754  * has a character array of trace_flags_index[], which holds the index
8755  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8756  * The address of this character array is passed to the flag option file
8757  * read/write callbacks.
8758  *
8759  * In order to extract both the index and the trace_array descriptor,
8760  * get_tr_index() uses the following algorithm.
8761  *
8762  *   idx = *ptr;
8763  *
8764  * As the pointer itself contains the address of the index (remember
8765  * index[1] == 1).
8766  *
8767  * Then to get the trace_array descriptor, by subtracting that index
8768  * from the ptr, we get to the start of the index itself.
8769  *
8770  *   ptr - idx == &index[0]
8771  *
8772  * Then a simple container_of() from that pointer gets us to the
8773  * trace_array descriptor.
8774  */
8775 static void get_tr_index(void *data, struct trace_array **ptr,
8776                          unsigned int *pindex)
8777 {
8778         *pindex = *(unsigned char *)data;
8779 
8780         *ptr = container_of(data - *pindex, struct trace_array,
8781                             trace_flags_index);
8782 }
8783 
8784 static ssize_t
8785 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8786                         loff_t *ppos)
8787 {
8788         void *tr_index = filp->private_data;
8789         struct trace_array *tr;
8790         unsigned int index;
8791         char *buf;
8792 
8793         get_tr_index(tr_index, &tr, &index);
8794 
8795         if (tr->trace_flags & (1 << index))
8796                 buf = "1\n";
8797         else
8798                 buf = "\n";
8799 
8800         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8801 }
8802 
8803 static ssize_t
8804 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8805                          loff_t *ppos)
8806 {
8807         void *tr_index = filp->private_data;
8808         struct trace_array *tr;
8809         unsigned int index;
8810         unsigned long val;
8811         int ret;
8812 
8813         get_tr_index(tr_index, &tr, &index);
8814 
8815         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8816         if (ret)
8817                 return ret;
8818 
8819         if (val != 0 && val != 1)
8820                 return -EINVAL;
8821 
8822         mutex_lock(&event_mutex);
8823         mutex_lock(&trace_types_lock);
8824         ret = set_tracer_flag(tr, 1 << index, val);
8825         mutex_unlock(&trace_types_lock);
8826         mutex_unlock(&event_mutex);
8827 
8828         if (ret < 0)
8829                 return ret;
8830 
8831         *ppos += cnt;
8832 
8833         return cnt;
8834 }
8835 
8836 static const struct file_operations trace_options_core_fops = {
8837         .open = tracing_open_generic,
8838         .read = trace_options_core_read,
8839         .write = trace_options_core_write,
8840         .llseek = generic_file_llseek,
8841 };
8842 
8843 struct dentry *trace_create_file(const char *name,
8844                                  umode_t mode,
8845                                  struct dentry *parent,
8846                                  void *data,
8847                                  const struct file_operations *fops)
8848 {
8849         struct dentry *ret;
8850 
8851         ret = tracefs_create_file(name, mode, parent, data, fops);
8852         if (!ret)
8853                 pr_warn("Could not create tracefs '%s' entry\n", name);
8854 
8855         return ret;
8856 }
8857 
8858 
8859 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8860 {
8861         struct dentry *d_tracer;
8862 
8863         if (tr->options)
8864                 return tr->options;
8865 
8866         d_tracer = tracing_get_dentry(tr);
8867         if (IS_ERR(d_tracer))
8868                 return NULL;
8869 
8870         tr->options = tracefs_create_dir("options", d_tracer);
8871         if (!tr->options) {
8872                 pr_warn("Could not create tracefs directory 'options'\n");
8873                 return NULL;
8874         }
8875 
8876         return tr->options;
8877 }
8878 
8879 static void
8880 create_trace_option_file(struct trace_array *tr,
8881                          struct trace_option_dentry *topt,
8882                          struct tracer_flags *flags,
8883                          struct tracer_opt *opt)
8884 {
8885         struct dentry *t_options;
8886 
8887         t_options = trace_options_init_dentry(tr);
8888         if (!t_options)
8889                 return;
8890 
8891         topt->flags = flags;
8892         topt->opt = opt;
8893         topt->tr = tr;
8894 
8895         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8896                                         t_options, topt, &trace_options_fops);
8897 
8898 }
8899 
8900 static void
8901 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8902 {
8903         struct trace_option_dentry *topts;
8904         struct trace_options *tr_topts;
8905         struct tracer_flags *flags;
8906         struct tracer_opt *opts;
8907         int cnt;
8908         int i;
8909 
8910         if (!tracer)
8911                 return;
8912 
8913         flags = tracer->flags;
8914 
8915         if (!flags || !flags->opts)
8916                 return;
8917 
8918         /*
8919          * If this is an instance, only create flags for tracers
8920          * the instance may have.
8921          */
8922         if (!trace_ok_for_array(tracer, tr))
8923                 return;
8924 
8925         for (i = 0; i < tr->nr_topts; i++) {
8926                 /* Make sure there's no duplicate flags. */
8927                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8928                         return;
8929         }
8930 
8931         opts = flags->opts;
8932 
8933         for (cnt = 0; opts[cnt].name; cnt++)
8934                 ;
8935 
8936         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8937         if (!topts)
8938                 return;
8939 
8940         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8941                             GFP_KERNEL);
8942         if (!tr_topts) {
8943                 kfree(topts);
8944                 return;
8945         }
8946 
8947         tr->topts = tr_topts;
8948         tr->topts[tr->nr_topts].tracer = tracer;
8949         tr->topts[tr->nr_topts].topts = topts;
8950         tr->nr_topts++;
8951 
8952         for (cnt = 0; opts[cnt].name; cnt++) {
8953                 create_trace_option_file(tr, &topts[cnt], flags,
8954                                          &opts[cnt]);
8955                 MEM_FAIL(topts[cnt].entry == NULL,
8956                           "Failed to create trace option: %s",
8957                           opts[cnt].name);
8958         }
8959 }
8960 
8961 static struct dentry *
8962 create_trace_option_core_file(struct trace_array *tr,
8963                               const char *option, long index)
8964 {
8965         struct dentry *t_options;
8966 
8967         t_options = trace_options_init_dentry(tr);
8968         if (!t_options)
8969                 return NULL;
8970 
8971         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8972                                  (void *)&tr->trace_flags_index[index],
8973                                  &trace_options_core_fops);
8974 }
8975 
8976 static void create_trace_options_dir(struct trace_array *tr)
8977 {
8978         struct dentry *t_options;
8979         bool top_level = tr == &global_trace;
8980         int i;
8981 
8982         t_options = trace_options_init_dentry(tr);
8983         if (!t_options)
8984                 return;
8985 
8986         for (i = 0; trace_options[i]; i++) {
8987                 if (top_level ||
8988                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8989                         create_trace_option_core_file(tr, trace_options[i], i);
8990         }
8991 }
8992 
8993 static ssize_t
8994 rb_simple_read(struct file *filp, char __user *ubuf,
8995                size_t cnt, loff_t *ppos)
8996 {
8997         struct trace_array *tr = filp->private_data;
8998         char buf[64];
8999         int r;
9000 
9001         r = tracer_tracing_is_on(tr);
9002         r = sprintf(buf, "%d\n", r);
9003 
9004         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9005 }
9006 
9007 static ssize_t
9008 rb_simple_write(struct file *filp, const char __user *ubuf,
9009                 size_t cnt, loff_t *ppos)
9010 {
9011         struct trace_array *tr = filp->private_data;
9012         struct trace_buffer *buffer = tr->array_buffer.buffer;
9013         unsigned long val;
9014         int ret;
9015 
9016         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9017         if (ret)
9018                 return ret;
9019 
9020         if (buffer) {
9021                 mutex_lock(&trace_types_lock);
9022                 if (!!val == tracer_tracing_is_on(tr)) {
9023                         val = 0; /* do nothing */
9024                 } else if (val) {
9025                         tracer_tracing_on(tr);
9026                         if (tr->current_trace->start)
9027                                 tr->current_trace->start(tr);
9028                 } else {
9029                         tracer_tracing_off(tr);
9030                         if (tr->current_trace->stop)
9031                                 tr->current_trace->stop(tr);
9032                         /* Wake up any waiters */
9033                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9034                 }
9035                 mutex_unlock(&trace_types_lock);
9036         }
9037 
9038         (*ppos)++;
9039 
9040         return cnt;
9041 }
9042 
9043 static const struct file_operations rb_simple_fops = {
9044         .open           = tracing_open_generic_tr,
9045         .read           = rb_simple_read,
9046         .write          = rb_simple_write,
9047         .release        = tracing_release_generic_tr,
9048         .llseek         = default_llseek,
9049 };
9050 
9051 static ssize_t
9052 buffer_percent_read(struct file *filp, char __user *ubuf,
9053                     size_t cnt, loff_t *ppos)
9054 {
9055         struct trace_array *tr = filp->private_data;
9056         char buf[64];
9057         int r;
9058 
9059         r = tr->buffer_percent;
9060         r = sprintf(buf, "%d\n", r);
9061 
9062         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9063 }
9064 
9065 static ssize_t
9066 buffer_percent_write(struct file *filp, const char __user *ubuf,
9067                      size_t cnt, loff_t *ppos)
9068 {
9069         struct trace_array *tr = filp->private_data;
9070         unsigned long val;
9071         int ret;
9072 
9073         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9074         if (ret)
9075                 return ret;
9076 
9077         if (val > 100)
9078                 return -EINVAL;
9079 
9080         tr->buffer_percent = val;
9081 
9082         (*ppos)++;
9083 
9084         return cnt;
9085 }
9086 
9087 static const struct file_operations buffer_percent_fops = {
9088         .open           = tracing_open_generic_tr,
9089         .read           = buffer_percent_read,
9090         .write          = buffer_percent_write,
9091         .release        = tracing_release_generic_tr,
9092         .llseek         = default_llseek,
9093 };
9094 
9095 static ssize_t
9096 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9097 {
9098         struct trace_array *tr = filp->private_data;
9099         size_t size;
9100         char buf[64];
9101         int order;
9102         int r;
9103 
9104         order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9105         size = (PAGE_SIZE << order) / 1024;
9106 
9107         r = sprintf(buf, "%zd\n", size);
9108 
9109         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9110 }
9111 
9112 static ssize_t
9113 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9114                          size_t cnt, loff_t *ppos)
9115 {
9116         struct trace_array *tr = filp->private_data;
9117         unsigned long val;
9118         int old_order;
9119         int order;
9120         int pages;
9121         int ret;
9122 
9123         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9124         if (ret)
9125                 return ret;
9126 
9127         val *= 1024; /* value passed in is in KB */
9128 
9129         pages = DIV_ROUND_UP(val, PAGE_SIZE);
9130         order = fls(pages - 1);
9131 
9132         /* limit between 1 and 128 system pages */
9133         if (order < 0 || order > 7)
9134                 return -EINVAL;
9135 
9136         /* Do not allow tracing while changing the order of the ring buffer */
9137         tracing_stop_tr(tr);
9138 
9139         old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9140         if (old_order == order)
9141                 goto out;
9142 
9143         ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9144         if (ret)
9145                 goto out;
9146 
9147 #ifdef CONFIG_TRACER_MAX_TRACE
9148 
9149         if (!tr->allocated_snapshot)
9150                 goto out_max;
9151 
9152         ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9153         if (ret) {
9154                 /* Put back the old order */
9155                 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9156                 if (WARN_ON_ONCE(cnt)) {
9157                         /*
9158                          * AARGH! We are left with different orders!
9159                          * The max buffer is our "snapshot" buffer.
9160                          * When a tracer needs a snapshot (one of the
9161                          * latency tracers), it swaps the max buffer
9162                          * with the saved snap shot. We succeeded to
9163                          * update the order of the main buffer, but failed to
9164                          * update the order of the max buffer. But when we tried
9165                          * to reset the main buffer to the original size, we
9166                          * failed there too. This is very unlikely to
9167                          * happen, but if it does, warn and kill all
9168                          * tracing.
9169                          */
9170                         tracing_disabled = 1;
9171                 }
9172                 goto out;
9173         }
9174  out_max:
9175 #endif
9176         (*ppos)++;
9177  out:
9178         if (ret)
9179                 cnt = ret;
9180         tracing_start_tr(tr);
9181         return cnt;
9182 }
9183 
9184 static const struct file_operations buffer_subbuf_size_fops = {
9185         .open           = tracing_open_generic_tr,
9186         .read           = buffer_subbuf_size_read,
9187         .write          = buffer_subbuf_size_write,
9188         .release        = tracing_release_generic_tr,
9189         .llseek         = default_llseek,
9190 };
9191 
9192 static struct dentry *trace_instance_dir;
9193 
9194 static void
9195 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9196 
9197 static int
9198 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9199 {
9200         enum ring_buffer_flags rb_flags;
9201 
9202         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9203 
9204         buf->tr = tr;
9205 
9206         buf->buffer = ring_buffer_alloc(size, rb_flags);
9207         if (!buf->buffer)
9208                 return -ENOMEM;
9209 
9210         buf->data = alloc_percpu(struct trace_array_cpu);
9211         if (!buf->data) {
9212                 ring_buffer_free(buf->buffer);
9213                 buf->buffer = NULL;
9214                 return -ENOMEM;
9215         }
9216 
9217         /* Allocate the first page for all buffers */
9218         set_buffer_entries(&tr->array_buffer,
9219                            ring_buffer_size(tr->array_buffer.buffer, 0));
9220 
9221         return 0;
9222 }
9223 
9224 static void free_trace_buffer(struct array_buffer *buf)
9225 {
9226         if (buf->buffer) {
9227                 ring_buffer_free(buf->buffer);
9228                 buf->buffer = NULL;
9229                 free_percpu(buf->data);
9230                 buf->data = NULL;
9231         }
9232 }
9233 
9234 static int allocate_trace_buffers(struct trace_array *tr, int size)
9235 {
9236         int ret;
9237 
9238         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9239         if (ret)
9240                 return ret;
9241 
9242 #ifdef CONFIG_TRACER_MAX_TRACE
9243         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9244                                     allocate_snapshot ? size : 1);
9245         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9246                 free_trace_buffer(&tr->array_buffer);
9247                 return -ENOMEM;
9248         }
9249         tr->allocated_snapshot = allocate_snapshot;
9250 
9251         allocate_snapshot = false;
9252 #endif
9253 
9254         return 0;
9255 }
9256 
9257 static void free_trace_buffers(struct trace_array *tr)
9258 {
9259         if (!tr)
9260                 return;
9261 
9262         free_trace_buffer(&tr->array_buffer);
9263 
9264 #ifdef CONFIG_TRACER_MAX_TRACE
9265         free_trace_buffer(&tr->max_buffer);
9266 #endif
9267 }
9268 
9269 static void init_trace_flags_index(struct trace_array *tr)
9270 {
9271         int i;
9272 
9273         /* Used by the trace options files */
9274         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9275                 tr->trace_flags_index[i] = i;
9276 }
9277 
9278 static void __update_tracer_options(struct trace_array *tr)
9279 {
9280         struct tracer *t;
9281 
9282         for (t = trace_types; t; t = t->next)
9283                 add_tracer_options(tr, t);
9284 }
9285 
9286 static void update_tracer_options(struct trace_array *tr)
9287 {
9288         mutex_lock(&trace_types_lock);
9289         tracer_options_updated = true;
9290         __update_tracer_options(tr);
9291         mutex_unlock(&trace_types_lock);
9292 }
9293 
9294 /* Must have trace_types_lock held */
9295 struct trace_array *trace_array_find(const char *instance)
9296 {
9297         struct trace_array *tr, *found = NULL;
9298 
9299         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9300                 if (tr->name && strcmp(tr->name, instance) == 0) {
9301                         found = tr;
9302                         break;
9303                 }
9304         }
9305 
9306         return found;
9307 }
9308 
9309 struct trace_array *trace_array_find_get(const char *instance)
9310 {
9311         struct trace_array *tr;
9312 
9313         mutex_lock(&trace_types_lock);
9314         tr = trace_array_find(instance);
9315         if (tr)
9316                 tr->ref++;
9317         mutex_unlock(&trace_types_lock);
9318 
9319         return tr;
9320 }
9321 
9322 static int trace_array_create_dir(struct trace_array *tr)
9323 {
9324         int ret;
9325 
9326         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9327         if (!tr->dir)
9328                 return -EINVAL;
9329 
9330         ret = event_trace_add_tracer(tr->dir, tr);
9331         if (ret) {
9332                 tracefs_remove(tr->dir);
9333                 return ret;
9334         }
9335 
9336         init_tracer_tracefs(tr, tr->dir);
9337         __update_tracer_options(tr);
9338 
9339         return ret;
9340 }
9341 
9342 static struct trace_array *
9343 trace_array_create_systems(const char *name, const char *systems)
9344 {
9345         struct trace_array *tr;
9346         int ret;
9347 
9348         ret = -ENOMEM;
9349         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9350         if (!tr)
9351                 return ERR_PTR(ret);
9352 
9353         tr->name = kstrdup(name, GFP_KERNEL);
9354         if (!tr->name)
9355                 goto out_free_tr;
9356 
9357         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9358                 goto out_free_tr;
9359 
9360         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9361                 goto out_free_tr;
9362 
9363         if (systems) {
9364                 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9365                 if (!tr->system_names)
9366                         goto out_free_tr;
9367         }
9368 
9369         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9370 
9371         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9372 
9373         raw_spin_lock_init(&tr->start_lock);
9374 
9375         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9376 #ifdef CONFIG_TRACER_MAX_TRACE
9377         spin_lock_init(&tr->snapshot_trigger_lock);
9378 #endif
9379         tr->current_trace = &nop_trace;
9380 
9381         INIT_LIST_HEAD(&tr->systems);
9382         INIT_LIST_HEAD(&tr->events);
9383         INIT_LIST_HEAD(&tr->hist_vars);
9384         INIT_LIST_HEAD(&tr->err_log);
9385 
9386         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9387                 goto out_free_tr;
9388 
9389         /* The ring buffer is defaultly expanded */
9390         trace_set_ring_buffer_expanded(tr);
9391 
9392         if (ftrace_allocate_ftrace_ops(tr) < 0)
9393                 goto out_free_tr;
9394 
9395         ftrace_init_trace_array(tr);
9396 
9397         init_trace_flags_index(tr);
9398 
9399         if (trace_instance_dir) {
9400                 ret = trace_array_create_dir(tr);
9401                 if (ret)
9402                         goto out_free_tr;
9403         } else
9404                 __trace_early_add_events(tr);
9405 
9406         list_add(&tr->list, &ftrace_trace_arrays);
9407 
9408         tr->ref++;
9409 
9410         return tr;
9411 
9412  out_free_tr:
9413         ftrace_free_ftrace_ops(tr);
9414         free_trace_buffers(tr);
9415         free_cpumask_var(tr->pipe_cpumask);
9416         free_cpumask_var(tr->tracing_cpumask);
9417         kfree_const(tr->system_names);
9418         kfree(tr->name);
9419         kfree(tr);
9420 
9421         return ERR_PTR(ret);
9422 }
9423 
9424 static struct trace_array *trace_array_create(const char *name)
9425 {
9426         return trace_array_create_systems(name, NULL);
9427 }
9428 
9429 static int instance_mkdir(const char *name)
9430 {
9431         struct trace_array *tr;
9432         int ret;
9433 
9434         mutex_lock(&event_mutex);
9435         mutex_lock(&trace_types_lock);
9436 
9437         ret = -EEXIST;
9438         if (trace_array_find(name))
9439                 goto out_unlock;
9440 
9441         tr = trace_array_create(name);
9442 
9443         ret = PTR_ERR_OR_ZERO(tr);
9444 
9445 out_unlock:
9446         mutex_unlock(&trace_types_lock);
9447         mutex_unlock(&event_mutex);
9448         return ret;
9449 }
9450 
9451 /**
9452  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9453  * @name: The name of the trace array to be looked up/created.
9454  * @systems: A list of systems to create event directories for (NULL for all)
9455  *
9456  * Returns pointer to trace array with given name.
9457  * NULL, if it cannot be created.
9458  *
9459  * NOTE: This function increments the reference counter associated with the
9460  * trace array returned. This makes sure it cannot be freed while in use.
9461  * Use trace_array_put() once the trace array is no longer needed.
9462  * If the trace_array is to be freed, trace_array_destroy() needs to
9463  * be called after the trace_array_put(), or simply let user space delete
9464  * it from the tracefs instances directory. But until the
9465  * trace_array_put() is called, user space can not delete it.
9466  *
9467  */
9468 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9469 {
9470         struct trace_array *tr;
9471 
9472         mutex_lock(&event_mutex);
9473         mutex_lock(&trace_types_lock);
9474 
9475         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9476                 if (tr->name && strcmp(tr->name, name) == 0)
9477                         goto out_unlock;
9478         }
9479 
9480         tr = trace_array_create_systems(name, systems);
9481 
9482         if (IS_ERR(tr))
9483                 tr = NULL;
9484 out_unlock:
9485         if (tr)
9486                 tr->ref++;
9487 
9488         mutex_unlock(&trace_types_lock);
9489         mutex_unlock(&event_mutex);
9490         return tr;
9491 }
9492 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9493 
9494 static int __remove_instance(struct trace_array *tr)
9495 {
9496         int i;
9497 
9498         /* Reference counter for a newly created trace array = 1. */
9499         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9500                 return -EBUSY;
9501 
9502         list_del(&tr->list);
9503 
9504         /* Disable all the flags that were enabled coming in */
9505         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9506                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9507                         set_tracer_flag(tr, 1 << i, 0);
9508         }
9509 
9510         tracing_set_nop(tr);
9511         clear_ftrace_function_probes(tr);
9512         event_trace_del_tracer(tr);
9513         ftrace_clear_pids(tr);
9514         ftrace_destroy_function_files(tr);
9515         tracefs_remove(tr->dir);
9516         free_percpu(tr->last_func_repeats);
9517         free_trace_buffers(tr);
9518         clear_tracing_err_log(tr);
9519 
9520         for (i = 0; i < tr->nr_topts; i++) {
9521                 kfree(tr->topts[i].topts);
9522         }
9523         kfree(tr->topts);
9524 
9525         free_cpumask_var(tr->pipe_cpumask);
9526         free_cpumask_var(tr->tracing_cpumask);
9527         kfree_const(tr->system_names);
9528         kfree(tr->name);
9529         kfree(tr);
9530 
9531         return 0;
9532 }
9533 
9534 int trace_array_destroy(struct trace_array *this_tr)
9535 {
9536         struct trace_array *tr;
9537         int ret;
9538 
9539         if (!this_tr)
9540                 return -EINVAL;
9541 
9542         mutex_lock(&event_mutex);
9543         mutex_lock(&trace_types_lock);
9544 
9545         ret = -ENODEV;
9546 
9547         /* Making sure trace array exists before destroying it. */
9548         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9549                 if (tr == this_tr) {
9550                         ret = __remove_instance(tr);
9551                         break;
9552                 }
9553         }
9554 
9555         mutex_unlock(&trace_types_lock);
9556         mutex_unlock(&event_mutex);
9557 
9558         return ret;
9559 }
9560 EXPORT_SYMBOL_GPL(trace_array_destroy);
9561 
9562 static int instance_rmdir(const char *name)
9563 {
9564         struct trace_array *tr;
9565         int ret;
9566 
9567         mutex_lock(&event_mutex);
9568         mutex_lock(&trace_types_lock);
9569 
9570         ret = -ENODEV;
9571         tr = trace_array_find(name);
9572         if (tr)
9573                 ret = __remove_instance(tr);
9574 
9575         mutex_unlock(&trace_types_lock);
9576         mutex_unlock(&event_mutex);
9577 
9578         return ret;
9579 }
9580 
9581 static __init void create_trace_instances(struct dentry *d_tracer)
9582 {
9583         struct trace_array *tr;
9584 
9585         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9586                                                          instance_mkdir,
9587                                                          instance_rmdir);
9588         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9589                 return;
9590 
9591         mutex_lock(&event_mutex);
9592         mutex_lock(&trace_types_lock);
9593 
9594         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9595                 if (!tr->name)
9596                         continue;
9597                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9598                              "Failed to create instance directory\n"))
9599                         break;
9600         }
9601 
9602         mutex_unlock(&trace_types_lock);
9603         mutex_unlock(&event_mutex);
9604 }
9605 
9606 static void
9607 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9608 {
9609         int cpu;
9610 
9611         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9612                         tr, &show_traces_fops);
9613 
9614         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9615                         tr, &set_tracer_fops);
9616 
9617         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9618                           tr, &tracing_cpumask_fops);
9619 
9620         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9621                           tr, &tracing_iter_fops);
9622 
9623         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9624                           tr, &tracing_fops);
9625 
9626         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9627                           tr, &tracing_pipe_fops);
9628 
9629         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9630                           tr, &tracing_entries_fops);
9631 
9632         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9633                           tr, &tracing_total_entries_fops);
9634 
9635         trace_create_file("free_buffer", 0200, d_tracer,
9636                           tr, &tracing_free_buffer_fops);
9637 
9638         trace_create_file("trace_marker", 0220, d_tracer,
9639                           tr, &tracing_mark_fops);
9640 
9641         tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9642 
9643         trace_create_file("trace_marker_raw", 0220, d_tracer,
9644                           tr, &tracing_mark_raw_fops);
9645 
9646         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9647                           &trace_clock_fops);
9648 
9649         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9650                           tr, &rb_simple_fops);
9651 
9652         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9653                           &trace_time_stamp_mode_fops);
9654 
9655         tr->buffer_percent = 50;
9656 
9657         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9658                         tr, &buffer_percent_fops);
9659 
9660         trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9661                           tr, &buffer_subbuf_size_fops);
9662 
9663         create_trace_options_dir(tr);
9664 
9665 #ifdef CONFIG_TRACER_MAX_TRACE
9666         trace_create_maxlat_file(tr, d_tracer);
9667 #endif
9668 
9669         if (ftrace_create_function_files(tr, d_tracer))
9670                 MEM_FAIL(1, "Could not allocate function filter files");
9671 
9672 #ifdef CONFIG_TRACER_SNAPSHOT
9673         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9674                           tr, &snapshot_fops);
9675 #endif
9676 
9677         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9678                           tr, &tracing_err_log_fops);
9679 
9680         for_each_tracing_cpu(cpu)
9681                 tracing_init_tracefs_percpu(tr, cpu);
9682 
9683         ftrace_init_tracefs(tr, d_tracer);
9684 }
9685 
9686 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9687 {
9688         struct vfsmount *mnt;
9689         struct file_system_type *type;
9690 
9691         /*
9692          * To maintain backward compatibility for tools that mount
9693          * debugfs to get to the tracing facility, tracefs is automatically
9694          * mounted to the debugfs/tracing directory.
9695          */
9696         type = get_fs_type("tracefs");
9697         if (!type)
9698                 return NULL;
9699         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9700         put_filesystem(type);
9701         if (IS_ERR(mnt))
9702                 return NULL;
9703         mntget(mnt);
9704 
9705         return mnt;
9706 }
9707 
9708 /**
9709  * tracing_init_dentry - initialize top level trace array
9710  *
9711  * This is called when creating files or directories in the tracing
9712  * directory. It is called via fs_initcall() by any of the boot up code
9713  * and expects to return the dentry of the top level tracing directory.
9714  */
9715 int tracing_init_dentry(void)
9716 {
9717         struct trace_array *tr = &global_trace;
9718 
9719         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9720                 pr_warn("Tracing disabled due to lockdown\n");
9721                 return -EPERM;
9722         }
9723 
9724         /* The top level trace array uses  NULL as parent */
9725         if (tr->dir)
9726                 return 0;
9727 
9728         if (WARN_ON(!tracefs_initialized()))
9729                 return -ENODEV;
9730 
9731         /*
9732          * As there may still be users that expect the tracing
9733          * files to exist in debugfs/tracing, we must automount
9734          * the tracefs file system there, so older tools still
9735          * work with the newer kernel.
9736          */
9737         tr->dir = debugfs_create_automount("tracing", NULL,
9738                                            trace_automount, NULL);
9739 
9740         return 0;
9741 }
9742 
9743 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9744 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9745 
9746 static struct workqueue_struct *eval_map_wq __initdata;
9747 static struct work_struct eval_map_work __initdata;
9748 static struct work_struct tracerfs_init_work __initdata;
9749 
9750 static void __init eval_map_work_func(struct work_struct *work)
9751 {
9752         int len;
9753 
9754         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9755         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9756 }
9757 
9758 static int __init trace_eval_init(void)
9759 {
9760         INIT_WORK(&eval_map_work, eval_map_work_func);
9761 
9762         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9763         if (!eval_map_wq) {
9764                 pr_err("Unable to allocate eval_map_wq\n");
9765                 /* Do work here */
9766                 eval_map_work_func(&eval_map_work);
9767                 return -ENOMEM;
9768         }
9769 
9770         queue_work(eval_map_wq, &eval_map_work);
9771         return 0;
9772 }
9773 
9774 subsys_initcall(trace_eval_init);
9775 
9776 static int __init trace_eval_sync(void)
9777 {
9778         /* Make sure the eval map updates are finished */
9779         if (eval_map_wq)
9780                 destroy_workqueue(eval_map_wq);
9781         return 0;
9782 }
9783 
9784 late_initcall_sync(trace_eval_sync);
9785 
9786 
9787 #ifdef CONFIG_MODULES
9788 static void trace_module_add_evals(struct module *mod)
9789 {
9790         if (!mod->num_trace_evals)
9791                 return;
9792 
9793         /*
9794          * Modules with bad taint do not have events created, do
9795          * not bother with enums either.
9796          */
9797         if (trace_module_has_bad_taint(mod))
9798                 return;
9799 
9800         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9801 }
9802 
9803 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9804 static void trace_module_remove_evals(struct module *mod)
9805 {
9806         union trace_eval_map_item *map;
9807         union trace_eval_map_item **last = &trace_eval_maps;
9808 
9809         if (!mod->num_trace_evals)
9810                 return;
9811 
9812         mutex_lock(&trace_eval_mutex);
9813 
9814         map = trace_eval_maps;
9815 
9816         while (map) {
9817                 if (map->head.mod == mod)
9818                         break;
9819                 map = trace_eval_jmp_to_tail(map);
9820                 last = &map->tail.next;
9821                 map = map->tail.next;
9822         }
9823         if (!map)
9824                 goto out;
9825 
9826         *last = trace_eval_jmp_to_tail(map)->tail.next;
9827         kfree(map);
9828  out:
9829         mutex_unlock(&trace_eval_mutex);
9830 }
9831 #else
9832 static inline void trace_module_remove_evals(struct module *mod) { }
9833 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9834 
9835 static int trace_module_notify(struct notifier_block *self,
9836                                unsigned long val, void *data)
9837 {
9838         struct module *mod = data;
9839 
9840         switch (val) {
9841         case MODULE_STATE_COMING:
9842                 trace_module_add_evals(mod);
9843                 break;
9844         case MODULE_STATE_GOING:
9845                 trace_module_remove_evals(mod);
9846                 break;
9847         }
9848 
9849         return NOTIFY_OK;
9850 }
9851 
9852 static struct notifier_block trace_module_nb = {
9853         .notifier_call = trace_module_notify,
9854         .priority = 0,
9855 };
9856 #endif /* CONFIG_MODULES */
9857 
9858 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9859 {
9860 
9861         event_trace_init();
9862 
9863         init_tracer_tracefs(&global_trace, NULL);
9864         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9865 
9866         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9867                         &global_trace, &tracing_thresh_fops);
9868 
9869         trace_create_file("README", TRACE_MODE_READ, NULL,
9870                         NULL, &tracing_readme_fops);
9871 
9872         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9873                         NULL, &tracing_saved_cmdlines_fops);
9874 
9875         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9876                           NULL, &tracing_saved_cmdlines_size_fops);
9877 
9878         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9879                         NULL, &tracing_saved_tgids_fops);
9880 
9881         trace_create_eval_file(NULL);
9882 
9883 #ifdef CONFIG_MODULES
9884         register_module_notifier(&trace_module_nb);
9885 #endif
9886 
9887 #ifdef CONFIG_DYNAMIC_FTRACE
9888         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9889                         NULL, &tracing_dyn_info_fops);
9890 #endif
9891 
9892         create_trace_instances(NULL);
9893 
9894         update_tracer_options(&global_trace);
9895 }
9896 
9897 static __init int tracer_init_tracefs(void)
9898 {
9899         int ret;
9900 
9901         trace_access_lock_init();
9902 
9903         ret = tracing_init_dentry();
9904         if (ret)
9905                 return 0;
9906 
9907         if (eval_map_wq) {
9908                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9909                 queue_work(eval_map_wq, &tracerfs_init_work);
9910         } else {
9911                 tracer_init_tracefs_work_func(NULL);
9912         }
9913 
9914         rv_init_interface();
9915 
9916         return 0;
9917 }
9918 
9919 fs_initcall(tracer_init_tracefs);
9920 
9921 static int trace_die_panic_handler(struct notifier_block *self,
9922                                 unsigned long ev, void *unused);
9923 
9924 static struct notifier_block trace_panic_notifier = {
9925         .notifier_call = trace_die_panic_handler,
9926         .priority = INT_MAX - 1,
9927 };
9928 
9929 static struct notifier_block trace_die_notifier = {
9930         .notifier_call = trace_die_panic_handler,
9931         .priority = INT_MAX - 1,
9932 };
9933 
9934 /*
9935  * The idea is to execute the following die/panic callback early, in order
9936  * to avoid showing irrelevant information in the trace (like other panic
9937  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9938  * warnings get disabled (to prevent potential log flooding).
9939  */
9940 static int trace_die_panic_handler(struct notifier_block *self,
9941                                 unsigned long ev, void *unused)
9942 {
9943         if (!ftrace_dump_on_oops_enabled())
9944                 return NOTIFY_DONE;
9945 
9946         /* The die notifier requires DIE_OOPS to trigger */
9947         if (self == &trace_die_notifier && ev != DIE_OOPS)
9948                 return NOTIFY_DONE;
9949 
9950         ftrace_dump(DUMP_PARAM);
9951 
9952         return NOTIFY_DONE;
9953 }
9954 
9955 /*
9956  * printk is set to max of 1024, we really don't need it that big.
9957  * Nothing should be printing 1000 characters anyway.
9958  */
9959 #define TRACE_MAX_PRINT         1000
9960 
9961 /*
9962  * Define here KERN_TRACE so that we have one place to modify
9963  * it if we decide to change what log level the ftrace dump
9964  * should be at.
9965  */
9966 #define KERN_TRACE              KERN_EMERG
9967 
9968 void
9969 trace_printk_seq(struct trace_seq *s)
9970 {
9971         /* Probably should print a warning here. */
9972         if (s->seq.len >= TRACE_MAX_PRINT)
9973                 s->seq.len = TRACE_MAX_PRINT;
9974 
9975         /*
9976          * More paranoid code. Although the buffer size is set to
9977          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9978          * an extra layer of protection.
9979          */
9980         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9981                 s->seq.len = s->seq.size - 1;
9982 
9983         /* should be zero ended, but we are paranoid. */
9984         s->buffer[s->seq.len] = 0;
9985 
9986         printk(KERN_TRACE "%s", s->buffer);
9987 
9988         trace_seq_init(s);
9989 }
9990 
9991 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
9992 {
9993         iter->tr = tr;
9994         iter->trace = iter->tr->current_trace;
9995         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9996         iter->array_buffer = &tr->array_buffer;
9997 
9998         if (iter->trace && iter->trace->open)
9999                 iter->trace->open(iter);
10000 
10001         /* Annotate start of buffers if we had overruns */
10002         if (ring_buffer_overruns(iter->array_buffer->buffer))
10003                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10004 
10005         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10006         if (trace_clocks[iter->tr->clock_id].in_ns)
10007                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10008 
10009         /* Can not use kmalloc for iter.temp and iter.fmt */
10010         iter->temp = static_temp_buf;
10011         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10012         iter->fmt = static_fmt_buf;
10013         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10014 }
10015 
10016 void trace_init_global_iter(struct trace_iterator *iter)
10017 {
10018         trace_init_iter(iter, &global_trace);
10019 }
10020 
10021 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10022 {
10023         /* use static because iter can be a bit big for the stack */
10024         static struct trace_iterator iter;
10025         unsigned int old_userobj;
10026         unsigned long flags;
10027         int cnt = 0, cpu;
10028 
10029         /*
10030          * Always turn off tracing when we dump.
10031          * We don't need to show trace output of what happens
10032          * between multiple crashes.
10033          *
10034          * If the user does a sysrq-z, then they can re-enable
10035          * tracing with echo 1 > tracing_on.
10036          */
10037         tracer_tracing_off(tr);
10038 
10039         local_irq_save(flags);
10040 
10041         /* Simulate the iterator */
10042         trace_init_iter(&iter, tr);
10043 
10044         for_each_tracing_cpu(cpu) {
10045                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10046         }
10047 
10048         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10049 
10050         /* don't look at user memory in panic mode */
10051         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10052 
10053         if (dump_mode == DUMP_ORIG)
10054                 iter.cpu_file = raw_smp_processor_id();
10055         else
10056                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10057 
10058         if (tr == &global_trace)
10059                 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10060         else
10061                 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10062 
10063         /* Did function tracer already get disabled? */
10064         if (ftrace_is_dead()) {
10065                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10066                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10067         }
10068 
10069         /*
10070          * We need to stop all tracing on all CPUS to read
10071          * the next buffer. This is a bit expensive, but is
10072          * not done often. We fill all what we can read,
10073          * and then release the locks again.
10074          */
10075 
10076         while (!trace_empty(&iter)) {
10077 
10078                 if (!cnt)
10079                         printk(KERN_TRACE "---------------------------------\n");
10080 
10081                 cnt++;
10082 
10083                 trace_iterator_reset(&iter);
10084                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10085 
10086                 if (trace_find_next_entry_inc(&iter) != NULL) {
10087                         int ret;
10088 
10089                         ret = print_trace_line(&iter);
10090                         if (ret != TRACE_TYPE_NO_CONSUME)
10091                                 trace_consume(&iter);
10092                 }
10093                 touch_nmi_watchdog();
10094 
10095                 trace_printk_seq(&iter.seq);
10096         }
10097 
10098         if (!cnt)
10099                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10100         else
10101                 printk(KERN_TRACE "---------------------------------\n");
10102 
10103         tr->trace_flags |= old_userobj;
10104 
10105         for_each_tracing_cpu(cpu) {
10106                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10107         }
10108         local_irq_restore(flags);
10109 }
10110 
10111 static void ftrace_dump_by_param(void)
10112 {
10113         bool first_param = true;
10114         char dump_param[MAX_TRACER_SIZE];
10115         char *buf, *token, *inst_name;
10116         struct trace_array *tr;
10117 
10118         strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10119         buf = dump_param;
10120 
10121         while ((token = strsep(&buf, ",")) != NULL) {
10122                 if (first_param) {
10123                         first_param = false;
10124                         if (!strcmp("", token))
10125                                 continue;
10126                         else if (!strcmp("1", token)) {
10127                                 ftrace_dump_one(&global_trace, DUMP_ALL);
10128                                 continue;
10129                         }
10130                         else if (!strcmp("2", token) ||
10131                           !strcmp("orig_cpu", token)) {
10132                                 ftrace_dump_one(&global_trace, DUMP_ORIG);
10133                                 continue;
10134                         }
10135                 }
10136 
10137                 inst_name = strsep(&token, "=");
10138                 tr = trace_array_find(inst_name);
10139                 if (!tr) {
10140                         printk(KERN_TRACE "Instance %s not found\n", inst_name);
10141                         continue;
10142                 }
10143 
10144                 if (token && (!strcmp("2", token) ||
10145                           !strcmp("orig_cpu", token)))
10146                         ftrace_dump_one(tr, DUMP_ORIG);
10147                 else
10148                         ftrace_dump_one(tr, DUMP_ALL);
10149         }
10150 }
10151 
10152 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10153 {
10154         static atomic_t dump_running;
10155 
10156         /* Only allow one dump user at a time. */
10157         if (atomic_inc_return(&dump_running) != 1) {
10158                 atomic_dec(&dump_running);
10159                 return;
10160         }
10161 
10162         switch (oops_dump_mode) {
10163         case DUMP_ALL:
10164                 ftrace_dump_one(&global_trace, DUMP_ALL);
10165                 break;
10166         case DUMP_ORIG:
10167                 ftrace_dump_one(&global_trace, DUMP_ORIG);
10168                 break;
10169         case DUMP_PARAM:
10170                 ftrace_dump_by_param();
10171                 break;
10172         case DUMP_NONE:
10173                 break;
10174         default:
10175                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10176                 ftrace_dump_one(&global_trace, DUMP_ALL);
10177         }
10178 
10179         atomic_dec(&dump_running);
10180 }
10181 EXPORT_SYMBOL_GPL(ftrace_dump);
10182 
10183 #define WRITE_BUFSIZE  4096
10184 
10185 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10186                                 size_t count, loff_t *ppos,
10187                                 int (*createfn)(const char *))
10188 {
10189         char *kbuf, *buf, *tmp;
10190         int ret = 0;
10191         size_t done = 0;
10192         size_t size;
10193 
10194         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10195         if (!kbuf)
10196                 return -ENOMEM;
10197 
10198         while (done < count) {
10199                 size = count - done;
10200 
10201                 if (size >= WRITE_BUFSIZE)
10202                         size = WRITE_BUFSIZE - 1;
10203 
10204                 if (copy_from_user(kbuf, buffer + done, size)) {
10205                         ret = -EFAULT;
10206                         goto out;
10207                 }
10208                 kbuf[size] = '\0';
10209                 buf = kbuf;
10210                 do {
10211                         tmp = strchr(buf, '\n');
10212                         if (tmp) {
10213                                 *tmp = '\0';
10214                                 size = tmp - buf + 1;
10215                         } else {
10216                                 size = strlen(buf);
10217                                 if (done + size < count) {
10218                                         if (buf != kbuf)
10219                                                 break;
10220                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10221                                         pr_warn("Line length is too long: Should be less than %d\n",
10222                                                 WRITE_BUFSIZE - 2);
10223                                         ret = -EINVAL;
10224                                         goto out;
10225                                 }
10226                         }
10227                         done += size;
10228 
10229                         /* Remove comments */
10230                         tmp = strchr(buf, '#');
10231 
10232                         if (tmp)
10233                                 *tmp = '\0';
10234 
10235                         ret = createfn(buf);
10236                         if (ret)
10237                                 goto out;
10238                         buf += size;
10239 
10240                 } while (done < count);
10241         }
10242         ret = done;
10243 
10244 out:
10245         kfree(kbuf);
10246 
10247         return ret;
10248 }
10249 
10250 #ifdef CONFIG_TRACER_MAX_TRACE
10251 __init static bool tr_needs_alloc_snapshot(const char *name)
10252 {
10253         char *test;
10254         int len = strlen(name);
10255         bool ret;
10256 
10257         if (!boot_snapshot_index)
10258                 return false;
10259 
10260         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10261             boot_snapshot_info[len] == '\t')
10262                 return true;
10263 
10264         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10265         if (!test)
10266                 return false;
10267 
10268         sprintf(test, "\t%s\t", name);
10269         ret = strstr(boot_snapshot_info, test) == NULL;
10270         kfree(test);
10271         return ret;
10272 }
10273 
10274 __init static void do_allocate_snapshot(const char *name)
10275 {
10276         if (!tr_needs_alloc_snapshot(name))
10277                 return;
10278 
10279         /*
10280          * When allocate_snapshot is set, the next call to
10281          * allocate_trace_buffers() (called by trace_array_get_by_name())
10282          * will allocate the snapshot buffer. That will alse clear
10283          * this flag.
10284          */
10285         allocate_snapshot = true;
10286 }
10287 #else
10288 static inline void do_allocate_snapshot(const char *name) { }
10289 #endif
10290 
10291 __init static void enable_instances(void)
10292 {
10293         struct trace_array *tr;
10294         char *curr_str;
10295         char *str;
10296         char *tok;
10297 
10298         /* A tab is always appended */
10299         boot_instance_info[boot_instance_index - 1] = '\0';
10300         str = boot_instance_info;
10301 
10302         while ((curr_str = strsep(&str, "\t"))) {
10303 
10304                 tok = strsep(&curr_str, ",");
10305 
10306                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10307                         do_allocate_snapshot(tok);
10308 
10309                 tr = trace_array_get_by_name(tok, NULL);
10310                 if (!tr) {
10311                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10312                         continue;
10313                 }
10314                 /* Allow user space to delete it */
10315                 trace_array_put(tr);
10316 
10317                 while ((tok = strsep(&curr_str, ","))) {
10318                         early_enable_events(tr, tok, true);
10319                 }
10320         }
10321 }
10322 
10323 __init static int tracer_alloc_buffers(void)
10324 {
10325         int ring_buf_size;
10326         int ret = -ENOMEM;
10327 
10328 
10329         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10330                 pr_warn("Tracing disabled due to lockdown\n");
10331                 return -EPERM;
10332         }
10333 
10334         /*
10335          * Make sure we don't accidentally add more trace options
10336          * than we have bits for.
10337          */
10338         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10339 
10340         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10341                 goto out;
10342 
10343         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10344                 goto out_free_buffer_mask;
10345 
10346         /* Only allocate trace_printk buffers if a trace_printk exists */
10347         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10348                 /* Must be called before global_trace.buffer is allocated */
10349                 trace_printk_init_buffers();
10350 
10351         /* To save memory, keep the ring buffer size to its minimum */
10352         if (global_trace.ring_buffer_expanded)
10353                 ring_buf_size = trace_buf_size;
10354         else
10355                 ring_buf_size = 1;
10356 
10357         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10358         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10359 
10360         raw_spin_lock_init(&global_trace.start_lock);
10361 
10362         /*
10363          * The prepare callbacks allocates some memory for the ring buffer. We
10364          * don't free the buffer if the CPU goes down. If we were to free
10365          * the buffer, then the user would lose any trace that was in the
10366          * buffer. The memory will be removed once the "instance" is removed.
10367          */
10368         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10369                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10370                                       NULL);
10371         if (ret < 0)
10372                 goto out_free_cpumask;
10373         /* Used for event triggers */
10374         ret = -ENOMEM;
10375         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10376         if (!temp_buffer)
10377                 goto out_rm_hp_state;
10378 
10379         if (trace_create_savedcmd() < 0)
10380                 goto out_free_temp_buffer;
10381 
10382         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10383                 goto out_free_savedcmd;
10384 
10385         /* TODO: make the number of buffers hot pluggable with CPUS */
10386         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10387                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10388                 goto out_free_pipe_cpumask;
10389         }
10390         if (global_trace.buffer_disabled)
10391                 tracing_off();
10392 
10393         if (trace_boot_clock) {
10394                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10395                 if (ret < 0)
10396                         pr_warn("Trace clock %s not defined, going back to default\n",
10397                                 trace_boot_clock);
10398         }
10399 
10400         /*
10401          * register_tracer() might reference current_trace, so it
10402          * needs to be set before we register anything. This is
10403          * just a bootstrap of current_trace anyway.
10404          */
10405         global_trace.current_trace = &nop_trace;
10406 
10407         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10408 #ifdef CONFIG_TRACER_MAX_TRACE
10409         spin_lock_init(&global_trace.snapshot_trigger_lock);
10410 #endif
10411         ftrace_init_global_array_ops(&global_trace);
10412 
10413         init_trace_flags_index(&global_trace);
10414 
10415         register_tracer(&nop_trace);
10416 
10417         /* Function tracing may start here (via kernel command line) */
10418         init_function_trace();
10419 
10420         /* All seems OK, enable tracing */
10421         tracing_disabled = 0;
10422 
10423         atomic_notifier_chain_register(&panic_notifier_list,
10424                                        &trace_panic_notifier);
10425 
10426         register_die_notifier(&trace_die_notifier);
10427 
10428         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10429 
10430         INIT_LIST_HEAD(&global_trace.systems);
10431         INIT_LIST_HEAD(&global_trace.events);
10432         INIT_LIST_HEAD(&global_trace.hist_vars);
10433         INIT_LIST_HEAD(&global_trace.err_log);
10434         list_add(&global_trace.list, &ftrace_trace_arrays);
10435 
10436         apply_trace_boot_options();
10437 
10438         register_snapshot_cmd();
10439 
10440         test_can_verify();
10441 
10442         return 0;
10443 
10444 out_free_pipe_cpumask:
10445         free_cpumask_var(global_trace.pipe_cpumask);
10446 out_free_savedcmd:
10447         trace_free_saved_cmdlines_buffer();
10448 out_free_temp_buffer:
10449         ring_buffer_free(temp_buffer);
10450 out_rm_hp_state:
10451         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10452 out_free_cpumask:
10453         free_cpumask_var(global_trace.tracing_cpumask);
10454 out_free_buffer_mask:
10455         free_cpumask_var(tracing_buffer_mask);
10456 out:
10457         return ret;
10458 }
10459 
10460 void __init ftrace_boot_snapshot(void)
10461 {
10462 #ifdef CONFIG_TRACER_MAX_TRACE
10463         struct trace_array *tr;
10464 
10465         if (!snapshot_at_boot)
10466                 return;
10467 
10468         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10469                 if (!tr->allocated_snapshot)
10470                         continue;
10471 
10472                 tracing_snapshot_instance(tr);
10473                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10474         }
10475 #endif
10476 }
10477 
10478 void __init early_trace_init(void)
10479 {
10480         if (tracepoint_printk) {
10481                 tracepoint_print_iter =
10482                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10483                 if (MEM_FAIL(!tracepoint_print_iter,
10484                              "Failed to allocate trace iterator\n"))
10485                         tracepoint_printk = 0;
10486                 else
10487                         static_key_enable(&tracepoint_printk_key.key);
10488         }
10489         tracer_alloc_buffers();
10490 
10491         init_events();
10492 }
10493 
10494 void __init trace_init(void)
10495 {
10496         trace_event_init();
10497 
10498         if (boot_instance_index)
10499                 enable_instances();
10500 }
10501 
10502 __init static void clear_boot_tracer(void)
10503 {
10504         /*
10505          * The default tracer at boot buffer is an init section.
10506          * This function is called in lateinit. If we did not
10507          * find the boot tracer, then clear it out, to prevent
10508          * later registration from accessing the buffer that is
10509          * about to be freed.
10510          */
10511         if (!default_bootup_tracer)
10512                 return;
10513 
10514         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10515                default_bootup_tracer);
10516         default_bootup_tracer = NULL;
10517 }
10518 
10519 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10520 __init static void tracing_set_default_clock(void)
10521 {
10522         /* sched_clock_stable() is determined in late_initcall */
10523         if (!trace_boot_clock && !sched_clock_stable()) {
10524                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10525                         pr_warn("Can not set tracing clock due to lockdown\n");
10526                         return;
10527                 }
10528 
10529                 printk(KERN_WARNING
10530                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10531                        "If you want to keep using the local clock, then add:\n"
10532                        "  \"trace_clock=local\"\n"
10533                        "on the kernel command line\n");
10534                 tracing_set_clock(&global_trace, "global");
10535         }
10536 }
10537 #else
10538 static inline void tracing_set_default_clock(void) { }
10539 #endif
10540 
10541 __init static int late_trace_init(void)
10542 {
10543         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10544                 static_key_disable(&tracepoint_printk_key.key);
10545                 tracepoint_printk = 0;
10546         }
10547 
10548         tracing_set_default_clock();
10549         clear_boot_tracer();
10550         return 0;
10551 }
10552 
10553 late_initcall_sync(late_trace_init);
10554 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php