~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/dma/map_benchmark.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * Copyright (C) 2020 HiSilicon Limited.
  4  */
  5 
  6 #define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
  7 
  8 #include <linux/debugfs.h>
  9 #include <linux/delay.h>
 10 #include <linux/device.h>
 11 #include <linux/dma-mapping.h>
 12 #include <linux/kernel.h>
 13 #include <linux/kthread.h>
 14 #include <linux/map_benchmark.h>
 15 #include <linux/math64.h>
 16 #include <linux/module.h>
 17 #include <linux/pci.h>
 18 #include <linux/platform_device.h>
 19 #include <linux/slab.h>
 20 #include <linux/timekeeping.h>
 21 
 22 struct map_benchmark_data {
 23         struct map_benchmark bparam;
 24         struct device *dev;
 25         struct dentry  *debugfs;
 26         enum dma_data_direction dir;
 27         atomic64_t sum_map_100ns;
 28         atomic64_t sum_unmap_100ns;
 29         atomic64_t sum_sq_map;
 30         atomic64_t sum_sq_unmap;
 31         atomic64_t loops;
 32 };
 33 
 34 static int map_benchmark_thread(void *data)
 35 {
 36         void *buf;
 37         dma_addr_t dma_addr;
 38         struct map_benchmark_data *map = data;
 39         int npages = map->bparam.granule;
 40         u64 size = npages * PAGE_SIZE;
 41         int ret = 0;
 42 
 43         buf = alloc_pages_exact(size, GFP_KERNEL);
 44         if (!buf)
 45                 return -ENOMEM;
 46 
 47         while (!kthread_should_stop())  {
 48                 u64 map_100ns, unmap_100ns, map_sq, unmap_sq;
 49                 ktime_t map_stime, map_etime, unmap_stime, unmap_etime;
 50                 ktime_t map_delta, unmap_delta;
 51 
 52                 /*
 53                  * for a non-coherent device, if we don't stain them in the
 54                  * cache, this will give an underestimate of the real-world
 55                  * overhead of BIDIRECTIONAL or TO_DEVICE mappings;
 56                  * 66 means evertything goes well! 66 is lucky.
 57                  */
 58                 if (map->dir != DMA_FROM_DEVICE)
 59                         memset(buf, 0x66, size);
 60 
 61                 map_stime = ktime_get();
 62                 dma_addr = dma_map_single(map->dev, buf, size, map->dir);
 63                 if (unlikely(dma_mapping_error(map->dev, dma_addr))) {
 64                         pr_err("dma_map_single failed on %s\n",
 65                                 dev_name(map->dev));
 66                         ret = -ENOMEM;
 67                         goto out;
 68                 }
 69                 map_etime = ktime_get();
 70                 map_delta = ktime_sub(map_etime, map_stime);
 71 
 72                 /* Pretend DMA is transmitting */
 73                 ndelay(map->bparam.dma_trans_ns);
 74 
 75                 unmap_stime = ktime_get();
 76                 dma_unmap_single(map->dev, dma_addr, size, map->dir);
 77                 unmap_etime = ktime_get();
 78                 unmap_delta = ktime_sub(unmap_etime, unmap_stime);
 79 
 80                 /* calculate sum and sum of squares */
 81 
 82                 map_100ns = div64_ul(map_delta,  100);
 83                 unmap_100ns = div64_ul(unmap_delta, 100);
 84                 map_sq = map_100ns * map_100ns;
 85                 unmap_sq = unmap_100ns * unmap_100ns;
 86 
 87                 atomic64_add(map_100ns, &map->sum_map_100ns);
 88                 atomic64_add(unmap_100ns, &map->sum_unmap_100ns);
 89                 atomic64_add(map_sq, &map->sum_sq_map);
 90                 atomic64_add(unmap_sq, &map->sum_sq_unmap);
 91                 atomic64_inc(&map->loops);
 92 
 93                 /*
 94                  * We may test for a long time so periodically check whether
 95                  * we need to schedule to avoid starving the others. Otherwise
 96                  * we may hangup the kernel in a non-preemptible kernel when
 97                  * the test kthreads number >= CPU number, the test kthreads
 98                  * will run endless on every CPU since the thread resposible
 99                  * for notifying the kthread stop (in do_map_benchmark())
100                  * could not be scheduled.
101                  *
102                  * Note this may degrade the test concurrency since the test
103                  * threads may need to share the CPU time with other load
104                  * in the system. So it's recommended to run this benchmark
105                  * on an idle system.
106                  */
107                 cond_resched();
108         }
109 
110 out:
111         free_pages_exact(buf, size);
112         return ret;
113 }
114 
115 static int do_map_benchmark(struct map_benchmark_data *map)
116 {
117         struct task_struct **tsk;
118         int threads = map->bparam.threads;
119         int node = map->bparam.node;
120         u64 loops;
121         int ret = 0;
122         int i;
123 
124         tsk = kmalloc_array(threads, sizeof(*tsk), GFP_KERNEL);
125         if (!tsk)
126                 return -ENOMEM;
127 
128         get_device(map->dev);
129 
130         for (i = 0; i < threads; i++) {
131                 tsk[i] = kthread_create_on_node(map_benchmark_thread, map,
132                                 map->bparam.node, "dma-map-benchmark/%d", i);
133                 if (IS_ERR(tsk[i])) {
134                         pr_err("create dma_map thread failed\n");
135                         ret = PTR_ERR(tsk[i]);
136                         while (--i >= 0)
137                                 kthread_stop(tsk[i]);
138                         goto out;
139                 }
140 
141                 if (node != NUMA_NO_NODE)
142                         kthread_bind_mask(tsk[i], cpumask_of_node(node));
143         }
144 
145         /* clear the old value in the previous benchmark */
146         atomic64_set(&map->sum_map_100ns, 0);
147         atomic64_set(&map->sum_unmap_100ns, 0);
148         atomic64_set(&map->sum_sq_map, 0);
149         atomic64_set(&map->sum_sq_unmap, 0);
150         atomic64_set(&map->loops, 0);
151 
152         for (i = 0; i < threads; i++) {
153                 get_task_struct(tsk[i]);
154                 wake_up_process(tsk[i]);
155         }
156 
157         msleep_interruptible(map->bparam.seconds * 1000);
158 
159         /* wait for the completion of all started benchmark threads */
160         for (i = 0; i < threads; i++) {
161                 int kthread_ret = kthread_stop_put(tsk[i]);
162 
163                 if (kthread_ret)
164                         ret = kthread_ret;
165         }
166 
167         if (ret)
168                 goto out;
169 
170         loops = atomic64_read(&map->loops);
171         if (likely(loops > 0)) {
172                 u64 map_variance, unmap_variance;
173                 u64 sum_map = atomic64_read(&map->sum_map_100ns);
174                 u64 sum_unmap = atomic64_read(&map->sum_unmap_100ns);
175                 u64 sum_sq_map = atomic64_read(&map->sum_sq_map);
176                 u64 sum_sq_unmap = atomic64_read(&map->sum_sq_unmap);
177 
178                 /* average latency */
179                 map->bparam.avg_map_100ns = div64_u64(sum_map, loops);
180                 map->bparam.avg_unmap_100ns = div64_u64(sum_unmap, loops);
181 
182                 /* standard deviation of latency */
183                 map_variance = div64_u64(sum_sq_map, loops) -
184                                 map->bparam.avg_map_100ns *
185                                 map->bparam.avg_map_100ns;
186                 unmap_variance = div64_u64(sum_sq_unmap, loops) -
187                                 map->bparam.avg_unmap_100ns *
188                                 map->bparam.avg_unmap_100ns;
189                 map->bparam.map_stddev = int_sqrt64(map_variance);
190                 map->bparam.unmap_stddev = int_sqrt64(unmap_variance);
191         }
192 
193 out:
194         put_device(map->dev);
195         kfree(tsk);
196         return ret;
197 }
198 
199 static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
200                 unsigned long arg)
201 {
202         struct map_benchmark_data *map = file->private_data;
203         void __user *argp = (void __user *)arg;
204         u64 old_dma_mask;
205         int ret;
206 
207         if (copy_from_user(&map->bparam, argp, sizeof(map->bparam)))
208                 return -EFAULT;
209 
210         switch (cmd) {
211         case DMA_MAP_BENCHMARK:
212                 if (map->bparam.threads == 0 ||
213                     map->bparam.threads > DMA_MAP_MAX_THREADS) {
214                         pr_err("invalid thread number\n");
215                         return -EINVAL;
216                 }
217 
218                 if (map->bparam.seconds == 0 ||
219                     map->bparam.seconds > DMA_MAP_MAX_SECONDS) {
220                         pr_err("invalid duration seconds\n");
221                         return -EINVAL;
222                 }
223 
224                 if (map->bparam.dma_trans_ns > DMA_MAP_MAX_TRANS_DELAY) {
225                         pr_err("invalid transmission delay\n");
226                         return -EINVAL;
227                 }
228 
229                 if (map->bparam.node != NUMA_NO_NODE &&
230                     (map->bparam.node < 0 || map->bparam.node >= MAX_NUMNODES ||
231                      !node_possible(map->bparam.node))) {
232                         pr_err("invalid numa node\n");
233                         return -EINVAL;
234                 }
235 
236                 if (map->bparam.granule < 1 || map->bparam.granule > 1024) {
237                         pr_err("invalid granule size\n");
238                         return -EINVAL;
239                 }
240 
241                 switch (map->bparam.dma_dir) {
242                 case DMA_MAP_BIDIRECTIONAL:
243                         map->dir = DMA_BIDIRECTIONAL;
244                         break;
245                 case DMA_MAP_FROM_DEVICE:
246                         map->dir = DMA_FROM_DEVICE;
247                         break;
248                 case DMA_MAP_TO_DEVICE:
249                         map->dir = DMA_TO_DEVICE;
250                         break;
251                 default:
252                         pr_err("invalid DMA direction\n");
253                         return -EINVAL;
254                 }
255 
256                 old_dma_mask = dma_get_mask(map->dev);
257 
258                 ret = dma_set_mask(map->dev,
259                                    DMA_BIT_MASK(map->bparam.dma_bits));
260                 if (ret) {
261                         pr_err("failed to set dma_mask on device %s\n",
262                                 dev_name(map->dev));
263                         return -EINVAL;
264                 }
265 
266                 ret = do_map_benchmark(map);
267 
268                 /*
269                  * restore the original dma_mask as many devices' dma_mask are
270                  * set by architectures, acpi, busses. When we bind them back
271                  * to their original drivers, those drivers shouldn't see
272                  * dma_mask changed by benchmark
273                  */
274                 dma_set_mask(map->dev, old_dma_mask);
275 
276                 if (ret)
277                         return ret;
278                 break;
279         default:
280                 return -EINVAL;
281         }
282 
283         if (copy_to_user(argp, &map->bparam, sizeof(map->bparam)))
284                 return -EFAULT;
285 
286         return ret;
287 }
288 
289 static const struct file_operations map_benchmark_fops = {
290         .open                   = simple_open,
291         .unlocked_ioctl         = map_benchmark_ioctl,
292 };
293 
294 static void map_benchmark_remove_debugfs(void *data)
295 {
296         struct map_benchmark_data *map = (struct map_benchmark_data *)data;
297 
298         debugfs_remove(map->debugfs);
299 }
300 
301 static int __map_benchmark_probe(struct device *dev)
302 {
303         struct dentry *entry;
304         struct map_benchmark_data *map;
305         int ret;
306 
307         map = devm_kzalloc(dev, sizeof(*map), GFP_KERNEL);
308         if (!map)
309                 return -ENOMEM;
310         map->dev = dev;
311 
312         ret = devm_add_action(dev, map_benchmark_remove_debugfs, map);
313         if (ret) {
314                 pr_err("Can't add debugfs remove action\n");
315                 return ret;
316         }
317 
318         /*
319          * we only permit a device bound with this driver, 2nd probe
320          * will fail
321          */
322         entry = debugfs_create_file("dma_map_benchmark", 0600, NULL, map,
323                         &map_benchmark_fops);
324         if (IS_ERR(entry))
325                 return PTR_ERR(entry);
326         map->debugfs = entry;
327 
328         return 0;
329 }
330 
331 static int map_benchmark_platform_probe(struct platform_device *pdev)
332 {
333         return __map_benchmark_probe(&pdev->dev);
334 }
335 
336 static struct platform_driver map_benchmark_platform_driver = {
337         .driver         = {
338                 .name   = "dma_map_benchmark",
339         },
340         .probe = map_benchmark_platform_probe,
341 };
342 
343 static int
344 map_benchmark_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
345 {
346         return __map_benchmark_probe(&pdev->dev);
347 }
348 
349 static struct pci_driver map_benchmark_pci_driver = {
350         .name   = "dma_map_benchmark",
351         .probe  = map_benchmark_pci_probe,
352 };
353 
354 static int __init map_benchmark_init(void)
355 {
356         int ret;
357 
358         ret = pci_register_driver(&map_benchmark_pci_driver);
359         if (ret)
360                 return ret;
361 
362         ret = platform_driver_register(&map_benchmark_platform_driver);
363         if (ret) {
364                 pci_unregister_driver(&map_benchmark_pci_driver);
365                 return ret;
366         }
367 
368         return 0;
369 }
370 
371 static void __exit map_benchmark_cleanup(void)
372 {
373         platform_driver_unregister(&map_benchmark_platform_driver);
374         pci_unregister_driver(&map_benchmark_pci_driver);
375 }
376 
377 module_init(map_benchmark_init);
378 module_exit(map_benchmark_cleanup);
379 
380 MODULE_AUTHOR("Barry Song <song.bao.hua@hisilicon.com>");
381 MODULE_DESCRIPTION("dma_map benchmark driver");
382 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php