21
21
#include "xe_macros.h"
22
22
#include "xe_observation.h"
23
23
#include "xe_pm.h"
24
+ #include "xe_trace.h"
24
25
25
26
#include "regs/xe_eu_stall_regs.h"
26
27
#include "regs/xe_gt_regs.h"
27
28
29
+ #define POLL_PERIOD_MS 5
30
+
28
31
static size_t per_xecore_buf_size = SZ_512K ;
29
32
30
33
struct per_xecore_buf {
@@ -37,22 +40,27 @@ struct per_xecore_buf {
37
40
};
38
41
39
42
struct xe_eu_stall_data_stream {
43
+ bool pollin ;
40
44
bool enabled ;
41
45
int wait_num_reports ;
42
46
int sampling_rate_mult ;
47
+ wait_queue_head_t poll_wq ;
43
48
size_t data_record_size ;
44
49
size_t per_xecore_buf_size ;
45
50
46
51
struct xe_gt * gt ;
47
52
struct xe_bo * bo ;
48
53
struct per_xecore_buf * xecore_buf ;
54
+ struct delayed_work buf_poll_work ;
49
55
};
50
56
51
57
struct xe_eu_stall_gt {
52
58
/* Lock to protect stream */
53
59
struct mutex stream_lock ;
54
60
/* EU stall data stream */
55
61
struct xe_eu_stall_data_stream * stream ;
62
+ /* Workqueue to schedule buffer pointers polling work */
63
+ struct workqueue_struct * buf_ptr_poll_wq ;
56
64
};
57
65
58
66
/**
@@ -114,6 +122,7 @@ static void xe_eu_stall_fini(void *arg)
114
122
{
115
123
struct xe_gt * gt = arg ;
116
124
125
+ destroy_workqueue (gt -> eu_stall -> buf_ptr_poll_wq );
117
126
mutex_destroy (& gt -> eu_stall -> stream_lock );
118
127
kfree (gt -> eu_stall );
119
128
}
@@ -139,11 +148,19 @@ int xe_eu_stall_init(struct xe_gt *gt)
139
148
140
149
mutex_init (& gt -> eu_stall -> stream_lock );
141
150
151
+ gt -> eu_stall -> buf_ptr_poll_wq = alloc_ordered_workqueue ("xe_eu_stall" , 0 );
152
+ if (!gt -> eu_stall -> buf_ptr_poll_wq ) {
153
+ ret = - ENOMEM ;
154
+ goto exit_free ;
155
+ }
156
+
142
157
ret = devm_add_action_or_reset (xe -> drm .dev , xe_eu_stall_fini , gt );
143
158
if (ret )
144
- goto exit_free ;
159
+ goto exit_destroy ;
145
160
146
161
return 0 ;
162
+ exit_destroy :
163
+ destroy_workqueue (gt -> eu_stall -> buf_ptr_poll_wq );
147
164
exit_free :
148
165
mutex_destroy (& gt -> eu_stall -> stream_lock );
149
166
kfree (gt -> eu_stall );
@@ -248,14 +265,214 @@ static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension,
248
265
return 0 ;
249
266
}
250
267
268
+ /**
269
+ * buf_data_size - Calculate the number of bytes in a circular buffer
270
+ * given the read and write pointers and the size of
271
+ * the buffer.
272
+ *
273
+ * @buf_size: Size of the circular buffer
274
+ * @read_ptr: Read pointer with an additional overflow bit
275
+ * @write_ptr: Write pointer with an additional overflow bit
276
+ *
277
+ * Since the read and write pointers have an additional overflow bit,
278
+ * this function calculates the offsets from the pointers and use the
279
+ * offsets to calculate the data size in the buffer.
280
+ *
281
+ * Returns: number of bytes of data in the buffer
282
+ */
283
+ static u32 buf_data_size (size_t buf_size , u32 read_ptr , u32 write_ptr )
284
+ {
285
+ u32 read_offset , write_offset , size = 0 ;
286
+
287
+ if (read_ptr == write_ptr )
288
+ goto exit ;
289
+
290
+ read_offset = read_ptr & (buf_size - 1 );
291
+ write_offset = write_ptr & (buf_size - 1 );
292
+
293
+ if (write_offset > read_offset )
294
+ size = write_offset - read_offset ;
295
+ else
296
+ size = buf_size - read_offset + write_offset ;
297
+ exit :
298
+ return size ;
299
+ }
300
+
301
+ /**
302
+ * eu_stall_data_buf_poll - Poll for EU stall data in the buffer.
303
+ *
304
+ * @stream: xe EU stall data stream instance
305
+ *
306
+ * Returns: true if the EU stall buffer contains minimum stall data as
307
+ * specified by the event report count, else false.
308
+ */
309
+ static bool eu_stall_data_buf_poll (struct xe_eu_stall_data_stream * stream )
310
+ {
311
+ u32 read_ptr , write_ptr_reg , write_ptr , total_data = 0 ;
312
+ u32 buf_size = stream -> per_xecore_buf_size ;
313
+ struct per_xecore_buf * xecore_buf ;
314
+ struct xe_gt * gt = stream -> gt ;
315
+ bool min_data_present = false;
316
+ u16 group , instance ;
317
+ unsigned int xecore ;
318
+
319
+ mutex_lock (& gt -> eu_stall -> stream_lock );
320
+ for_each_dss_steering (xecore , gt , group , instance ) {
321
+ xecore_buf = & stream -> xecore_buf [xecore ];
322
+ read_ptr = xecore_buf -> read ;
323
+ write_ptr_reg = xe_gt_mcr_unicast_read (gt , XEHPC_EUSTALL_REPORT ,
324
+ group , instance );
325
+ write_ptr = REG_FIELD_GET (XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK , write_ptr_reg );
326
+ write_ptr <<= 6 ;
327
+ write_ptr &= ((buf_size << 1 ) - 1 );
328
+ if (!min_data_present ) {
329
+ total_data += buf_data_size (buf_size , read_ptr , write_ptr );
330
+ if (num_data_rows (total_data ) >= stream -> wait_num_reports )
331
+ min_data_present = true;
332
+ }
333
+ xecore_buf -> write = write_ptr ;
334
+ }
335
+ mutex_unlock (& gt -> eu_stall -> stream_lock );
336
+
337
+ return min_data_present ;
338
+ }
339
+
340
+ static int xe_eu_stall_data_buf_read (struct xe_eu_stall_data_stream * stream ,
341
+ char __user * buf , size_t count ,
342
+ size_t * total_data_size , struct xe_gt * gt ,
343
+ u16 group , u16 instance , unsigned int xecore )
344
+ {
345
+ size_t read_data_size , copy_size , buf_size ;
346
+ u32 read_ptr_reg , read_ptr , write_ptr ;
347
+ u8 * xecore_start_vaddr , * read_vaddr ;
348
+ struct per_xecore_buf * xecore_buf ;
349
+ u32 read_offset , write_offset ;
350
+
351
+ /* Hardware increments the read and write pointers such that they can
352
+ * overflow into one additional bit. For example, a 256KB size buffer
353
+ * offset pointer needs 18 bits. But HW uses 19 bits for the read and
354
+ * write pointers. This technique avoids wasting a slot in the buffer.
355
+ * Read and write offsets are calculated from the pointers in order to
356
+ * check if the write pointer has wrapped around the array.
357
+ */
358
+ xecore_buf = & stream -> xecore_buf [xecore ];
359
+ xecore_start_vaddr = xecore_buf -> vaddr ;
360
+ read_ptr = xecore_buf -> read ;
361
+ write_ptr = xecore_buf -> write ;
362
+ buf_size = stream -> per_xecore_buf_size ;
363
+
364
+ read_data_size = buf_data_size (buf_size , read_ptr , write_ptr );
365
+ /* Read only the data that the user space buffer can accommodate */
366
+ read_data_size = min_t (size_t , count - * total_data_size , read_data_size );
367
+ if (read_data_size == 0 )
368
+ return 0 ;
369
+
370
+ read_offset = read_ptr & (buf_size - 1 );
371
+ write_offset = write_ptr & (buf_size - 1 );
372
+ read_vaddr = xecore_start_vaddr + read_offset ;
373
+
374
+ if (write_offset > read_offset ) {
375
+ if (copy_to_user (buf + * total_data_size , read_vaddr , read_data_size ))
376
+ return - EFAULT ;
377
+ } else {
378
+ if (read_data_size >= buf_size - read_offset )
379
+ copy_size = buf_size - read_offset ;
380
+ else
381
+ copy_size = read_data_size ;
382
+ if (copy_to_user (buf + * total_data_size , read_vaddr , copy_size ))
383
+ return - EFAULT ;
384
+ if (copy_to_user (buf + * total_data_size + copy_size ,
385
+ xecore_start_vaddr , read_data_size - copy_size ))
386
+ return - EFAULT ;
387
+ }
388
+
389
+ * total_data_size += read_data_size ;
390
+ read_ptr += read_data_size ;
391
+
392
+ /* Read pointer can overflow into one additional bit */
393
+ read_ptr &= (buf_size << 1 ) - 1 ;
394
+ read_ptr_reg = REG_FIELD_PREP (XEHPC_EUSTALL_REPORT1_READ_PTR_MASK , (read_ptr >> 6 ));
395
+ read_ptr_reg = _MASKED_FIELD (XEHPC_EUSTALL_REPORT1_READ_PTR_MASK , read_ptr_reg );
396
+ xe_gt_mcr_unicast_write (gt , XEHPC_EUSTALL_REPORT1 , read_ptr_reg , group , instance );
397
+ xecore_buf -> read = read_ptr ;
398
+ trace_xe_eu_stall_data_read (group , instance , read_ptr , write_ptr ,
399
+ read_data_size , * total_data_size );
400
+ return 0 ;
401
+ }
402
+
403
+ /**
404
+ * xe_eu_stall_stream_read_locked - copy EU stall counters data from the
405
+ * per xecore buffers to the userspace buffer
406
+ * @stream: A stream opened for EU stall count metrics
407
+ * @file: An xe EU stall data stream file
408
+ * @buf: destination buffer given by userspace
409
+ * @count: the number of bytes userspace wants to read
410
+ *
411
+ * Returns: Number of bytes copied or a negative error code
412
+ * If we've successfully copied any data then reporting that takes
413
+ * precedence over any internal error status, so the data isn't lost.
414
+ */
415
+ static ssize_t xe_eu_stall_stream_read_locked (struct xe_eu_stall_data_stream * stream ,
416
+ struct file * file , char __user * buf ,
417
+ size_t count )
418
+ {
419
+ struct xe_gt * gt = stream -> gt ;
420
+ size_t total_size = 0 ;
421
+ u16 group , instance ;
422
+ unsigned int xecore ;
423
+ int ret = 0 ;
424
+
425
+ for_each_dss_steering (xecore , gt , group , instance ) {
426
+ ret = xe_eu_stall_data_buf_read (stream , buf , count , & total_size ,
427
+ gt , group , instance , xecore );
428
+ if (ret || count == total_size )
429
+ break ;
430
+ }
431
+ return total_size ?: (ret ?: - EAGAIN );
432
+ }
433
+
251
434
/*
252
435
* Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE
253
436
* before calling read().
254
437
*/
255
438
static ssize_t xe_eu_stall_stream_read (struct file * file , char __user * buf ,
256
439
size_t count , loff_t * ppos )
257
440
{
258
- ssize_t ret = 0 ;
441
+ struct xe_eu_stall_data_stream * stream = file -> private_data ;
442
+ struct xe_gt * gt = stream -> gt ;
443
+ ssize_t ret , aligned_count ;
444
+
445
+ aligned_count = ALIGN_DOWN (count , stream -> data_record_size );
446
+ if (aligned_count == 0 )
447
+ return - EINVAL ;
448
+
449
+ if (!stream -> enabled ) {
450
+ xe_gt_dbg (gt , "EU stall data stream not enabled to read\n" );
451
+ return - EINVAL ;
452
+ }
453
+
454
+ if (!(file -> f_flags & O_NONBLOCK )) {
455
+ do {
456
+ ret = wait_event_interruptible (stream -> poll_wq , stream -> pollin );
457
+ if (ret )
458
+ return - EINTR ;
459
+
460
+ mutex_lock (& gt -> eu_stall -> stream_lock );
461
+ ret = xe_eu_stall_stream_read_locked (stream , file , buf , aligned_count );
462
+ mutex_unlock (& gt -> eu_stall -> stream_lock );
463
+ } while (ret == - EAGAIN );
464
+ } else {
465
+ mutex_lock (& gt -> eu_stall -> stream_lock );
466
+ ret = xe_eu_stall_stream_read_locked (stream , file , buf , aligned_count );
467
+ mutex_unlock (& gt -> eu_stall -> stream_lock );
468
+ }
469
+
470
+ /*
471
+ * This may not work correctly if the user buffer is very small.
472
+ * We don't want to block the next read() when there is data in the buffer
473
+ * now, but couldn't be accommodated in the small user buffer.
474
+ */
475
+ stream -> pollin = false;
259
476
260
477
return ret ;
261
478
}
@@ -348,6 +565,21 @@ static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream)
348
565
return 0 ;
349
566
}
350
567
568
+ static void eu_stall_data_buf_poll_work_fn (struct work_struct * work )
569
+ {
570
+ struct xe_eu_stall_data_stream * stream =
571
+ container_of (work , typeof (* stream ), buf_poll_work .work );
572
+ struct xe_gt * gt = stream -> gt ;
573
+
574
+ if (eu_stall_data_buf_poll (stream )) {
575
+ stream -> pollin = true;
576
+ wake_up (& stream -> poll_wq );
577
+ }
578
+ queue_delayed_work (gt -> eu_stall -> buf_ptr_poll_wq ,
579
+ & stream -> buf_poll_work ,
580
+ msecs_to_jiffies (POLL_PERIOD_MS ));
581
+ }
582
+
351
583
static int xe_eu_stall_stream_init (struct xe_eu_stall_data_stream * stream ,
352
584
struct eu_stall_open_properties * props )
353
585
{
@@ -372,6 +604,9 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
372
604
max_wait_num_reports );
373
605
return - EINVAL ;
374
606
}
607
+
608
+ init_waitqueue_head (& stream -> poll_wq );
609
+ INIT_DELAYED_WORK (& stream -> buf_poll_work , eu_stall_data_buf_poll_work_fn );
375
610
stream -> per_xecore_buf_size = per_xecore_buf_size ;
376
611
stream -> sampling_rate_mult = props -> sampling_rate_mult ;
377
612
stream -> wait_num_reports = props -> wait_num_reports ;
@@ -389,15 +624,35 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
389
624
return 0 ;
390
625
}
391
626
627
+ static __poll_t xe_eu_stall_stream_poll_locked (struct xe_eu_stall_data_stream * stream ,
628
+ struct file * file , poll_table * wait )
629
+ {
630
+ __poll_t events = 0 ;
631
+
632
+ poll_wait (file , & stream -> poll_wq , wait );
633
+
634
+ if (stream -> pollin )
635
+ events |= EPOLLIN ;
636
+
637
+ return events ;
638
+ }
639
+
392
640
static __poll_t xe_eu_stall_stream_poll (struct file * file , poll_table * wait )
393
641
{
394
- __poll_t ret = 0 ;
642
+ struct xe_eu_stall_data_stream * stream = file -> private_data ;
643
+ struct xe_gt * gt = stream -> gt ;
644
+ __poll_t ret ;
645
+
646
+ mutex_lock (& gt -> eu_stall -> stream_lock );
647
+ ret = xe_eu_stall_stream_poll_locked (stream , file , wait );
648
+ mutex_unlock (& gt -> eu_stall -> stream_lock );
395
649
396
650
return ret ;
397
651
}
398
652
399
653
static int xe_eu_stall_enable_locked (struct xe_eu_stall_data_stream * stream )
400
654
{
655
+ struct xe_gt * gt = stream -> gt ;
401
656
int ret = 0 ;
402
657
403
658
if (stream -> enabled )
@@ -406,6 +661,10 @@ static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream)
406
661
stream -> enabled = true;
407
662
408
663
ret = xe_eu_stall_stream_enable (stream );
664
+
665
+ queue_delayed_work (gt -> eu_stall -> buf_ptr_poll_wq ,
666
+ & stream -> buf_poll_work ,
667
+ msecs_to_jiffies (POLL_PERIOD_MS ));
409
668
return ret ;
410
669
}
411
670
@@ -420,6 +679,8 @@ static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream)
420
679
421
680
xe_gt_mcr_multicast_write (gt , XEHPC_EUSTALL_BASE , 0 );
422
681
682
+ cancel_delayed_work_sync (& stream -> buf_poll_work );
683
+
423
684
xe_force_wake_put (gt_to_fw (gt ), XE_FW_RENDER );
424
685
xe_pm_runtime_put (gt_to_xe (gt ));
425
686
0 commit comments