@@ -39,6 +39,8 @@ struct ixgbe_tx_queue {
39
39
uint16_t clean_index ;
40
40
// position to insert packets for transmission
41
41
uint16_t tx_index ;
42
+ // head writeback pointer
43
+ uint32_t * head_pointer ;
42
44
// virtual addresses to map descriptors back to their mbuf for freeing
43
45
void * virtual_addresses [];
44
46
};
@@ -195,13 +197,23 @@ static void init_tx(struct ixgbe_device* dev) {
195
197
// there are no defines for this in ixgbe_type.h for some reason
196
198
// pthresh: 6:0, hthresh: 14:8, wthresh: 22:16
197
199
txdctl &= ~(0x3F | (0x3F << 8 ) | (0x3F << 16 )); // clear bits
198
- txdctl |= (36 | (8 << 8 ) | (4 << 16 )); // from DPDK
200
+ txdctl |= (36 | (8 << 8 ) | (0 << 16 )); // from DPDK
199
201
set_reg32 (dev -> addr , IXGBE_TXDCTL (i ), txdctl );
200
202
201
203
// private data for the driver, 0-initialized
202
204
struct ixgbe_tx_queue * queue = ((struct ixgbe_tx_queue * )(dev -> tx_queues )) + i ;
203
205
queue -> num_entries = NUM_TX_QUEUE_ENTRIES ;
204
206
queue -> descriptors = (union ixgbe_adv_tx_desc * ) mem .virt ;
207
+
208
+ mem = memory_allocate_dma (4 , true);
209
+ queue -> head_pointer = mem .virt ;
210
+ info ("virt %p phy %lx" , mem .virt , mem .phy );
211
+
212
+ // Set writeback head pointer address
213
+ set_reg32 (dev , IXGBE_TDWBAL (i ), (uint32_t ) (1 | mem .phy ));
214
+ set_reg32 (dev , IXGBE_TDWBAH (i ), (mem .phy >> 32 ));
215
+ debug ("TDWBAL %x" , get_reg32 (dev , IXGBE_TDWBAL (i )));
216
+ debug ("TDWBAH %x" , get_reg32 (dev , IXGBE_TDWBAH (i )));
205
217
}
206
218
// final step: enable DMA
207
219
set_reg32 (dev -> addr , IXGBE_DMATXCTL , IXGBE_DMATXCTL_TE );
@@ -415,12 +427,18 @@ uint32_t ixgbe_tx_batch(struct ixy_device* ixy, uint16_t queue_id, struct pkt_bu
415
427
uint16_t clean_index = queue -> clean_index ; // next descriptor to clean up
416
428
uint16_t cur_index = queue -> tx_index ; // next descriptor to use for tx
417
429
430
+ //uint32_t iio_head = get_reg32(dev, IXGBE_TDH(queue_id));
431
+ uint32_t ptr_head = * queue -> head_pointer ;
432
+ //debug("TDH %u, head pointer: %u", iio_head, ptr_head);
433
+
418
434
// step 1: clean up descriptors that were sent out by the hardware and return them to the mempool
419
435
// start by reading step 2 which is done first for each packet
420
436
// cleaning up must be done in batches for performance reasons, so this is unfortunately somewhat complicated
421
- while (true) {
437
+ // while (true) {
422
438
// figure out how many descriptors can be cleaned up
423
- int32_t cleanable = cur_index - clean_index ; // cur is always ahead of clean (invariant of our queue)
439
+ //int32_t cleanable = cur_index - clean_index; // cur is always ahead of clean (invariant of our queue)
440
+ /*
441
+ int32_t cleanable = *queue->head_pointer - clean_index;
424
442
if (cleanable < 0) { // handle wrap-around
425
443
cleanable = queue->num_entries + cleanable;
426
444
}
@@ -433,8 +451,33 @@ uint32_t ixgbe_tx_batch(struct ixy_device* ixy, uint16_t queue_id, struct pkt_bu
433
451
if (cleanup_to >= queue->num_entries) {
434
452
cleanup_to -= queue->num_entries;
435
453
}
454
+ */
455
+
456
+ int32_t cleanup_to = ptr_head ;
457
+ int32_t i = clean_index ;
458
+ // Ring empty
459
+ if (clean_index == cleanup_to ) {
460
+ goto skip_clean ;
461
+ }
462
+ cleanup_to -- ;
463
+ if (cleanup_to < 0 ) {
464
+ cleanup_to += queue -> num_entries ;
465
+ }
466
+ //debug("cleaning from %i to %i", clean_index, cleanup_to);
467
+ while (true) {
468
+ struct pkt_buf * buf = queue -> virtual_addresses [i ];
469
+ pkt_buf_free (buf );
470
+ if (i == cleanup_to ) {
471
+ break ;
472
+ }
473
+ i = wrap_ring (i , queue -> num_entries );
474
+ }
475
+ clean_index = wrap_ring (cleanup_to , queue -> num_entries );
476
+
477
+ /*
436
478
volatile union ixgbe_adv_tx_desc* txd = queue->descriptors + cleanup_to;
437
479
uint32_t status = txd->wb.status;
480
+
438
481
// hardware sets this flag as soon as it's sent out, we can give back all bufs in the batch back to the mempool
439
482
if (status & IXGBE_ADVTXD_STAT_DD) {
440
483
int32_t i = clean_index;
@@ -453,8 +496,10 @@ uint32_t ixgbe_tx_batch(struct ixy_device* ixy, uint16_t queue_id, struct pkt_bu
453
496
// the queue forever if you stop transmitting, but that's not a real concern
454
497
break;
455
498
}
456
- }
499
+ */
500
+ //}
457
501
queue -> clean_index = clean_index ;
502
+ skip_clean :;
458
503
459
504
// step 2: send out as many of our packets as possible
460
505
uint32_t sent ;
@@ -473,7 +518,13 @@ uint32_t ixgbe_tx_batch(struct ixy_device* ixy, uint16_t queue_id, struct pkt_bu
473
518
txd -> read .buffer_addr = buf -> buf_addr_phy + offsetof(struct pkt_buf , data );
474
519
// always the same flags: one buffer (EOP), advanced data descriptor, CRC offload, data length
475
520
txd -> read .cmd_type_len =
476
- IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS | IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_DATA | buf -> size ;
521
+ IXGBE_ADVTXD_DCMD_EOP | /*IXGBE_ADVTXD_DCMD_RS |*/ IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_DATA | buf -> size ;
522
+
523
+ // RS bit signals the NIC head pointer updates
524
+ // Implement someting more fancy. I did not find an upper limit for gaps, but sent packets can't be cleaned until one with the bit set passes.
525
+ if (sent == num_bufs - 1 ) {
526
+ txd -> read .cmd_type_len |= IXGBE_ADVTXD_DCMD_RS ;
527
+ }
477
528
// no fancy offloading stuff - only the total payload length
478
529
// implement offloading flags here:
479
530
// * ip checksum offloading is trivial: just set the offset
@@ -487,3 +538,4 @@ uint32_t ixgbe_tx_batch(struct ixy_device* ixy, uint16_t queue_id, struct pkt_bu
487
538
return sent ;
488
539
}
489
540
541
+
0 commit comments