@@ -38,6 +38,7 @@ struct ioam6_lwt_freq {
38
38
};
39
39
40
40
struct ioam6_lwt {
41
+ struct dst_entry null_dst ;
41
42
struct dst_cache cache ;
42
43
struct ioam6_lwt_freq freq ;
43
44
atomic_t pkt_cnt ;
@@ -177,6 +178,14 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
177
178
if (err )
178
179
goto free_lwt ;
179
180
181
+ /* This "fake" dst_entry will be stored in a dst_cache, which will call
182
+ * dst_hold() and dst_release() on it. We must ensure that dst_destroy()
183
+ * will never be called. For that, its initial refcount is 1 and +1 when
184
+ * it is stored in the cache. Then, +1/-1 each time we read the cache
185
+ * and release it. Long story short, we're fine.
186
+ */
187
+ dst_init (& ilwt -> null_dst , NULL , NULL , DST_OBSOLETE_NONE , DST_NOCOUNT );
188
+
180
189
atomic_set (& ilwt -> pkt_cnt , 0 );
181
190
ilwt -> freq .k = freq_k ;
182
191
ilwt -> freq .n = freq_n ;
@@ -356,6 +365,17 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
356
365
dst = dst_cache_get (& ilwt -> cache );
357
366
local_bh_enable ();
358
367
368
+ /* This is how we notify that the destination does not change after
369
+ * transformation and that we need to use orig_dst instead of the cache
370
+ */
371
+ if (dst == & ilwt -> null_dst ) {
372
+ dst_release (dst );
373
+
374
+ dst = orig_dst ;
375
+ /* keep refcount balance: dst_release() is called at the end */
376
+ dst_hold (dst );
377
+ }
378
+
359
379
switch (ilwt -> mode ) {
360
380
case IOAM6_IPTUNNEL_MODE_INLINE :
361
381
do_inline :
@@ -408,12 +428,19 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
408
428
goto drop ;
409
429
}
410
430
411
- /* cache only if we don't create a dst reference loop */
412
- if (orig_dst -> lwtstate != dst -> lwtstate ) {
413
- local_bh_disable ();
431
+ /* If the destination is the same after transformation (which is
432
+ * a valid use case for IOAM), then we don't want to add it to
433
+ * the cache in order to avoid a reference loop. Instead, we add
434
+ * our fake dst_entry to the cache as a way to detect this case.
435
+ * Otherwise, we add the resolved destination to the cache.
436
+ */
437
+ local_bh_disable ();
438
+ if (orig_dst -> lwtstate == dst -> lwtstate )
439
+ dst_cache_set_ip6 (& ilwt -> cache ,
440
+ & ilwt -> null_dst , & fl6 .saddr );
441
+ else
414
442
dst_cache_set_ip6 (& ilwt -> cache , dst , & fl6 .saddr );
415
- local_bh_enable ();
416
- }
443
+ local_bh_enable ();
417
444
418
445
err = skb_cow_head (skb , LL_RESERVED_SPACE (dst -> dev ));
419
446
if (unlikely (err ))
@@ -439,6 +466,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
439
466
440
467
static void ioam6_destroy_state (struct lwtunnel_state * lwt )
441
468
{
469
+ /* Since the refcount of per-cpu dst_entry caches will never be 0 (see
470
+ * why above) when our "fake" dst_entry is used, it is not necessary to
471
+ * remove them before calling dst_cache_destroy()
472
+ */
442
473
dst_cache_destroy (& ioam6_lwt_state (lwt )-> cache );
443
474
}
444
475
0 commit comments