netfilter: nft_set_pipapo_avx2: split lookup function in two parts

Split the main avx2 lookup function into a helper.

This is a preparation patch: followup change will use the new helper
from the insertion path if possible.  This greatly improves insertion
performance when avx2 is supported.

Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
This commit is contained in:
Florian Westphal 2025-08-15 16:36:57 +02:00
parent d11b26402a
commit 416e53e395

View file

@ -1133,56 +1133,35 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns
}
/**
* nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation
* @net: Network namespace
* @set: nftables API set representation
* @key: nftables API element representation containing key data
* pipapo_get_avx2() - Lookup function for AVX2 implementation
* @m: Storage containing the set elements
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
* @tstamp: Timestamp to check for expired elements
*
* For more details, see DOC: Theory of Operation in nft_set_pipapo.c.
*
* This implementation exploits the repetitive characteristic of the algorithm
* to provide a fast, vectorised version using the AVX2 SIMD instruction set.
*
* Return: true on match, false otherwise.
* The caller must check that the FPU is usable.
* This function must be called with BH disabled.
*
* Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise.
*/
const struct nft_set_ext *
nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key)
static struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m,
const u8 *data, u8 genmask,
u64 tstamp)
{
struct nft_pipapo *priv = nft_set_priv(set);
const struct nft_set_ext *ext = NULL;
struct nft_pipapo_scratch *scratch;
u8 genmask = nft_genmask_cur(net);
const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
const u8 *rp = (const u8 *)key;
unsigned long *res, *fill;
bool map_index;
int i;
local_bh_disable();
if (unlikely(!irq_fpu_usable())) {
ext = nft_pipapo_lookup(net, set, key);
local_bh_enable();
return ext;
}
m = rcu_dereference(priv->match);
/* Note that we don't need a valid MXCSR state for any of the
* operations we use here, so pass 0 as mask and spare a LDMXCSR
* instruction.
*/
kernel_fpu_begin_mask(0);
scratch = *raw_cpu_ptr(m->scratch);
if (unlikely(!scratch)) {
kernel_fpu_end();
local_bh_enable();
if (unlikely(!scratch))
return NULL;
}
map_index = scratch->map_index;
@ -1191,6 +1170,12 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
pipapo_resmap_init_avx2(m, res);
/* Note that we don't need a valid MXCSR state for any of the
* operations we use here, so pass 0 as mask and spare a LDMXCSR
* instruction.
*/
kernel_fpu_begin_mask(0);
nft_pipapo_avx2_prepare();
next_match:
@ -1200,7 +1185,7 @@ next_match:
#define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \
(ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \
ret, rp, \
ret, data, \
first, last))
if (likely(f->bb == 8)) {
@ -1216,7 +1201,7 @@ next_match:
NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16);
} else {
ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
ret, rp,
ret, data,
first, last);
}
} else {
@ -1232,7 +1217,7 @@ next_match:
NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32);
} else {
ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
ret, rp,
ret, data,
first, last);
}
}
@ -1240,29 +1225,72 @@ next_match:
#undef NFT_SET_PIPAPO_AVX2_LOOKUP
if (ret < 0)
goto out;
if (last) {
const struct nft_set_ext *e = &f->mt[ret].e->ext;
if (unlikely(nft_set_elem_expired(e) ||
!nft_set_elem_active(e, genmask)))
goto next_match;
ext = e;
goto out;
if (ret < 0) {
scratch->map_index = map_index;
kernel_fpu_end();
return NULL;
}
if (last) {
struct nft_pipapo_elem *e;
e = f->mt[ret].e;
if (unlikely(__nft_set_elem_expired(&e->ext, tstamp) ||
!nft_set_elem_active(&e->ext, genmask)))
goto next_match;
scratch->map_index = map_index;
kernel_fpu_end();
return e;
}
map_index = !map_index;
swap(res, fill);
rp += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
data += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
}
out:
if (i % 2)
scratch->map_index = !map_index;
kernel_fpu_end();
return NULL;
}
/**
* nft_pipapo_avx2_lookup() - Dataplane frontend for AVX2 implementation
* @net: Network namespace
* @set: nftables API set representation
* @key: nftables API element representation containing key data
*
* This function is called from the data path. It will search for
* an element matching the given key in the current active copy using
* the AVX2 routines if the fpu is usable or fall back to the generic
* implementation of the algorithm otherwise.
*
* Return: nftables API extension pointer or NULL if no match.
*/
const struct nft_set_ext *
nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key)
{
struct nft_pipapo *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
const struct nft_pipapo_match *m;
const u8 *rp = (const u8 *)key;
const struct nft_pipapo_elem *e;
local_bh_disable();
if (unlikely(!irq_fpu_usable())) {
const struct nft_set_ext *ext;
ext = nft_pipapo_lookup(net, set, key);
local_bh_enable();
return ext;
}
m = rcu_dereference(priv->match);
e = pipapo_get_avx2(m, rp, genmask, get_jiffies_64());
local_bh_enable();
return ext;
return e ? &e->ext : NULL;
}