mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 04:04:43 +01:00
netfilter: nft_set_pipapo_avx2: split lookup function in two parts
Split the main avx2 lookup function into a helper. This is a preparation patch: followup change will use the new helper from the insertion path if possible. This greatly improves insertion performance when avx2 is supported. Reviewed-by: Stefano Brivio <sbrivio@redhat.com> Signed-off-by: Florian Westphal <fw@strlen.de>
This commit is contained in:
parent
d11b26402a
commit
416e53e395
1 changed files with 82 additions and 54 deletions
|
|
@ -1133,56 +1133,35 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns
|
|||
}
|
||||
|
||||
/**
|
||||
* nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation
|
||||
* @net: Network namespace
|
||||
* @set: nftables API set representation
|
||||
* @key: nftables API element representation containing key data
|
||||
* pipapo_get_avx2() - Lookup function for AVX2 implementation
|
||||
* @m: Storage containing the set elements
|
||||
* @data: Key data to be matched against existing elements
|
||||
* @genmask: If set, check that element is active in given genmask
|
||||
* @tstamp: Timestamp to check for expired elements
|
||||
*
|
||||
* For more details, see DOC: Theory of Operation in nft_set_pipapo.c.
|
||||
*
|
||||
* This implementation exploits the repetitive characteristic of the algorithm
|
||||
* to provide a fast, vectorised version using the AVX2 SIMD instruction set.
|
||||
*
|
||||
* Return: true on match, false otherwise.
|
||||
* The caller must check that the FPU is usable.
|
||||
* This function must be called with BH disabled.
|
||||
*
|
||||
* Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise.
|
||||
*/
|
||||
const struct nft_set_ext *
|
||||
nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
|
||||
const u32 *key)
|
||||
static struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m,
|
||||
const u8 *data, u8 genmask,
|
||||
u64 tstamp)
|
||||
{
|
||||
struct nft_pipapo *priv = nft_set_priv(set);
|
||||
const struct nft_set_ext *ext = NULL;
|
||||
struct nft_pipapo_scratch *scratch;
|
||||
u8 genmask = nft_genmask_cur(net);
|
||||
const struct nft_pipapo_match *m;
|
||||
const struct nft_pipapo_field *f;
|
||||
const u8 *rp = (const u8 *)key;
|
||||
unsigned long *res, *fill;
|
||||
bool map_index;
|
||||
int i;
|
||||
|
||||
local_bh_disable();
|
||||
|
||||
if (unlikely(!irq_fpu_usable())) {
|
||||
ext = nft_pipapo_lookup(net, set, key);
|
||||
|
||||
local_bh_enable();
|
||||
return ext;
|
||||
}
|
||||
|
||||
m = rcu_dereference(priv->match);
|
||||
|
||||
/* Note that we don't need a valid MXCSR state for any of the
|
||||
* operations we use here, so pass 0 as mask and spare a LDMXCSR
|
||||
* instruction.
|
||||
*/
|
||||
kernel_fpu_begin_mask(0);
|
||||
|
||||
scratch = *raw_cpu_ptr(m->scratch);
|
||||
if (unlikely(!scratch)) {
|
||||
kernel_fpu_end();
|
||||
local_bh_enable();
|
||||
if (unlikely(!scratch))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
map_index = scratch->map_index;
|
||||
|
||||
|
|
@ -1191,6 +1170,12 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
|
|||
|
||||
pipapo_resmap_init_avx2(m, res);
|
||||
|
||||
/* Note that we don't need a valid MXCSR state for any of the
|
||||
* operations we use here, so pass 0 as mask and spare a LDMXCSR
|
||||
* instruction.
|
||||
*/
|
||||
kernel_fpu_begin_mask(0);
|
||||
|
||||
nft_pipapo_avx2_prepare();
|
||||
|
||||
next_match:
|
||||
|
|
@ -1200,7 +1185,7 @@ next_match:
|
|||
|
||||
#define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \
|
||||
(ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \
|
||||
ret, rp, \
|
||||
ret, data, \
|
||||
first, last))
|
||||
|
||||
if (likely(f->bb == 8)) {
|
||||
|
|
@ -1216,7 +1201,7 @@ next_match:
|
|||
NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16);
|
||||
} else {
|
||||
ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
|
||||
ret, rp,
|
||||
ret, data,
|
||||
first, last);
|
||||
}
|
||||
} else {
|
||||
|
|
@ -1232,7 +1217,7 @@ next_match:
|
|||
NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32);
|
||||
} else {
|
||||
ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
|
||||
ret, rp,
|
||||
ret, data,
|
||||
first, last);
|
||||
}
|
||||
}
|
||||
|
|
@ -1240,29 +1225,72 @@ next_match:
|
|||
|
||||
#undef NFT_SET_PIPAPO_AVX2_LOOKUP
|
||||
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (last) {
|
||||
const struct nft_set_ext *e = &f->mt[ret].e->ext;
|
||||
|
||||
if (unlikely(nft_set_elem_expired(e) ||
|
||||
!nft_set_elem_active(e, genmask)))
|
||||
goto next_match;
|
||||
|
||||
ext = e;
|
||||
goto out;
|
||||
if (ret < 0) {
|
||||
scratch->map_index = map_index;
|
||||
kernel_fpu_end();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (last) {
|
||||
struct nft_pipapo_elem *e;
|
||||
|
||||
e = f->mt[ret].e;
|
||||
if (unlikely(__nft_set_elem_expired(&e->ext, tstamp) ||
|
||||
!nft_set_elem_active(&e->ext, genmask)))
|
||||
goto next_match;
|
||||
|
||||
scratch->map_index = map_index;
|
||||
kernel_fpu_end();
|
||||
return e;
|
||||
}
|
||||
|
||||
map_index = !map_index;
|
||||
swap(res, fill);
|
||||
rp += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
|
||||
data += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
|
||||
}
|
||||
|
||||
out:
|
||||
if (i % 2)
|
||||
scratch->map_index = !map_index;
|
||||
kernel_fpu_end();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* nft_pipapo_avx2_lookup() - Dataplane frontend for AVX2 implementation
|
||||
* @net: Network namespace
|
||||
* @set: nftables API set representation
|
||||
* @key: nftables API element representation containing key data
|
||||
*
|
||||
* This function is called from the data path. It will search for
|
||||
* an element matching the given key in the current active copy using
|
||||
* the AVX2 routines if the fpu is usable or fall back to the generic
|
||||
* implementation of the algorithm otherwise.
|
||||
*
|
||||
* Return: nftables API extension pointer or NULL if no match.
|
||||
*/
|
||||
const struct nft_set_ext *
|
||||
nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
|
||||
const u32 *key)
|
||||
{
|
||||
struct nft_pipapo *priv = nft_set_priv(set);
|
||||
u8 genmask = nft_genmask_cur(net);
|
||||
const struct nft_pipapo_match *m;
|
||||
const u8 *rp = (const u8 *)key;
|
||||
const struct nft_pipapo_elem *e;
|
||||
|
||||
local_bh_disable();
|
||||
|
||||
if (unlikely(!irq_fpu_usable())) {
|
||||
const struct nft_set_ext *ext;
|
||||
|
||||
ext = nft_pipapo_lookup(net, set, key);
|
||||
|
||||
local_bh_enable();
|
||||
return ext;
|
||||
}
|
||||
|
||||
m = rcu_dereference(priv->match);
|
||||
|
||||
e = pipapo_get_avx2(m, rp, genmask, get_jiffies_64());
|
||||
local_bh_enable();
|
||||
|
||||
return ext;
|
||||
return e ? &e->ext : NULL;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue