Commit 283bc9f3 by Fan Du Committed by Steffen Klassert

xfrm: Namespacify xfrm state/policy locks

By semantics, xfrm layer is fully name space aware, so will the locks, e.g. xfrm_state/pocliy_lock. Ensure exclusive access into state/policy link list for different name space with one global lock is not right in terms of semantics aspect at first place, as they are indeed mutually independent with each other, but also more seriously causes scalability problem. One practical scenario is on a Open Network Stack, more than hundreds of lxc tenants acts as routers within one host, a global xfrm_state/policy_lock becomes the bottleneck. But onces those locks are decoupled in a per-namespace fashion, locks contend is just with in specific name space scope, without causing additional SPD/SAD access delay for other name space. Also this patch improve scalability while as without changing original xfrm behavior. Signed-off-by: 's avatarFan Du <> Signed-off-by: 's avatarSteffen Klassert <>
parent 8d549c4f
......@@ -59,6 +59,10 @@ struct netns_xfrm {
struct dst_ops xfrm6_dst_ops;
spinlock_t xfrm_state_lock;
spinlock_t xfrm_policy_sk_bundle_lock;
rwlock_t xfrm_policy_lock;
struct mutex xfrm_cfg_mutex;
......@@ -53,7 +53,6 @@
#define XFRM_INC_STATS_USER(net, field) ((void)(net))
extern struct mutex xfrm_cfg_mutex;
/* Organization of SPD aka "XFRM rules"
......@@ -1409,7 +1408,7 @@ static inline void xfrm_sysctl_fini(struct net *net)
void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto);
int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
int (*func)(struct xfrm_state *, int, void*), void *);
void xfrm_state_walk_done(struct xfrm_state_walk *walk);
void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net);
struct xfrm_state *xfrm_state_alloc(struct net *net);
struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
......@@ -1436,12 +1435,12 @@ struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark,
unsigned short family);
int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
unsigned short family);
unsigned short family, struct net *net);
int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
unsigned short family);
static inline int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src,
int n, unsigned short family)
int n, unsigned short family, struct net *net)
return -ENOSYS;
......@@ -1553,7 +1552,7 @@ void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type);
int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
int (*func)(struct xfrm_policy *, int, int, void*),
void *);
void xfrm_policy_walk_done(struct xfrm_policy_walk *walk);
void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net);
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark,
u8 type, int dir,
......@@ -1576,7 +1575,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_bundles,
const struct xfrm_kmaddress *k);
struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m);
struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net);
struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
struct xfrm_migrate *m);
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
......@@ -1785,7 +1785,9 @@ static int pfkey_dump_sa(struct pfkey_sock *pfk)
static void pfkey_dump_sa_done(struct pfkey_sock *pfk)
struct net *net = sock_net(&pfk->sk);
xfrm_state_walk_done(&pfk->dump.u.state, net);
static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
......@@ -1861,7 +1863,7 @@ static u32 gen_reqid(struct net *net)
xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN);
rc = xfrm_policy_walk(net, &walk, check_reqid, (void*)&reqid);
xfrm_policy_walk_done(&walk, net);
if (rc != -EEXIST)
return reqid;
} while (reqid != start);
......@@ -2660,7 +2662,9 @@ static int pfkey_dump_sp(struct pfkey_sock *pfk)
static void pfkey_dump_sp_done(struct pfkey_sock *pfk)
struct net *net = sock_net((struct sock *)pfk);
xfrm_policy_walk_done(&pfk->dump.u.policy, net);
static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
......@@ -3570,6 +3574,7 @@ static int pfkey_sendmsg(struct kiocb *kiocb,
struct sk_buff *skb = NULL;
struct sadb_msg *hdr = NULL;
int err;
struct net *net = sock_net(sk);
if (msg->msg_flags & MSG_OOB)
......@@ -3592,9 +3597,9 @@ static int pfkey_sendmsg(struct kiocb *kiocb,
if (!hdr)
goto out;
err = pfkey_process(sk, skb, hdr);
if (err && hdr && pfkey_error(hdr, err, sk) == 0)
......@@ -877,7 +877,10 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
static int xfrm_dump_sa_done(struct netlink_callback *cb)
struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1];
struct sock *sk = cb->skb->sk;
struct net *net = sock_net(sk);
xfrm_state_walk_done(walk, net);
return 0;
......@@ -1555,8 +1558,9 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
static int xfrm_dump_policy_done(struct netlink_callback *cb)
struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1];
struct net *net = sock_net(cb->skb->sk);
xfrm_policy_walk_done(walk, net);
return 0;
......@@ -2403,9 +2407,11 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
static void xfrm_netlink_rcv(struct sk_buff *skb)
struct net *net = sock_net(skb->sk);
netlink_rcv_skb(skb, &xfrm_user_rcv_msg);
static inline size_t xfrm_expire_msgsize(void)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment