Hi Mat,
Mat Martineau <mathew.j.martineau(a)linux.intel.com> 于2020年12月18日周五 上午8:14写道:
On Sun, 13 Dec 2020, Geliang Tang wrote:
> This patch created a listening socket when an address with a port-number
> is added by PM netlink. Then binded the new port to the socket, and
> listened for the connection.
>
> Signed-off-by: Geliang Tang <geliangtang(a)gmail.com>
> ---
> net/mptcp/pm_netlink.c | 64 ++++++++++++++++++++++++++++++++++++++++++
> net/mptcp/protocol.c | 2 +-
> net/mptcp/protocol.h | 3 ++
> net/mptcp/subflow.c | 4 +--
> 4 files changed, 70 insertions(+), 3 deletions(-)
>
> diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
> index 9b1f6298bbdb..1548efb22a1b 100644
> --- a/net/mptcp/pm_netlink.c
> +++ b/net/mptcp/pm_netlink.c
> @@ -26,6 +26,7 @@ struct mptcp_pm_addr_entry {
> struct list_head list;
> struct mptcp_addr_info addr;
> struct rcu_head rcu;
> + struct socket *lsk;
> };
>
> struct mptcp_pm_add_entry {
> @@ -613,6 +614,53 @@ static int mptcp_pm_nl_append_new_local_addr(struct
pm_nl_pernet *pernet,
> return ret;
> }
>
> +static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
> + struct mptcp_pm_addr_entry *entry)
> +{
> + struct sockaddr_storage addr;
> + struct mptcp_sock *msk;
> + struct socket *ssock;
> + int backlog = 1024;
> + int err;
> +
> + err = sock_create_kern(sock_net(sk), entry->addr.family,
> + SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk);
> + if (err)
> + return err;
> +
> + msk = mptcp_sk(entry->lsk->sk);
> + if (!msk) {
> + err = -EINVAL;
> + goto out;
> + }
> +
> + ssock = __mptcp_nmpc_socket(msk);
> + if (!ssock) {
> + err = -EINVAL;
> + goto out;
> + }
> +
> + mptcp_info2sockaddr(&entry->addr, &addr);
> + err = kernel_bind(ssock, (struct sockaddr *)&addr,
> + sizeof(struct sockaddr_in));
> + if (err) {
> + pr_warn("kernel_bind error, err=%d", err);
> + goto out;
> + }
> +
> + err = kernel_listen(ssock, backlog);
> + if (err) {
> + pr_warn("kernel_listen error, err=%d", err);
> + goto out;
> + }
> +
> + return 0;
> +
> +out:
> + sock_release(entry->lsk);
> + return err;
> +}
> +
> int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
> {
> struct mptcp_pm_addr_entry *entry;
> @@ -657,6 +705,8 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct
sock_common *skc)
> entry->addr.ifindex = 0;
> entry->addr.flags = 0;
> entry->addr.id = 0;
> + entry->addr.port = 0;
> + entry->lsk = NULL;
> ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
> if (ret < 0)
> kfree(entry);
> @@ -808,9 +858,19 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct
genl_info *info)
> }
>
> *entry = addr;
> + if (entry->addr.port) {
> + ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry);
> + if (ret) {
> + GENL_SET_ERR_MSG(info, "create listen socket
error");
> + kfree(entry);
> + return ret;
> + }
> + }
> ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
> if (ret < 0) {
> GENL_SET_ERR_MSG(info, "too many addresses or duplicate
one");
> + if (entry->lsk)
> + sock_release(entry->lsk);
> kfree(entry);
> return ret;
> }
> @@ -921,6 +981,8 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct
genl_info *info)
> spin_unlock_bh(&pernet->lock);
>
> mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk),
&entry->addr);
> + if (entry->lsk)
> + sock_release(entry->lsk);
> kfree_rcu(entry, rcu);
Releasing the socket here could be racy - since the list is rcu-protected,
the entry could still be accessed (which is why it's freed with
kfree_rcu()). Rather than calling kfree_rcu(), use a custom callback with
call_rcu() that will both release lsk and kfree the list entry.
It dosen't work. I fixed it like this:
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 5f80b886aecb..d6b937bffb43 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1010,6 +1010,16 @@ static int
mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
return 0;
}
+static void mptcp_pm_addr_entry_free(struct rcu_head *head)
+{
+ struct mptcp_pm_addr_entry *entry;
+
+ entry = container_of(head, struct mptcp_pm_addr_entry, rcu);
+ if (entry->lsk)
+ sock_release(entry->lsk);
+ kfree(entry);
+}
+
static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
@@ -1039,9 +1049,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff
*skb, struct genl_info *info)
spin_unlock_bh(&pernet->lock);
mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr);
- if (entry->lsk)
- sock_release(entry->lsk);
- kfree_rcu(entry, rcu);
+ call_rcu(&entry->rcu, mptcp_pm_addr_entry_free);
return ret;
}
@@ -1054,10 +1062,8 @@ static void __flush_addrs(struct net *net,
struct list_head *list)
cur = list_entry(list->next,
struct mptcp_pm_addr_entry, list);
mptcp_nl_remove_subflow_and_signal_addr(net, &cur->addr);
- if (cur->lsk)
- sock_release(cur->lsk);
list_del_rcu(&cur->list);
- kfree_rcu(cur, rcu);
+ call_rcu(&cur->rcu, mptcp_pm_addr_entry_free);
}
}
--
2.29.2
But I got this error:
[ 261.105979] MPTCP: sock_release 0000000058a9d993
[ 261.105985] BUG: sleeping function called from invalid context at
net/core/sock.c:3048
[ 261.105987] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid:
0, name: swapper/2
[ 261.105990] 1 lock held by swapper/2/0:
[ 261.105992] #0: ffffffffac5a68e0 (rcu_callback){....}-{0:0}, at:
rcu_do_batch+0x216/0x900
[ 261.106009] CPU: 2 PID: 0 Comm: swapper/2 Kdump: loaded Not tainted
5.10.0-mptcp+ #429
[ 261.106011] Hardware name: TIMI Mi Laptop Pro 15/TM1905, BIOS
XMACM500P0301 04/08/2020
[ 261.106013] Call Trace:
[ 261.106016] <IRQ>
[ 261.106022] dump_stack+0x8b/0xb0
[ 261.106028] ___might_sleep.cold+0xb6/0xc6
[ 261.106033] lock_sock_nested+0x28/0x90
[ 261.106040] mptcp_close+0x20/0x2f0
[ 261.106043] ? rcu_do_batch+0x216/0x900
[ 261.106048] ? rcu_do_batch+0x216/0x900
[ 261.106052] inet_release+0x42/0x80
[ 261.106058] sock_release+0x20/0x70
[ 261.106063] mptcp_pm_addr_entry_free+0x3b/0x60
[ 261.106067] rcu_do_batch+0x289/0x900
[ 261.106078] rcu_core+0x27d/0x450
[ 261.106085] __do_softirq+0xd5/0x485
[ 261.106096] asm_call_irq_on_stack+0xf/0x20
[ 261.106098] </IRQ>
[ 261.106103] do_softirq_own_stack+0x5b/0x70
[ 261.106106] __irq_exit_rcu+0xda/0x120
[ 261.106110] irq_exit_rcu+0xa/0x20
[ 261.106113] sysvec_apic_timer_interrupt+0x4b/0xa0
[ 261.106118] asm_sysvec_apic_timer_interrupt+0x12/0x20
[ 261.106122] RIP: 0010:cpuidle_enter_state+0xfa/0x470
-Geliang
> >
> > return ret;
> > @@ -934,6 +996,8 @@ static void __flush_addrs(struct net *net, struct list_head
*list)
> > cur = list_entry(list->next,
> > struct mptcp_pm_addr_entry, list);
> > mptcp_nl_remove_subflow_and_signal_addr(net, &cur->addr);
> > + if (cur->lsk)
> > + sock_release(cur->lsk);
> > list_del_rcu(&cur->list);
> > kfree_rcu(cur, rcu);
>
> Same issue as above with sock_release() and rcu.
>
>
> Mat
>
> > }
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index 8ec9e4582d18..79e1b34ecb53 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -49,7 +49,7 @@ static void __mptcp_check_send_data_fin(struct sock *sk);
> > * completed yet or has failed, return the subflow socket.
> > * Otherwise return NULL.
> > */
> > -static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
> > +struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
> > {
> > if (!msk->subflow || READ_ONCE(msk->can_ack))
> > return NULL;
> > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> > index d6400ad2d615..a2a031cca97a 100644
> > --- a/net/mptcp/protocol.h
> > +++ b/net/mptcp/protocol.h
> > @@ -473,11 +473,14 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock
*ssk, int how);
> > void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
> > struct mptcp_subflow_context *subflow);
> > void mptcp_subflow_reset(struct sock *ssk);
> > +struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
> >
> > /* called with sk socket lock held */
> > int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
> > const struct mptcp_addr_info *remote);
> > int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
> > +void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
> > + struct sockaddr_storage *addr);
> >
> > static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
> > struct mptcp_subflow_context *ctx)
> > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> > index 73e66a406d99..c64a1c41a29b 100644
> > --- a/net/mptcp/subflow.c
> > +++ b/net/mptcp/subflow.c
> > @@ -1073,8 +1073,8 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
> > }
> > #endif
> >
> > -static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
> > - struct sockaddr_storage *addr)
> > +void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
> > + struct sockaddr_storage *addr)
> > {
> > memset(addr, 0, sizeof(*addr));
> > addr->ss_family = info->family;
> > --
> > 2.29.2
>
> --
> Mat Martineau
> Intel