Hi Peter, Paolo,
On 17/10/2019 12:36, Paolo Abeni wrote:
Hi,
On Wed, 2019-10-16 at 17:02 -0700, Peter Krystad wrote:
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 4fdcbb0f4285..bc38527209c9 100644
[...]
> @@ -1311,3 +1409,33 @@ void mptcp_proto_init(void)
>
> inet_register_protosw(&mptcp_protosw);
> }
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +static struct proto_ops mptcp_v6_stream_ops;
> +
> +static struct inet_protosw mptcp_v6_protosw = {
> + .type = SOCK_STREAM,
> + .protocol = IPPROTO_MPTCP,
> + .prot = &mptcp_prot,
> + .ops = &mptcp_v6_stream_ops,
> + .flags = INET_PROTOSW_ICSK,
> +};
> +
> +int mptcp_proto_v6_init(void)
> +{
> + int err;
> +
> + mptcp_v6_stream_ops = inet6_stream_ops;
> + mptcp_v6_stream_ops.bind = mptcp_v6_bind;
> + mptcp_v6_stream_ops.connect = mptcp_v6_stream_connect;
> + mptcp_v6_stream_ops.poll = mptcp_poll;
> + mptcp_v6_stream_ops.accept = mptcp_stream_accept;
> + mptcp_v6_stream_ops.getname = mptcp_v6_getname;
> + mptcp_v6_stream_ops.listen = mptcp_v6_listen;
> + mptcp_v6_stream_ops.shutdown = mptcp_shutdown;
> +
> + err = inet6_register_protosw(&mptcp_v6_protosw);
For IPv4 we currently do panic, if we can't register the protosw
struct, possibly we could explicitly handle the failure even there?
Is it not safer to report this error and free what has been register
just in case IPv6 is loaded as a module?
Overall this looks nice and clean! What about moving the ipv6
protocol.c support into a separate file, to reduce the preprocessor
conditionals?
Or force IPV6 support? :-)
[...]
> @@ -226,6 +290,26 @@ static int subflow_conn_request(struct sock *sk, struct sk_buff
*skb)
> return 0;
> }
>
> +static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
> +{
> + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
> +
> + pr_debug("subflow=%p", subflow);
Just to avoid duplicate debug messages for IPv4 packets, what about
moving this one below the next if?
[...]
> @@ -309,7 +393,70 @@ static struct sock *subflow_syn_recv_sock(const struct sock
*sk,
> return NULL;
> }
>
> -static struct inet_connection_sock_af_ops subflow_specific;
> +#if IS_ENABLED(CONFIG_IPV6)
> +static struct sock *subflow_v6_syn_recv_sock(const struct sock *sk,
> + struct sk_buff *skb,
> + struct request_sock *req,
> + struct dst_entry *dst,
> + struct request_sock *req_unhash,
> + bool *own_req)
> +{
> + struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
> + struct mptcp_subflow_request_sock *subflow_req;
> + struct tcp_options_received opt_rx;
> + struct sock *child;
> +
> + pr_debug("listener=%p, req=%p, conn=%p", listener, req,
listener->conn);
> +
> + /* if the sk is MP_CAPABLE, we already received the client key */
> + subflow_req = mptcp_subflow_rsk(req);
> + if (!subflow_req->mp_capable && subflow_req->mp_join) {
> + opt_rx.mptcp.mp_join = 0;
> + mptcp_get_options(skb, &opt_rx);
> + if (!opt_rx.mptcp.mp_join ||
> + !subflow_hmac_valid(req, &opt_rx))
> + return NULL;
> + }
> +
> + child = tcp_v6_syn_recv_sock(sk, skb, req, dst, req_unhash, own_req);
> +
> + if (child && *own_req) {
> + struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
> +
> + if (!ctx)
> + goto close_child;
> +
> + if (ctx->mp_capable) {
> + if (mptcp_token_new_accept(ctx->token))
> + goto close_child;
> + } else if (ctx->mp_join) {
> + struct mptcp_sock *owner;
> +
> + owner = mptcp_token_get_sock(ctx->token);
> + if (!owner)
> + goto close_child;
> +
> + ctx->conn = (struct sock *)owner;
> + mptcp_finish_join(child);
> + }
> + }
> +
> + return child;
> +
> +close_child:
> + pr_debug("closing child socket");
> + inet_sk_set_state(child, TCP_CLOSE);
> + sock_set_flag(child, SOCK_DEAD);
> + inet_csk_destroy_sock(child);
> + return NULL;
> +}
As this looks quite alike the ipv4 counter part, except for the
tcp_v4_syn_recv_sock() call, replaced here by tcp_v6_syn_recv_sock(),
what about using a function pointer and the same overall code?
We might be able to do that for a few other functions. Would be the goal
to avoid duplication? Or readability.
If the additional indirect call overhead is a concern, we can
leverage
the indirect call infrastructure ;) [shameless self-promotion;)]
:-)
But a good idea!
How about moving even the subflow ipv6 code to a separate, ipv6
specific file ? (possibly the same used for ipv6 protocol.c code)
By having specific IPv6 code in another file, that might force us to
duplicate more code sometimes, not to have to read from one file to
another. But it might help for the readability. I guess you guys have
experience about what is best here :-)
Overall this looks much more clean and self-contained than expected!
That's true, very good, thank you for sharing this!
I just have an additional comment about the modified TCP code, a detail:
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1f3a87f4867e..9ed884818c29 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2110,9 +2110,16 @@ int __init tcpv6_init(void)
ret = register_pernet_subsys(&tcpv6_net_ops);
if (ret)
goto out_tcpv6_protosw;
+
+ ret = mptcpv6_init();
+ if (ret)
+ goto out_mptcpv6_init;
+
out:
return ret;
+out_mptcpv6_init:
Detail: I guess the label should be renamed to something like
"out_tcp6_pernet_subsys" to keep the same logic.
+ unregister_pernet_subsys(&tcpv6_net_ops);
out_tcpv6_protosw:
inet6_unregister_protosw(&tcpv6_protosw);
out_tcpv6_protocol:
Cheers,
Matt
--
Matthieu Baerts | R&D Engineer
matthieu.baerts(a)tessares.net
Tessares SA | Hybrid Access Solutions
www.tessares.net
1 Avenue Jean Monnet, 1348 Louvain-la-Neuve, Belgium