Send a DATA_FIN along with any subflow TCP FIN flag when the MPTCP
socket is closing or shutting down writes.
Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
---
The DATA_FIN ack patch (would have been a 2nd patch after this) isn't
quite working yet, but I wanted to share the sending patch since there
was a minor conflict after some recent squashing. I also confirmed that
this applies cleanly after the kselftest patch for "part 1", and that
the self tests pass.
Changes from v2:
* Rebased and confirmed selftests when cherry-picked to the end of the "part 1"
(single-subflow) chunk of the patch series.
Changes from v1:
* Fixed problem with the receive side truncating data. The issue happened
when a DSS mapping for DATA_FIN was found on a data segment, where the
data in the packet was already covered by an earlier mapping.
* Only send DATA_FIN when the subflow is sending a FIN and the MPTCP
socket is no longer in the TCP_ESTABLISHED state. This prevents sending
DATA_FIN when an individual subflow is removed but the MPTCP-level
connection is kept alive.
* Changes to warnings and comments suggested in code review.
net/mptcp/options.c | 38 ++++++++++++++++++++++++++++++++++++--
net/mptcp/protocol.c | 4 ++++
net/mptcp/subflow.c | 42 ++++++++++++++++++++++++++++--------------
3 files changed, 68 insertions(+), 16 deletions(-)
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 644c30d46922..be180c4f71a4 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -286,19 +286,49 @@ static bool mptcp_established_options_mp(struct sock *sk, unsigned
int *size,
return false;
}
+static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
+ struct mptcp_ext *ext)
+{
+ ext->data_fin = 1;
+
+ if (!ext->use_map) {
+ /* RFC6824 requires a DSS mapping with specific values
+ * if DATA_FIN is set but no data payload is mapped
+ */
+ ext->use_map = 1;
+ ext->dsn64 = 1;
+ ext->data_seq = mptcp_sk(subflow->conn)->write_seq;
+ ext->subflow_seq = 0;
+ ext->data_len = 1;
+ } else {
+ /* If there's an existing DSS mapping, DATA_FIN consumes
+ * 1 additional byte of mapping space.
+ */
+ ext->data_len++;
+ }
+}
+
static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
unsigned int dss_size = 0;
struct mptcp_ext *mpext;
struct mptcp_sock *msk;
unsigned int ack_size;
+ u8 tcp_fin;
- mpext = skb ? mptcp_get_ext(skb) : NULL;
+ if (skb) {
+ mpext = mptcp_get_ext(skb);
+ tcp_fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
+ } else {
+ mpext = NULL;
+ tcp_fin = 0;
+ }
- if (!skb || (mpext && mpext->use_map)) {
+ if (!skb || (mpext && mpext->use_map) || tcp_fin) {
unsigned int map_size;
map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
@@ -307,6 +337,10 @@ static bool mptcp_established_options_dss(struct sock *sk, struct
sk_buff *skb,
dss_size = map_size;
if (mpext)
opts->ext_copy = *mpext;
+
+ if (skb && tcp_fin &&
+ subflow->conn->sk_state != TCP_ESTABLISHED)
+ mptcp_write_data_fin(subflow, &opts->ext_copy);
}
ack_size = TCPOLEN_MPTCP_DSS_ACK64;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 9b384da910ce..f95a3f824a18 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -919,6 +919,10 @@ static int mptcp_shutdown(struct socket *sock, int how)
pr_debug("sk=%p, how=%d", msk, how);
lock_sock(sock->sk);
+
+ if (how == SHUT_WR || how == SHUT_RDWR)
+ inet_sk_state_store(sock->sk, TCP_FIN_WAIT1);
+
ssock = __mptcp_fallback_get_ref(msk);
if (ssock) {
release_sock(sock->sk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 998fc6066ea5..ee29281d4570 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -277,6 +277,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_ext *mpext;
struct sk_buff *skb;
+ u16 data_len;
u64 map_seq;
skb = skb_peek(&ssk->sk_receive_queue);
@@ -301,25 +302,38 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
if (!subflow->map_valid)
return MAPPING_INVALID;
+
goto validate_seq;
}
- pr_debug("seq=%llu is64=%d ssn=%u data_len=%u", mpext->data_seq,
- mpext->dsn64, mpext->subflow_seq, mpext->data_len);
+ pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
+ mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
+ mpext->data_len, mpext->data_fin);
- if (mpext->data_len == 0) {
+ data_len = mpext->data_len;
+ if (data_len == 0) {
pr_err("Infinite mapping not handled");
return MAPPING_INVALID;
- } else if (mpext->subflow_seq == 0 &&
- mpext->data_fin == 1) {
- if (WARN_ON_ONCE(mpext->data_len != 1))
- return false;
+ }
- /* do not try hard to handle this any better, till we have
- * real data_fin support
- */
- pr_debug("DATA_FIN with no payload");
- return MAPPING_DATA_FIN;
+ if (mpext->data_fin == 1) {
+ if (data_len == 1) {
+ pr_debug("DATA_FIN with no payload");
+ if (subflow->map_valid) {
+ /* A DATA_FIN might arrive in a DSS
+ * option before the previous mapping
+ * has been fully consumed. Continue
+ * handling the existing mapping.
+ */
+ skb_ext_del(skb, SKB_EXT_MPTCP);
+ return MAPPING_OK;
+ } else {
+ return MAPPING_DATA_FIN;
+ }
+ }
+
+ /* Adjust for DATA_FIN using 1 byte of sequence space */
+ data_len--;
}
if (!mpext->dsn64) {
@@ -334,7 +348,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
/* Allow replacing only with an identical map */
if (subflow->map_seq == map_seq &&
subflow->map_subflow_seq == mpext->subflow_seq &&
- subflow->map_data_len == mpext->data_len) {
+ subflow->map_data_len == data_len) {
skb_ext_del(skb, SKB_EXT_MPTCP);
return MAPPING_OK;
}
@@ -351,7 +365,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
subflow->map_seq = map_seq;
subflow->map_subflow_seq = mpext->subflow_seq;
- subflow->map_data_len = mpext->data_len;
+ subflow->map_data_len = data_len;
subflow->map_valid = 1;
pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
subflow->map_seq, subflow->map_subflow_seq,
--
2.24.0