Skip to content

Commit

Permalink
ipvs: toa enhancements
Browse files Browse the repository at this point in the history
1. Do not insert toa data in syn packets any more.
2. Compact uselsess tcp option spaces when inserting toa data failed and then try again.

Signed-off-by: ywc689 <[email protected]>
  • Loading branch information
ywc689 committed Dec 11, 2023
1 parent 486ed1e commit 74399d8
Showing 1 changed file with 81 additions and 48 deletions.
129 changes: 81 additions & 48 deletions src/ipvs/ip_vs_proto_tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,46 +306,86 @@ static void tcp_in_remove_ts(struct tcphdr *tcph)
}
}

/* use NOP option to replace TCP_OLEN_IP4_ADDR and TCP_OLEN_IP6_ADDR opt */
static void tcp_in_remove_toa(struct tcphdr *tcph, int af)
/*
* Remove NOP and TOA options preset in the mbuf and compact option space.
* Return the trimmed length on success, otherwise -1 on failure.
* */
static int tcp_in_prune_options(int af, struct rte_mbuf *mbuf, struct tcphdr *tcph)
{
unsigned char *ptr;
int len, i;
uint32_t tcp_opt_len = af == AF_INET ? TCP_OLEN_IP4_ADDR : TCP_OLEN_IP6_ADDR;
unsigned char *ptr, *fast, *slow;
unsigned char *l3hdr;
int i, optlen;
unsigned int pruned;
uint8_t opcode, opsize;

ptr = (unsigned char *)(tcph + 1);
len = (tcph->doff << 2) - sizeof(struct tcphdr);

while (len > 0) {
int opcode = *ptr++;
int opsize;
fast = slow = ptr;
optlen = (tcph->doff << 2) - sizeof(struct tcphdr);

while (optlen > 0) {
opcode = *ptr++;
switch (opcode) {
case TCP_OPT_EOL:
return;
goto fini;
case TCP_OPT_NOP:
len--;
fast++;
optlen--;
continue;
default:
opsize = *ptr++;
if (opsize < 2) /* silly options */
return;
if (opsize > len)
return; /* partial options */
if ((opcode == TCP_OPT_ADDR) && (opsize == tcp_opt_len)) {
for (i = 0; i < tcp_opt_len; i++) {
*(ptr - 2 + i) = TCP_OPT_NOP;
if (opsize < 2) /* silly options */
return EDPVS_INVPKT;
if (opsize > optlen)
return EDPVS_INVPKT; /* partial options */
if (opcode == TCP_OPT_ADDR) {
for (i = 0; i < opsize; i++) {
fast++;
}
} else {
for (i = 0; i < opsize; i++) {
if (slow != fast)
*slow = *fast;
slow++;
fast++;
}
/* DON'T RETURN
* keep search other TCP_OPT_ADDR ,and clear them.
* See https://github.com/iqiyi/dpvs/pull/925 for more detail. */
}

ptr += opsize - 2;
len -= opsize;
optlen -= opsize;
break;
}
}

fini:
if (slow != fast) {
pruned = fast - slow;
while (pruned & 0x3) { /* 4-bytes alignment for tcp options */
*slow++ = 0;
pruned--;
}
if (slow == fast)
return 0;
/* trim the packet */
l3hdr = rte_pktmbuf_mtod(mbuf, void *);
if (unlikely(mbuf_may_pull(mbuf, mbuf->pkt_len) != 0)) {
memset(slow, 0, pruned);
return EDPVS_INVPKT;
}
if (unlikely(fast - l3hdr > mbuf->pkt_len)) {
memset(slow, 0, pruned);
return EDPVS_INVPKT;
}
memmove(slow, fast, mbuf->pkt_len - (fast - l3hdr));
tcph->doff -= (pruned >> 2);
if (af == AF_INET)
((struct rte_ipv4_hdr *)l3hdr)->total_length =
htons(ntohs(((struct rte_ipv4_hdr *)l3hdr)->total_length) - pruned);
else
((struct rte_ipv6_hdr *)l3hdr)->payload_len =
htons(ntohs(((struct rte_ipv6_hdr *)l3hdr)->payload_len) - pruned);
return pruned;
}
return 0;
}

static int tcp_in_add_proxy_proto(struct dp_vs_conn *conn, struct rte_mbuf *mbuf,
Expand Down Expand Up @@ -797,15 +837,12 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,
struct dp_vs_conn *conn, struct rte_mbuf *mbuf)
{
struct tcphdr *th;
/* af/mbuf may be changed for nat64 which in af is ipv6 and out is ipv4 */
int iaf, oaf;
int af; /* outbound af */
int iphdrlen;
int err, pp_hdr_shift = 0;

iaf = tuplehash_in(conn).af;
oaf = tuplehash_out(conn).af;

iphdrlen = ((AF_INET6 == oaf) ? ip6_hdrlen(mbuf): ip4_hdrlen(mbuf));
af = tuplehash_out(conn).af;
iphdrlen = ((AF_INET6 == af) ? ip6_hdrlen(mbuf): ip4_hdrlen(mbuf));

if (mbuf_may_pull(mbuf, iphdrlen + sizeof(*th)) != 0)
return EDPVS_INVPKT;
Expand All @@ -819,41 +856,37 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,

/*
* for SYN packet
* 1. remove tcp timestamp option
* laddress for different client have diff timestamp.
* 2. save original TCP sequence for seq-adjust later.
* since TCP option will be change.
* 3. add TOA option
* so that RS with TOA module can get real client IP.
* 1. remove tcp timestamp option,
* laddrs for different clients have diff timestamp.
* 2. save original TCP sequence for seq-adjust later
* since TCP option will be changed.
*/
if (th->syn && !th->ack) {
tcp_in_remove_ts(th);

tcp_in_init_seq(conn, mbuf, th);
if (PROXY_PROTOCOL_V1 != PROXY_PROTOCOL_VERSION(conn->pp_version)
&& PROXY_PROTOCOL_V2 != PROXY_PROTOCOL_VERSION(conn->pp_version)) {
if (unlikely(tcp_in_add_toa(conn, mbuf, th) != EDPVS_OK)) {
tcp_in_remove_toa(th, iaf);
}
}
}

/* add toa/proxy_proto to first data packet */
/* Add toa/proxy_protocol to the first data packet */
if (ntohl(th->ack_seq) == conn->fnat_seq.fdata_seq
&& !th->syn && !th->rst /*&& !th->fin*/) {
if (PROXY_PROTOCOL_V2 == PROXY_PROTOCOL_VERSION(conn->pp_version)
|| PROXY_PROTOCOL_V1 == PROXY_PROTOCOL_VERSION(conn->pp_version)) {
if (conn->fnat_seq.isn - conn->fnat_seq.delta + 1 == ntohl(th->seq)) {
/* avoid inserting repetitive ppdata when the first rs ack delayed */
/* avoid inserting repetitive proxy protocol data
* when the first rs ack is delayed */
err = tcp_in_add_proxy_proto(conn, mbuf, th, iphdrlen, &pp_hdr_shift);
if (unlikely(EDPVS_OK != err))
RTE_LOG(INFO, IPVS, "%s: insert proxy protocol fail -- %s\n",
__func__, dpvs_strerror(err));
th = ((void *)th) + pp_hdr_shift;
}
} else {
if (unlikely(tcp_in_add_toa(conn, mbuf, th) != EDPVS_OK)) {
tcp_in_remove_toa(th, iaf);
} else { /* use toa */
err = tcp_in_add_toa(conn, mbuf, th);
if (unlikely(EDPVS_OK != err)) {
if (tcp_in_prune_options(af, mbuf, th) >= TCP_OLEN_IP4_ADDR
&& (err == EDPVS_NOROOM)) {
tcp_in_add_toa(conn, mbuf, th);
}
}
}
}
Expand All @@ -864,7 +897,7 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,
th->source = conn->lport;
th->dest = conn->dport;

return tcp_send_csum(oaf, iphdrlen, th, conn, mbuf, conn->in_dev);
return tcp_send_csum(af, iphdrlen, th, conn, mbuf, conn->in_dev);
}

static int tcp_fnat_out_handler(struct dp_vs_proto *proto,
Expand Down

0 comments on commit 74399d8

Please sign in to comment.