Skip to content

Commit

Permalink
Fix load balanced hairpin traffic for fragmented packets.
Browse files Browse the repository at this point in the history
If we have a UDP load balancer - 10.0.0.10:80 = 10.0.0.3:8080, in order to
determine if the load balanced traffic needs to be hairpinned, the
vip - 10.0.0.10 and the vip port - 80 are stored in the registers before
the packet is load balanced using the below logical flow -

table=6 (ls_in_pre_stateful ), priority=120  ,
  match=(reg0[2] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80),
  action=(reg1 = 10.0.0.10; reg2[0..15] = 80; ct_lb_mark;)

These registers are used in the later stages to check if the load balanced
packet needs to be hairpinned or not.

However, if the packet is fragmented we may not be able to match on the
L4 fields (tcp, udp or sctp dest port) and this breaks the hairpin
traffic.

This patch addressed this issue by making use of ct_nw_dst/ct_ip6_dst and
ct_tp_dst conntrack fields to determine the hairpin load balanced
traffic.

In order to not break hardware offload on certain smart nics, care is taken
to match on these fields only for fragmented packets.

Note: Relying on conntrack to reassemble packets is not exactly correct, it
only accidentaly works with the kernel datapath.  In our internal bug
tracking system we have this issue to track this incorrect assumption:
https://issues.redhat.com/browse/FDP-913

Reported-at: https://issues.redhat.com/browse/FDP-905
Fixes: 1139b65 ("Don't blindly save original dst IP and Port to avoid megaflow unwildcarding.")
Acked-by: Dumitru Ceara <[email protected]>
Suggested-by: Dumitru Ceara <[email protected]>
Signed-off-by: Numan Siddique <[email protected]>
(cherry picked from commit 0f806cf)
(cherry picked from commit 0e2fb83)
  • Loading branch information
numansiddique committed Nov 9, 2024
1 parent 570b968 commit 2ffc406
Show file tree
Hide file tree
Showing 13 changed files with 521 additions and 54 deletions.
3 changes: 3 additions & 0 deletions controller/lflow.c
Original file line number Diff line number Diff line change
Expand Up @@ -887,6 +887,9 @@ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow,
.in_port_sec_ptable = OFTABLE_CHK_IN_PORT_SEC,
.out_port_sec_ptable = OFTABLE_CHK_OUT_PORT_SEC,
.mac_cache_use_table = OFTABLE_MAC_CACHE_USE,
.ct_nw_dst_load_table = OFTABLE_CT_ORIG_NW_DST_LOAD,
.ct_ip6_dst_load_table = OFTABLE_CT_ORIG_IP6_DST_LOAD,
.ct_tp_dst_load_table = OFTABLE_CT_ORIG_TP_DST_LOAD,
.ctrl_meter_id = ctrl_meter_id,
.common_nat_ct_zone = get_common_nat_zone(ldp),
};
Expand Down
4 changes: 4 additions & 0 deletions controller/lflow.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ struct uuid;
#define OFTABLE_ECMP_NH 77
#define OFTABLE_CHK_LB_AFFINITY 78
#define OFTABLE_MAC_CACHE_USE 79
#define OFTABLE_CT_ORIG_NW_DST_LOAD 81
#define OFTABLE_CT_ORIG_IP6_DST_LOAD 82
#define OFTABLE_CT_ORIG_TP_DST_LOAD 83


struct lflow_ctx_in {
struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath;
Expand Down
37 changes: 37 additions & 0 deletions controller/physical.c
Original file line number Diff line number Diff line change
Expand Up @@ -2729,5 +2729,42 @@ physical_run(struct physical_ctx *p_ctx,
*/
add_default_drop_flow(p_ctx, OFTABLE_LOG_TO_PHY, flow_table);

/* Table 81, 82 and 83
* Match on ct.trk and ct.est and store the ct_nw_dst, ct_ip6_dst and
* ct_tp_dst in the registers. */
uint32_t ct_state = OVS_CS_F_TRACKED | OVS_CS_F_ESTABLISHED;
match_init_catchall(&match);
ofpbuf_clear(&ofpacts);

/* Add the flow:
* match = (ct.trk && ct.est), action = (reg8 = ct_tp_dst)
* table = 83
*/
match_set_ct_state_masked(&match, ct_state, ct_state);
put_move(MFF_CT_TP_DST, 0, MFF_LOG_CT_ORIG_TP_DST_PORT, 0, 16, &ofpacts);
ofctrl_add_flow(flow_table, OFTABLE_CT_ORIG_TP_DST_LOAD, 100, 0, &match,
&ofpacts, hc_uuid);

/* Add the flow:
* match = (ct.trk && ct.est && ip4), action = (reg4 = ct_nw_dst)
* table = 81
*/
ofpbuf_clear(&ofpacts);
match_set_dl_type(&match, htons(ETH_TYPE_IP));
put_move(MFF_CT_NW_DST, 0, MFF_LOG_CT_ORIG_NW_DST_ADDR, 0, 32, &ofpacts);
ofctrl_add_flow(flow_table, OFTABLE_CT_ORIG_NW_DST_LOAD, 100, 0, &match,
&ofpacts, hc_uuid);

/* Add the flow:
* match = (ct.trk && ct.est && ip6), action = (xxreg0 = ct_ip6_dst)
* table = 82
*/
ofpbuf_clear(&ofpacts);
match_set_dl_type(&match, htons(ETH_TYPE_IPV6));
put_move(MFF_CT_IPV6_DST, 0, MFF_LOG_CT_ORIG_IP6_DST_ADDR, 0,
128, &ofpacts);
ofctrl_add_flow(flow_table, OFTABLE_CT_ORIG_IP6_DST_LOAD, 100, 0, &match,
&ofpacts, hc_uuid);

ofpbuf_uninit(&ofpacts);
}
14 changes: 12 additions & 2 deletions include/ovn/actions.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ struct collector_set_ids;
OVNACT(CHK_LB_AFF, ovnact_result) \
OVNACT(SAMPLE, ovnact_sample) \
OVNACT(MAC_CACHE_USE, ovnact_null) \
OVNACT(CT_ORIG_NW_DST, ovnact_result) \
OVNACT(CT_ORIG_IP6_DST, ovnact_result) \
OVNACT(CT_ORIG_TP_DST, ovnact_result) \

/* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
enum OVS_PACKED_ENUM ovnact_type {
Expand Down Expand Up @@ -411,10 +414,11 @@ struct ovnact_set_queue {
uint16_t queue_id;
};

/* OVNACT_DNS_LOOKUP, OVNACT_CHK_LB_HAIRPIN, OVNACT_CHK_LB_HAIRPIN_REPLY. */
/* OVNACT_DNS_LOOKUP, OVNACT_CHK_LB_HAIRPIN, OVNACT_CHK_LB_HAIRPIN_REPLY,
* OVNACT_CT_ORIG_NW_DST, CT_ORIG_IP6_DST, CT_ORIG_TP_DST */
struct ovnact_result {
struct ovnact ovnact;
struct expr_field dst; /* 1-bit destination field. */
struct expr_field dst; /* destination field. */
};

/* OVNACT_LOG. */
Expand Down Expand Up @@ -899,6 +903,12 @@ struct ovnact_encode_params {
this determines which CT zone to use */
uint32_t mac_cache_use_table; /* OpenFlow table for 'mac_cache_use'
* to resubmit. */
uint32_t ct_nw_dst_load_table; /* OpenFlow table for 'ct_nw_dst'
* to resubmit. */
uint32_t ct_ip6_dst_load_table; /* OpenFlow table for 'ct_ip6_dst'
* to resubmit. */
uint32_t ct_tp_dst_load_table; /* OpenFlow table for 'ct_tp_dst'
* to resubmit. */
};

void ovnacts_encode(const struct ovnact[], size_t ovnacts_len,
Expand Down
4 changes: 4 additions & 0 deletions include/ovn/logical-fields.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ enum ovn_controller_event {
#define MFF_LOG_LB_AFF_MATCH_LR_IP6_ADDR MFF_XXREG1
#define MFF_LOG_LB_AFF_MATCH_PORT MFF_REG8

#define MFF_LOG_CT_ORIG_NW_DST_ADDR MFF_REG4
#define MFF_LOG_CT_ORIG_IP6_DST_ADDR MFF_XXREG0
#define MFF_LOG_CT_ORIG_TP_DST_PORT MFF_REG8

void ovn_init_symtab(struct shash *symtab);

/* MFF_LOG_FLAGS_REG bit assignments */
Expand Down
138 changes: 123 additions & 15 deletions lib/actions.c
Original file line number Diff line number Diff line change
Expand Up @@ -3271,18 +3271,19 @@ ovnact_set_queue_free(struct ovnact_set_queue *a OVS_UNUSED)
}

static void
parse_ovnact_result(struct action_context *ctx, const char *name,
const char *prereq, const struct expr_field *dst,
struct ovnact_result *res)
parse_ovnact_result__(struct action_context *ctx, const char *name,
const char *prereq, const struct expr_field *dst,
struct ovnact_result *res,
int n_bits)
{
lexer_get(ctx->lexer); /* Skip action name. */
lexer_get(ctx->lexer); /* Skip '('. */
if (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
lexer_error(ctx->lexer, "%s doesn't take any parameters", name);
return;
}
/* Validate that the destination is a 1-bit, modifiable field. */
char *error = expr_type_check(dst, 1, true, ctx->scope);
/* Validate that the destination is n_bits, modifiable field. */
char *error = expr_type_check(dst, n_bits, true, ctx->scope);
if (error) {
lexer_error(ctx->lexer, "%s", error);
free(error);
Expand All @@ -3295,6 +3296,14 @@ parse_ovnact_result(struct action_context *ctx, const char *name,
}
}

static void
parse_ovnact_result(struct action_context *ctx, const char *name,
const char *prereq, const struct expr_field *dst,
struct ovnact_result *res)
{
parse_ovnact_result__(ctx, name, prereq, dst, res, 1);
}

static void
parse_dns_lookup(struct action_context *ctx, const struct expr_field *dst,
struct ovnact_result *dl)
Expand Down Expand Up @@ -4294,22 +4303,40 @@ format_CHK_LB_HAIRPIN_REPLY(const struct ovnact_result *res, struct ds *s)
ds_put_cstr(s, " = chk_lb_hairpin_reply();");
}

static void
encode_result_action___(const struct ovnact_result *res,
uint8_t resubmit_table,
enum mf_field_id dst,
int ofs, int n_bits,
struct ofpbuf *ofpacts)
{
ovs_assert(n_bits <= 128);

struct mf_subfield res_dst = expr_resolve_field(&res->dst);
ovs_assert(res_dst.field);

put_load(0, dst, ofs, n_bits < 64 ? n_bits : 64, ofpacts);
if (n_bits > 64) {
put_load(0, dst, ofs + 64, n_bits - 64, ofpacts);
}

emit_resubmit(ofpacts, resubmit_table);

struct ofpact_reg_move *orm = ofpact_put_REG_MOVE(ofpacts);
orm->dst = res_dst;
orm->src.field = mf_from_id(dst);
orm->src.ofs = ofs;
orm->src.n_bits = n_bits;
}

static void
encode_result_action__(const struct ovnact_result *res,
uint8_t resubmit_table,
int log_flags_result_bit,
struct ofpbuf *ofpacts)
{
struct mf_subfield dst = expr_resolve_field(&res->dst);
ovs_assert(dst.field);
put_load(0, MFF_LOG_FLAGS, log_flags_result_bit, 1, ofpacts);
emit_resubmit(ofpacts, resubmit_table);

struct ofpact_reg_move *orm = ofpact_put_REG_MOVE(ofpacts);
orm->dst = dst;
orm->src.field = mf_from_id(MFF_LOG_FLAGS);
orm->src.ofs = log_flags_result_bit;
orm->src.n_bits = 1;
encode_result_action___(res, resubmit_table, MFF_LOG_FLAGS,
log_flags_result_bit, 1, ofpacts);
}

static void
Expand Down Expand Up @@ -5426,6 +5453,75 @@ encode_MAC_CACHE_USE(const struct ovnact_null *null OVS_UNUSED,
emit_resubmit(ofpacts, ep->mac_cache_use_table);
}

static void
encode_CT_ORIG_NW_DST(const struct ovnact_result *res,
const struct ovnact_encode_params *ep,
struct ofpbuf *ofpacts)
{
encode_result_action___(res, ep->ct_nw_dst_load_table,
MFF_LOG_CT_ORIG_NW_DST_ADDR, 0, 32, ofpacts);
}

static void
parse_CT_ORIG_NW_DST(struct action_context *ctx, const struct expr_field *dst,
struct ovnact_result *res)
{
parse_ovnact_result__(ctx, "ct_nw_dst", NULL, dst, res, 32);
}

static void
format_CT_ORIG_NW_DST(const struct ovnact_result *res, struct ds *s)
{
expr_field_format(&res->dst, s);
ds_put_cstr(s, " = ct_nw_dst();");
}

static void
encode_CT_ORIG_IP6_DST(const struct ovnact_result *res,
const struct ovnact_encode_params *ep,
struct ofpbuf *ofpacts)
{
encode_result_action___(res, ep->ct_ip6_dst_load_table,
MFF_LOG_CT_ORIG_IP6_DST_ADDR, 0, 128, ofpacts);
}

static void
parse_CT_ORIG_IP6_DST(struct action_context *ctx, const struct expr_field *dst,
struct ovnact_result *res)
{
parse_ovnact_result__(ctx, "ct_ip6_dst", NULL, dst, res, 128);
}

static void
format_CT_ORIG_IP6_DST(const struct ovnact_result *res, struct ds *s)
{
expr_field_format(&res->dst, s);
ds_put_cstr(s, " = ct_ip6_dst();");
}

static void
encode_CT_ORIG_TP_DST(const struct ovnact_result *res,
const struct ovnact_encode_params *ep OVS_UNUSED,
struct ofpbuf *ofpacts)
{
encode_result_action___(res, ep->ct_tp_dst_load_table,
MFF_LOG_CT_ORIG_TP_DST_PORT, 0, 16, ofpacts);
}

static void
parse_CT_ORIG_TP_DST(struct action_context *ctx, const struct expr_field *dst,
struct ovnact_result *res)
{
parse_ovnact_result__(ctx, "ct_tp_dst", NULL, dst, res, 16);
}

static void
format_CT_ORIG_TP_DST(const struct ovnact_result *res, struct ds *s)
{
expr_field_format(&res->dst, s);
ds_put_cstr(s, " = ct_tp_dst();");
}

/* Parses an assignment or exchange or put_dhcp_opts action. */
static void
parse_set_action(struct action_context *ctx)
Expand Down Expand Up @@ -5514,6 +5610,18 @@ parse_set_action(struct action_context *ctx)
lexer_lookahead(ctx->lexer) == LEX_T_LPAREN) {
parse_chk_lb_aff(ctx, &lhs,
ovnact_put_CHK_LB_AFF(ctx->ovnacts));
} else if (!strcmp(ctx->lexer->token.s, "ct_nw_dst") &&
lexer_lookahead(ctx->lexer) == LEX_T_LPAREN) {
parse_CT_ORIG_NW_DST(ctx, &lhs,
ovnact_put_CT_ORIG_NW_DST(ctx->ovnacts));
} else if (!strcmp(ctx->lexer->token.s, "ct_ip6_dst") &&
lexer_lookahead(ctx->lexer) == LEX_T_LPAREN) {
parse_CT_ORIG_IP6_DST(ctx, &lhs,
ovnact_put_CT_ORIG_IP6_DST(ctx->ovnacts));
} else if (!strcmp(ctx->lexer->token.s, "ct_tp_dst") &&
lexer_lookahead(ctx->lexer) == LEX_T_LPAREN) {
parse_CT_ORIG_TP_DST(ctx, &lhs,
ovnact_put_CT_ORIG_TP_DST(ctx->ovnacts));
} else {
parse_assignment_action(ctx, false, &lhs);
}
Expand Down
Loading

0 comments on commit 2ffc406

Please sign in to comment.