Skip to content

Commit

Permalink
use a dedicated map instead of several entries from the stats map
Browse files Browse the repository at this point in the history
Summary:
it makes the code more readable(KatranThriftClient::showQuicStats seems to be a good example. current implementation swapped cid based routing and ch based routing by mistake)
it makes the addition/update/deletion of related counters much easier
it can be easily extended to support per-vip katran stats later

Reviewed By: sharmafb

Differential Revision: D45786363

fbshipit-source-id: 4b5d31ea8e0e3f32ab241ae3f5b9641544e183e9
  • Loading branch information
Fei Chen authored and facebook-github-bot committed May 16, 2023
1 parent 487297e commit 06d0c85
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 119 deletions.
14 changes: 14 additions & 0 deletions katran/lib/BalancerStructs.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,18 @@ struct flow_debug_info {
uint32_t this_hopv6[4];
};
};

// struct for quic packets statistics counters
struct lb_quic_packets_stats {
uint64_t ch_routed;
uint64_t cid_initial;
uint64_t cid_invalid_server_id;
uint64_t cid_routed;
uint64_t cid_unknown_real_dropped;
uint64_t cid_v0;
uint64_t cid_v1;
uint64_t cid_v2;
uint64_t cid_v3;
};

} // namespace katran
44 changes: 35 additions & 9 deletions katran/lib/KatranLb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2032,16 +2032,42 @@ lb_stats KatranLb::getIcmpTooBigStats() {
return getLbStats(config_.maxVips + kIcmpTooBigOffset);
}

lb_stats KatranLb::getQuicRoutingStats() {
return getLbStats(config_.maxVips + kQuicRoutingOffset);
}

lb_stats KatranLb::getQuicCidVersionStats() {
return getLbStats(config_.maxVips + kQuicCidVersionOffset);
}
lb_quic_packets_stats KatranLb::getLbQuicPacketsStats() {
if (config_.disableForwarding) {
LOG(ERROR) << "getLbStats called on non-forwarding instance";
return lb_quic_packets_stats{};
}
unsigned int nr_cpus = BpfAdapter::getPossibleCpus();
if (nr_cpus < 0) {
LOG(ERROR) << "Error while getting number of possible cpus";
return lb_quic_packets_stats{};
}
lb_quic_packets_stats stats[nr_cpus];
lb_quic_packets_stats sum_stat = {};

lb_stats KatranLb::getQuicCidDropStats() {
return getLbStats(config_.maxVips + kQuicCidDropOffset);
if (!config_.testing) {
int position = 0;
auto res = bpfAdapter_->bpfMapLookupElement(
bpfAdapter_->getMapFdByName("quic_packets_stats_map"),
&position,
stats);
if (!res) {
for (auto& stat : stats) {
sum_stat.ch_routed += stat.ch_routed;
sum_stat.cid_initial += stat.cid_initial;
sum_stat.cid_invalid_server_id += stat.cid_invalid_server_id;
sum_stat.cid_routed += stat.cid_routed;
sum_stat.cid_unknown_real_dropped += stat.cid_unknown_real_dropped;
sum_stat.cid_v0 += stat.cid_v0;
sum_stat.cid_v1 += stat.cid_v1;
sum_stat.cid_v2 += stat.cid_v2;
sum_stat.cid_v3 += stat.cid_v3;
}
} else {
lbStats_.bpfFailedCalls++;
}
}
return sum_stat;
}

lb_stats KatranLb::getChDropStats() {
Expand Down
47 changes: 12 additions & 35 deletions katran/lib/KatranLb.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,13 @@ constexpr uint32_t kLruFallbackOffset = 3;
constexpr uint32_t kIcmpTooBigOffset = 4;
constexpr uint32_t kLpmSrcOffset = 5;
constexpr uint32_t kInlineDecapOffset = 6;
constexpr uint32_t kQuicRoutingOffset = 7;
constexpr uint32_t kQuicCidVersionOffset = 8;
constexpr uint32_t kQuicCidDropOffset = 9;
constexpr uint32_t kTcpServerIdRoutingOffset = 10;
constexpr uint32_t kGlobalLruOffset = 11;
constexpr uint32_t kChDropOffset = 12;
constexpr uint32_t kDecapCounterOffset = 13;
constexpr uint32_t kQuicIcmpOffset = 14;
constexpr uint32_t kIcmpPtbV6Offset = 15;
constexpr uint32_t kIcmpPtbV4Offset = 16;
constexpr uint32_t kTcpServerIdRoutingOffset = 7;
constexpr uint32_t kGlobalLruOffset = 8;
constexpr uint32_t kChDropOffset = 9;
constexpr uint32_t kDecapCounterOffset = 10;
constexpr uint32_t kQuicIcmpOffset = 11;
constexpr uint32_t kIcmpPtbV6Offset = 12;
constexpr uint32_t kIcmpPtbV4Offset = 13;

/**
* LRU map related constants
Expand Down Expand Up @@ -521,31 +518,6 @@ class KatranLb {
*/
lb_stats getIcmpTooBigStats();

/**
* @return struct lb_stats w/ statistic of QUIC routing stats
*
* helper function which returns how many QUIC packets were routed
* using the default 5-tuple hash vs using the connection-id
*/
lb_stats getQuicRoutingStats();

/**
* @return struct lb_stats w/ statistic of QUIC CID versions stats
*
* helper function which returns how many QUIC packets were routed
* using CIDv1 vs CIDv2
*/
lb_stats getQuicCidVersionStats();

/**
* @return struct lb_stats w/ statistic of QUIC packet drop stats
*
* helper function which returns how many QUIC packets were dropped:
* v1 - packets dropped because server ID map pointed to unknown real ID.
* v2 - packets routed to real #0, which we don't map any real to
*/
lb_stats getQuicCidDropStats();

/**
* @return struct lb_stats w/ statistic of packets dropped during consistent
* hashing.
Expand Down Expand Up @@ -830,6 +802,11 @@ class KatranLb {
return bpfAdapter_->getMapFdByName(mapName);
}

/**
* helper function to get quic packets stats
*/
lb_quic_packets_stats getLbQuicPacketsStats();

private:
/**
* update vipmap(add or remove vip) in forwarding plane
Expand Down
22 changes: 9 additions & 13 deletions katran/lib/bpf/balancer_consts.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@
#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
#define STATS_MAP_SIZE (MAX_VIPS * 2)

#define QUIC_STATS_MAP_SIZE 1

// for LRU's map in map we will support up to this number of cpus
#ifndef MAX_SUPPORTED_CPUS
#define MAX_SUPPORTED_CPUS 128
Expand Down Expand Up @@ -238,26 +240,20 @@
#define LPM_SRC_CNTRS 5
// offset of remote encaped packets counters
#define REMOTE_ENCAP_CNTRS 6
// offset of QUIC routing related stats
#define QUIC_ROUTE_STATS 7
// QUIC CID versions
#define QUIC_CID_VERSION_STATS 8
// QUIC CID drops stats
#define QUIC_CID_DROP_STATS 9
// offset of stats for server_id based routing of TCP packets (TPR)
#define TCP_SERVER_ID_ROUTE_STATS 10
#define TCP_SERVER_ID_ROUTE_STATS 7
// offset of stats for global LRU
#define GLOBAL_LRU_CNTR 11
#define GLOBAL_LRU_CNTR 8
// offset of stats for packets dropped during consistent hashing
#define CH_DROP_STATS 12
#define CH_DROP_STATS 9
// offset of stats for decap packets
#define DECAP_CNTR 13
#define DECAP_CNTR 10
// offset of stats for quic icmp messages
#define QUIC_ICMP_STATS 14
#define QUIC_ICMP_STATS 11
// offset of stats for icmp PTB messages
#define ICMP_PTB_V6_STATS 15
#define ICMP_PTB_V6_STATS 12
// offset of stats for icmp Fragment needed messages
#define ICMP_PTB_V4_STATS 16
#define ICMP_PTB_V4_STATS 13

// indice for all stats maps defined above correspond to entries in the map
// stats starting from the index MAX_VIPS. The max_entries of stats is
Expand Down
68 changes: 24 additions & 44 deletions katran/lib/bpf/balancer_kern.c
Original file line number Diff line number Diff line change
Expand Up @@ -497,40 +497,14 @@ __attribute__((__always_inline__)) static inline int process_encaped_gue_pckt(
#endif // of INLINE_DECAP_GUE

__attribute__((__always_inline__)) static inline void
increment_quic_cid_version_stats(int host_id) {
__u32 quic_version_stats_key = MAX_VIPS + QUIC_CID_VERSION_STATS;
struct lb_stats* quic_version =
bpf_map_lookup_elem(&stats, &quic_version_stats_key);
if (!quic_version) {
return;
}
increment_quic_cid_version_stats(
struct lb_quic_packets_stats* quic_packets_stats,
int host_id) {
if (host_id > QUIC_CONNID_VERSION_V1_MAX_VAL) {
quic_version->v2 += 1;
quic_packets_stats->cid_v2 += 1;
} else {
quic_version->v1 += 1;
}
}

__attribute__((__always_inline__)) static inline void
increment_quic_cid_drop_no_real() {
__u32 quic_drop_stats_key = MAX_VIPS + QUIC_CID_DROP_STATS;
struct lb_stats* quic_drop =
bpf_map_lookup_elem(&stats, &quic_drop_stats_key);
if (!quic_drop) {
return;
}
quic_drop->v1 += 1;
}

__attribute__((__always_inline__)) static inline void
increment_quic_cid_drop_real_0() {
__u32 quic_drop_stats_key = MAX_VIPS + QUIC_CID_DROP_STATS;
struct lb_stats* quic_drop =
bpf_map_lookup_elem(&stats, &quic_drop_stats_key);
if (!quic_drop) {
return;
quic_packets_stats->cid_v1 += 1;
}
quic_drop->v2 += 1;
}

__attribute__((__always_inline__)) static inline int update_vip_lru_miss_stats(
Expand Down Expand Up @@ -730,40 +704,46 @@ process_packet(struct xdp_md* xdp, __u64 off, bool is_ipv6) {
data_stats->v2 += 1;
}
} else {
__u32 quic_stats_key = MAX_VIPS + QUIC_ROUTE_STATS;
struct lb_stats* quic_stats =
bpf_map_lookup_elem(&stats, &quic_stats_key);
if (!quic_stats) {
__u32 quic_packets_stats_key = 0;
struct lb_quic_packets_stats* quic_packets_stats =
bpf_map_lookup_elem(&quic_packets_stats_map, &quic_packets_stats_key);
if (!quic_packets_stats) {
return XDP_DROP;
}
struct quic_parse_result qpr = parse_quic(data, data_end, is_ipv6, &pckt);
if (qpr.server_id > 0) {
increment_quic_cid_version_stats(qpr.server_id);
// server_id is expected to always be positive. get a server id from
// quic packet
increment_quic_cid_version_stats(quic_packets_stats, qpr.server_id);
__u32 key = qpr.server_id;
__u32* real_pos = bpf_map_lookup_elem(&server_id_map, &key);
if (real_pos) {
// get a real position for the server id
key = *real_pos;
if (key == 0) {
increment_quic_cid_drop_real_0();
// increment counter for the CH based routing
quic_stats->v1 += 1;
// pos 0 means the entry for the server id is not initialized.
// fallback to ch
quic_packets_stats->cid_invalid_server_id += 1;
quic_packets_stats->ch_routed += 1;
} else {
pckt.real_index = key;
dst = bpf_map_lookup_elem(&reals, &key);
if (!dst) {
increment_quic_cid_drop_no_real();
// fail to find a real server with the real pos, drop the packet
quic_packets_stats->cid_unknown_real_dropped += 1;
REPORT_QUIC_PACKET_DROP_NO_REAL(
xdp, data, data_end - data, false);
return XDP_DROP;
}
quic_stats->v2 += 1;
quic_packets_stats->cid_routed += 1;
}
} else {
// increment counter for the CH based routing
quic_stats->v1 += 1;
// cannot get a real pos with the server id, fallback to ch
quic_packets_stats->ch_routed += 1;
}
} else if (!qpr.is_initial) {
quic_stats->v1 += 1;
// cannot get a server id from quic packet, fallback to ch
quic_packets_stats->ch_routed += 1;
}
}
}
Expand Down
9 changes: 9 additions & 0 deletions katran/lib/bpf/balancer_maps.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ struct {
__uint(map_flags, NO_FLAGS);
} stats SEC(".maps");

// map for quic stats
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, __u32);
__type(value, struct lb_quic_packets_stats);
__uint(max_entries, QUIC_STATS_MAP_SIZE);
__uint(map_flags, NO_FLAGS);
} quic_packets_stats_map SEC(".maps");

// map for server-id to real's id mapping. The ids can be embedded in header of
// QUIC or TCP (if enabled) packets for routing of packets for existing flows
#ifdef SERVER_ID_HASH_MAP
Expand Down
14 changes: 14 additions & 0 deletions katran/lib/bpf/balancer_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,4 +137,18 @@ struct flow_debug_info {
};
};
#endif // of RECORD_FLOW_INFO

// struct for quic packets statistics counters
struct lb_quic_packets_stats {
__u64 ch_routed;
__u64 cid_initial;
__u64 cid_invalid_server_id;
__u64 cid_routed;
__u64 cid_unknown_real_dropped;
__u64 cid_v0;
__u64 cid_v1;
__u64 cid_v2;
__u64 cid_v3;
};

#endif // of _BALANCER_STRUCTS
37 changes: 19 additions & 18 deletions katran/lib/testing/katran_tester.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,34 +215,35 @@ void testLbCounters(katran::KatranLb& lb, KatranTestParam& testParam) {
VLOG(2) << "FallbackLRU hits: " << stats.v1;
LOG(ERROR) << "LRU fallback counter is incorrect";
}
stats = lb.getQuicRoutingStats();
if (stats.v1 != testParam.expectedQuicRoutingWithCh() ||
stats.v2 != testParam.expectedQuicRoutingWithCid()) {
LOG(ERROR) << "Counters for QUIC packets routed with CH: " << stats.v1
<< ", with connection-id: " << stats.v2;
stats = lb.getTcpServerIdRoutingStats();
if (stats.v2 != testParam.expectedTcpServerIdRoutingCounts() ||
stats.v1 != testParam.expectedTcpServerIdRoutingFallbackCounts()) {
LOG(ERROR) << "Counters for TCP server-id routing with CH (v1): "
<< stats.v1 << ", with server-id (v2): " << stats.v2;
LOG(ERROR) << "Counters for TCP server-id based routing are wrong";
}
auto quicStats = lb.getLbQuicPacketsStats();
if (quicStats.ch_routed != testParam.expectedQuicRoutingWithCh() ||
quicStats.cid_routed != testParam.expectedQuicRoutingWithCid()) {
LOG(ERROR) << "Counters for QUIC packets routed with CH: "
<< quicStats.ch_routed
<< ", with connection-id: " << quicStats.cid_routed;
LOG(ERROR) << "Counters for routing of QUIC packets is wrong.";
}
stats = lb.getQuicCidVersionStats();
if (stats.v1 != testParam.expectedQuicCidV1Counts() ||
stats.v2 != testParam.expectedQuicCidV2Counts()) {
if (quicStats.cid_v1 != testParam.expectedQuicCidV1Counts() ||
quicStats.cid_v2 != testParam.expectedQuicCidV2Counts()) {
LOG(ERROR) << "QUIC CID version counters v1 " << stats.v1 << " v2 "
<< stats.v2;
LOG(ERROR) << "Counters for QUIC versions are wrong";
}
stats = lb.getQuicCidDropStats();
if (stats.v1 != testParam.expectedQuicCidDropsReal0Counts() ||
stats.v2 != testParam.expectedQuicCidDropsNoRealCounts()) {
if (quicStats.cid_invalid_server_id !=
testParam.expectedQuicCidDropsReal0Counts() ||
quicStats.cid_unknown_real_dropped !=
testParam.expectedQuicCidDropsNoRealCounts()) {
LOG(ERROR) << "QUIC CID drop counters v1 " << stats.v1 << " v2 "
<< stats.v2;
LOG(ERROR) << "Counters for QUIC drops are wrong";
}
stats = lb.getTcpServerIdRoutingStats();
if (stats.v2 != testParam.expectedTcpServerIdRoutingCounts() ||
stats.v1 != testParam.expectedTcpServerIdRoutingFallbackCounts()) {
LOG(ERROR) << "Counters for TCP server-id routing with CH (v1): "
<< stats.v1 << ", with server-id (v2): " << stats.v2;
LOG(ERROR) << "Counters for TCP server-id based routing are wrong";
}
auto realStats = testParam.expectedRealStats();
for (int i = 0; i < kReals.size(); i++) {
auto real = kReals[i];
Expand Down

0 comments on commit 06d0c85

Please sign in to comment.