blob: aa6389fbeb8380b2f8147b593472d07abe097bba [file] [log] [blame]
/*
* teamd_balancer.h - Load balancer for teamd
* Copyright (C) 2012-2015 Jiri Pirko <jiri@resnulli.us>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <inttypes.h>
#include <private/list.h>
#include <private/misc.h>
#include <team.h>
#include "teamd.h"
#include "teamd_config.h"
struct tb_stats {
uint64_t last_bytes;
uint64_t curr_bytes;
bool initialized;
};
struct tb_hash_info {
uint8_t hash;
struct tb_stats stats;
struct teamd_port *tdport;
struct {
bool processed;
} rebalance;
};
struct tb_port_info {
struct list_item list;
struct tb_stats stats;
struct teamd_port *tdport;
struct {
uint64_t bytes;
bool unusable;
} rebalance;
};
#define HASH_COUNT 256
struct teamd_balancer {
struct teamd_context *ctx;
bool tx_balancing_enabled;
uint32_t balancing_interval;
struct tb_hash_info hash_info[HASH_COUNT];
struct list_item port_info_list;
};
static struct tb_port_info *get_tb_port_info(struct teamd_balancer *tb,
struct teamd_port *tdport)
{
struct tb_port_info *tbpi;
list_for_each_node_entry(tbpi, &tb->port_info_list, list) {
if (tbpi->tdport == tdport)
return tbpi;
}
return NULL;
}
static uint64_t tb_stats_get_delta(struct tb_stats *stats)
{
return stats->curr_bytes - stats->last_bytes;
}
static void tb_stats_update_last(struct tb_stats *stats)
{
stats->last_bytes = stats->curr_bytes;
}
static void tb_stats_update(struct tb_stats *stats,
uint64_t bytes)
{
stats->curr_bytes = bytes;
if (!stats->initialized) {
tb_stats_update_last(stats);
stats->initialized = true;
}
}
static void tb_stats_all_update_last(struct teamd_balancer *tb)
{
struct tb_port_info *tbpi;
int i;
list_for_each_node_entry(tbpi, &tb->port_info_list, list)
tb_stats_update_last(&tbpi->stats);
for (i = 0; i < HASH_COUNT; i++)
tb_stats_update_last(&tb->hash_info[i].stats);
}
static void tb_stats_update_hash(struct teamd_balancer *tb,
uint8_t hash, uint64_t bytes)
{
tb_stats_update(&tb->hash_info[hash].stats, bytes);
}
static void tb_stats_update_port(struct teamd_balancer *tb,
struct teamd_port *tdport, uint64_t bytes)
{
struct tb_port_info *tbpi;
tbpi = get_tb_port_info(tb, tdport);
if (tbpi)
tb_stats_update(&tbpi->stats, bytes);
}
static void tb_hash_to_port_map_update(struct teamd_balancer *tb,
uint8_t hash, struct teamd_port *tdport)
{
tb->hash_info[hash].tdport = tdport;
}
static struct tb_port_info *tb_get_least_loaded_port(struct teamd_balancer *tb)
{
struct tb_port_info *tbpi;
struct tb_port_info *best_tbpi = NULL;
list_for_each_node_entry(tbpi, &tb->port_info_list, list) {
if (tbpi->rebalance.unusable)
continue;
if (!best_tbpi ||
tbpi->rebalance.bytes < best_tbpi->rebalance.bytes)
best_tbpi = tbpi;
}
return best_tbpi;
}
static struct tb_hash_info *tb_get_biggest_unprocessed_hash(struct teamd_balancer *tb)
{
struct tb_hash_info *tbhi;
struct tb_hash_info *best_tbhi = NULL;
int i;
for (i = 0; i < HASH_COUNT; i++) {
tbhi = &tb->hash_info[i];
if (tbhi->rebalance.processed)
continue;
if (!best_tbhi || tb_stats_get_delta(&tbhi->stats) >
tb_stats_get_delta(&best_tbhi->stats))
best_tbhi = tbhi;
}
return best_tbhi;
}
static void tb_clear_rebalance_data(struct teamd_balancer *tb)
{
struct tb_port_info *tbpi;
int i;
list_for_each_node_entry(tbpi, &tb->port_info_list, list) {
tbpi->rebalance.bytes = 0;
tbpi->rebalance.unusable = false;
}
for (i = 0; i < HASH_COUNT; i++) {
tb->hash_info[i].rebalance.processed = false;
}
}
static int tb_hash_to_port_remap(struct teamd_balancer *tb,
struct team_handle *th,
struct tb_hash_info *tbhi,
struct tb_port_info *tbpi)
{
struct team_option *option;
struct teamd_port *new_tdport = tbpi->tdport;
struct teamd_context *ctx = tb->ctx;
uint8_t hash = tbhi->hash;
int err;
if (tbhi->tdport == new_tdport)
return 0;
option = team_get_option(th, "na", "lb_tx_hash_to_port_mapping", hash);
if (!option)
return -ENOENT;
err = team_set_option_value_u32(th, option, new_tdport->ifindex);
if (err)
return err;
teamd_log_dbg(ctx, "Remapped hash \"%u\" (delta %" PRIu64 ") to port %s.",
hash, tb_stats_get_delta(&tbhi->stats),
new_tdport->ifname);
return 0;
}
static int tb_rebalance(struct teamd_balancer *tb, struct team_handle *th)
{
int err;
struct tb_hash_info *tbhi;
struct tb_port_info *tbpi;
struct teamd_context *ctx = tb->ctx;
if (!tb->tx_balancing_enabled)
return 0;
tb_clear_rebalance_data(tb);
while ((tbhi = tb_get_biggest_unprocessed_hash(tb)) &&
(tbpi = tb_get_least_loaded_port(tb))) {
/* Do not remap zero delta hashes */
if (tbhi->tdport && !tb_stats_get_delta(&tbhi->stats)) {
tbhi->rebalance.processed = true;
continue;
}
err = tb_hash_to_port_remap(tb, th, tbhi, tbpi);
if (err) {
tbpi->rebalance.unusable = true;
continue;
}
tbpi->rebalance.bytes += tb_stats_get_delta(&tbhi->stats);
tbhi->rebalance.processed = true;
}
list_for_each_node_entry(tbpi, &tb->port_info_list, list) {
if (tbpi->rebalance.unusable)
continue;
teamd_log_dbg(ctx, "Port %s rebalanced, delta: %" PRIu64,
tbpi->tdport->ifname, tbpi->rebalance.bytes);
}
return 0;
}
struct lb_stats {
uint64_t tx_bytes;
};
static int tb_option_change_handler_func(struct team_handle *th, void *priv,
team_change_type_mask_t type_mask)
{
struct teamd_balancer *tb = priv;
struct teamd_context *ctx = tb->ctx;
struct team_option *option;
bool rebalance_needed = false;
team_for_each_option(option, ctx->th) {
char *name = team_get_option_name(option);
bool changed = team_is_option_changed(option);
if (!strcmp(name, "lb_tx_hash_to_port_mapping")) {
uint32_t array_index;
uint32_t port_ifindex;
struct teamd_port *tdport;
if (team_get_option_type(option) != TEAM_OPTION_TYPE_U32) {
teamd_log_err("Wrong type of option lb_tx_hash_to_port_mapping.");
return -EINVAL;
}
array_index = team_get_option_array_index(option);
if (array_index >= HASH_COUNT) {
teamd_log_err("Wrong array index \"%u\" for option lb_tx_hash_to_port_mapping.",
array_index);
return -EINVAL;
}
port_ifindex = team_get_option_value_u32(option);
tdport = teamd_get_port(ctx, port_ifindex);
tb_hash_to_port_map_update(tb, array_index, tdport);
}
if (!changed)
continue;
if (!strcmp(name, "lb_hash_stats") ||
!strcmp(name, "lb_port_stats") ||
!strcmp(name, "enabled"))
rebalance_needed = true;
}
if (!rebalance_needed)
return 0;
tb_stats_all_update_last(tb);
team_for_each_option(option, ctx->th) {
char *name = team_get_option_name(option);
bool changed = team_is_option_changed(option);
struct lb_stats *lb_stats;
if (!changed)
continue;
if (team_get_option_type(option) != TEAM_OPTION_TYPE_BINARY)
continue;
lb_stats = team_get_option_value_binary(option);
if (!strcmp(name, "lb_hash_stats")) {
uint32_t array_index;
array_index = team_get_option_array_index(option);
if (array_index >= HASH_COUNT) {
teamd_log_err("Wrong array index \"%u\" for option lb_hash_stats.",
array_index);
return -EINVAL;
}
teamd_log_dbg(ctx, "stats update for hash \"%u\": \"%" PRIu64 "\".",
array_index, lb_stats->tx_bytes);
tb_stats_update_hash(tb, array_index,
lb_stats->tx_bytes);
}
else if (!strcmp(name, "lb_port_stats")) {
struct teamd_port *tdport;
uint32_t port_ifindex;
port_ifindex = team_get_option_port_ifindex(option);
tdport = teamd_get_port(ctx, port_ifindex);
if (!tdport) {
teamd_log_err("Port with interface index \"%u\" is not part of this device.",
port_ifindex);
return -EINVAL;
}
teamd_log_dbg(ctx, "stats update for port %s: \"%" PRIu64 "\".",
tdport->ifname, lb_stats->tx_bytes);
tb_stats_update_port(tb, tdport,
lb_stats->tx_bytes);
}
}
return tb_rebalance(tb, th);
}
static bool tb_get_enable_tx_balancing(struct teamd_context *ctx)
{
int err;
const char *tx_balancer_name;
err = teamd_config_string_get(ctx, &tx_balancer_name, "$.runner.tx_balancer.name");
if (err)
return false; /* disabled by default */
if (!strcmp(tx_balancer_name, "basic"))
return true;
return false;
}
static uint32_t tb_get_balancing_interval(struct teamd_context *ctx)
{
int err;
int balancing_interval;
err = teamd_config_int_get(ctx, &balancing_interval, "$.runner.tx_balancer.balancing_interval");
if (err || balancing_interval < 0)
return 50; /* 5sec is default */
return balancing_interval;
}
static int tb_set_lb_tx_method(struct team_handle *th,
struct teamd_balancer *tb)
{
struct team_option *option;
option = team_get_option(th, "n!", "lb_tx_method");
if (!option)
return -ENOENT;
return team_set_option_value_string(th, option,
tb->tx_balancing_enabled ?
"hash_to_port_mapping" : "hash");
}
static int tb_set_lb_stats_refresh_interval(struct team_handle *th,
struct teamd_balancer *tb)
{
struct team_option *option;
option = team_get_option(th, "n!", "lb_stats_refresh_interval");
if (!option)
return -ENOENT;
return team_set_option_value_u32(th, option, tb->balancing_interval);
}
static const struct team_change_handler tb_option_change_handler = {
.func = tb_option_change_handler_func,
.type_mask = TEAM_OPTION_CHANGE,
};
int teamd_balancer_init(struct teamd_context *ctx, struct teamd_balancer **ptb)
{
struct teamd_balancer *tb;
int err;
int i;
tb = myzalloc(sizeof(*tb));
if (!tb)
return -ENOMEM;
list_init(&tb->port_info_list);
for (i = 0; i < HASH_COUNT; i++)
tb->hash_info[i].hash = i;
tb->tx_balancing_enabled = tb_get_enable_tx_balancing(ctx);
tb->balancing_interval = tb_get_balancing_interval(ctx);
err = tb_set_lb_tx_method(ctx->th, tb);
if (err) {
teamd_log_err("Failed to set lb_tx_method.");
goto err_set_lb_tx_method;
}
teamd_log_info("TX balancing %s.", tb->tx_balancing_enabled ?
"enabled" : "disabled");
if (tb->tx_balancing_enabled) {
err = tb_set_lb_stats_refresh_interval(ctx->th, tb);
if (err) {
teamd_log_err("Failed to set lb_stats_refresh_interval.");
goto err_set_lb_stats_refresh_interval;
}
teamd_log_info("Balancing interval %u.", tb->balancing_interval);
}
tb->ctx = ctx;
err = team_change_handler_register(ctx->th,
&tb_option_change_handler, tb);
if (err) {
teamd_log_err("Failed to register tb option change handler.");
goto err_change_handler_register;
}
*ptb = tb;
return 0;
err_set_lb_tx_method:
err_set_lb_stats_refresh_interval:
err_change_handler_register:
free(tb);
return err;
}
void teamd_balancer_fini(struct teamd_balancer *tb)
{
team_change_handler_unregister(tb->ctx->th,
&tb_option_change_handler, tb);
free(tb);
}
int teamd_balancer_port_added(struct teamd_balancer *tb,
struct teamd_port *tdport)
{
struct tb_port_info *tbpi;
tbpi = get_tb_port_info(tb, tdport);
if (tbpi)
return -EEXIST;
tbpi = myzalloc(sizeof(*tbpi));
if (!tbpi)
return -ENOMEM;
tbpi->tdport = tdport;
list_add(&tb->port_info_list, &tbpi->list);
return 0;
}
void teamd_balancer_port_removed(struct teamd_balancer *tb,
struct teamd_port *tdport)
{
struct tb_port_info *tbpi;
tbpi = get_tb_port_info(tb, tdport);
if (!tbpi)
return;
list_del(&tbpi->list);
free(tbpi);
}