blob: f3d01812cb06d040e0ecdfd8fae9d8dd2872607f [file] [log] [blame]
/*
* ss.c "sockstat", socket statistics
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/sysmacros.h>
#include <netinet/in.h>
#include <string.h>
#include <errno.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <dirent.h>
#include <fnmatch.h>
#include <getopt.h>
#include <stdbool.h>
#include <limits.h>
#include <stdarg.h>
#include "ss_util.h"
#include "utils.h"
#include "rt_names.h"
#include "ll_map.h"
#include "libnetlink.h"
#include "namespace.h"
#include "SNAPSHOT.h"
#include "rt_names.h"
#include "cg_map.h"
#include <linux/tcp.h>
#include <linux/unix_diag.h>
#include <linux/netdevice.h> /* for MAX_ADDR_LEN */
#include <linux/filter.h>
#include <linux/xdp_diag.h>
#include <linux/packet_diag.h>
#include <linux/netlink_diag.h>
#include <linux/sctp.h>
#include <linux/vm_sockets_diag.h>
#include <linux/net.h>
#include <linux/tipc.h>
#include <linux/tipc_netlink.h>
#include <linux/tipc_sockets_diag.h>
#include <linux/tls.h>
#include <linux/mptcp.h>
/* AF_VSOCK/PF_VSOCK is only provided since glibc 2.18 */
#ifndef PF_VSOCK
#define PF_VSOCK 40
#endif
#ifndef AF_VSOCK
#define AF_VSOCK PF_VSOCK
#endif
#define BUF_CHUNK (1024 * 1024) /* Buffer chunk allocation size */
#define BUF_CHUNKS_MAX 5 /* Maximum number of allocated buffer chunks */
#define LEN_ALIGN(x) (((x) + 1) & ~1)
#if HAVE_SELINUX
#include <selinux/selinux.h>
#else
/* Stubs for SELinux functions */
static int is_selinux_enabled(void)
{
return -1;
}
static int getpidcon(pid_t pid, char **context)
{
*context = NULL;
return -1;
}
static int getfilecon(char *path, char **context)
{
*context = NULL;
return -1;
}
static int security_get_initial_context(char *name, char **context)
{
*context = NULL;
return -1;
}
#endif
int preferred_family = AF_UNSPEC;
static int show_options;
int show_details;
static int show_users;
static int show_mem;
static int show_tcpinfo;
static int show_bpf;
static int show_proc_ctx;
static int show_sock_ctx;
static int show_header = 1;
static int follow_events;
static int sctp_ino;
static int show_tipcinfo;
static int show_tos;
static int show_cgroup;
int oneline;
enum col_id {
COL_NETID,
COL_STATE,
COL_RECVQ,
COL_SENDQ,
COL_ADDR,
COL_SERV,
COL_RADDR,
COL_RSERV,
COL_EXT,
COL_PROC,
COL_MAX
};
enum col_align {
ALIGN_LEFT,
ALIGN_CENTER,
ALIGN_RIGHT
};
struct column {
const enum col_align align;
const char *header;
const char *ldelim;
int disabled;
int width; /* Calculated, including additional layout spacing */
int max_len; /* Measured maximum field length in this column */
};
static struct column columns[] = {
{ ALIGN_LEFT, "Netid", "", 0, 0, 0 },
{ ALIGN_LEFT, "State", " ", 0, 0, 0 },
{ ALIGN_LEFT, "Recv-Q", " ", 0, 0, 0 },
{ ALIGN_LEFT, "Send-Q", " ", 0, 0, 0 },
{ ALIGN_RIGHT, "Local Address:", " ", 0, 0, 0 },
{ ALIGN_LEFT, "Port", "", 0, 0, 0 },
{ ALIGN_RIGHT, "Peer Address:", " ", 0, 0, 0 },
{ ALIGN_LEFT, "Port", "", 0, 0, 0 },
{ ALIGN_LEFT, "Process", "", 0, 0, 0 },
{ ALIGN_LEFT, "", "", 0, 0, 0 },
};
static struct column *current_field = columns;
/* Output buffer: chained chunks of BUF_CHUNK bytes. Each field is written to
* the buffer as a variable size token. A token consists of a 16 bits length
* field, followed by a string which is not NULL-terminated.
*
* A new chunk is allocated and linked when the current chunk doesn't have
* enough room to store the current token as a whole.
*/
struct buf_chunk {
struct buf_chunk *next; /* Next chained chunk */
char *end; /* Current end of content */
char data[0];
};
struct buf_token {
uint16_t len; /* Data length, excluding length descriptor */
char data[0];
};
static struct {
struct buf_token *cur; /* Position of current token in chunk */
struct buf_chunk *head; /* First chunk */
struct buf_chunk *tail; /* Current chunk */
int chunks; /* Number of allocated chunks */
} buffer;
static const char *TCP_PROTO = "tcp";
static const char *SCTP_PROTO = "sctp";
static const char *UDP_PROTO = "udp";
static const char *RAW_PROTO = "raw";
static const char *dg_proto;
enum {
TCP_DB,
DCCP_DB,
UDP_DB,
RAW_DB,
UNIX_DG_DB,
UNIX_ST_DB,
UNIX_SQ_DB,
PACKET_DG_DB,
PACKET_R_DB,
NETLINK_DB,
SCTP_DB,
VSOCK_ST_DB,
VSOCK_DG_DB,
TIPC_DB,
XDP_DB,
MAX_DB
};
#define PACKET_DBM ((1<<PACKET_DG_DB)|(1<<PACKET_R_DB))
#define UNIX_DBM ((1<<UNIX_DG_DB)|(1<<UNIX_ST_DB)|(1<<UNIX_SQ_DB))
#define ALL_DB ((1<<MAX_DB)-1)
#define INET_L4_DBM ((1<<TCP_DB)|(1<<UDP_DB)|(1<<DCCP_DB)|(1<<SCTP_DB))
#define INET_DBM (INET_L4_DBM | (1<<RAW_DB))
#define VSOCK_DBM ((1<<VSOCK_ST_DB)|(1<<VSOCK_DG_DB))
enum {
SS_UNKNOWN,
SS_ESTABLISHED,
SS_SYN_SENT,
SS_SYN_RECV,
SS_FIN_WAIT1,
SS_FIN_WAIT2,
SS_TIME_WAIT,
SS_CLOSE,
SS_CLOSE_WAIT,
SS_LAST_ACK,
SS_LISTEN,
SS_CLOSING,
SS_MAX
};
enum {
SCTP_STATE_CLOSED = 0,
SCTP_STATE_COOKIE_WAIT = 1,
SCTP_STATE_COOKIE_ECHOED = 2,
SCTP_STATE_ESTABLISHED = 3,
SCTP_STATE_SHUTDOWN_PENDING = 4,
SCTP_STATE_SHUTDOWN_SENT = 5,
SCTP_STATE_SHUTDOWN_RECEIVED = 6,
SCTP_STATE_SHUTDOWN_ACK_SENT = 7,
};
#define SS_ALL ((1 << SS_MAX) - 1)
#define SS_CONN (SS_ALL & ~((1<<SS_LISTEN)|(1<<SS_CLOSE)|(1<<SS_TIME_WAIT)|(1<<SS_SYN_RECV)))
#define TIPC_SS_CONN ((1<<SS_ESTABLISHED)|(1<<SS_LISTEN)|(1<<SS_CLOSE))
#include "ssfilter.h"
struct filter {
int dbs;
int states;
uint64_t families;
struct ssfilter *f;
bool kill;
struct rtnl_handle *rth_for_killing;
};
#define FAMILY_MASK(family) ((uint64_t)1 << (family))
static const struct filter default_dbs[MAX_DB] = {
[TCP_DB] = {
.states = SS_CONN,
.families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
},
[DCCP_DB] = {
.states = SS_CONN,
.families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
},
[UDP_DB] = {
.states = (1 << SS_ESTABLISHED),
.families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
},
[RAW_DB] = {
.states = (1 << SS_ESTABLISHED),
.families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
},
[UNIX_DG_DB] = {
.states = (1 << SS_CLOSE),
.families = FAMILY_MASK(AF_UNIX),
},
[UNIX_ST_DB] = {
.states = SS_CONN,
.families = FAMILY_MASK(AF_UNIX),
},
[UNIX_SQ_DB] = {
.states = SS_CONN,
.families = FAMILY_MASK(AF_UNIX),
},
[PACKET_DG_DB] = {
.states = (1 << SS_CLOSE),
.families = FAMILY_MASK(AF_PACKET),
},
[PACKET_R_DB] = {
.states = (1 << SS_CLOSE),
.families = FAMILY_MASK(AF_PACKET),
},
[NETLINK_DB] = {
.states = (1 << SS_CLOSE),
.families = FAMILY_MASK(AF_NETLINK),
},
[SCTP_DB] = {
.states = SS_CONN,
.families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
},
[VSOCK_ST_DB] = {
.states = SS_CONN,
.families = FAMILY_MASK(AF_VSOCK),
},
[VSOCK_DG_DB] = {
.states = SS_CONN,
.families = FAMILY_MASK(AF_VSOCK),
},
[TIPC_DB] = {
.states = TIPC_SS_CONN,
.families = FAMILY_MASK(AF_TIPC),
},
[XDP_DB] = {
.states = (1 << SS_CLOSE),
.families = FAMILY_MASK(AF_XDP),
},
};
static const struct filter default_afs[AF_MAX] = {
[AF_INET] = {
.dbs = INET_DBM,
.states = SS_CONN,
},
[AF_INET6] = {
.dbs = INET_DBM,
.states = SS_CONN,
},
[AF_UNIX] = {
.dbs = UNIX_DBM,
.states = SS_CONN,
},
[AF_PACKET] = {
.dbs = PACKET_DBM,
.states = (1 << SS_CLOSE),
},
[AF_NETLINK] = {
.dbs = (1 << NETLINK_DB),
.states = (1 << SS_CLOSE),
},
[AF_VSOCK] = {
.dbs = VSOCK_DBM,
.states = SS_CONN,
},
[AF_TIPC] = {
.dbs = (1 << TIPC_DB),
.states = TIPC_SS_CONN,
},
[AF_XDP] = {
.dbs = (1 << XDP_DB),
.states = (1 << SS_CLOSE),
},
};
static int do_default = 1;
static struct filter current_filter;
static void filter_db_set(struct filter *f, int db, bool enable)
{
if (enable) {
f->states |= default_dbs[db].states;
f->dbs |= 1 << db;
} else {
f->dbs &= ~(1 << db);
}
do_default = 0;
}
static int filter_db_parse(struct filter *f, const char *s)
{
const struct {
const char *name;
int dbs[MAX_DB + 1];
} db_name_tbl[] = {
#define ENTRY(name, ...) { #name, { __VA_ARGS__, MAX_DB } }
ENTRY(all, UDP_DB, DCCP_DB, TCP_DB, RAW_DB,
UNIX_ST_DB, UNIX_DG_DB, UNIX_SQ_DB,
PACKET_R_DB, PACKET_DG_DB, NETLINK_DB,
SCTP_DB, VSOCK_ST_DB, VSOCK_DG_DB, XDP_DB),
ENTRY(inet, UDP_DB, DCCP_DB, TCP_DB, SCTP_DB, RAW_DB),
ENTRY(udp, UDP_DB),
ENTRY(dccp, DCCP_DB),
ENTRY(tcp, TCP_DB),
ENTRY(sctp, SCTP_DB),
ENTRY(raw, RAW_DB),
ENTRY(unix, UNIX_ST_DB, UNIX_DG_DB, UNIX_SQ_DB),
ENTRY(unix_stream, UNIX_ST_DB),
ENTRY(u_str, UNIX_ST_DB), /* alias for unix_stream */
ENTRY(unix_dgram, UNIX_DG_DB),
ENTRY(u_dgr, UNIX_DG_DB), /* alias for unix_dgram */
ENTRY(unix_seqpacket, UNIX_SQ_DB),
ENTRY(u_seq, UNIX_SQ_DB), /* alias for unix_seqpacket */
ENTRY(packet, PACKET_R_DB, PACKET_DG_DB),
ENTRY(packet_raw, PACKET_R_DB),
ENTRY(p_raw, PACKET_R_DB), /* alias for packet_raw */
ENTRY(packet_dgram, PACKET_DG_DB),
ENTRY(p_dgr, PACKET_DG_DB), /* alias for packet_dgram */
ENTRY(netlink, NETLINK_DB),
ENTRY(vsock, VSOCK_ST_DB, VSOCK_DG_DB),
ENTRY(vsock_stream, VSOCK_ST_DB),
ENTRY(v_str, VSOCK_ST_DB), /* alias for vsock_stream */
ENTRY(vsock_dgram, VSOCK_DG_DB),
ENTRY(v_dgr, VSOCK_DG_DB), /* alias for vsock_dgram */
ENTRY(xdp, XDP_DB),
#undef ENTRY
};
bool enable = true;
unsigned int i;
const int *dbp;
if (s[0] == '!') {
enable = false;
s++;
}
for (i = 0; i < ARRAY_SIZE(db_name_tbl); i++) {
if (strcmp(s, db_name_tbl[i].name))
continue;
for (dbp = db_name_tbl[i].dbs; *dbp != MAX_DB; dbp++)
filter_db_set(f, *dbp, enable);
return 0;
}
return -1;
}
static void filter_af_set(struct filter *f, int af)
{
f->states |= default_afs[af].states;
f->families |= FAMILY_MASK(af);
do_default = 0;
preferred_family = af;
}
static int filter_af_get(struct filter *f, int af)
{
return !!(f->families & FAMILY_MASK(af));
}
static void filter_states_set(struct filter *f, int states)
{
if (states)
f->states = states;
}
static void filter_merge_defaults(struct filter *f)
{
int db;
int af;
for (db = 0; db < MAX_DB; db++) {
if (!(f->dbs & (1 << db)))
continue;
if (!(default_dbs[db].families & f->families))
f->families |= default_dbs[db].families;
}
for (af = 0; af < AF_MAX; af++) {
if (!(f->families & FAMILY_MASK(af)))
continue;
if (!(default_afs[af].dbs & f->dbs))
f->dbs |= default_afs[af].dbs;
}
}
static FILE *generic_proc_open(const char *env, const char *name)
{
const char *p = getenv(env);
char store[128];
if (!p) {
p = getenv("PROC_ROOT") ? : "/proc";
snprintf(store, sizeof(store)-1, "%s/%s", p, name);
p = store;
}
return fopen(p, "r");
}
#define net_tcp_open() generic_proc_open("PROC_NET_TCP", "net/tcp")
#define net_tcp6_open() generic_proc_open("PROC_NET_TCP6", "net/tcp6")
#define net_udp_open() generic_proc_open("PROC_NET_UDP", "net/udp")
#define net_udp6_open() generic_proc_open("PROC_NET_UDP6", "net/udp6")
#define net_raw_open() generic_proc_open("PROC_NET_RAW", "net/raw")
#define net_raw6_open() generic_proc_open("PROC_NET_RAW6", "net/raw6")
#define net_unix_open() generic_proc_open("PROC_NET_UNIX", "net/unix")
#define net_packet_open() generic_proc_open("PROC_NET_PACKET", \
"net/packet")
#define net_netlink_open() generic_proc_open("PROC_NET_NETLINK", \
"net/netlink")
#define net_sockstat_open() generic_proc_open("PROC_NET_SOCKSTAT", \
"net/sockstat")
#define net_sockstat6_open() generic_proc_open("PROC_NET_SOCKSTAT6", \
"net/sockstat6")
#define net_snmp_open() generic_proc_open("PROC_NET_SNMP", "net/snmp")
#define ephemeral_ports_open() generic_proc_open("PROC_IP_LOCAL_PORT_RANGE", \
"sys/net/ipv4/ip_local_port_range")
struct user_ent {
struct user_ent *next;
unsigned int ino;
int pid;
int fd;
char *process;
char *process_ctx;
char *socket_ctx;
};
#define USER_ENT_HASH_SIZE 256
static struct user_ent *user_ent_hash[USER_ENT_HASH_SIZE];
static int user_ent_hashfn(unsigned int ino)
{
int val = (ino >> 24) ^ (ino >> 16) ^ (ino >> 8) ^ ino;
return val & (USER_ENT_HASH_SIZE - 1);
}
static void user_ent_add(unsigned int ino, char *process,
int pid, int fd,
char *proc_ctx,
char *sock_ctx)
{
struct user_ent *p, **pp;
p = malloc(sizeof(struct user_ent));
if (!p) {
fprintf(stderr, "ss: failed to malloc buffer\n");
abort();
}
p->next = NULL;
p->ino = ino;
p->pid = pid;
p->fd = fd;
p->process = strdup(process);
p->process_ctx = strdup(proc_ctx);
p->socket_ctx = strdup(sock_ctx);
pp = &user_ent_hash[user_ent_hashfn(ino)];
p->next = *pp;
*pp = p;
}
static void user_ent_destroy(void)
{
struct user_ent *p, *p_next;
int cnt = 0;
while (cnt != USER_ENT_HASH_SIZE) {
p = user_ent_hash[cnt];
while (p) {
free(p->process);
free(p->process_ctx);
free(p->socket_ctx);
p_next = p->next;
free(p);
p = p_next;
}
cnt++;
}
}
static void user_ent_hash_build(void)
{
const char *root = getenv("PROC_ROOT") ? : "/proc/";
struct dirent *d;
char name[1024];
int nameoff;
DIR *dir;
char *pid_context;
char *sock_context;
const char *no_ctx = "unavailable";
static int user_ent_hash_build_init;
/* If show_users & show_proc_ctx set only do this once */
if (user_ent_hash_build_init != 0)
return;
user_ent_hash_build_init = 1;
strlcpy(name, root, sizeof(name));
if (strlen(name) == 0 || name[strlen(name)-1] != '/')
strcat(name, "/");
nameoff = strlen(name);
dir = opendir(name);
if (!dir)
return;
while ((d = readdir(dir)) != NULL) {
struct dirent *d1;
char process[16];
char *p;
int pid, pos;
DIR *dir1;
char crap;
if (sscanf(d->d_name, "%d%c", &pid, &crap) != 1)
continue;
if (getpidcon(pid, &pid_context) != 0)
pid_context = strdup(no_ctx);
snprintf(name + nameoff, sizeof(name) - nameoff, "%d/fd/", pid);
pos = strlen(name);
if ((dir1 = opendir(name)) == NULL) {
free(pid_context);
continue;
}
process[0] = '\0';
p = process;
while ((d1 = readdir(dir1)) != NULL) {
const char *pattern = "socket:[";
unsigned int ino;
char lnk[64];
int fd;
ssize_t link_len;
char tmp[1024];
if (sscanf(d1->d_name, "%d%c", &fd, &crap) != 1)
continue;
snprintf(name+pos, sizeof(name) - pos, "%d", fd);
link_len = readlink(name, lnk, sizeof(lnk)-1);
if (link_len == -1)
continue;
lnk[link_len] = '\0';
if (strncmp(lnk, pattern, strlen(pattern)))
continue;
sscanf(lnk, "socket:[%u]", &ino);
snprintf(tmp, sizeof(tmp), "%s/%d/fd/%s",
root, pid, d1->d_name);
if (getfilecon(tmp, &sock_context) <= 0)
sock_context = strdup(no_ctx);
if (*p == '\0') {
FILE *fp;
snprintf(tmp, sizeof(tmp), "%s/%d/stat",
root, pid);
if ((fp = fopen(tmp, "r")) != NULL) {
if (fscanf(fp, "%*d (%[^)])", p) < 1)
; /* ignore */
fclose(fp);
}
}
user_ent_add(ino, p, pid, fd,
pid_context, sock_context);
free(sock_context);
}
free(pid_context);
closedir(dir1);
}
closedir(dir);
}
enum entry_types {
USERS,
PROC_CTX,
PROC_SOCK_CTX
};
#define ENTRY_BUF_SIZE 512
static int find_entry(unsigned int ino, char **buf, int type)
{
struct user_ent *p;
int cnt = 0;
char *ptr;
char *new_buf;
int len, new_buf_len;
int buf_used = 0;
int buf_len = 0;
if (!ino)
return 0;
p = user_ent_hash[user_ent_hashfn(ino)];
ptr = *buf = NULL;
while (p) {
if (p->ino != ino)
goto next;
while (1) {
ptr = *buf + buf_used;
switch (type) {
case USERS:
len = snprintf(ptr, buf_len - buf_used,
"(\"%s\",pid=%d,fd=%d),",
p->process, p->pid, p->fd);
break;
case PROC_CTX:
len = snprintf(ptr, buf_len - buf_used,
"(\"%s\",pid=%d,proc_ctx=%s,fd=%d),",
p->process, p->pid,
p->process_ctx, p->fd);
break;
case PROC_SOCK_CTX:
len = snprintf(ptr, buf_len - buf_used,
"(\"%s\",pid=%d,proc_ctx=%s,fd=%d,sock_ctx=%s),",
p->process, p->pid,
p->process_ctx, p->fd,
p->socket_ctx);
break;
default:
fprintf(stderr, "ss: invalid type: %d\n", type);
abort();
}
if (len < 0 || len >= buf_len - buf_used) {
new_buf_len = buf_len + ENTRY_BUF_SIZE;
new_buf = realloc(*buf, new_buf_len);
if (!new_buf) {
fprintf(stderr, "ss: failed to malloc buffer\n");
abort();
}
*buf = new_buf;
buf_len = new_buf_len;
continue;
} else {
buf_used += len;
break;
}
}
cnt++;
next:
p = p->next;
}
if (buf_used) {
ptr = *buf + buf_used;
ptr[-1] = '\0';
}
return cnt;
}
static unsigned long long cookie_sk_get(const uint32_t *cookie)
{
return (((unsigned long long)cookie[1] << 31) << 1) | cookie[0];
}
static const char *sctp_sstate_name[] = {
[SCTP_STATE_CLOSED] = "CLOSED",
[SCTP_STATE_COOKIE_WAIT] = "COOKIE_WAIT",
[SCTP_STATE_COOKIE_ECHOED] = "COOKIE_ECHOED",
[SCTP_STATE_ESTABLISHED] = "ESTAB",
[SCTP_STATE_SHUTDOWN_PENDING] = "SHUTDOWN_PENDING",
[SCTP_STATE_SHUTDOWN_SENT] = "SHUTDOWN_SENT",
[SCTP_STATE_SHUTDOWN_RECEIVED] = "SHUTDOWN_RECEIVED",
[SCTP_STATE_SHUTDOWN_ACK_SENT] = "ACK_SENT",
};
static const char * const stype_nameg[] = {
"UNKNOWN",
[SOCK_STREAM] = "STREAM",
[SOCK_DGRAM] = "DGRAM",
[SOCK_RDM] = "RDM",
[SOCK_SEQPACKET] = "SEQPACKET",
};
struct sockstat {
struct sockstat *next;
unsigned int type;
uint16_t prot;
uint16_t raw_prot;
inet_prefix local;
inet_prefix remote;
int lport;
int rport;
int state;
int rq, wq;
unsigned int ino;
unsigned int uid;
int refcnt;
unsigned int iface;
unsigned long long sk;
char *name;
char *peer_name;
__u32 mark;
__u64 cgroup_id;
};
struct dctcpstat {
unsigned int ce_state;
unsigned int alpha;
unsigned int ab_ecn;
unsigned int ab_tot;
bool enabled;
};
struct tcpstat {
struct sockstat ss;
unsigned int timer;
unsigned int timeout;
int probes;
char cong_alg[16];
double rto, ato, rtt, rttvar;
int qack, ssthresh, backoff;
double send_bps;
int snd_wscale;
int rcv_wscale;
int mss;
int rcv_mss;
int advmss;
unsigned int pmtu;
unsigned int cwnd;
unsigned int lastsnd;
unsigned int lastrcv;
unsigned int lastack;
double pacing_rate;
double pacing_rate_max;
double delivery_rate;
unsigned long long bytes_acked;
unsigned long long bytes_received;
unsigned int segs_out;
unsigned int segs_in;
unsigned int data_segs_out;
unsigned int data_segs_in;
unsigned int unacked;
unsigned int retrans;
unsigned int retrans_total;
unsigned int lost;
unsigned int sacked;
unsigned int fackets;
unsigned int reordering;
unsigned int not_sent;
unsigned int delivered;
unsigned int delivered_ce;
unsigned int dsack_dups;
unsigned int reord_seen;
double rcv_rtt;
double min_rtt;
int rcv_space;
unsigned int rcv_ssthresh;
unsigned long long busy_time;
unsigned long long rwnd_limited;
unsigned long long sndbuf_limited;
unsigned long long bytes_sent;
unsigned long long bytes_retrans;
bool has_ts_opt;
bool has_sack_opt;
bool has_ecn_opt;
bool has_ecnseen_opt;
bool has_fastopen_opt;
bool has_wscale_opt;
bool app_limited;
struct dctcpstat *dctcp;
struct tcp_bbr_info *bbr_info;
};
/* SCTP assocs share the same inode number with their parent endpoint. So if we
* have seen the inode number before, it must be an assoc instead of the next
* endpoint. */
static bool is_sctp_assoc(struct sockstat *s, const char *sock_name)
{
if (strcmp(sock_name, "sctp"))
return false;
if (!sctp_ino || sctp_ino != s->ino)
return false;
return true;
}
static const char *unix_netid_name(int type)
{
switch (type) {
case SOCK_STREAM:
return "u_str";
case SOCK_SEQPACKET:
return "u_seq";
case SOCK_DGRAM:
default:
return "u_dgr";
}
}
static const char *proto_name(int protocol)
{
switch (protocol) {
case 0:
return "raw";
case IPPROTO_UDP:
return "udp";
case IPPROTO_TCP:
return "tcp";
case IPPROTO_SCTP:
return "sctp";
case IPPROTO_DCCP:
return "dccp";
case IPPROTO_ICMPV6:
return "icmp6";
}
return "???";
}
static const char *vsock_netid_name(int type)
{
switch (type) {
case SOCK_STREAM:
return "v_str";
case SOCK_DGRAM:
return "v_dgr";
default:
return "???";
}
}
static const char *tipc_netid_name(int type)
{
switch (type) {
case SOCK_STREAM:
return "ti_st";
case SOCK_DGRAM:
return "ti_dg";
case SOCK_RDM:
return "ti_rd";
case SOCK_SEQPACKET:
return "ti_sq";
default:
return "???";
}
}
/* Allocate and initialize a new buffer chunk */
static struct buf_chunk *buf_chunk_new(void)
{
struct buf_chunk *new = malloc(BUF_CHUNK);
if (!new)
abort();
new->next = NULL;
/* This is also the last block */
buffer.tail = new;
/* Next token will be stored at the beginning of chunk data area, and
* its initial length is zero.
*/
buffer.cur = (struct buf_token *)new->data;
buffer.cur->len = 0;
new->end = buffer.cur->data;
buffer.chunks++;
return new;
}
/* Return available tail room in given chunk */
static int buf_chunk_avail(struct buf_chunk *chunk)
{
return BUF_CHUNK - offsetof(struct buf_chunk, data) -
(chunk->end - chunk->data);
}
/* Update end pointer and token length, link new chunk if we hit the end of the
* current one. Return -EAGAIN if we got a new chunk, caller has to print again.
*/
static int buf_update(int len)
{
struct buf_chunk *chunk = buffer.tail;
struct buf_token *t = buffer.cur;
/* Claim success if new content fits in the current chunk, and anyway
* if this is the first token in the chunk: in the latter case,
* allocating a new chunk won't help, so we'll just cut the output.
*/
if ((len < buf_chunk_avail(chunk) && len != -1 /* glibc < 2.0.6 */) ||
t == (struct buf_token *)chunk->data) {
len = min(len, buf_chunk_avail(chunk));
/* Total field length can't exceed 2^16 bytes, cut as needed */
len = min(len, USHRT_MAX - t->len);
chunk->end += len;
t->len += len;
return 0;
}
/* Content truncated, time to allocate more */
chunk->next = buf_chunk_new();
/* Copy current token over to new chunk, including length descriptor */
memcpy(chunk->next->data, t, sizeof(t->len) + t->len);
chunk->next->end += t->len;
/* Discard partially written field in old chunk */
chunk->end -= t->len + sizeof(t->len);
return -EAGAIN;
}
/* Append content to buffer as part of the current field */
__attribute__((format(printf, 1, 2)))
static void out(const char *fmt, ...)
{
struct column *f = current_field;
va_list args;
char *pos;
int len;
if (f->disabled)
return;
if (!buffer.head)
buffer.head = buf_chunk_new();
again: /* Append to buffer: if we have a new chunk, print again */
pos = buffer.cur->data + buffer.cur->len;
va_start(args, fmt);
/* Limit to tail room. If we hit the limit, buf_update() will tell us */
len = vsnprintf(pos, buf_chunk_avail(buffer.tail), fmt, args);
va_end(args);
if (buf_update(len))
goto again;
}
static int print_left_spacing(struct column *f, int stored, int printed)
{
int s;
if (!f->width || f->align == ALIGN_LEFT)
return 0;
s = f->width - stored - printed;
if (f->align == ALIGN_CENTER)
/* If count of total spacing is odd, shift right by one */
s = (s + 1) / 2;
if (s > 0)
return printf("%*c", s, ' ');
return 0;
}
static void print_right_spacing(struct column *f, int printed)
{
int s;
if (!f->width || f->align == ALIGN_RIGHT)
return;
s = f->width - printed;
if (f->align == ALIGN_CENTER)
s /= 2;
if (s > 0)
printf("%*c", s, ' ');
}
/* Done with field: update buffer pointer, start new token after current one */
static void field_flush(struct column *f)
{
struct buf_chunk *chunk;
unsigned int pad;
if (f->disabled)
return;
chunk = buffer.tail;
pad = buffer.cur->len % 2;
if (buffer.cur->len > f->max_len)
f->max_len = buffer.cur->len;
/* We need a new chunk if we can't store the next length descriptor.
* Mind the gap between end of previous token and next aligned position
* for length descriptor.
*/
if (buf_chunk_avail(chunk) - pad < sizeof(buffer.cur->len)) {
chunk->end += pad;
chunk->next = buf_chunk_new();
return;
}
buffer.cur = (struct buf_token *)(buffer.cur->data +
LEN_ALIGN(buffer.cur->len));
buffer.cur->len = 0;
buffer.tail->end = buffer.cur->data;
}
static int field_is_last(struct column *f)
{
return f - columns == COL_MAX - 1;
}
/* Get the next available token in the buffer starting from the current token */
static struct buf_token *buf_token_next(struct buf_token *cur)
{
struct buf_chunk *chunk = buffer.tail;
/* If we reached the end of chunk contents, get token from next chunk */
if (cur->data + LEN_ALIGN(cur->len) == chunk->end) {
buffer.tail = chunk = chunk->next;
return chunk ? (struct buf_token *)chunk->data : NULL;
}
return (struct buf_token *)(cur->data + LEN_ALIGN(cur->len));
}
/* Free up all allocated buffer chunks */
static void buf_free_all(void)
{
struct buf_chunk *tmp;
for (buffer.tail = buffer.head; buffer.tail; ) {
tmp = buffer.tail;
buffer.tail = buffer.tail->next;
free(tmp);
}
buffer.head = NULL;
buffer.chunks = 0;
}
/* Get current screen width, returns -1 if TIOCGWINSZ fails */
static int render_screen_width(void)
{
int width = -1;
if (isatty(STDOUT_FILENO)) {
struct winsize w;
if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) != -1) {
if (w.ws_col > 0)
width = w.ws_col;
}
}
return width;
}
/* Calculate column width from contents length. If columns don't fit on one
* line, break them into the least possible amount of lines and keep them
* aligned across lines. Available screen space is equally spread between fields
* as additional spacing.
*/
static void render_calc_width(void)
{
int screen_width, first, len = 0, linecols = 0;
struct column *c, *eol = columns - 1;
bool compact_output = false;
screen_width = render_screen_width();
if (screen_width == -1) {
screen_width = INT_MAX;
compact_output = true;
}
/* First pass: set width for each column to measured content length */
for (first = 1, c = columns; c - columns < COL_MAX; c++) {
if (c->disabled)
continue;
if (!first && c->max_len)
c->width = c->max_len + strlen(c->ldelim);
else
c->width = c->max_len;
/* But don't exceed screen size. If we exceed the screen size
* for even a single field, it will just start on a line of its
* own and then naturally wrap.
*/
c->width = min(c->width, screen_width);
if (c->width)
first = 0;
}
if (compact_output) {
/* Compact output, skip extending columns. */
return;
}
/* Second pass: find out newlines and distribute available spacing */
for (c = columns; c - columns < COL_MAX; c++) {
int pad, spacing, rem, last;
struct column *tmp;
if (!c->width)
continue;
linecols++;
len += c->width;
for (last = 1, tmp = c + 1; tmp - columns < COL_MAX; tmp++) {
if (tmp->width) {
last = 0;
break;
}
}
if (!last && len < screen_width) {
/* Columns fit on screen so far, nothing to do yet */
continue;
}
if (len == screen_width) {
/* Exact fit, just start with new line */
goto newline;
}
if (len > screen_width) {
/* Screen width exceeded: go back one column */
len -= c->width;
c--;
linecols--;
}
/* Distribute remaining space to columns on this line */
pad = screen_width - len;
spacing = pad / linecols;
rem = pad % linecols;
for (tmp = c; tmp > eol; tmp--) {
if (!tmp->width)
continue;
tmp->width += spacing;
if (rem) {
tmp->width++;
rem--;
}
}
newline:
/* Line break: reset line counters, mark end-of-line */
eol = c;
len = 0;
linecols = 0;
}
}
/* Render buffered output with spacing and delimiters, then free up buffers */
static void render(void)
{
struct buf_token *token;
int printed, line_started = 0;
struct column *f;
if (!buffer.head)
return;
token = (struct buf_token *)buffer.head->data;
/* Ensure end alignment of last token, it wasn't necessarily flushed */
buffer.tail->end += buffer.cur->len % 2;
render_calc_width();
/* Rewind and replay */
buffer.tail = buffer.head;
f = columns;
while (!f->width)
f++;
while (token) {
/* Print left delimiter only if we already started a line */
if (line_started++)
printed = printf("%s", f->ldelim);
else
printed = 0;
/* Print field content from token data with spacing */
printed += print_left_spacing(f, token->len, printed);
printed += fwrite(token->data, 1, token->len, stdout);
print_right_spacing(f, printed);
/* Go to next non-empty field, deal with end-of-line */
do {
if (field_is_last(f)) {
printf("\n");
f = columns;
line_started = 0;
} else {
f++;
}
} while (f->disabled);
token = buf_token_next(token);
}
/* Deal with final end-of-line when the last non-empty field printed
* is not the last field.
*/
if (line_started)
printf("\n");
buf_free_all();
current_field = columns;
}
/* Move to next field, and render buffer if we reached the maximum number of
* chunks, at the last field in a line.
*/
static void field_next(void)
{
if (field_is_last(current_field) && buffer.chunks >= BUF_CHUNKS_MAX) {
render();
return;
}
field_flush(current_field);
if (field_is_last(current_field))
current_field = columns;
else
current_field++;
}
/* Walk through fields and flush them until we reach the desired one */
static void field_set(enum col_id id)
{
while (id != current_field - columns)
field_next();
}
/* Print header for all non-empty columns */
static void print_header(void)
{
while (!field_is_last(current_field)) {
if (!current_field->disabled)
out("%s", current_field->header);
field_next();
}
}
static void sock_state_print(struct sockstat *s)
{
const char *sock_name;
static const char * const sstate_name[] = {
"UNKNOWN",
[SS_ESTABLISHED] = "ESTAB",
[SS_SYN_SENT] = "SYN-SENT",
[SS_SYN_RECV] = "SYN-RECV",
[SS_FIN_WAIT1] = "FIN-WAIT-1",
[SS_FIN_WAIT2] = "FIN-WAIT-2",
[SS_TIME_WAIT] = "TIME-WAIT",
[SS_CLOSE] = "UNCONN",
[SS_CLOSE_WAIT] = "CLOSE-WAIT",
[SS_LAST_ACK] = "LAST-ACK",
[SS_LISTEN] = "LISTEN",
[SS_CLOSING] = "CLOSING",
};
switch (s->local.family) {
case AF_UNIX:
sock_name = unix_netid_name(s->type);
break;
case AF_INET:
case AF_INET6:
sock_name = proto_name(s->type);
break;
case AF_PACKET:
sock_name = s->type == SOCK_RAW ? "p_raw" : "p_dgr";
break;
case AF_NETLINK:
sock_name = "nl";
break;
case AF_TIPC:
sock_name = tipc_netid_name(s->type);
break;
case AF_VSOCK:
sock_name = vsock_netid_name(s->type);
break;
case AF_XDP:
sock_name = "xdp";
break;
default:
sock_name = "unknown";
}
if (is_sctp_assoc(s, sock_name)) {
field_set(COL_STATE); /* Empty Netid field */
out("`- %s", sctp_sstate_name[s->state]);
} else {
field_set(COL_NETID);
out("%s", sock_name);
field_set(COL_STATE);
out("%s", sstate_name[s->state]);
}
field_set(COL_RECVQ);
out("%-6d", s->rq);
field_set(COL_SENDQ);
out("%-6d", s->wq);
field_set(COL_ADDR);
}
static void sock_details_print(struct sockstat *s)
{
if (s->uid)
out(" uid:%u", s->uid);
out(" ino:%u", s->ino);
out(" sk:%llx", s->sk);
if (s->mark)
out(" fwmark:0x%x", s->mark);
if (s->cgroup_id)
out(" cgroup:%s", cg_id_to_path(s->cgroup_id));
}
static void sock_addr_print(const char *addr, char *delim, const char *port,
const char *ifname)
{
if (ifname)
out("%s" "%%" "%s%s", addr, ifname, delim);
else
out("%s%s", addr, delim);
field_next();
out("%s", port);
field_next();
}
static const char *print_ms_timer(unsigned int timeout)
{
static char buf[64];
int secs, msecs, minutes;
secs = timeout/1000;
minutes = secs/60;
secs = secs%60;
msecs = timeout%1000;
buf[0] = 0;
if (minutes) {
msecs = 0;
snprintf(buf, sizeof(buf)-16, "%dmin", minutes);
if (minutes > 9)
secs = 0;
}
if (secs) {
if (secs > 9)
msecs = 0;
sprintf(buf+strlen(buf), "%d%s", secs, msecs ? "." : "sec");
}
if (msecs)
sprintf(buf+strlen(buf), "%03dms", msecs);
return buf;
}
struct scache {
struct scache *next;
int port;
char *name;
const char *proto;
};
static struct scache *rlist;
static void init_service_resolver(void)
{
char buf[128];
FILE *fp = popen("/usr/sbin/rpcinfo -p 2>/dev/null", "r");
if (!fp)
return;
if (!fgets(buf, sizeof(buf), fp)) {
pclose(fp);
return;
}
while (fgets(buf, sizeof(buf), fp) != NULL) {
unsigned int progn, port;
char proto[128], prog[128] = "rpc.";
struct scache *c;
if (sscanf(buf, "%u %*d %s %u %s",
&progn, proto, &port, prog+4) != 4)
continue;
if (!(c = malloc(sizeof(*c))))
continue;
c->port = port;
c->name = strdup(prog);
if (strcmp(proto, TCP_PROTO) == 0)
c->proto = TCP_PROTO;
else if (strcmp(proto, UDP_PROTO) == 0)
c->proto = UDP_PROTO;
else if (strcmp(proto, SCTP_PROTO) == 0)
c->proto = SCTP_PROTO;
else
c->proto = NULL;
c->next = rlist;
rlist = c;
}
pclose(fp);
}
/* Even do not try default linux ephemeral port ranges:
* default /etc/services contains so much of useless crap
* wouldbe "allocated" to this area that resolution
* is really harmful. I shrug each time when seeing
* "socks" or "cfinger" in dumps.
*/
static int is_ephemeral(int port)
{
static int min = 0, max;
if (!min) {
FILE *f = ephemeral_ports_open();
if (!f || fscanf(f, "%d %d", &min, &max) < 2) {
min = 1024;
max = 4999;
}
if (f)
fclose(f);
}
return port >= min && port <= max;
}
static const char *__resolve_service(int port)
{
struct scache *c;
for (c = rlist; c; c = c->next) {
if (c->port == port && c->proto == dg_proto)
return c->name;
}
if (!is_ephemeral(port)) {
static int notfirst;
struct servent *se;
if (!notfirst) {
setservent(1);
notfirst = 1;
}
se = getservbyport(htons(port), dg_proto);
if (se)
return se->s_name;
}
return NULL;
}
#define SCACHE_BUCKETS 1024
static struct scache *cache_htab[SCACHE_BUCKETS];
static const char *resolve_service(int port)
{
static char buf[128];
struct scache *c;
const char *res;
int hash;
if (port == 0) {
buf[0] = '*';
buf[1] = 0;
return buf;
}
if (numeric)
goto do_numeric;
if (dg_proto == RAW_PROTO)
return inet_proto_n2a(port, buf, sizeof(buf));
hash = (port^(((unsigned long)dg_proto)>>2)) % SCACHE_BUCKETS;
for (c = cache_htab[hash]; c; c = c->next) {
if (c->port == port && c->proto == dg_proto)
goto do_cache;
}
c = malloc(sizeof(*c));
if (!c)
goto do_numeric;
res = __resolve_service(port);
c->port = port;
c->name = res ? strdup(res) : NULL;
c->proto = dg_proto;
c->next = cache_htab[hash];
cache_htab[hash] = c;
do_cache:
if (c->name)
return c->name;
do_numeric:
sprintf(buf, "%u", port);
return buf;
}
static void inet_addr_print(const inet_prefix *a, int port,
unsigned int ifindex, bool v6only)
{
char buf[1024];
const char *ap = buf;
const char *ifname = NULL;
if (a->family == AF_INET) {
ap = format_host(AF_INET, 4, a->data);
} else {
if (!v6only &&
!memcmp(a->data, &in6addr_any, sizeof(in6addr_any))) {
buf[0] = '*';
buf[1] = 0;
} else {
ap = format_host(a->family, 16, a->data);
/* Numeric IPv6 addresses should be bracketed */
if (strchr(ap, ':')) {
snprintf(buf, sizeof(buf),
"[%s]", ap);
ap = buf;
}
}
}
if (ifindex)
ifname = ll_index_to_name(ifindex);
sock_addr_print(ap, ":", resolve_service(port), ifname);
}
struct aafilter {
inet_prefix addr;
int port;
unsigned int iface;
__u32 mark;
__u32 mask;
__u64 cgroup_id;
struct aafilter *next;
};
static int inet2_addr_match(const inet_prefix *a, const inet_prefix *p,
int plen)
{
if (!inet_addr_match(a, p, plen))
return 0;
/* Cursed "v4 mapped" addresses: v4 mapped socket matches
* pure IPv4 rule, but v4-mapped rule selects only v4-mapped
* sockets. Fair? */
if (p->family == AF_INET && a->family == AF_INET6) {
if (a->data[0] == 0 && a->data[1] == 0 &&
a->data[2] == htonl(0xffff)) {
inet_prefix tmp = *a;
tmp.data[0] = a->data[3];
return inet_addr_match(&tmp, p, plen);
}
}
return 1;
}
static int unix_match(const inet_prefix *a, const inet_prefix *p)
{
char *addr, *pattern;
memcpy(&addr, a->data, sizeof(addr));
memcpy(&pattern, p->data, sizeof(pattern));
if (pattern == NULL)
return 1;
if (addr == NULL)
addr = "";
return !fnmatch(pattern, addr, 0);
}
static int run_ssfilter(struct ssfilter *f, struct sockstat *s)
{
switch (f->type) {
case SSF_S_AUTO:
{
if (s->local.family == AF_UNIX) {
char *p;
memcpy(&p, s->local.data, sizeof(p));
return p == NULL || (p[0] == '@' && strlen(p) == 6 &&
strspn(p+1, "0123456789abcdef") == 5);
}
if (s->local.family == AF_PACKET)
return s->lport == 0 && s->local.data[0] == 0;
if (s->local.family == AF_NETLINK)
return s->lport < 0;
if (s->local.family == AF_VSOCK)
return s->lport > 1023;
return is_ephemeral(s->lport);
}
case SSF_DCOND:
{
struct aafilter *a = (void *)f->pred;
if (a->addr.family == AF_UNIX)
return unix_match(&s->remote, &a->addr);
if (a->port != -1 && a->port != s->rport)
return 0;
if (a->addr.bitlen) {
do {
if (!inet2_addr_match(&s->remote, &a->addr, a->addr.bitlen))
return 1;
} while ((a = a->next) != NULL);
return 0;
}
return 1;
}
case SSF_SCOND:
{
struct aafilter *a = (void *)f->pred;
if (a->addr.family == AF_UNIX)
return unix_match(&s->local, &a->addr);
if (a->port != -1 && a->port != s->lport)
return 0;
if (a->addr.bitlen) {
do {
if (!inet2_addr_match(&s->local, &a->addr, a->addr.bitlen))
return 1;
} while ((a = a->next) != NULL);
return 0;
}
return 1;
}
case SSF_D_GE:
{
struct aafilter *a = (void *)f->pred;
return s->rport >= a->port;
}
case SSF_D_LE:
{
struct aafilter *a = (void *)f->pred;
return s->rport <= a->port;
}
case SSF_S_GE:
{
struct aafilter *a = (void *)f->pred;
return s->lport >= a->port;
}
case SSF_S_LE:
{
struct aafilter *a = (void *)f->pred;
return s->lport <= a->port;
}
case SSF_DEVCOND:
{
struct aafilter *a = (void *)f->pred;
return s->iface == a->iface;
}
case SSF_MARKMASK:
{
struct aafilter *a = (void *)f->pred;
return (s->mark & a->mask) == a->mark;
}
case SSF_CGROUPCOND:
{
struct aafilter *a = (void *)f->pred;
return s->cgroup_id == a->cgroup_id;
}
/* Yup. It is recursion. Sorry. */
case SSF_AND:
return run_ssfilter(f->pred, s) && run_ssfilter(f->post, s);
case SSF_OR:
return run_ssfilter(f->pred, s) || run_ssfilter(f->post, s);
case SSF_NOT:
return !run_ssfilter(f->pred, s);
default:
abort();
}
}
/* Relocate external jumps by reloc. */
static void ssfilter_patch(char *a, int len, int reloc)
{
while (len > 0) {
struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)a;
if (op->no == len+4)
op->no += reloc;
len -= op->yes;
a += op->yes;
}
if (len < 0)
abort();
}
static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode)
{
switch (f->type) {
case SSF_S_AUTO:
{
if (!(*bytecode = malloc(4))) abort();
((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_AUTO, 4, 8 };
return 4;
}
case SSF_DCOND:
case SSF_SCOND:
{
struct aafilter *a = (void *)f->pred;
struct aafilter *b;
char *ptr;
int code = (f->type == SSF_DCOND ? INET_DIAG_BC_D_COND : INET_DIAG_BC_S_COND);
int len = 0;
for (b = a; b; b = b->next) {
len += 4 + sizeof(struct inet_diag_hostcond);
if (a->addr.family == AF_INET6)
len += 16;
else
len += 4;
if (b->next)
len += 4;
}
if (!(ptr = malloc(len))) abort();
*bytecode = ptr;
for (b = a; b; b = b->next) {
struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)ptr;
int alen = (a->addr.family == AF_INET6 ? 16 : 4);
int oplen = alen + 4 + sizeof(struct inet_diag_hostcond);
struct inet_diag_hostcond *cond = (struct inet_diag_hostcond *)(ptr+4);
*op = (struct inet_diag_bc_op){ code, oplen, oplen+4 };
cond->family = a->addr.family;
cond->port = a->port;
cond->prefix_len = a->addr.bitlen;
memcpy(cond->addr, a->addr.data, alen);
ptr += oplen;
if (b->next) {
op = (struct inet_diag_bc_op *)ptr;
*op = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, len - (ptr-*bytecode)};
ptr += 4;
}
}
return ptr - *bytecode;
}
case SSF_D_GE:
{
struct aafilter *x = (void *)f->pred;
if (!(*bytecode = malloc(8))) abort();
((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_D_GE, 8, 12 };
((struct inet_diag_bc_op *)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port };
return 8;
}
case SSF_D_LE:
{
struct aafilter *x = (void *)f->pred;
if (!(*bytecode = malloc(8))) abort();
((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_D_LE, 8, 12 };
((struct inet_diag_bc_op *)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port };
return 8;
}
case SSF_S_GE:
{
struct aafilter *x = (void *)f->pred;
if (!(*bytecode = malloc(8))) abort();
((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_S_GE, 8, 12 };
((struct inet_diag_bc_op *)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port };
return 8;
}
case SSF_S_LE:
{
struct aafilter *x = (void *)f->pred;
if (!(*bytecode = malloc(8))) abort();
((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_S_LE, 8, 12 };
((struct inet_diag_bc_op *)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port };
return 8;
}
case SSF_AND:
{
char *a1 = NULL, *a2 = NULL, *a;
int l1, l2;
l1 = ssfilter_bytecompile(f->pred, &a1);
l2 = ssfilter_bytecompile(f->post, &a2);
if (!l1 || !l2) {
free(a1);
free(a2);
return 0;
}
if (!(a = malloc(l1+l2))) abort();
memcpy(a, a1, l1);
memcpy(a+l1, a2, l2);
free(a1); free(a2);
ssfilter_patch(a, l1, l2);
*bytecode = a;
return l1+l2;
}
case SSF_OR:
{
char *a1 = NULL, *a2 = NULL, *a;
int l1, l2;
l1 = ssfilter_bytecompile(f->pred, &a1);
l2 = ssfilter_bytecompile(f->post, &a2);
if (!l1 || !l2) {
free(a1);
free(a2);
return 0;
}
if (!(a = malloc(l1+l2+4))) abort();
memcpy(a, a1, l1);
memcpy(a+l1+4, a2, l2);
free(a1); free(a2);
*(struct inet_diag_bc_op *)(a+l1) = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, l2+4 };
*bytecode = a;
return l1+l2+4;
}
case SSF_NOT:
{
char *a1 = NULL, *a;
int l1;
l1 = ssfilter_bytecompile(f->pred, &a1);
if (!l1) {
free(a1);
return 0;
}
if (!(a = malloc(l1+4))) abort();
memcpy(a, a1, l1);
free(a1);
*(struct inet_diag_bc_op *)(a+l1) = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, 8 };
*bytecode = a;
return l1+4;
}
case SSF_DEVCOND:
{
/* bytecompile for SSF_DEVCOND not supported yet */
return 0;
}
case SSF_MARKMASK:
{
struct aafilter *a = (void *)f->pred;
struct instr {
struct inet_diag_bc_op op;
struct inet_diag_markcond cond;
};
int inslen = sizeof(struct instr);
if (!(*bytecode = malloc(inslen))) abort();
((struct instr *)*bytecode)[0] = (struct instr) {
{ INET_DIAG_BC_MARK_COND, inslen, inslen + 4 },
{ a->mark, a->mask},
};
return inslen;
}
case SSF_CGROUPCOND:
{
struct aafilter *a = (void *)f->pred;
struct instr {
struct inet_diag_bc_op op;
__u64 cgroup_id;
} __attribute__((packed));
int inslen = sizeof(struct instr);
if (!(*bytecode = malloc(inslen))) abort();
((struct instr *)*bytecode)[0] = (struct instr) {
{ INET_DIAG_BC_CGROUP_COND, inslen, inslen + 4 },
a->cgroup_id,
};
return inslen;
}
default:
abort();
}
}
static int remember_he(struct aafilter *a, struct hostent *he)
{
char **ptr = he->h_addr_list;
int cnt = 0;
int len;
if (he->h_addrtype == AF_INET)
len = 4;
else if (he->h_addrtype == AF_INET6)
len = 16;
else
return 0;
while (*ptr) {
struct aafilter *b = a;
if (a->addr.bitlen) {
if ((b = malloc(sizeof(*b))) == NULL)
return cnt;
*b = *a;
a->next = b;
}
memcpy(b->addr.data, *ptr, len);
b->addr.bytelen = len;
b->addr.bitlen = len*8;
b->addr.family = he->h_addrtype;
ptr++;
cnt++;
}
return cnt;
}
static int get_dns_host(struct aafilter *a, const char *addr, int fam)
{
static int notfirst;
int cnt = 0;
struct hostent *he;
a->addr.bitlen = 0;
if (!notfirst) {
sethostent(1);
notfirst = 1;
}
he = gethostbyname2(addr, fam == AF_UNSPEC ? AF_INET : fam);
if (he)
cnt = remember_he(a, he);
if (fam == AF_UNSPEC) {
he = gethostbyname2(addr, AF_INET6);
if (he)
cnt += remember_he(a, he);
}
return !cnt;
}
static int xll_initted;
static void xll_init(void)
{
struct rtnl_handle rth;
if (rtnl_open(&rth, 0) < 0)
exit(1);
ll_init_map(&rth);
rtnl_close(&rth);
xll_initted = 1;
}
static const char *xll_index_to_name(int index)
{
if (!xll_initted)
xll_init();
return ll_index_to_name(index);
}
static int xll_name_to_index(const char *dev)
{
if (!xll_initted)
xll_init();
return ll_name_to_index(dev);
}
void *parse_devcond(char *name)
{
struct aafilter a = { .iface = 0 };
struct aafilter *res;
a.iface = xll_name_to_index(name);
if (a.iface == 0) {
char *end;
unsigned long n;
n = strtoul(name, &end, 0);
if (!end || end == name || *end || n > UINT_MAX)
return NULL;
a.iface = n;
}
res = malloc(sizeof(*res));
*res = a;
return res;
}
static void vsock_set_inet_prefix(inet_prefix *a, __u32 cid)
{
*a = (inet_prefix){
.bytelen = sizeof(cid),
.family = AF_VSOCK,
};
memcpy(a->data, &cid, sizeof(cid));
}
void *parse_hostcond(char *addr, bool is_port)
{
char *port = NULL;
struct aafilter a = { .port = -1 };
struct aafilter *res;
int fam = preferred_family;
struct filter *f = &current_filter;
if (fam == AF_UNIX || strncmp(addr, "unix:", 5) == 0) {
char *p;
a.addr.family = AF_UNIX;
if (strncmp(addr, "unix:", 5) == 0)
addr += 5;
p = strdup(addr);
a.addr.bitlen = 8*strlen(p);
memcpy(a.addr.data, &p, sizeof(p));
fam = AF_UNIX;
goto out;
}
if (fam == AF_PACKET || strncmp(addr, "link:", 5) == 0) {
a.addr.family = AF_PACKET;
a.addr.bitlen = 0;
if (strncmp(addr, "link:", 5) == 0)
addr += 5;
port = strchr(addr, ':');
if (port) {
*port = 0;
if (port[1] && strcmp(port+1, "*")) {
if (get_integer(&a.port, port+1, 0)) {
if ((a.port = xll_name_to_index(port+1)) <= 0)
return NULL;
}
}
}
if (addr[0] && strcmp(addr, "*")) {
unsigned short tmp;
a.addr.bitlen = 32;
if (ll_proto_a2n(&tmp, addr))
return NULL;
a.addr.data[0] = ntohs(tmp);
}
fam = AF_PACKET;
goto out;
}
if (fam == AF_NETLINK || strncmp(addr, "netlink:", 8) == 0) {
a.addr.family = AF_NETLINK;
a.addr.bitlen = 0;
if (strncmp(addr, "netlink:", 8) == 0)
addr += 8;
port = strchr(addr, ':');
if (port) {
*port = 0;
if (port[1] && strcmp(port+1, "*")) {
if (get_integer(&a.port, port+1, 0)) {
if (strcmp(port+1, "kernel") == 0)
a.port = 0;
else
return NULL;
}
}
}
if (addr[0] && strcmp(addr, "*")) {
a.addr.bitlen = 32;
if (nl_proto_a2n(&a.addr.data[0], addr) == -1)
return NULL;
}
fam = AF_NETLINK;
goto out;
}
if (fam == AF_VSOCK || strncmp(addr, "vsock:", 6) == 0) {
__u32 cid = ~(__u32)0;
a.addr.family = AF_VSOCK;
if (strncmp(addr, "vsock:", 6) == 0)
addr += 6;
if (is_port)
port = addr;
else {
port = strchr(addr, ':');
if (port) {
*port = '\0';
port++;
}
}
if (port && strcmp(port, "*") &&
get_u32((__u32 *)&a.port, port, 0))
return NULL;
if (addr[0] && strcmp(addr, "*")) {
a.addr.bitlen = 32;
if (get_u32(&cid, addr, 0))
return NULL;
}
vsock_set_inet_prefix(&a.addr, cid);
fam = AF_VSOCK;
goto out;
}
if (fam == AF_INET || !strncmp(addr, "inet:", 5)) {
fam = AF_INET;
if (!strncmp(addr, "inet:", 5))
addr += 5;
} else if (fam == AF_INET6 || !strncmp(addr, "inet6:", 6)) {
fam = AF_INET6;
if (!strncmp(addr, "inet6:", 6))
addr += 6;
}
/* URL-like literal [] */
if (addr[0] == '[') {
addr++;
if ((port = strchr(addr, ']')) == NULL)
return NULL;
*port++ = 0;
} else if (addr[0] == '*') {
port = addr+1;
} else {
port = strrchr(strchr(addr, '/') ? : addr, ':');
}
if (is_port)
port = addr;
if (port && *port) {
if (*port == ':')
*port++ = 0;
if (*port && *port != '*') {
if (get_integer(&a.port, port, 0)) {
struct servent *se1 = NULL;
struct servent *se2 = NULL;
if (current_filter.dbs&(1<<UDP_DB))
se1 = getservbyname(port, UDP_PROTO);
if (current_filter.dbs&(1<<TCP_DB))
se2 = getservbyname(port, TCP_PROTO);
if (se1 && se2 && se1->s_port != se2->s_port) {
fprintf(stderr, "Error: ambiguous port \"%s\".\n", port);
return NULL;
}
if (!se1)
se1 = se2;
if (se1) {
a.port = ntohs(se1->s_port);
} else {
struct scache *s;
for (s = rlist; s; s = s->next) {
if ((s->proto == UDP_PROTO &&
(current_filter.dbs&(1<<UDP_DB))) ||
(s->proto == TCP_PROTO &&
(current_filter.dbs&(1<<TCP_DB)))) {
if (s->name && strcmp(s->name, port) == 0) {
if (a.port > 0 && a.port != s->port) {
fprintf(stderr, "Error: ambiguous port \"%s\".\n", port);
return NULL;
}
a.port = s->port;
}
}
}
if (a.port <= 0) {
fprintf(stderr, "Error: \"%s\" does not look like a port.\n", port);
return NULL;
}
}
}
}
}
if (!is_port && *addr && *addr != '*') {
if (get_prefix_1(&a.addr, addr, fam)) {
if (get_dns_host(&a, addr, fam)) {
fprintf(stderr, "Error: an inet prefix is expected rather than \"%s\".\n", addr);
return NULL;
}
}
}
out:
if (fam != AF_UNSPEC) {
int states = f->states;
f->families = 0;
filter_af_set(f, fam);
filter_states_set(f, states);
}
res = malloc(sizeof(*res));
if (res)
memcpy(res, &a, sizeof(a));
return res;
}
void *parse_markmask(const char *markmask)
{
struct aafilter a, *res;
if (strchr(markmask, '/')) {
if (sscanf(markmask, "%i/%i", &a.mark, &a.mask) != 2)
return NULL;
} else {
a.mask = 0xffffffff;
if (sscanf(markmask, "%i", &a.mark) != 1)
return NULL;
}
res = malloc(sizeof(*res));
if (res)
memcpy(res, &a, sizeof(a));
return res;
}
void *parse_cgroupcond(const char *path)
{
struct aafilter *res;
__u64 id;
id = get_cgroup2_id(path);
if (!id)
return NULL;
res = malloc(sizeof(*res));
if (res)
res->cgroup_id = id;
return res;
}
static void proc_ctx_print(struct sockstat *s)
{
char *buf;
if (show_proc_ctx || show_sock_ctx) {
if (find_entry(s->ino, &buf,
(show_proc_ctx & show_sock_ctx) ?
PROC_SOCK_CTX : PROC_CTX) > 0) {
out(" users:(%s)", buf);
free(buf);
}
} else if (show_users) {
if (find_entry(s->ino, &buf, USERS) > 0) {
out(" users:(%s)", buf);
free(buf);
}
}
}
static void inet_stats_print(struct sockstat *s, bool v6only)
{
sock_state_print(s);
inet_addr_print(&s->local, s->lport, s->iface, v6only);
inet_addr_print(&s->remote, s->rport, 0, v6only);
proc_ctx_print(s);
}
static int proc_parse_inet_addr(char *loc, char *rem, int family, struct
sockstat * s)
{
s->local.family = s->remote.family = family;
if (family == AF_INET) {
sscanf(loc, "%x:%x", s->local.data, (unsigned *)&s->lport);
sscanf(rem, "%x:%x", s->remote.data, (unsigned *)&s->rport);
s->local.bytelen = s->remote.bytelen = 4;
return 0;
} else {
sscanf(loc, "%08x%08x%08x%08x:%x",
s->local.data,
s->local.data + 1,
s->local.data + 2,
s->local.data + 3,
&s->lport);
sscanf(rem, "%08x%08x%08x%08x:%x",
s->remote.data,
s->remote.data + 1,
s->remote.data + 2,
s->remote.data + 3,
&s->rport);
s->local.bytelen = s->remote.bytelen = 16;
return 0;
}
return -1;
}
static int proc_inet_split_line(char *line, char **loc, char **rem, char **data)
{
char *p;
if ((p = strchr(line, ':')) == NULL)
return -1;
*loc = p+2;
if ((p = strchr(*loc, ':')) == NULL)
return -1;
p[5] = 0;
*rem = p+6;
if ((p = strchr(*rem, ':')) == NULL)
return -1;
p[5] = 0;
*data = p+6;
return 0;
}
/*
* Display bandwidth in standard units
* See: https://en.wikipedia.org/wiki/Data-rate_units
* bw is in bits per second
*/
static char *sprint_bw(char *buf, double bw)
{
if (numeric)
sprintf(buf, "%.0f", bw);
else if (bw >= 1e12)
sprintf(buf, "%.3gT", bw / 1e12);
else if (bw >= 1e9)
sprintf(buf, "%.3gG", bw / 1e9);
else if (bw >= 1e6)
sprintf(buf, "%.3gM", bw / 1e6);
else if (bw >= 1e3)
sprintf(buf, "%.3gk", bw / 1e3);
else
sprintf(buf, "%g", bw);
return buf;
}
static void sctp_stats_print(struct sctp_info *s)
{
if (s->sctpi_tag)
out(" tag:%x", s->sctpi_tag);
if (s->sctpi_state)
out(" state:%s", sctp_sstate_name[s->sctpi_state]);
if (s->sctpi_rwnd)
out(" rwnd:%d", s->sctpi_rwnd);
if (s->sctpi_unackdata)
out(" unackdata:%d", s->sctpi_unackdata);
if (s->sctpi_penddata)
out(" penddata:%d", s->sctpi_penddata);
if (s->sctpi_instrms)
out(" instrms:%d", s->sctpi_instrms);
if (s->sctpi_outstrms)
out(" outstrms:%d", s->sctpi_outstrms);
if (s->sctpi_inqueue)
out(" inqueue:%d", s->sctpi_inqueue);
if (s->sctpi_outqueue)
out(" outqueue:%d", s->sctpi_outqueue);
if (s->sctpi_overall_error)
out(" overerr:%d", s->sctpi_overall_error);
if (s->sctpi_max_burst)
out(" maxburst:%d", s->sctpi_max_burst);
if (s->sctpi_maxseg)
out(" maxseg:%d", s->sctpi_maxseg);
if (s->sctpi_peer_rwnd)
out(" prwnd:%d", s->sctpi_peer_rwnd);
if (s->sctpi_peer_tag)
out(" ptag:%x", s->sctpi_peer_tag);
if (s->sctpi_peer_capable)
out(" pcapable:%d", s->sctpi_peer_capable);
if (s->sctpi_peer_sack)
out(" psack:%d", s->sctpi_peer_sack);
if (s->sctpi_s_autoclose)
out(" autoclose:%d", s->sctpi_s_autoclose);
if (s->sctpi_s_adaptation_ind)
out(" adapind:%d", s->sctpi_s_adaptation_ind);
if (s->sctpi_s_pd_point)
out(" pdpoint:%d", s->sctpi_s_pd_point);
if (s->sctpi_s_nodelay)
out(" nodelay:%d", s->sctpi_s_nodelay);
if (s->sctpi_s_disable_fragments)
out(" nofrag:%d", s->sctpi_s_disable_fragments);
if (s->sctpi_s_v4mapped)
out(" v4mapped:%d", s->sctpi_s_v4mapped);
if (s->sctpi_s_frag_interleave)
out(" fraginl:%d", s->sctpi_s_frag_interleave);
}
static void tcp_stats_print(struct tcpstat *s)
{
char b1[64];
if (s->has_ts_opt)
out(" ts");
if (s->has_sack_opt)
out(" sack");
if (s->has_ecn_opt)
out(" ecn");
if (s->has_ecnseen_opt)
out(" ecnseen");
if (s->has_fastopen_opt)
out(" fastopen");
if (s->cong_alg[0])
out(" %s", s->cong_alg);
if (s->has_wscale_opt)
out(" wscale:%d,%d", s->snd_wscale, s->rcv_wscale);
if (s->rto)
out(" rto:%g", s->rto);
if (s->backoff)
out(" backoff:%u", s->backoff);
if (s->rtt)
out(" rtt:%g/%g", s->rtt, s->rttvar);
if (s->ato)
out(" ato:%g", s->ato);
if (s->qack)
out(" qack:%d", s->qack);
if (s->qack & 1)
out(" bidir");
if (s->mss)
out(" mss:%d", s->mss);
if (s->pmtu)
out(" pmtu:%u", s->pmtu);
if (s->rcv_mss)
out(" rcvmss:%d", s->rcv_mss);
if (s->advmss)
out(" advmss:%d", s->advmss);
if (s->cwnd)
out(" cwnd:%u", s->cwnd);
if (s->ssthresh)
out(" ssthresh:%d", s->ssthresh);
if (s->bytes_sent)
out(" bytes_sent:%llu", s->bytes_sent);
if (s->bytes_retrans)
out(" bytes_retrans:%llu", s->bytes_retrans);
if (s->bytes_acked)
out(" bytes_acked:%llu", s->bytes_acked);
if (s->bytes_received)
out(" bytes_received:%llu", s->bytes_received);
if (s->segs_out)
out(" segs_out:%u", s->segs_out);
if (s->segs_in)
out(" segs_in:%u", s->segs_in);
if (s->data_segs_out)
out(" data_segs_out:%u", s->data_segs_out);
if (s->data_segs_in)
out(" data_segs_in:%u", s->data_segs_in);
if (s->dctcp && s->dctcp->enabled) {
struct dctcpstat *dctcp = s->dctcp;
out(" dctcp:(ce_state:%u,alpha:%u,ab_ecn:%u,ab_tot:%u)",
dctcp->ce_state, dctcp->alpha, dctcp->ab_ecn,
dctcp->ab_tot);
} else if (s->dctcp) {
out(" dctcp:fallback_mode");
}
if (s->bbr_info) {
__u64 bw;
bw = s->bbr_info->bbr_bw_hi;
bw <<= 32;
bw |= s->bbr_info->bbr_bw_lo;
out(" bbr:(bw:%sbps,mrtt:%g",
sprint_bw(b1, bw * 8.0),
(double)s->bbr_info->bbr_min_rtt / 1000.0);
if (s->bbr_info->bbr_pacing_gain)
out(",pacing_gain:%g",
(double)s->bbr_info->bbr_pacing_gain / 256.0);
if (s->bbr_info->bbr_cwnd_gain)
out(",cwnd_gain:%g",
(double)s->bbr_info->bbr_cwnd_gain / 256.0);
out(")");
}
if (s->send_bps)
out(" send %sbps", sprint_bw(b1, s->send_bps));
if (s->lastsnd)
out(" lastsnd:%u", s->lastsnd);
if (s->lastrcv)
out(" lastrcv:%u", s->lastrcv);
if (s->lastack)
out(" lastack:%u", s->lastack);
if (s->pacing_rate) {
out(" pacing_rate %sbps", sprint_bw(b1, s->pacing_rate));
if (s->pacing_rate_max)
out("/%sbps", sprint_bw(b1, s->pacing_rate_max));
}
if (s->delivery_rate)
out(" delivery_rate %sbps", sprint_bw(b1, s->delivery_rate));
if (s->delivered)
out(" delivered:%u", s->delivered);
if (s->delivered_ce)
out(" delivered_ce:%u", s->delivered_ce);
if (s->app_limited)
out(" app_limited");
if (s->busy_time) {
out(" busy:%llums", s->busy_time / 1000);
if (s->rwnd_limited)
out(" rwnd_limited:%llums(%.1f%%)",
s->rwnd_limited / 1000,
100.0 * s->rwnd_limited / s->busy_time);
if (s->sndbuf_limited)
out(" sndbuf_limited:%llums(%.1f%%)",
s->sndbuf_limited / 1000,
100.0 * s->sndbuf_limited / s->busy_time);
}
if (s->unacked)
out(" unacked:%u", s->unacked);
if (s->retrans || s->retrans_total)
out(" retrans:%u/%u", s->retrans, s->retrans_total);