Merge ../vxlan-x
diff --git a/Documentation/devicetree/bindings/net/arc_emac.txt b/Documentation/devicetree/bindings/net/arc_emac.txt
new file mode 100644
index 0000000..bcbc3f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/arc_emac.txt
@@ -0,0 +1,38 @@
+* Synopsys ARC EMAC 10/100 Ethernet driver (EMAC)
+
+Required properties:
+- compatible: Should be "snps,arc-emac"
+- reg: Address and length of the register set for the device
+- interrupts: Should contain the EMAC interrupts
+- clock-frequency: CPU frequency. It is needed to calculate and set polling
+period of EMAC.
+- max-speed: Maximum supported data-rate in Mbit/s. In some HW configurations
+bandwidth of external memory controller might be a limiting factor. That's why
+it's required to specify which data-rate is supported on current SoC or FPGA.
+For example if only 10 Mbit/s is supported (10BASE-T) set "10". If 100 Mbit/s is
+supported (100BASE-TX) set "100".
+- phy: PHY device attached to the EMAC via MDIO bus
+
+Child nodes of the driver are the individual PHY devices connected to the
+MDIO bus. They must have a "reg" property given the PHY address on the MDIO bus.
+
+Optional properties:
+- mac-address: 6 bytes, mac address
+
+Examples:
+
+ ethernet@c0fc2000 {
+ compatible = "snps,arc-emac";
+ reg = <0xc0fc2000 0x3c>;
+ interrupts = <6>;
+ mac-address = [ 00 11 22 33 44 55 ];
+ clock-frequency = <80000000>;
+ max-speed = <100>;
+ phy = <&phy0>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy0: ethernet-phy@0 {
+ reg = <1>;
+ };
+ };
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index e7454fc..87bbcfe 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -303,6 +303,12 @@
such a situation, validation of backup slaves must be
disabled.
+ The validation of ARP requests on backup slaves is mainly
+ helping bonding to decide which slaves are more likely to
+ work in case of the active slave failure, it doesn't really
+ guarantee that the backup slave will work if it's selected
+ as the next active slave.
+
This option is useful in network configurations in which
multiple bonding hosts are concurrently issuing ARPs to one or
more targets beyond a common switch. Should the link between
@@ -315,6 +321,25 @@
This option was added in bonding version 3.1.0.
+arp_all_targets
+
+ Specifies the quantity of arp_ip_targets that must be reachable
+ in order for the ARP monitor to consider a slave as being up.
+ This option affects only active-backup mode for slaves with
+ arp_validation enabled.
+
+ Possible values are:
+
+ any or 0
+
+ consider the slave up only when any of the arp_ip_targets
+ is reachable
+
+ all or 1
+
+ consider the slave up only when all of the arp_ip_targets
+ are reachable
+
downdelay
Specifies the time, in milliseconds, to wait before disabling
diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt
index e6088ba..5cc6005 100644
--- a/Documentation/networking/netlink_mmap.txt
+++ b/Documentation/networking/netlink_mmap.txt
@@ -274,9 +274,9 @@
/* Get next frame header */
hdr = rx_ring + frame_offset;
- if (hdr->nm_status == NL_MMAP_STATUS_VALID)
+ if (hdr->nm_status == NL_MMAP_STATUS_VALID) {
/* Regular memory mapped frame */
- nlh = (void *hdr) + NL_MMAP_HDRLEN;
+ nlh = (void *)hdr + NL_MMAP_HDRLEN;
len = hdr->nm_len;
/* Release empty message immediately. May happen
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 5369879..e658bbf 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -50,11 +50,25 @@
it's a Per-CPU variable.
Default: 64
+low_latency_read
+----------------
+Low latency busy poll timeout for socket reads. (needs CONFIG_NET_LL_RX_POLL)
+Approximate time in us to spin waiting for packets on the device queue.
+This sets the default value of the SO_LL socket option.
+Can be set or overridden per socket by setting socket option SO_LL.
+Recommended value is 50. May increase power usage.
+Default: 0 (off)
+
low_latency_poll
----------------
-Low latency busy poll timeout. (needs CONFIG_NET_LL_RX_POLL)
+Low latency busy poll timeout for poll and select. (needs CONFIG_NET_LL_RX_POLL)
Approximate time in us to spin waiting for packets on the device queue.
-Recommended value is 50. May increase power usage.
+Recommended value depends on the number of sockets you poll on.
+For several sockets 50, for several hundreds 100.
+For more than that you probably want to use epoll.
+Note that only sockets with SO_LL set will be busy polled, so you want to either
+selectively set SO_LL on those sockets or set sysctl.net.low_latency_read globally.
+May increase power usage.
Default: 0 (off)
rmem_default
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 00aba08..b45b240 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -240,6 +240,16 @@
This is the virtual network driver for virtio. It can be used with
lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
+config NLMON
+ tristate "Virtual netlink monitoring device"
+ ---help---
+ This option enables a monitoring net device for netlink skbs. The
+ purpose of this is to analyze netlink messages with packet sockets.
+ Thus applications like tcpdump will be able to see local netlink
+ messages if they tap into the netlink device, record pcaps for further
+ diagnostics, etc. This is mostly intended for developers or support
+ to debug netlink issues. If unsure, say N.
+
endif # NET_CORE
config SUNGEM_PHY
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index ef3d090..3fef8a8 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -22,6 +22,7 @@
obj-$(CONFIG_VETH) += veth.o
obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
obj-$(CONFIG_VXLAN) += vxlan.o
+obj-$(CONFIG_NLMON) += nlmon.o
#
# Networking Drivers
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3b31c19..142d55d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -104,6 +104,7 @@
static int arp_interval = BOND_LINK_ARP_INTERV;
static char *arp_ip_target[BOND_MAX_ARP_TARGETS];
static char *arp_validate;
+static char *arp_all_targets;
static char *fail_over_mac;
static int all_slaves_active = 0;
static struct bond_params bonding_defaults;
@@ -166,6 +167,8 @@
MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes; "
"0 for none (default), 1 for active, "
"2 for backup, 3 for all");
+module_param(arp_all_targets, charp, 0);
+MODULE_PARM_DESC(arp_all_targets, "fail on any/all arp targets timeout; 0 for any (default), 1 for all");
module_param(fail_over_mac, charp, 0);
MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to "
"the same MAC; 0 for none (default), "
@@ -216,6 +219,12 @@
{ NULL, -1},
};
+const struct bond_parm_tbl arp_all_targets_tbl[] = {
+{ "any", BOND_ARP_TARGETS_ANY},
+{ "all", BOND_ARP_TARGETS_ALL},
+{ NULL, -1},
+};
+
const struct bond_parm_tbl arp_validate_tbl[] = {
{ "none", BOND_ARP_VALIDATE_NONE},
{ "active", BOND_ARP_VALIDATE_ACTIVE},
@@ -1483,7 +1492,7 @@
struct slave *new_slave = NULL;
struct sockaddr addr;
int link_reporting;
- int res = 0;
+ int res = 0, i;
if (!bond->params.use_carrier &&
slave_dev->ethtool_ops->get_link == NULL &&
@@ -1712,6 +1721,8 @@
new_slave->last_arp_rx = jiffies -
(msecs_to_jiffies(bond->params.arp_interval) + 1);
+ for (i = 0; i < BOND_MAX_ARP_TARGETS; i++)
+ new_slave->target_last_arp_rx[i] = new_slave->last_arp_rx;
if (bond->params.miimon && !bond->params.use_carrier) {
link_reporting = bond_check_dev_link(bond, slave_dev, 1);
@@ -2611,18 +2622,19 @@
static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip)
{
int i;
- __be32 *targets = bond->params.arp_targets;
- for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) {
- pr_debug("bva: sip %pI4 tip %pI4 t[%d] %pI4 bhti(tip) %d\n",
- &sip, &tip, i, &targets[i],
- bond_has_this_ip(bond, tip));
- if (sip == targets[i]) {
- if (bond_has_this_ip(bond, tip))
- slave->last_arp_rx = jiffies;
- return;
- }
+ if (!sip || !bond_has_this_ip(bond, tip)) {
+ pr_debug("bva: sip %pI4 tip %pI4 not found\n", &sip, &tip);
+ return;
}
+
+ i = bond_get_targets_ip(bond->params.arp_targets, sip);
+ if (i == -1) {
+ pr_debug("bva: sip %pI4 not found in targets\n", &sip);
+ return;
+ }
+ slave->last_arp_rx = jiffies;
+ slave->target_last_arp_rx[i] = jiffies;
}
static int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
@@ -2637,6 +2649,10 @@
return RX_HANDLER_ANOTHER;
read_lock(&bond->lock);
+
+ if (!slave_do_arp_validate(bond, slave))
+ goto out_unlock;
+
alen = arp_hdr_len(bond->dev);
pr_debug("bond_arp_rcv: bond %s skb->dev %s\n",
@@ -2676,10 +2692,17 @@
* configuration, the ARP probe will (hopefully) travel from
* the active, through one switch, the router, then the other
* switch before reaching the backup.
+ *
+ * We 'trust' the arp requests if there is an active slave and
+ * it received valid arp reply(s) after it became active. This
+ * is done to avoid endless looping when we can't reach the
+ * arp_ip_target and fool ourselves with our own arp requests.
*/
if (bond_is_active_slave(slave))
bond_validate_arp(bond, slave, sip, tip);
- else
+ else if (bond->curr_active_slave &&
+ time_after(slave_last_rx(bond, bond->curr_active_slave),
+ bond->curr_active_slave->jiffies))
bond_validate_arp(bond, slave, tip, sip);
out_unlock:
@@ -4401,6 +4424,7 @@
static int bond_check_params(struct bond_params *params)
{
int arp_validate_value, fail_over_mac_value, primary_reselect_value, i;
+ int arp_all_targets_value;
/*
* Convert string parameters.
@@ -4591,7 +4615,11 @@
arp_ip_target[i]);
arp_interval = 0;
} else {
- arp_target[arp_ip_count++] = ip;
+ if (bond_get_targets_ip(arp_target, ip) == -1)
+ arp_target[arp_ip_count++] = ip;
+ else
+ pr_warning("Warning: duplicate address %pI4 in arp_ip_target, skipping\n",
+ &ip);
}
}
@@ -4622,6 +4650,18 @@
} else
arp_validate_value = 0;
+ arp_all_targets_value = 0;
+ if (arp_all_targets) {
+ arp_all_targets_value = bond_parse_parm(arp_all_targets,
+ arp_all_targets_tbl);
+
+ if (arp_all_targets_value == -1) {
+ pr_err("Error: invalid arp_all_targets_value \"%s\"\n",
+ arp_all_targets);
+ arp_all_targets_value = 0;
+ }
+ }
+
if (miimon) {
pr_info("MII link monitoring set to %d ms\n", miimon);
} else if (arp_interval) {
@@ -4686,6 +4726,7 @@
params->num_peer_notif = num_peer_notif;
params->arp_interval = arp_interval;
params->arp_validate = arp_validate_value;
+ params->arp_all_targets = arp_all_targets_value;
params->updelay = updelay;
params->downdelay = downdelay;
params->use_carrier = use_carrier;
@@ -4839,7 +4880,7 @@
bond_create_proc_dir(bn);
bond_create_sysfs(bn);
-
+
return 0;
}
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index f8bee4c..dc36a3d 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -443,6 +443,44 @@
static DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate,
bonding_store_arp_validate);
+/*
+ * Show and set arp_all_targets.
+ */
+static ssize_t bonding_show_arp_all_targets(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct bonding *bond = to_bond(d);
+ int value = bond->params.arp_all_targets;
+
+ return sprintf(buf, "%s %d\n", arp_all_targets_tbl[value].modename,
+ value);
+}
+
+static ssize_t bonding_store_arp_all_targets(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct bonding *bond = to_bond(d);
+ int new_value;
+
+ new_value = bond_parse_parm(buf, arp_all_targets_tbl);
+ if (new_value < 0) {
+ pr_err("%s: Ignoring invalid arp_all_targets value %s\n",
+ bond->dev->name, buf);
+ return -EINVAL;
+ }
+ pr_info("%s: setting arp_all_targets to %s (%d).\n",
+ bond->dev->name, arp_all_targets_tbl[new_value].modename,
+ new_value);
+
+ bond->params.arp_all_targets = new_value;
+
+ return count;
+}
+
+static DEVICE_ATTR(arp_all_targets, S_IRUGO | S_IWUSR,
+ bonding_show_arp_all_targets, bonding_store_arp_all_targets);
/*
* Show and store fail_over_mac. User only allowed to change the
@@ -590,10 +628,11 @@
struct device_attribute *attr,
const char *buf, size_t count)
{
- __be32 newtarget;
- int i = 0, done = 0, ret = count;
struct bonding *bond = to_bond(d);
- __be32 *targets;
+ struct slave *slave;
+ __be32 newtarget, *targets;
+ unsigned long *targets_rx;
+ int ind, i, j, ret = -EINVAL;
targets = bond->params.arp_targets;
newtarget = in_aton(buf + 1);
@@ -602,57 +641,63 @@
if ((newtarget == 0) || (newtarget == htonl(INADDR_BROADCAST))) {
pr_err("%s: invalid ARP target %pI4 specified for addition\n",
bond->dev->name, &newtarget);
- ret = -EINVAL;
- goto out;
- }
- /* look for an empty slot to put the target in, and check for dupes */
- for (i = 0; (i < BOND_MAX_ARP_TARGETS) && !done; i++) {
- if (targets[i] == newtarget) { /* duplicate */
- pr_err("%s: ARP target %pI4 is already present\n",
- bond->dev->name, &newtarget);
- ret = -EINVAL;
- goto out;
- }
- if (targets[i] == 0) {
- pr_info("%s: adding ARP target %pI4.\n",
- bond->dev->name, &newtarget);
- done = 1;
- targets[i] = newtarget;
- }
- }
- if (!done) {
- pr_err("%s: ARP target table is full!\n",
- bond->dev->name);
- ret = -EINVAL;
goto out;
}
+ if (bond_get_targets_ip(targets, newtarget) != -1) { /* dup */
+ pr_err("%s: ARP target %pI4 is already present\n",
+ bond->dev->name, &newtarget);
+ goto out;
+ }
+
+ ind = bond_get_targets_ip(targets, 0); /* first free slot */
+ if (ind == -1) {
+ pr_err("%s: ARP target table is full!\n",
+ bond->dev->name);
+ goto out;
+ }
+
+ pr_info("%s: adding ARP target %pI4.\n", bond->dev->name,
+ &newtarget);
+ /* not to race with bond_arp_rcv */
+ write_lock_bh(&bond->lock);
+ bond_for_each_slave(bond, slave, i)
+ slave->target_last_arp_rx[ind] = jiffies;
+ targets[ind] = newtarget;
+ write_unlock_bh(&bond->lock);
} else if (buf[0] == '-') {
if ((newtarget == 0) || (newtarget == htonl(INADDR_BROADCAST))) {
pr_err("%s: invalid ARP target %pI4 specified for removal\n",
bond->dev->name, &newtarget);
- ret = -EINVAL;
goto out;
}
- for (i = 0; (i < BOND_MAX_ARP_TARGETS) && !done; i++) {
- if (targets[i] == newtarget) {
- int j;
- pr_info("%s: removing ARP target %pI4.\n",
- bond->dev->name, &newtarget);
- for (j = i; (j < (BOND_MAX_ARP_TARGETS-1)) && targets[j+1]; j++)
- targets[j] = targets[j+1];
-
- targets[j] = 0;
- done = 1;
- }
- }
- if (!done) {
- pr_info("%s: unable to remove nonexistent ARP target %pI4.\n",
+ ind = bond_get_targets_ip(targets, newtarget);
+ if (ind == -1) {
+ pr_err("%s: unable to remove nonexistent ARP target %pI4.\n",
bond->dev->name, &newtarget);
- ret = -EINVAL;
goto out;
}
+
+ if (ind == 0 && !targets[1] && bond->params.arp_interval)
+ pr_warn("%s: removing last arp target with arp_interval on\n",
+ bond->dev->name);
+
+ pr_info("%s: removing ARP target %pI4.\n", bond->dev->name,
+ &newtarget);
+
+ write_lock_bh(&bond->lock);
+ bond_for_each_slave(bond, slave, i) {
+ targets_rx = slave->target_last_arp_rx;
+ j = ind;
+ for (; (j < BOND_MAX_ARP_TARGETS-1) && targets[j+1]; j++)
+ targets_rx[j] = targets_rx[j+1];
+ targets_rx[j] = 0;
+ }
+ for (i = ind; (i < BOND_MAX_ARP_TARGETS-1) && targets[i+1]; i++)
+ targets[i] = targets[i+1];
+ targets[i] = 0;
+ write_unlock_bh(&bond->lock);
} else {
pr_err("no command found in arp_ip_targets file for bond %s. Use +<addr> or -<addr>.\n",
bond->dev->name);
@@ -660,6 +705,7 @@
goto out;
}
+ ret = count;
out:
return ret;
}
@@ -1635,6 +1681,7 @@
&dev_attr_mode.attr,
&dev_attr_fail_over_mac.attr,
&dev_attr_arp_validate.attr,
+ &dev_attr_arp_all_targets.attr,
&dev_attr_arp_interval.attr,
&dev_attr_arp_ip_target.attr,
&dev_attr_downdelay.attr,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index c990b42..3fb73cc 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -144,6 +144,7 @@
u8 num_peer_notif;
int arp_interval;
int arp_validate;
+ int arp_all_targets;
int use_carrier;
int fail_over_mac;
int updelay;
@@ -179,6 +180,7 @@
int delay;
unsigned long jiffies;
unsigned long last_arp_rx;
+ unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];
s8 link; /* one of BOND_LINK_XXXX */
s8 new_link;
u8 backup:1, /* indicates backup slave. Value corresponds with
@@ -322,6 +324,9 @@
#define BOND_FOM_ACTIVE 1
#define BOND_FOM_FOLLOW 2
+#define BOND_ARP_TARGETS_ANY 0
+#define BOND_ARP_TARGETS_ALL 1
+
#define BOND_ARP_VALIDATE_NONE 0
#define BOND_ARP_VALIDATE_ACTIVE (1 << BOND_STATE_ACTIVE)
#define BOND_ARP_VALIDATE_BACKUP (1 << BOND_STATE_BACKUP)
@@ -334,11 +339,31 @@
return bond->params.arp_validate & (1 << bond_slave_state(slave));
}
+/* Get the oldest arp which we've received on this slave for bond's
+ * arp_targets.
+ */
+static inline unsigned long slave_oldest_target_arp_rx(struct bonding *bond,
+ struct slave *slave)
+{
+ int i = 1;
+ unsigned long ret = slave->target_last_arp_rx[0];
+
+ for (; (i < BOND_MAX_ARP_TARGETS) && bond->params.arp_targets[i]; i++)
+ if (time_before(slave->target_last_arp_rx[i], ret))
+ ret = slave->target_last_arp_rx[i];
+
+ return ret;
+}
+
static inline unsigned long slave_last_rx(struct bonding *bond,
struct slave *slave)
{
- if (slave_do_arp_validate(bond, slave))
- return slave->last_arp_rx;
+ if (slave_do_arp_validate(bond, slave)) {
+ if (bond->params.arp_all_targets == BOND_ARP_TARGETS_ALL)
+ return slave_oldest_target_arp_rx(bond, slave);
+ else
+ return slave->last_arp_rx;
+ }
return slave->dev->last_rx;
}
@@ -464,12 +489,29 @@
return NULL;
}
+/* Check if the ip is present in arp ip list, or first free slot if ip == 0
+ * Returns -1 if not found, index if found
+ */
+static inline int bond_get_targets_ip(__be32 *targets, __be32 ip)
+{
+ int i;
+
+ for (i = 0; i < BOND_MAX_ARP_TARGETS; i++)
+ if (targets[i] == ip)
+ return i;
+ else if (targets[i] == 0)
+ break;
+
+ return -1;
+}
+
/* exported from bond_main.c */
extern int bond_net_id;
extern const struct bond_parm_tbl bond_lacp_tbl[];
extern const struct bond_parm_tbl bond_mode_tbl[];
extern const struct bond_parm_tbl xmit_hashtype_tbl[];
extern const struct bond_parm_tbl arp_validate_tbl[];
+extern const struct bond_parm_tbl arp_all_targets_tbl[];
extern const struct bond_parm_tbl fail_over_mac_tbl[];
extern const struct bond_parm_tbl pri_reselect_tbl[];
extern struct bond_parm_tbl ad_select_tbl[];
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index a989669..2037080 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -24,6 +24,7 @@
source "drivers/net/ethernet/alteon/Kconfig"
source "drivers/net/ethernet/amd/Kconfig"
source "drivers/net/ethernet/apple/Kconfig"
+source "drivers/net/ethernet/arc/Kconfig"
source "drivers/net/ethernet/atheros/Kconfig"
source "drivers/net/ethernet/cadence/Kconfig"
source "drivers/net/ethernet/adi/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 009da27..390bd0b 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -10,6 +10,7 @@
obj-$(CONFIG_NET_VENDOR_ALTEON) += alteon/
obj-$(CONFIG_NET_VENDOR_AMD) += amd/
obj-$(CONFIG_NET_VENDOR_APPLE) += apple/
+obj-$(CONFIG_NET_VENDOR_ARC) += arc/
obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/
obj-$(CONFIG_NET_CADENCE) += cadence/
obj-$(CONFIG_NET_BFIN) += adi/
diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index 714dcfe..a597b76 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c
@@ -1016,7 +1016,6 @@
static void bmac_set_multicast(struct net_device *dev)
{
struct netdev_hw_addr *ha;
- int i;
unsigned short rx_cfg;
u32 crc;
diff --git a/drivers/net/ethernet/arc/Kconfig b/drivers/net/ethernet/arc/Kconfig
new file mode 100644
index 0000000..514c57f
--- /dev/null
+++ b/drivers/net/ethernet/arc/Kconfig
@@ -0,0 +1,31 @@
+#
+# ARC EMAC network device configuration
+#
+
+config NET_VENDOR_ARC
+ bool "ARC devices"
+ default y
+ ---help---
+ If you have a network (Ethernet) card belonging to this class, say Y
+ and read the Ethernet-HOWTO, available from
+ <http://www.tldp.org/docs.html#howto>.
+
+ Note that the answer to this question doesn't directly affect the
+ kernel: saying N will just cause the configurator to skip all
+ the questions about ARC cards. If you say Y, you will be asked for
+ your specific card in the following questions.
+
+if NET_VENDOR_ARC
+
+config ARC_EMAC
+ tristate "ARC EMAC support"
+ select MII
+ select PHYLIB
+ depends on OF_IRQ
+ depends on OF_NET
+ ---help---
+ On some legacy ARC (Synopsys) FPGA boards such as ARCAngel4/ML50x
+ non-standard on-chip ethernet device ARC EMAC 10/100 is used.
+ Say Y here if you have such a board. If unsure, say N.
+
+endif # NET_VENDOR_ARC
diff --git a/drivers/net/ethernet/arc/Makefile b/drivers/net/ethernet/arc/Makefile
new file mode 100644
index 0000000..00c8657
--- /dev/null
+++ b/drivers/net/ethernet/arc/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the ARC network device drivers.
+#
+
+arc_emac-objs := emac_main.o emac_mdio.o
+obj-$(CONFIG_ARC_EMAC) += arc_emac.o
diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h
new file mode 100644
index 0000000..dc08678
--- /dev/null
+++ b/drivers/net/ethernet/arc/emac.h
@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 2004-2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Registers and bits definitions of ARC EMAC
+ */
+
+#ifndef ARC_EMAC_H
+#define ARC_EMAC_H
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+
+/* STATUS and ENABLE Register bit masks */
+#define TXINT_MASK (1<<0) /* Transmit interrupt */
+#define RXINT_MASK (1<<1) /* Receive interrupt */
+#define ERR_MASK (1<<2) /* Error interrupt */
+#define TXCH_MASK (1<<3) /* Transmit chaining error interrupt */
+#define MSER_MASK (1<<4) /* Missed packet counter error */
+#define RXCR_MASK (1<<8) /* RXCRCERR counter rolled over */
+#define RXFR_MASK (1<<9) /* RXFRAMEERR counter rolled over */
+#define RXFL_MASK (1<<10) /* RXOFLOWERR counter rolled over */
+#define MDIO_MASK (1<<12) /* MDIO complete interrupt */
+#define TXPL_MASK (1<<31) /* Force polling of BD by EMAC */
+
+/* CONTROL Register bit masks */
+#define EN_MASK (1<<0) /* VMAC enable */
+#define TXRN_MASK (1<<3) /* TX enable */
+#define RXRN_MASK (1<<4) /* RX enable */
+#define DSBC_MASK (1<<8) /* Disable receive broadcast */
+#define ENFL_MASK (1<<10) /* Enable Full-duplex */
+#define PROM_MASK (1<<11) /* Promiscuous mode */
+
+/* Buffer descriptor INFO bit masks */
+#define OWN_MASK (1<<31) /* 0-CPU owns buffer, 1-EMAC owns buffer */
+#define FIRST_MASK (1<<16) /* First buffer in chain */
+#define LAST_MASK (1<<17) /* Last buffer in chain */
+#define LEN_MASK 0x000007FF /* last 11 bits */
+#define CRLS (1<<21)
+#define DEFR (1<<22)
+#define DROP (1<<23)
+#define RTRY (1<<24)
+#define LTCL (1<<28)
+#define UFLO (1<<29)
+
+#define FOR_EMAC OWN_MASK
+#define FOR_CPU 0
+
+/* ARC EMAC register set combines entries for MAC and MDIO */
+enum {
+ R_ID = 0,
+ R_STATUS,
+ R_ENABLE,
+ R_CTRL,
+ R_POLLRATE,
+ R_RXERR,
+ R_MISS,
+ R_TX_RING,
+ R_RX_RING,
+ R_ADDRL,
+ R_ADDRH,
+ R_LAFL,
+ R_LAFH,
+ R_MDIO,
+};
+
+#define TX_TIMEOUT (400*HZ/1000) /* Transmission timeout */
+
+#define ARC_EMAC_NAPI_WEIGHT 40 /* Workload for NAPI */
+
+#define EMAC_BUFFER_SIZE 1536 /* EMAC buffer size */
+
+/**
+ * struct arc_emac_bd - EMAC buffer descriptor (BD).
+ *
+ * @info: Contains status information on the buffer itself.
+ * @data: 32-bit byte addressable pointer to the packet data.
+ */
+struct arc_emac_bd {
+ __le32 info;
+ dma_addr_t data;
+};
+
+/* Number of Rx/Tx BD's */
+#define RX_BD_NUM 128
+#define TX_BD_NUM 128
+
+#define RX_RING_SZ (RX_BD_NUM * sizeof(struct arc_emac_bd))
+#define TX_RING_SZ (TX_BD_NUM * sizeof(struct arc_emac_bd))
+
+/**
+ * struct buffer_state - Stores Rx/Tx buffer state.
+ * @sk_buff: Pointer to socket buffer.
+ * @addr: Start address of DMA-mapped memory region.
+ * @len: Length of DMA-mapped memory region.
+ */
+struct buffer_state {
+ struct sk_buff *skb;
+ DEFINE_DMA_UNMAP_ADDR(addr);
+ DEFINE_DMA_UNMAP_LEN(len);
+};
+
+/**
+ * struct arc_emac_priv - Storage of EMAC's private information.
+ * @dev: Pointer to the current device.
+ * @ndev: Pointer to the current network device.
+ * @phy_dev: Pointer to attached PHY device.
+ * @bus: Pointer to the current MII bus.
+ * @regs: Base address of EMAC memory-mapped control registers.
+ * @napi: Structure for NAPI.
+ * @stats: Network device statistics.
+ * @rxbd: Pointer to Rx BD ring.
+ * @txbd: Pointer to Tx BD ring.
+ * @rxbd_dma: DMA handle for Rx BD ring.
+ * @txbd_dma: DMA handle for Tx BD ring.
+ * @rx_buff: Storage for Rx buffers states.
+ * @tx_buff: Storage for Tx buffers states.
+ * @txbd_curr: Index of Tx BD to use on the next "ndo_start_xmit".
+ * @txbd_dirty: Index of Tx BD to free on the next Tx interrupt.
+ * @last_rx_bd: Index of the last Rx BD we've got from EMAC.
+ * @link: PHY's last seen link state.
+ * @duplex: PHY's last set duplex mode.
+ * @speed: PHY's last set speed.
+ * @max_speed: Maximum supported by current system network data-rate.
+ */
+struct arc_emac_priv {
+ /* Devices */
+ struct device *dev;
+ struct net_device *ndev;
+ struct phy_device *phy_dev;
+ struct mii_bus *bus;
+
+ void __iomem *regs;
+
+ struct napi_struct napi;
+ struct net_device_stats stats;
+
+ struct arc_emac_bd *rxbd;
+ struct arc_emac_bd *txbd;
+
+ dma_addr_t rxbd_dma;
+ dma_addr_t txbd_dma;
+
+ struct buffer_state rx_buff[RX_BD_NUM];
+ struct buffer_state tx_buff[TX_BD_NUM];
+ unsigned int txbd_curr;
+ unsigned int txbd_dirty;
+
+ unsigned int last_rx_bd;
+
+ unsigned int link;
+ unsigned int duplex;
+ unsigned int speed;
+ unsigned int max_speed;
+};
+
+/**
+ * arc_reg_set - Sets EMAC register with provided value.
+ * @priv: Pointer to ARC EMAC private data structure.
+ * @reg: Register offset from base address.
+ * @value: Value to set in register.
+ */
+static inline void arc_reg_set(struct arc_emac_priv *priv, int reg, int value)
+{
+ iowrite32(value, priv->regs + reg * sizeof(int));
+}
+
+/**
+ * arc_reg_get - Gets value of specified EMAC register.
+ * @priv: Pointer to ARC EMAC private data structure.
+ * @reg: Register offset from base address.
+ *
+ * returns: Value of requested register.
+ */
+static inline unsigned int arc_reg_get(struct arc_emac_priv *priv, int reg)
+{
+ return ioread32(priv->regs + reg * sizeof(int));
+}
+
+/**
+ * arc_reg_or - Applies mask to specified EMAC register - ("reg" | "mask").
+ * @priv: Pointer to ARC EMAC private data structure.
+ * @reg: Register offset from base address.
+ * @mask: Mask to apply to specified register.
+ *
+ * This function reads initial register value, then applies provided mask
+ * to it and then writes register back.
+ */
+static inline void arc_reg_or(struct arc_emac_priv *priv, int reg, int mask)
+{
+ unsigned int value = arc_reg_get(priv, reg);
+ arc_reg_set(priv, reg, value | mask);
+}
+
+/**
+ * arc_reg_clr - Applies mask to specified EMAC register - ("reg" & ~"mask").
+ * @priv: Pointer to ARC EMAC private data structure.
+ * @reg: Register offset from base address.
+ * @mask: Mask to apply to specified register.
+ *
+ * This function reads initial register value, then applies provided mask
+ * to it and then writes register back.
+ */
+static inline void arc_reg_clr(struct arc_emac_priv *priv, int reg, int mask)
+{
+ unsigned int value = arc_reg_get(priv, reg);
+ arc_reg_set(priv, reg, value & ~mask);
+}
+
+int arc_mdio_probe(struct platform_device *pdev, struct arc_emac_priv *priv);
+int arc_mdio_remove(struct arc_emac_priv *priv);
+
+#endif /* ARC_EMAC_H */
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
new file mode 100644
index 0000000..20345f6
--- /dev/null
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -0,0 +1,806 @@
+/*
+ * Copyright (C) 2004-2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Driver for the ARC EMAC 10100 (hardware revision 5)
+ *
+ * Contributors:
+ * Amit Bhor
+ * Sameer Dhavale
+ * Vineet Gupta
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+
+#include "emac.h"
+
+#define DRV_NAME "arc_emac"
+#define DRV_VERSION "1.0"
+
+/**
+ * arc_emac_adjust_link - Adjust the PHY link duplex.
+ * @ndev: Pointer to the net_device structure.
+ *
+ * This function is called to change the duplex setting after auto negotiation
+ * is done by the PHY.
+ */
+static void arc_emac_adjust_link(struct net_device *ndev)
+{
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+ struct phy_device *phy_dev = priv->phy_dev;
+ unsigned int reg, state_changed = 0;
+
+ if (priv->link != phy_dev->link) {
+ priv->link = phy_dev->link;
+ state_changed = 1;
+ }
+
+ if (priv->speed != phy_dev->speed) {
+ priv->speed = phy_dev->speed;
+ state_changed = 1;
+ }
+
+ if (priv->duplex != phy_dev->duplex) {
+ reg = arc_reg_get(priv, R_CTRL);
+
+ if (DUPLEX_FULL == phy_dev->duplex)
+ reg |= ENFL_MASK;
+ else
+ reg &= ~ENFL_MASK;
+
+ arc_reg_set(priv, R_CTRL, reg);
+ priv->duplex = phy_dev->duplex;
+ state_changed = 1;
+ }
+
+ if (state_changed)
+ phy_print_status(phy_dev);
+}
+
+/**
+ * arc_emac_get_settings - Get PHY settings.
+ * @ndev: Pointer to net_device structure.
+ * @cmd: Pointer to ethtool_cmd structure.
+ *
+ * This implements ethtool command for getting PHY settings. If PHY could
+ * not be found, the function returns -ENODEV. This function calls the
+ * relevant PHY ethtool API to get the PHY settings.
+ * Issue "ethtool ethX" under linux prompt to execute this function.
+ */
+static int arc_emac_get_settings(struct net_device *ndev,
+ struct ethtool_cmd *cmd)
+{
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+
+ return phy_ethtool_gset(priv->phy_dev, cmd);
+}
+
+/**
+ * arc_emac_set_settings - Set PHY settings as passed in the argument.
+ * @ndev: Pointer to net_device structure.
+ * @cmd: Pointer to ethtool_cmd structure.
+ *
+ * This implements ethtool command for setting various PHY settings. If PHY
+ * could not be found, the function returns -ENODEV. This function calls the
+ * relevant PHY ethtool API to set the PHY.
+ * Issue e.g. "ethtool -s ethX speed 1000" under linux prompt to execute this
+ * function.
+ */
+static int arc_emac_set_settings(struct net_device *ndev,
+ struct ethtool_cmd *cmd)
+{
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ return phy_ethtool_sset(priv->phy_dev, cmd);
+}
+
+/**
+ * arc_emac_get_drvinfo - Get EMAC driver information.
+ * @ndev: Pointer to net_device structure.
+ * @info: Pointer to ethtool_drvinfo structure.
+ *
+ * This implements ethtool command for getting the driver information.
+ * Issue "ethtool -i ethX" under linux prompt to execute this function.
+ */
+static void arc_emac_get_drvinfo(struct net_device *ndev,
+ struct ethtool_drvinfo *info)
+{
+ strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
+ strlcpy(info->version, DRV_VERSION, sizeof(info->version));
+}
+
+static const struct ethtool_ops arc_emac_ethtool_ops = {
+ .get_settings = arc_emac_get_settings,
+ .set_settings = arc_emac_set_settings,
+ .get_drvinfo = arc_emac_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+};
+
+#define FIRST_OR_LAST_MASK (FIRST_MASK | LAST_MASK)
+
+/**
+ * arc_emac_tx_clean - clears processed by EMAC Tx BDs.
+ * @ndev: Pointer to the network device.
+ */
+static void arc_emac_tx_clean(struct net_device *ndev)
+{
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+ struct net_device_stats *stats = &priv->stats;
+ unsigned int i;
+
+ for (i = 0; i < TX_BD_NUM; i++) {
+ unsigned int *txbd_dirty = &priv->txbd_dirty;
+ struct arc_emac_bd *txbd = &priv->txbd[*txbd_dirty];
+ struct buffer_state *tx_buff = &priv->tx_buff[*txbd_dirty];
+ struct sk_buff *skb = tx_buff->skb;
+ unsigned int info = le32_to_cpu(txbd->info);
+
+ *txbd_dirty = (*txbd_dirty + 1) % TX_BD_NUM;
+
+ if ((info & FOR_EMAC) || !txbd->data)
+ break;
+
+ if (unlikely(info & (DROP | DEFR | LTCL | UFLO))) {
+ stats->tx_errors++;
+ stats->tx_dropped++;
+
+ if (info & DEFR)
+ stats->tx_carrier_errors++;
+
+ if (info & LTCL)
+ stats->collisions++;
+
+ if (info & UFLO)
+ stats->tx_fifo_errors++;
+ } else if (likely(info & FIRST_OR_LAST_MASK)) {
+ stats->tx_packets++;
+ stats->tx_bytes += skb->len;
+ }
+
+ dma_unmap_single(&ndev->dev, dma_unmap_addr(&tx_buff, addr),
+ dma_unmap_len(&tx_buff, len), DMA_TO_DEVICE);
+
+ /* return the sk_buff to system */
+ dev_kfree_skb_irq(skb);
+
+ txbd->data = 0;
+ txbd->info = 0;
+
+ if (netif_queue_stopped(ndev))
+ netif_wake_queue(ndev);
+ }
+}
+
+/**
+ * arc_emac_rx - processing of Rx packets.
+ * @ndev: Pointer to the network device.
+ * @budget: How many BDs to process on 1 call.
+ *
+ * returns: Number of processed BDs
+ *
+ * Iterate through Rx BDs and deliver received packages to upper layer.
+ */
+static int arc_emac_rx(struct net_device *ndev, int budget)
+{
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+ unsigned int work_done;
+
+ for (work_done = 0; work_done <= budget; work_done++) {
+ unsigned int *last_rx_bd = &priv->last_rx_bd;
+ struct net_device_stats *stats = &priv->stats;
+ struct buffer_state *rx_buff = &priv->rx_buff[*last_rx_bd];
+ struct arc_emac_bd *rxbd = &priv->rxbd[*last_rx_bd];
+ unsigned int buflen = EMAC_BUFFER_SIZE;
+ unsigned int pktlen, info = le32_to_cpu(rxbd->info);
+ struct sk_buff *skb;
+ dma_addr_t addr;
+
+ if (unlikely((info & OWN_MASK) == FOR_EMAC))
+ break;
+
+ /* Make a note that we saw a packet at this BD.
+ * So next time, driver starts from this + 1
+ */
+ *last_rx_bd = (*last_rx_bd + 1) % RX_BD_NUM;
+
+ if (unlikely((info & FIRST_OR_LAST_MASK) !=
+ FIRST_OR_LAST_MASK)) {
+ /* We pre-allocate buffers of MTU size so incoming
+ * packets won't be split/chained.
+ */
+ if (net_ratelimit())
+ netdev_err(ndev, "incomplete packet received\n");
+
+ /* Return ownership to EMAC */
+ rxbd->info = cpu_to_le32(FOR_EMAC | buflen);
+ stats->rx_errors++;
+ stats->rx_length_errors++;
+ continue;
+ }
+
+ pktlen = info & LEN_MASK;
+ stats->rx_packets++;
+ stats->rx_bytes += pktlen;
+ skb = rx_buff->skb;
+ skb_put(skb, pktlen);
+ skb->dev = ndev;
+ skb->protocol = eth_type_trans(skb, ndev);
+
+ dma_unmap_single(&ndev->dev, dma_unmap_addr(&rx_buff, addr),
+ dma_unmap_len(&rx_buff, len), DMA_FROM_DEVICE);
+
+ /* Prepare the BD for next cycle */
+ rx_buff->skb = netdev_alloc_skb_ip_align(ndev, buflen);
+ if (unlikely(!rx_buff->skb)) {
+ stats->rx_errors++;
+ /* Because receive_skb is below, increment rx_dropped */
+ stats->rx_dropped++;
+ continue;
+ }
+
+ /* receive_skb only if new skb was allocated to avoid holes */
+ netif_receive_skb(skb);
+
+ addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data,
+ buflen, DMA_FROM_DEVICE);
+ if (dma_mapping_error(&ndev->dev, addr)) {
+ if (net_ratelimit())
+ netdev_err(ndev, "cannot dma map\n");
+ dev_kfree_skb(rx_buff->skb);
+ stats->rx_errors++;
+ continue;
+ }
+ dma_unmap_addr_set(&rx_buff, mapping, addr);
+ dma_unmap_len_set(&rx_buff, len, buflen);
+
+ rxbd->data = cpu_to_le32(rx_buff->skb->data);
+
+ /* Make sure pointer to data buffer is set */
+ wmb();
+
+ /* Return ownership to EMAC */
+ rxbd->info = cpu_to_le32(FOR_EMAC | buflen);
+ }
+
+ return work_done;
+}
+
+/**
+ * arc_emac_poll - NAPI poll handler.
+ * @napi: Pointer to napi_struct structure.
+ * @budget: How many BDs to process on 1 call.
+ *
+ * returns: Number of processed BDs
+ */
+static int arc_emac_poll(struct napi_struct *napi, int budget)
+{
+ struct net_device *ndev = napi->dev;
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+ unsigned int work_done;
+
+ arc_emac_tx_clean(ndev);
+
+ work_done = arc_emac_rx(ndev, budget);
+ if (work_done < budget) {
+ napi_complete(napi);
+ arc_reg_or(priv, R_ENABLE, RXINT_MASK);
+ }
+
+ return work_done;
+}
+
+/**
+ * arc_emac_intr - Global interrupt handler for EMAC.
+ * @irq: irq number.
+ * @dev_instance: device instance.
+ *
+ * returns: IRQ_HANDLED for all cases.
+ *
+ * ARC EMAC has only 1 interrupt line, and depending on bits raised in
+ * STATUS register we may tell what is a reason for interrupt to fire.
+ */
+static irqreturn_t arc_emac_intr(int irq, void *dev_instance)
+{
+ struct net_device *ndev = dev_instance;
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+ struct net_device_stats *stats = &priv->stats;
+ unsigned int status;
+
+ status = arc_reg_get(priv, R_STATUS);
+ status &= ~MDIO_MASK;
+
+ /* Reset all flags except "MDIO complete" */
+ arc_reg_set(priv, R_STATUS, status);
+
+ if (status & RXINT_MASK) {
+ if (likely(napi_schedule_prep(&priv->napi))) {
+ arc_reg_clr(priv, R_ENABLE, RXINT_MASK);
+ __napi_schedule(&priv->napi);
+ }
+ }
+
+ if (status & ERR_MASK) {
+ /* MSER/RXCR/RXFR/RXFL interrupt fires on corresponding
+ * 8-bit error counter overrun.
+ */
+
+ if (status & MSER_MASK) {
+ stats->rx_missed_errors += 0x100;
+ stats->rx_errors += 0x100;
+ }
+
+ if (status & RXCR_MASK) {
+ stats->rx_crc_errors += 0x100;
+ stats->rx_errors += 0x100;
+ }
+
+ if (status & RXFR_MASK) {
+ stats->rx_frame_errors += 0x100;
+ stats->rx_errors += 0x100;
+ }
+
+ if (status & RXFL_MASK) {
+ stats->rx_over_errors += 0x100;
+ stats->rx_errors += 0x100;
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * arc_emac_open - Open the network device.
+ * @ndev: Pointer to the network device.
+ *
+ * returns: 0, on success or non-zero error value on failure.
+ *
+ * This function sets the MAC address, requests and enables an IRQ
+ * for the EMAC device and starts the Tx queue.
+ * It also connects to the phy device.
+ */
+static int arc_emac_open(struct net_device *ndev)
+{
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+ struct phy_device *phy_dev = priv->phy_dev;
+ struct arc_emac_bd *bd;
+ struct sk_buff *skb;
+ int i;
+
+ phy_dev->autoneg = AUTONEG_ENABLE;
+ phy_dev->speed = 0;
+ phy_dev->duplex = 0;
+ phy_dev->advertising = phy_dev->supported;
+
+ if (priv->max_speed > 100) {
+ phy_dev->advertising &= PHY_GBIT_FEATURES;
+ } else if (priv->max_speed <= 100) {
+ phy_dev->advertising &= PHY_BASIC_FEATURES;
+ if (priv->max_speed <= 10) {
+ phy_dev->advertising &= ~SUPPORTED_100baseT_Half;
+ phy_dev->advertising &= ~SUPPORTED_100baseT_Full;
+ }
+ }
+
+ /* Allocate and set buffers for Rx BD's */
+ bd = priv->rxbd;
+ for (i = 0; i < RX_BD_NUM; i++) {
+ skb = netdev_alloc_skb_ip_align(ndev, EMAC_BUFFER_SIZE);
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ priv->rx_buff[i].skb = skb;
+ bd->data = cpu_to_le32(skb->data);
+
+ /* Make sure pointer to data buffer is set */
+ wmb();
+
+ /* Set ownership to EMAC */
+ bd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
+ bd++;
+ }
+
+ priv->last_rx_bd = 0;
+
+ /* Clean Tx BD's */
+ memset(priv->txbd, 0, TX_RING_SZ);
+
+ /* Initialize logical address filter */
+ arc_reg_set(priv, R_LAFL, 0);
+ arc_reg_set(priv, R_LAFH, 0);
+
+ /* Set BD ring pointers for device side */
+ arc_reg_set(priv, R_RX_RING, (unsigned int)priv->rxbd_dma);
+ arc_reg_set(priv, R_TX_RING, (unsigned int)priv->txbd_dma);
+
+ /* Enable interrupts */
+ arc_reg_set(priv, R_ENABLE, RXINT_MASK | ERR_MASK);
+
+ /* Set CONTROL */
+ arc_reg_set(priv, R_CTRL,
+ (RX_BD_NUM << 24) | /* RX BD table length */
+ (TX_BD_NUM << 16) | /* TX BD table length */
+ TXRN_MASK | RXRN_MASK);
+
+ napi_enable(&priv->napi);
+
+ /* Enable EMAC */
+ arc_reg_or(priv, R_CTRL, EN_MASK);
+
+ phy_start_aneg(priv->phy_dev);
+
+ netif_start_queue(ndev);
+
+ return 0;
+}
+
+/**
+ * arc_emac_stop - Close the network device.
+ * @ndev: Pointer to the network device.
+ *
+ * This function stops the Tx queue, disables interrupts and frees the IRQ for
+ * the EMAC device.
+ * It also disconnects the PHY device associated with the EMAC device.
+ */
+static int arc_emac_stop(struct net_device *ndev)
+{
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+
+ napi_disable(&priv->napi);
+ netif_stop_queue(ndev);
+
+ /* Disable interrupts */
+ arc_reg_clr(priv, R_ENABLE, RXINT_MASK | ERR_MASK);
+
+ /* Disable EMAC */
+ arc_reg_clr(priv, R_CTRL, EN_MASK);
+
+ return 0;
+}
+
+/**
+ * arc_emac_stats - Get system network statistics.
+ * @ndev: Pointer to net_device structure.
+ *
+ * Returns the address of the device statistics structure.
+ * Statistics are updated in interrupt handler.
+ */
+static struct net_device_stats *arc_emac_stats(struct net_device *ndev)
+{
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+ struct net_device_stats *stats = &priv->stats;
+ unsigned long miss, rxerr;
+ u8 rxcrc, rxfram, rxoflow;
+
+ rxerr = arc_reg_get(priv, R_RXERR);
+ miss = arc_reg_get(priv, R_MISS);
+
+ rxcrc = rxerr;
+ rxfram = rxerr >> 8;
+ rxoflow = rxerr >> 16;
+
+ stats->rx_errors += miss;
+ stats->rx_errors += rxcrc + rxfram + rxoflow;
+
+ stats->rx_over_errors += rxoflow;
+ stats->rx_frame_errors += rxfram;
+ stats->rx_crc_errors += rxcrc;
+ stats->rx_missed_errors += miss;
+
+ return stats;
+}
+
+/**
+ * arc_emac_tx - Starts the data transmission.
+ * @skb: sk_buff pointer that contains data to be Transmitted.
+ * @ndev: Pointer to net_device structure.
+ *
+ * returns: NETDEV_TX_OK, on success
+ * NETDEV_TX_BUSY, if any of the descriptors are not free.
+ *
+ * This function is invoked from upper layers to initiate transmission.
+ */
+static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev)
+{
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+ unsigned int len, *txbd_curr = &priv->txbd_curr;
+ struct net_device_stats *stats = &priv->stats;
+ __le32 *info = &priv->txbd[*txbd_curr].info;
+ dma_addr_t addr;
+
+ if (skb_padto(skb, ETH_ZLEN))
+ return NETDEV_TX_OK;
+
+ len = max_t(unsigned int, ETH_ZLEN, skb->len);
+
+ /* EMAC still holds this buffer in its possession.
+ * CPU must not modify this buffer descriptor
+ */
+ if (unlikely((le32_to_cpu(*info) & OWN_MASK) == FOR_EMAC)) {
+ netif_stop_queue(ndev);
+ return NETDEV_TX_BUSY;
+ }
+
+ addr = dma_map_single(&ndev->dev, (void *)skb->data, len,
+ DMA_TO_DEVICE);
+
+ if (unlikely(dma_mapping_error(&ndev->dev, addr))) {
+ stats->tx_dropped++;
+ stats->tx_errors++;
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+ dma_unmap_addr_set(&priv->tx_buff[*txbd_curr], mapping, addr);
+ dma_unmap_len_set(&priv->tx_buff[*txbd_curr], len, len);
+
+ priv->tx_buff[*txbd_curr].skb = skb;
+ priv->txbd[*txbd_curr].data = cpu_to_le32(skb->data);
+
+ /* Make sure pointer to data buffer is set */
+ wmb();
+
+ *info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len);
+
+ /* Increment index to point to the next BD */
+ *txbd_curr = (*txbd_curr + 1) % TX_BD_NUM;
+
+ /* Get "info" of the next BD */
+ info = &priv->txbd[*txbd_curr].info;
+
+ /* Check if if Tx BD ring is full - next BD is still owned by EMAC */
+ if (unlikely((le32_to_cpu(*info) & OWN_MASK) == FOR_EMAC))
+ netif_stop_queue(ndev);
+
+ arc_reg_set(priv, R_STATUS, TXPL_MASK);
+
+ skb_tx_timestamp(skb);
+
+ return NETDEV_TX_OK;
+}
+
+/**
+ * arc_emac_set_address - Set the MAC address for this device.
+ * @ndev: Pointer to net_device structure.
+ * @p: 6 byte Address to be written as MAC address.
+ *
+ * This function copies the HW address from the sockaddr structure to the
+ * net_device structure and updates the address in HW.
+ *
+ * returns: -EBUSY if the net device is busy or 0 if the address is set
+ * successfully.
+ */
+static int arc_emac_set_address(struct net_device *ndev, void *p)
+{
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+ struct sockaddr *addr = p;
+ unsigned int addr_low, addr_hi;
+
+ if (netif_running(ndev))
+ return -EBUSY;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len);
+
+ addr_low = le32_to_cpu(*(__le32 *) &ndev->dev_addr[0]);
+ addr_hi = le16_to_cpu(*(__le16 *) &ndev->dev_addr[4]);
+
+ arc_reg_set(priv, R_ADDRL, addr_low);
+ arc_reg_set(priv, R_ADDRH, addr_hi);
+
+ return 0;
+}
+
+static const struct net_device_ops arc_emac_netdev_ops = {
+ .ndo_open = arc_emac_open,
+ .ndo_stop = arc_emac_stop,
+ .ndo_start_xmit = arc_emac_tx,
+ .ndo_set_mac_address = arc_emac_set_address,
+ .ndo_get_stats = arc_emac_stats,
+};
+
+static int arc_emac_probe(struct platform_device *pdev)
+{
+ struct resource res_regs, res_irq;
+ struct device_node *phy_node;
+ struct arc_emac_priv *priv;
+ struct net_device *ndev;
+ const char *mac_addr;
+ unsigned int id, clock_frequency;
+ int err;
+
+ if (!pdev->dev.of_node)
+ return -ENODEV;
+
+ /* Get PHY from device tree */
+ phy_node = of_parse_phandle(pdev->dev.of_node, "phy", 0);
+ if (!phy_node) {
+ dev_err(&pdev->dev, "failed to retrieve phy description from device tree\n");
+ return -ENODEV;
+ }
+
+ /* Get EMAC registers base address from device tree */
+ err = of_address_to_resource(pdev->dev.of_node, 0, &res_regs);
+ if (err) {
+ dev_err(&pdev->dev, "failed to retrieve registers base from device tree\n");
+ return -ENODEV;
+ }
+
+ /* Get CPU clock frequency from device tree */
+ if (of_property_read_u32(pdev->dev.of_node, "clock-frequency",
+ &clock_frequency)) {
+ dev_err(&pdev->dev, "failed to retrieve <clock-frequency> from device tree\n");
+ return -EINVAL;
+ }
+
+ /* Get IRQ from device tree */
+ err = of_irq_to_resource(pdev->dev.of_node, 0, &res_irq);
+ if (!err) {
+ dev_err(&pdev->dev, "failed to retrieve <irq> value from device tree\n");
+ return -ENODEV;
+ }
+
+ ndev = alloc_etherdev(sizeof(struct arc_emac_priv));
+ if (!ndev)
+ return -ENOMEM;
+
+ SET_NETDEV_DEV(ndev, &pdev->dev);
+
+ ndev->netdev_ops = &arc_emac_netdev_ops;
+ ndev->ethtool_ops = &arc_emac_ethtool_ops;
+ ndev->watchdog_timeo = TX_TIMEOUT;
+ /* FIXME :: no multicast support yet */
+ ndev->flags &= ~IFF_MULTICAST;
+
+ priv = netdev_priv(ndev);
+ priv->dev = &pdev->dev;
+ priv->ndev = ndev;
+
+ priv->regs = devm_ioremap_resource(&pdev->dev, &res_regs);
+ if (IS_ERR(priv->regs)) {
+ err = PTR_ERR(priv->regs);
+ goto out;
+ }
+ dev_dbg(&pdev->dev, "Registers base address is 0x%p\n", priv->regs);
+
+ id = arc_reg_get(priv, R_ID);
+
+ /* Check for EMAC revision 5 or 7, magic number */
+ if (!(id == 0x0005fd02 || id == 0x0007fd02)) {
+ dev_err(&pdev->dev, "ARC EMAC not detected, id=0x%x\n", id);
+ err = -ENODEV;
+ goto out;
+ }
+ dev_info(&pdev->dev, "ARC EMAC detected with id: 0x%x\n", id);
+
+ /* Set poll rate so that it polls every 1 ms */
+ arc_reg_set(priv, R_POLLRATE, clock_frequency / 1000000);
+
+ /* Get max speed of operation from device tree */
+ if (of_property_read_u32(pdev->dev.of_node, "max-speed",
+ &priv->max_speed)) {
+ dev_err(&pdev->dev, "failed to retrieve <max-speed> from device tree\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ ndev->irq = res_irq.start;
+ dev_info(&pdev->dev, "IRQ is %d\n", ndev->irq);
+
+ /* Register interrupt handler for device */
+ err = devm_request_irq(&pdev->dev, ndev->irq, arc_emac_intr, 0,
+ ndev->name, ndev);
+ if (err) {
+ dev_err(&pdev->dev, "could not allocate IRQ\n");
+ goto out;
+ }
+
+ /* Get MAC address from device tree */
+ mac_addr = of_get_mac_address(pdev->dev.of_node);
+
+ if (!mac_addr || !is_valid_ether_addr(mac_addr))
+ eth_hw_addr_random(ndev);
+ else
+ memcpy(ndev->dev_addr, mac_addr, ETH_ALEN);
+
+ dev_info(&pdev->dev, "MAC address is now %pM\n", ndev->dev_addr);
+
+ /* Do 1 allocation instead of 2 separate ones for Rx and Tx BD rings */
+ priv->rxbd = dmam_alloc_coherent(&pdev->dev, RX_RING_SZ + TX_RING_SZ,
+ &priv->rxbd_dma, GFP_KERNEL);
+
+ if (!priv->rxbd) {
+ dev_err(&pdev->dev, "failed to allocate data buffers\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ priv->txbd = priv->rxbd + RX_BD_NUM;
+
+ priv->txbd_dma = priv->rxbd_dma + RX_RING_SZ;
+ dev_dbg(&pdev->dev, "EMAC Device addr: Rx Ring [0x%x], Tx Ring[%x]\n",
+ (unsigned int)priv->rxbd_dma, (unsigned int)priv->txbd_dma);
+
+ err = arc_mdio_probe(pdev, priv);
+ if (err) {
+ dev_err(&pdev->dev, "failed to probe MII bus\n");
+ goto out;
+ }
+
+ priv->phy_dev = of_phy_connect(ndev, phy_node, arc_emac_adjust_link, 0,
+ PHY_INTERFACE_MODE_MII);
+ if (!priv->phy_dev) {
+ dev_err(&pdev->dev, "of_phy_connect() failed\n");
+ err = -ENODEV;
+ goto out;
+ }
+
+ dev_info(&pdev->dev, "connected to %s phy with id 0x%x\n",
+ priv->phy_dev->drv->name, priv->phy_dev->phy_id);
+
+ netif_napi_add(ndev, &priv->napi, arc_emac_poll, ARC_EMAC_NAPI_WEIGHT);
+
+ err = register_netdev(ndev);
+ if (err) {
+ netif_napi_del(&priv->napi);
+ dev_err(&pdev->dev, "failed to register network device\n");
+ goto out;
+ }
+
+ return 0;
+
+out:
+ free_netdev(ndev);
+ return err;
+}
+
+static int arc_emac_remove(struct platform_device *pdev)
+{
+ struct net_device *ndev = platform_get_drvdata(pdev);
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+
+ phy_disconnect(priv->phy_dev);
+ priv->phy_dev = NULL;
+ arc_mdio_remove(priv);
+ unregister_netdev(ndev);
+ netif_napi_del(&priv->napi);
+ free_netdev(ndev);
+
+ return 0;
+}
+
+static const struct of_device_id arc_emac_dt_ids[] = {
+ { .compatible = "snps,arc-emac" },
+ { /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, arc_emac_dt_ids);
+
+static struct platform_driver arc_emac_driver = {
+ .probe = arc_emac_probe,
+ .remove = arc_emac_remove,
+ .driver = {
+ .name = DRV_NAME,
+ .owner = THIS_MODULE,
+ .of_match_table = arc_emac_dt_ids,
+ },
+};
+
+module_platform_driver(arc_emac_driver);
+
+MODULE_AUTHOR("Alexey Brodkin <abrodkin@synopsys.com>");
+MODULE_DESCRIPTION("ARC EMAC driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/arc/emac_mdio.c b/drivers/net/ethernet/arc/emac_mdio.c
new file mode 100644
index 0000000..26ba242
--- /dev/null
+++ b/drivers/net/ethernet/arc/emac_mdio.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2004-2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * MDIO implementation for ARC EMAC
+ */
+
+#include <linux/delay.h>
+#include <linux/of_mdio.h>
+#include <linux/platform_device.h>
+
+#include "emac.h"
+
+/* Number of seconds we wait for "MDIO complete" flag to appear */
+#define ARC_MDIO_COMPLETE_POLL_COUNT 1
+
+/**
+ * arc_mdio_complete_wait - Waits until MDIO transaction is completed.
+ * @priv: Pointer to ARC EMAC private data structure.
+ *
+ * returns: 0 on success, -ETIMEDOUT on a timeout.
+ */
+static int arc_mdio_complete_wait(struct arc_emac_priv *priv)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARC_MDIO_COMPLETE_POLL_COUNT * 40; i++) {
+ unsigned int status = arc_reg_get(priv, R_STATUS);
+
+ status &= MDIO_MASK;
+
+ if (status) {
+ /* Reset "MDIO complete" flag */
+ arc_reg_set(priv, R_STATUS, status);
+ return 0;
+ }
+
+ msleep(25);
+ }
+
+ return -ETIMEDOUT;
+}
+
+/**
+ * arc_mdio_read - MDIO interface read function.
+ * @bus: Pointer to MII bus structure.
+ * @phy_addr: Address of the PHY device.
+ * @reg_num: PHY register to read.
+ *
+ * returns: The register contents on success, -ETIMEDOUT on a timeout.
+ *
+ * Reads the contents of the requested register from the requested PHY
+ * address.
+ */
+static int arc_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num)
+{
+ struct arc_emac_priv *priv = bus->priv;
+ unsigned int value;
+ int error;
+
+ arc_reg_set(priv, R_MDIO,
+ 0x60020000 | (phy_addr << 23) | (reg_num << 18));
+
+ error = arc_mdio_complete_wait(priv);
+ if (error < 0)
+ return error;
+
+ value = arc_reg_get(priv, R_MDIO) & 0xffff;
+
+ dev_dbg(priv->dev, "arc_mdio_read(phy_addr=%i, reg_num=%x) = %x\n",
+ phy_addr, reg_num, value);
+
+ return value;
+}
+
+/**
+ * arc_mdio_write - MDIO interface write function.
+ * @bus: Pointer to MII bus structure.
+ * @phy_addr: Address of the PHY device.
+ * @reg_num: PHY register to write to.
+ * @value: Value to be written into the register.
+ *
+ * returns: 0 on success, -ETIMEDOUT on a timeout.
+ *
+ * Writes the value to the requested register.
+ */
+static int arc_mdio_write(struct mii_bus *bus, int phy_addr,
+ int reg_num, u16 value)
+{
+ struct arc_emac_priv *priv = bus->priv;
+
+ dev_dbg(priv->dev,
+ "arc_mdio_write(phy_addr=%i, reg_num=%x, value=%x)\n",
+ phy_addr, reg_num, value);
+
+ arc_reg_set(priv, R_MDIO,
+ 0x50020000 | (phy_addr << 23) | (reg_num << 18) | value);
+
+ return arc_mdio_complete_wait(priv);
+}
+
+/**
+ * arc_mdio_probe - MDIO probe function.
+ * @pdev: Pointer to platform device.
+ * @priv: Pointer to ARC EMAC private data structure.
+ *
+ * returns: 0 on success, -ENOMEM when mdiobus_alloc
+ * (to allocate memory for MII bus structure) fails.
+ *
+ * Sets up and registers the MDIO interface.
+ */
+int arc_mdio_probe(struct platform_device *pdev, struct arc_emac_priv *priv)
+{
+ struct mii_bus *bus;
+ int error;
+
+ bus = mdiobus_alloc();
+ if (!bus)
+ return -ENOMEM;
+
+ priv->bus = bus;
+ bus->priv = priv;
+ bus->parent = priv->dev;
+ bus->name = "Synopsys MII Bus",
+ bus->read = &arc_mdio_read;
+ bus->write = &arc_mdio_write;
+
+ snprintf(bus->id, MII_BUS_ID_SIZE, "%s", pdev->name);
+
+ error = of_mdiobus_register(bus, pdev->dev.of_node);
+ if (error) {
+ dev_err(priv->dev, "cannot register MDIO bus %s\n", bus->name);
+ mdiobus_free(bus);
+ return error;
+ }
+
+ return 0;
+}
+
+/**
+ * arc_mdio_remove - MDIO remove function.
+ * @priv: Pointer to ARC EMAC private data structure.
+ *
+ * Unregisters the MDIO and frees any associate memory for MII bus.
+ */
+int arc_mdio_remove(struct arc_emac_priv *priv)
+{
+ mdiobus_unregister(priv->bus);
+ mdiobus_free(priv->bus);
+ priv->bus = NULL;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index d342c5a..ec3aa1d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1722,7 +1722,7 @@
return request_irq(irq, bnx2x_interrupt, flags, bp->dev->name, bp->dev);
}
-int bnx2x_setup_irqs(struct bnx2x *bp)
+static int bnx2x_setup_irqs(struct bnx2x *bp)
{
int rc = 0;
if (bp->flags & USING_MSIX_FLAG &&
@@ -3543,9 +3543,12 @@
/* outer IP header info */
if (xmit_type & XMIT_CSUM_V4) {
struct iphdr *iph = ip_hdr(skb);
+ u16 csum = (__force u16)(~iph->check) -
+ (__force u16)iph->tot_len -
+ (__force u16)iph->frag_off;
+
pbd2->fw_ip_csum_wo_len_flags_frag =
- bswab16(csum_fold((~iph->check) -
- iph->tot_len - iph->frag_off));
+ bswab16(csum_fold((__force __wsum)csum));
} else {
pbd2->fw_ip_hdr_to_payload_w =
hlen_w - ((sizeof(struct ipv6hdr)) >> 1);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 7c6faeb..b8c067d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -1391,7 +1391,7 @@
bp->pm_cap + PCI_PM_CTRL, &pm);
if ((rc && !netif_running(dev)) ||
- (!rc && ((pm & PCI_PM_CTRL_STATE_MASK) != PCI_D0)))
+ (!rc && ((pm & PCI_PM_CTRL_STATE_MASK) != (__force u16)PCI_D0)))
return false;
return true;
@@ -1610,8 +1610,10 @@
*/
val = be32_to_cpu(val_be);
- val &= ~le32_to_cpu(0xff << BYTE_OFFSET(offset));
- val |= le32_to_cpu(*data_buf << BYTE_OFFSET(offset));
+ val &= ~le32_to_cpu((__force __le32)
+ (0xff << BYTE_OFFSET(offset)));
+ val |= le32_to_cpu((__force __le32)
+ (*data_buf << BYTE_OFFSET(offset)));
rc = bnx2x_nvram_write_dword(bp, align_offset, val,
cmd_flags);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 7318988..740518b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -615,7 +615,7 @@
if (rc) {
BNX2X_ERR("DMAE returned failure %d\n", rc);
bnx2x_panic();
- };
+ }
}
static void bnx2x_write_dmae_phys_len(struct bnx2x *bp, dma_addr_t phys_addr,
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 64646eb..270e65f 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -483,7 +483,6 @@
unsigned long flags;
struct netdev_hw_addr *ha;
u32 recognise = ETH_ARC_AB; /* always accept broadcasts */
- int i;
/* Set promiscuous mode */
if (dev->flags & IFF_PROMISC)
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index ea1e038..df04c82 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -257,6 +257,8 @@
if (!wait_for_completion_timeout(&context->done,
msecs_to_jiffies(timeout))) {
+ mlx4_warn(dev, "communication channel command 0x%x timed out\n",
+ op);
err = -EBUSY;
goto out;
}
@@ -486,6 +488,8 @@
}
if (cmd_pending(dev)) {
+ mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n",
+ op);
err = -ETIMEDOUT;
goto out;
}
@@ -549,6 +553,8 @@
if (!wait_for_completion_timeout(&context->done,
msecs_to_jiffies(timeout))) {
+ mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n",
+ op);
err = -EBUSY;
goto out;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
index 0f91222..9d4a1ea 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
@@ -207,9 +207,6 @@
struct mlx4_en_priv *priv = netdev_priv(dev);
int i;
- if (!priv->maxrate)
- return -EINVAL;
-
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
maxrate->tc_maxrate[i] =
priv->maxrate[i] * MLX4_RATELIMIT_UNITS_IN_KB;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index a5c9df07..a071cda 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -310,7 +310,7 @@
err_mr:
(void) mlx4_mr_free(dev, &mdev->mr);
err_map:
- if (!mdev->uar_map)
+ if (mdev->uar_map)
iounmap(mdev->uar_map);
err_uar:
mlx4_uar_free(dev, &mdev->priv_uar);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 7299ada..caf2047 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -405,7 +405,7 @@
en_err(priv, "Failed configuring VLAN filter\n");
}
if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx))
- en_err(priv, "failed adding vlan %d\n", vid);
+ en_dbg(HW, priv, "failed adding vlan %d\n", vid);
mutex_unlock(&mdev->state_lock);
return 0;
@@ -428,7 +428,7 @@
if (!mlx4_find_cached_vlan(mdev->dev, priv->port, vid, &idx))
mlx4_unregister_vlan(mdev->dev, priv->port, idx);
else
- en_err(priv, "could not find vid %d in cache\n", vid);
+ en_dbg(HW, priv, "could not find vid %d in cache\n", vid);
if (mdev->device_up && priv->port_up) {
err = mlx4_SET_VLAN_FLTR(mdev->dev, priv);
@@ -1236,10 +1236,19 @@
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_en_dev *mdev = priv->mdev;
+ int i;
if (netif_msg_timer(priv))
en_warn(priv, "Tx timeout called on port:%d\n", priv->port);
+ for (i = 0; i < priv->tx_ring_num; i++) {
+ if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)))
+ continue;
+ en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n",
+ i, priv->tx_ring[i].qpn, priv->tx_ring[i].cqn,
+ priv->tx_ring[i].cons, priv->tx_ring[i].prod);
+ }
+
priv->port_stats.tx_timeout++;
en_dbg(DRV, priv, "Scheduling watchdog\n");
queue_work(mdev->workqueue, &priv->watchdog_task);
@@ -1375,12 +1384,13 @@
mutex_lock(&mdev->state_lock);
if (mdev->device_up) {
- err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0);
- if (err)
- en_dbg(HW, priv, "Could not update stats\n");
+ if (priv->port_up) {
+ err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0);
+ if (err)
+ en_dbg(HW, priv, "Could not update stats\n");
- if (priv->port_up)
mlx4_en_auto_moderation(priv);
+ }
queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
}
@@ -1634,6 +1644,9 @@
return;
}
+ /* close port*/
+ mlx4_CLOSE_PORT(mdev->dev, priv->port);
+
/* Synchronize with tx routine */
netif_tx_lock_bh(dev);
if (detach)
@@ -1734,14 +1747,11 @@
}
local_bh_enable();
- mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]);
while (test_bit(NAPI_STATE_SCHED, &cq->napi.state))
msleep(1);
+ mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]);
mlx4_en_deactivate_cq(priv, cq);
}
-
- /* close port*/
- mlx4_CLOSE_PORT(mdev->dev, priv->port);
}
static void mlx4_en_restart(struct work_struct *work)
@@ -2322,6 +2332,8 @@
mdev->pndev[port] = dev;
netif_carrier_off(dev);
+ mlx4_en_set_default_moderation(priv);
+
err = register_netdev(dev);
if (err) {
en_err(priv, "Netdev registration failed for port %d\n", port);
@@ -2353,7 +2365,6 @@
en_err(priv, "Failed Initializing port\n");
goto out;
}
- mlx4_en_set_default_moderation(priv);
queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 9c57581..76997b9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -43,40 +43,64 @@
#include "mlx4_en.h"
+static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_alloc *page_alloc,
+ const struct mlx4_en_frag_info *frag_info,
+ gfp_t _gfp)
+{
+ int order;
+ struct page *page;
+ dma_addr_t dma;
+
+ for (order = MLX4_EN_ALLOC_PREFER_ORDER; ;) {
+ gfp_t gfp = _gfp;
+
+ if (order)
+ gfp |= __GFP_COMP | __GFP_NOWARN;
+ page = alloc_pages(gfp, order);
+ if (likely(page))
+ break;
+ if (--order < 0 ||
+ ((PAGE_SIZE << order) < frag_info->frag_size))
+ return -ENOMEM;
+ }
+ dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
+ PCI_DMA_FROMDEVICE);
+ if (dma_mapping_error(priv->ddev, dma)) {
+ put_page(page);
+ return -ENOMEM;
+ }
+ page_alloc->size = PAGE_SIZE << order;
+ page_alloc->page = page;
+ page_alloc->dma = dma;
+ page_alloc->offset = frag_info->frag_align;
+ /* Not doing get_page() for each frag is a big win
+ * on asymetric workloads.
+ */
+ atomic_set(&page->_count, page_alloc->size / frag_info->frag_stride);
+ return 0;
+}
+
static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
struct mlx4_en_rx_desc *rx_desc,
struct mlx4_en_rx_alloc *frags,
- struct mlx4_en_rx_alloc *ring_alloc)
+ struct mlx4_en_rx_alloc *ring_alloc,
+ gfp_t gfp)
{
struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
- struct mlx4_en_frag_info *frag_info;
+ const struct mlx4_en_frag_info *frag_info;
struct page *page;
dma_addr_t dma;
int i;
for (i = 0; i < priv->num_frags; i++) {
frag_info = &priv->frag_info[i];
- if (ring_alloc[i].offset == frag_info->last_offset) {
- page = alloc_pages(GFP_ATOMIC | __GFP_COMP,
- MLX4_EN_ALLOC_ORDER);
- if (!page)
- goto out;
- dma = dma_map_page(priv->ddev, page, 0,
- MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
- if (dma_mapping_error(priv->ddev, dma)) {
- put_page(page);
- goto out;
- }
- page_alloc[i].page = page;
- page_alloc[i].dma = dma;
- page_alloc[i].offset = frag_info->frag_align;
- } else {
- page_alloc[i].page = ring_alloc[i].page;
- get_page(ring_alloc[i].page);
- page_alloc[i].dma = ring_alloc[i].dma;
- page_alloc[i].offset = ring_alloc[i].offset +
- frag_info->frag_stride;
- }
+ page_alloc[i] = ring_alloc[i];
+ page_alloc[i].offset += frag_info->frag_stride;
+ if (page_alloc[i].offset + frag_info->frag_stride <= ring_alloc[i].size)
+ continue;
+ if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp))
+ goto out;
}
for (i = 0; i < priv->num_frags; i++) {
@@ -88,14 +112,16 @@
return 0;
-
out:
while (i--) {
frag_info = &priv->frag_info[i];
- if (ring_alloc[i].offset == frag_info->last_offset)
+ if (page_alloc[i].page != ring_alloc[i].page) {
dma_unmap_page(priv->ddev, page_alloc[i].dma,
- MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
- put_page(page_alloc[i].page);
+ page_alloc[i].size, PCI_DMA_FROMDEVICE);
+ page = page_alloc[i].page;
+ atomic_set(&page->_count, 1);
+ put_page(page);
+ }
}
return -ENOMEM;
}
@@ -104,12 +130,12 @@
struct mlx4_en_rx_alloc *frags,
int i)
{
- struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
+ const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
- if (frags[i].offset == frag_info->last_offset) {
- dma_unmap_page(priv->ddev, frags[i].dma, MLX4_EN_ALLOC_SIZE,
+ if (frags[i].offset + frag_info->frag_stride > frags[i].size)
+ dma_unmap_page(priv->ddev, frags[i].dma, frags[i].size,
PCI_DMA_FROMDEVICE);
- }
+
if (frags[i].page)
put_page(frags[i].page);
}
@@ -117,35 +143,28 @@
static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring *ring)
{
- struct mlx4_en_rx_alloc *page_alloc;
int i;
+ struct mlx4_en_rx_alloc *page_alloc;
for (i = 0; i < priv->num_frags; i++) {
- page_alloc = &ring->page_alloc[i];
- page_alloc->page = alloc_pages(GFP_ATOMIC | __GFP_COMP,
- MLX4_EN_ALLOC_ORDER);
- if (!page_alloc->page)
- goto out;
+ const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
- page_alloc->dma = dma_map_page(priv->ddev, page_alloc->page, 0,
- MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
- if (dma_mapping_error(priv->ddev, page_alloc->dma)) {
- put_page(page_alloc->page);
- page_alloc->page = NULL;
+ if (mlx4_alloc_pages(priv, &ring->page_alloc[i],
+ frag_info, GFP_KERNEL))
goto out;
- }
- page_alloc->offset = priv->frag_info[i].frag_align;
- en_dbg(DRV, priv, "Initialized allocator:%d with page:%p\n",
- i, page_alloc->page);
}
return 0;
out:
while (i--) {
+ struct page *page;
+
page_alloc = &ring->page_alloc[i];
dma_unmap_page(priv->ddev, page_alloc->dma,
- MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
- put_page(page_alloc->page);
+ page_alloc->size, PCI_DMA_FROMDEVICE);
+ page = page_alloc->page;
+ atomic_set(&page->_count, 1);
+ put_page(page);
page_alloc->page = NULL;
}
return -ENOMEM;
@@ -158,13 +177,18 @@
int i;
for (i = 0; i < priv->num_frags; i++) {
+ const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
+
page_alloc = &ring->page_alloc[i];
en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n",
i, page_count(page_alloc->page));
dma_unmap_page(priv->ddev, page_alloc->dma,
- MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
- put_page(page_alloc->page);
+ page_alloc->size, PCI_DMA_FROMDEVICE);
+ while (page_alloc->offset + frag_info->frag_stride < page_alloc->size) {
+ put_page(page_alloc->page);
+ page_alloc->offset += frag_info->frag_stride;
+ }
page_alloc->page = NULL;
}
}
@@ -195,13 +219,14 @@
}
static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_ring *ring, int index)
+ struct mlx4_en_rx_ring *ring, int index,
+ gfp_t gfp)
{
struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
struct mlx4_en_rx_alloc *frags = ring->rx_info +
(index << priv->log_rx_info);
- return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc);
+ return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp);
}
static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
@@ -235,7 +260,8 @@
ring = &priv->rx_ring[ring_ind];
if (mlx4_en_prepare_rx_desc(priv, ring,
- ring->actual_size)) {
+ ring->actual_size,
+ GFP_KERNEL)) {
if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
en_err(priv, "Failed to allocate "
"enough rx buffers\n");
@@ -450,11 +476,11 @@
DMA_FROM_DEVICE);
/* Save page reference in skb */
- get_page(frags[nr].page);
__skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page);
skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size);
skb_frags_rx[nr].page_offset = frags[nr].offset;
skb->truesize += frag_info->frag_stride;
+ frags[nr].page = NULL;
}
/* Adjust size of last fragment to match actual length */
if (nr > 0)
@@ -547,7 +573,7 @@
int index = ring->prod & ring->size_mask;
while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
- if (mlx4_en_prepare_rx_desc(priv, ring, index))
+ if (mlx4_en_prepare_rx_desc(priv, ring, index, GFP_ATOMIC))
break;
ring->prod++;
index = ring->prod & ring->size_mask;
@@ -805,21 +831,7 @@
return done;
}
-
-/* Calculate the last offset position that accommodates a full fragment
- * (assuming fagment size = stride-align) */
-static int mlx4_en_last_alloc_offset(struct mlx4_en_priv *priv, u16 stride, u16 align)
-{
- u16 res = MLX4_EN_ALLOC_SIZE % stride;
- u16 offset = MLX4_EN_ALLOC_SIZE - stride - res + align;
-
- en_dbg(DRV, priv, "Calculated last offset for stride:%d align:%d "
- "res:%d offset:%d\n", stride, align, res, offset);
- return offset;
-}
-
-
-static int frag_sizes[] = {
+static const int frag_sizes[] = {
FRAG_SZ0,
FRAG_SZ1,
FRAG_SZ2,
@@ -847,9 +859,6 @@
priv->frag_info[i].frag_stride =
ALIGN(frag_sizes[i], SMP_CACHE_BYTES);
}
- priv->frag_info[i].last_offset = mlx4_en_last_alloc_offset(
- priv, priv->frag_info[i].frag_stride,
- priv->frag_info[i].frag_align);
buf_size += priv->frag_info[i].frag_size;
i++;
}
@@ -861,13 +870,13 @@
en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
"num_frags:%d):\n", eff_mtu, priv->num_frags);
for (i = 0; i < priv->num_frags; i++) {
- en_dbg(DRV, priv, " frag:%d - size:%d prefix:%d align:%d "
- "stride:%d last_offset:%d\n", i,
- priv->frag_info[i].frag_size,
- priv->frag_info[i].frag_prefix_size,
- priv->frag_info[i].frag_align,
- priv->frag_info[i].frag_stride,
- priv->frag_info[i].last_offset);
+ en_err(priv,
+ " frag:%d - size:%d prefix:%d align:%d stride:%d\n",
+ i,
+ priv->frag_info[i].frag_size,
+ priv->frag_info[i].frag_prefix_size,
+ priv->frag_info[i].frag_align,
+ priv->frag_info[i].frag_stride);
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 2f4a260..56160a2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -839,11 +839,11 @@
return -EINVAL;
}
- err = sscanf(buf, "%d", &mtu);
- if (err > 0)
+ err = kstrtoint(buf, 0, &mtu);
+ if (!err)
ibta_mtu = int_to_ibta_mtu(mtu);
- if (err <= 0 || ibta_mtu < 0) {
+ if (err || ibta_mtu < 0) {
mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
return -EINVAL;
}
@@ -2077,6 +2077,11 @@
num_vfs, MLX4_MAX_NUM_VF);
return -EINVAL;
}
+
+ if (num_vfs < 0) {
+ pr_err("num_vfs module parameter cannot be negative\n");
+ return -EINVAL;
+ }
/*
* Check for BARs.
*/
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 57192a8..35fb60e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -96,7 +96,8 @@
/* Use the maximum between 16384 and a single page */
#define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384)
-#define MLX4_EN_ALLOC_ORDER get_order(MLX4_EN_ALLOC_SIZE)
+
+#define MLX4_EN_ALLOC_PREFER_ORDER PAGE_ALLOC_COSTLY_ORDER
/* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU
* and 4K allocations) */
@@ -234,9 +235,10 @@
#define MLX4_EN_CX3_HIGH_ID 0x1005
struct mlx4_en_rx_alloc {
- struct page *page;
- dma_addr_t dma;
- u16 offset;
+ struct page *page;
+ dma_addr_t dma;
+ u32 offset;
+ u32 size;
};
struct mlx4_en_tx_ring {
@@ -439,8 +441,6 @@
u16 frag_prefix_size;
u16 frag_stride;
u16 frag_align;
- u16 last_offset;
-
};
#ifdef CONFIG_MLX4_EN_DCB
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 46cc11d..e7284a2 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -21,8 +21,8 @@
#include <linux/ethtool.h>
#include <linux/topology.h>
#include <linux/gfp.h>
-#include <linux/cpu_rmap.h>
#include <linux/aer.h>
+#include <linux/interrupt.h>
#include "net_driver.h"
#include "efx.h"
#include "nic.h"
@@ -1283,29 +1283,6 @@
return count;
}
-static int
-efx_init_rx_cpu_rmap(struct efx_nic *efx, struct msix_entry *xentries)
-{
-#ifdef CONFIG_RFS_ACCEL
- unsigned int i;
- int rc;
-
- efx->net_dev->rx_cpu_rmap = alloc_irq_cpu_rmap(efx->n_rx_channels);
- if (!efx->net_dev->rx_cpu_rmap)
- return -ENOMEM;
- for (i = 0; i < efx->n_rx_channels; i++) {
- rc = irq_cpu_rmap_add(efx->net_dev->rx_cpu_rmap,
- xentries[i].vector);
- if (rc) {
- free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
- efx->net_dev->rx_cpu_rmap = NULL;
- return rc;
- }
- }
-#endif
- return 0;
-}
-
/* Probe the number and type of interrupts we are able to obtain, and
* the resulting numbers of channels and RX queues.
*/
@@ -1359,11 +1336,6 @@
efx->n_tx_channels = n_channels;
efx->n_rx_channels = n_channels;
}
- rc = efx_init_rx_cpu_rmap(efx, xentries);
- if (rc) {
- pci_disable_msix(efx->pci_dev);
- return rc;
- }
for (i = 0; i < efx->n_channels; i++)
efx_get_channel(efx, i)->irq =
xentries[i].vector;
@@ -1427,6 +1399,10 @@
BUG_ON(efx->state == STATE_DISABLED);
+ if (efx->eeh_disabled_legacy_irq) {
+ enable_irq(efx->legacy_irq);
+ efx->eeh_disabled_legacy_irq = false;
+ }
if (efx->legacy_irq)
efx->legacy_irq_enabled = true;
efx_nic_enable_interrupts(efx);
@@ -2365,7 +2341,7 @@
* Returns 0 if the recovery mechanisms are unsuccessful.
* Returns a non-zero value otherwise.
*/
-static int efx_try_recovery(struct efx_nic *efx)
+int efx_try_recovery(struct efx_nic *efx)
{
#ifdef CONFIG_EEH
/* A PCI error can occur and not be seen by EEH because nothing
@@ -2603,10 +2579,6 @@
BUG_ON(efx->state == STATE_READY);
cancel_work_sync(&efx->reset_work);
-#ifdef CONFIG_RFS_ACCEL
- free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
- efx->net_dev->rx_cpu_rmap = NULL;
-#endif
efx_stop_interrupts(efx, false);
efx_nic_fini_interrupt(efx);
efx_fini_port(efx);
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 8372da2..bdb30bb 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -124,6 +124,7 @@
extern int efx_reset(struct efx_nic *efx, enum reset_type method);
extern void efx_reset_down(struct efx_nic *efx, enum reset_type method);
extern int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok);
+extern int efx_try_recovery(struct efx_nic *efx);
/* Global */
extern void efx_schedule_reset(struct efx_nic *efx, enum reset_type type);
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 6e76817..1fc2145 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -1114,6 +1114,20 @@
return 0;
}
+int efx_ethtool_get_ts_info(struct net_device *net_dev,
+ struct ethtool_ts_info *ts_info)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ /* Software capabilities */
+ ts_info->so_timestamping = (SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE);
+ ts_info->phc_index = -1;
+
+ efx_ptp_get_ts_info(efx, ts_info);
+ return 0;
+}
+
static int efx_ethtool_get_module_eeprom(struct net_device *net_dev,
struct ethtool_eeprom *ee,
u8 *data)
@@ -1176,7 +1190,7 @@
.get_rxfh_indir_size = efx_ethtool_get_rxfh_indir_size,
.get_rxfh_indir = efx_ethtool_get_rxfh_indir,
.set_rxfh_indir = efx_ethtool_set_rxfh_indir,
- .get_ts_info = efx_ptp_get_ts_info,
+ .get_ts_info = efx_ethtool_get_ts_info,
.get_module_info = efx_ethtool_get_module_info,
.get_module_eeprom = efx_ethtool_get_module_eeprom,
};
diff --git a/drivers/net/ethernet/sfc/filter.c b/drivers/net/ethernet/sfc/filter.c
index 2397f0e..b74a60a 100644
--- a/drivers/net/ethernet/sfc/filter.c
+++ b/drivers/net/ethernet/sfc/filter.c
@@ -1185,8 +1185,21 @@
nhoff = skb_network_offset(skb);
- if (skb->protocol != htons(ETH_P_IP))
+ if (skb->protocol == htons(ETH_P_8021Q)) {
+ EFX_BUG_ON_PARANOID(skb_headlen(skb) <
+ nhoff + sizeof(struct vlan_hdr));
+ if (((const struct vlan_hdr *)skb->data + nhoff)->
+ h_vlan_encapsulated_proto != htons(ETH_P_IP))
+ return -EPROTONOSUPPORT;
+
+ /* This is IP over 802.1q VLAN. We can't filter on the
+ * IP 5-tuple and the vlan together, so just strip the
+ * vlan header and filter on the IP part.
+ */
+ nhoff += sizeof(struct vlan_hdr);
+ } else if (skb->protocol != htons(ETH_P_IP)) {
return -EPROTONOSUPPORT;
+ }
/* RFS must validate the IP header length before calling us */
EFX_BUG_ON_PARANOID(skb_headlen(skb) < nhoff + sizeof(*ip));
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 9a2914c..f4c7e6b 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -243,6 +243,7 @@
#define EFX_RX_BUF_LAST_IN_PAGE 0x0001
#define EFX_RX_PKT_CSUMMED 0x0002
#define EFX_RX_PKT_DISCARD 0x0004
+#define EFX_RX_PKT_TCP 0x0040
/**
* struct efx_rx_page_state - Page-based rx buffer state
@@ -788,6 +789,7 @@
const struct efx_nic_type *type;
int legacy_irq;
bool legacy_irq_enabled;
+ bool eeh_disabled_legacy_irq;
struct workqueue_struct *workqueue;
char workqueue_name[16];
struct work_struct reset_work;
diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index b0503cd..56ed3bc 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -14,6 +14,7 @@
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/seq_file.h>
+#include <linux/cpu_rmap.h>
#include "net_driver.h"
#include "bitfield.h"
#include "efx.h"
@@ -1080,12 +1081,21 @@
rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
if (likely(rx_ev_pkt_ok)) {
- /* If packet is marked as OK and packet type is TCP/IP or
- * UDP/IP, then we can rely on the hardware checksum.
+ /* If packet is marked as OK then we can rely on the
+ * hardware checksum and classification.
*/
- flags = (rx_ev_hdr_type == FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_TCP ||
- rx_ev_hdr_type == FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_UDP) ?
- EFX_RX_PKT_CSUMMED : 0;
+ flags = 0;
+ switch (rx_ev_hdr_type) {
+ case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_TCP:
+ flags |= EFX_RX_PKT_TCP;
+ /* fall through */
+ case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_UDP:
+ flags |= EFX_RX_PKT_CSUMMED;
+ /* fall through */
+ case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_OTHER:
+ case FSE_AZ_RX_EV_HDR_TYPE_OTHER:
+ break;
+ }
} else {
flags = efx_handle_rx_not_ok(rx_queue, event);
}
@@ -1579,6 +1589,16 @@
efx_readd(efx, ®, FR_BZ_INT_ISR0);
queues = EFX_EXTRACT_DWORD(reg, 0, 31);
+ /* Legacy interrupts are disabled too late by the EEH kernel
+ * code. Disable them earlier.
+ * If an EEH error occurred, the read will have returned all ones.
+ */
+ if (EFX_DWORD_IS_ALL_ONES(reg) && efx_try_recovery(efx) &&
+ !efx->eeh_disabled_legacy_irq) {
+ disable_irq_nosync(efx->legacy_irq);
+ efx->eeh_disabled_legacy_irq = true;
+ }
+
/* Handle non-event-queue sources */
if (queues & (1U << efx->irq_level)) {
syserr = EFX_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_FATAL_INT);
@@ -1687,6 +1707,7 @@
int efx_nic_init_interrupt(struct efx_nic *efx)
{
struct efx_channel *channel;
+ unsigned int n_irqs;
int rc;
if (!EFX_INT_MODE_USE_MSI(efx)) {
@@ -1707,7 +1728,19 @@
return 0;
}
+#ifdef CONFIG_RFS_ACCEL
+ if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
+ efx->net_dev->rx_cpu_rmap =
+ alloc_irq_cpu_rmap(efx->n_rx_channels);
+ if (!efx->net_dev->rx_cpu_rmap) {
+ rc = -ENOMEM;
+ goto fail1;
+ }
+ }
+#endif
+
/* Hook MSI or MSI-X interrupt */
+ n_irqs = 0;
efx_for_each_channel(channel, efx) {
rc = request_irq(channel->irq, efx_msi_interrupt,
IRQF_PROBE_SHARED, /* Not shared */
@@ -1718,13 +1751,31 @@
"failed to hook IRQ %d\n", channel->irq);
goto fail2;
}
+ ++n_irqs;
+
+#ifdef CONFIG_RFS_ACCEL
+ if (efx->interrupt_mode == EFX_INT_MODE_MSIX &&
+ channel->channel < efx->n_rx_channels) {
+ rc = irq_cpu_rmap_add(efx->net_dev->rx_cpu_rmap,
+ channel->irq);
+ if (rc)
+ goto fail2;
+ }
+#endif
}
return 0;
fail2:
- efx_for_each_channel(channel, efx)
+#ifdef CONFIG_RFS_ACCEL
+ free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
+ efx->net_dev->rx_cpu_rmap = NULL;
+#endif
+ efx_for_each_channel(channel, efx) {
+ if (n_irqs-- == 0)
+ break;
free_irq(channel->irq, &efx->channel[channel->channel]);
+ }
fail1:
return rc;
}
@@ -1734,11 +1785,14 @@
struct efx_channel *channel;
efx_oword_t reg;
+#ifdef CONFIG_RFS_ACCEL
+ free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
+ efx->net_dev->rx_cpu_rmap = NULL;
+#endif
+
/* Disable MSI/MSI-X interrupts */
- efx_for_each_channel(channel, efx) {
- if (channel->irq)
- free_irq(channel->irq, &efx->channel[channel->channel]);
- }
+ efx_for_each_channel(channel, efx)
+ free_irq(channel->irq, &efx->channel[channel->channel]);
/* ACK legacy interrupt */
if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0)
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 1b00033..d63c299 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -254,8 +254,8 @@
struct ethtool_ts_info;
extern void efx_ptp_probe(struct efx_nic *efx);
extern int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd);
-extern int efx_ptp_get_ts_info(struct net_device *net_dev,
- struct ethtool_ts_info *ts_info);
+extern void efx_ptp_get_ts_info(struct efx_nic *efx,
+ struct ethtool_ts_info *ts_info);
extern bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb);
extern int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb);
extern void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev);
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index 9a95abf..b495394 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -1203,18 +1203,16 @@
return 0;
}
-int
-efx_ptp_get_ts_info(struct net_device *net_dev, struct ethtool_ts_info *ts_info)
+void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info)
{
- struct efx_nic *efx = netdev_priv(net_dev);
struct efx_ptp_data *ptp = efx->ptp_data;
if (!ptp)
- return -EOPNOTSUPP;
+ return;
- ts_info->so_timestamping = (SOF_TIMESTAMPING_TX_HARDWARE |
- SOF_TIMESTAMPING_RX_HARDWARE |
- SOF_TIMESTAMPING_RAW_HARDWARE);
+ ts_info->so_timestamping |= (SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE);
ts_info->phc_index = ptp_clock_index(ptp->phc_clock);
ts_info->tx_types = 1 << HWTSTAMP_TX_OFF | 1 << HWTSTAMP_TX_ON;
ts_info->rx_filters = (1 << HWTSTAMP_FILTER_NONE |
@@ -1224,7 +1222,6 @@
1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT |
1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC |
1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ);
- return 0;
}
int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd)
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index a7dfe36..65646cd 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -36,7 +36,7 @@
#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
/* Size of buffer allocated for skb header area. */
-#define EFX_SKB_HEADERS 64u
+#define EFX_SKB_HEADERS 128u
/* This is the percentage fill level below which new RX descriptors
* will be added to the RX descriptor ring.
@@ -598,6 +598,8 @@
/* Set the SKB flags */
skb_checksum_none_assert(skb);
+ if (likely(rx_buf->flags & EFX_RX_PKT_CSUMMED))
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
if (channel->type->receive_skb)
if (channel->type->receive_skb(channel, skb))
@@ -627,7 +629,7 @@
if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
- if (!channel->type->receive_skb)
+ if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb)
efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh);
else
efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
@@ -675,7 +677,7 @@
#ifdef CONFIG_PPC64
bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
#else
- if (efx->pci_dev->dev.iommu_group)
+ if (iommu_present(&pci_bus_type))
bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
else
bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c
index 09b4f8c..0d43fa9 100644
--- a/drivers/net/ethernet/sun/sunbmac.c
+++ b/drivers/net/ethernet/sun/sunbmac.c
@@ -995,7 +995,6 @@
struct bigmac *bp = netdev_priv(dev);
void __iomem *bregs = bp->bregs;
struct netdev_hw_addr *ha;
- int i;
u32 tmp, crc;
/* Disable the receiver. The bit self-clears when
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index efb6f65..f118d71 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1532,7 +1532,7 @@
struct device *emac_dev = &ndev->dev;
u32 cnt;
struct resource *res;
- int q, m, ret;
+ int ret;
int i = 0;
int k = 0;
struct emac_priv *priv = netdev_priv(ndev);
@@ -1567,8 +1567,9 @@
while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, k))) {
for (i = res->start; i <= res->end; i++) {
- if (request_irq(i, emac_irq, IRQF_DISABLED,
- ndev->name, ndev))
+ if (devm_request_irq(&priv->pdev->dev, i, emac_irq,
+ IRQF_DISABLED,
+ ndev->name, ndev))
goto rollback;
}
k++;
@@ -1641,15 +1642,7 @@
rollback:
- dev_err(emac_dev, "DaVinci EMAC: request_irq() failed");
-
- for (q = k; k >= 0; k--) {
- for (m = i; m >= res->start; m--)
- free_irq(m, ndev);
- res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, k-1);
- m = res->end;
- }
-
+ dev_err(emac_dev, "DaVinci EMAC: devm_request_irq() failed");
ret = -EBUSY;
err:
pm_runtime_put(&priv->pdev->dev);
@@ -1667,9 +1660,6 @@
*/
static int emac_dev_stop(struct net_device *ndev)
{
- struct resource *res;
- int i = 0;
- int irq_num;
struct emac_priv *priv = netdev_priv(ndev);
struct device *emac_dev = &ndev->dev;
@@ -1685,13 +1675,6 @@
if (priv->phydev)
phy_disconnect(priv->phydev);
- /* Free IRQ */
- while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, i))) {
- for (irq_num = res->start; irq_num <= res->end; irq_num++)
- free_irq(irq_num, priv->ndev);
- i++;
- }
-
if (netif_msg_drv(priv))
dev_notice(emac_dev, "DaVinci EMAC: %s stopped\n", ndev->name);
@@ -1771,29 +1754,22 @@
#endif
};
-#ifdef CONFIG_OF
-static struct emac_platform_data
- *davinci_emac_of_get_pdata(struct platform_device *pdev,
- struct emac_priv *priv)
+static struct emac_platform_data *
+davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv)
{
struct device_node *np;
struct emac_platform_data *pdata = NULL;
const u8 *mac_addr;
- u32 data;
- int ret;
- pdata = pdev->dev.platform_data;
- if (!pdata) {
- pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
- if (!pdata)
- goto nodata;
- }
+ if (!IS_ENABLED(CONFIG_OF) || !pdev->dev.of_node)
+ return pdev->dev.platform_data;
+
+ pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return NULL;
np = pdev->dev.of_node;
- if (!np)
- goto nodata;
- else
- pdata->version = EMAC_VERSION_2;
+ pdata->version = EMAC_VERSION_2;
if (!is_valid_ether_addr(pdata->mac_addr)) {
mac_addr = of_get_mac_address(np);
@@ -1801,47 +1777,31 @@
memcpy(pdata->mac_addr, mac_addr, ETH_ALEN);
}
- ret = of_property_read_u32(np, "ti,davinci-ctrl-reg-offset", &data);
- if (!ret)
- pdata->ctrl_reg_offset = data;
+ of_property_read_u32(np, "ti,davinci-ctrl-reg-offset",
+ &pdata->ctrl_reg_offset);
- ret = of_property_read_u32(np, "ti,davinci-ctrl-mod-reg-offset",
- &data);
- if (!ret)
- pdata->ctrl_mod_reg_offset = data;
+ of_property_read_u32(np, "ti,davinci-ctrl-mod-reg-offset",
+ &pdata->ctrl_mod_reg_offset);
- ret = of_property_read_u32(np, "ti,davinci-ctrl-ram-offset", &data);
- if (!ret)
- pdata->ctrl_ram_offset = data;
+ of_property_read_u32(np, "ti,davinci-ctrl-ram-offset",
+ &pdata->ctrl_ram_offset);
- ret = of_property_read_u32(np, "ti,davinci-ctrl-ram-size", &data);
- if (!ret)
- pdata->ctrl_ram_size = data;
+ of_property_read_u32(np, "ti,davinci-ctrl-ram-size",
+ &pdata->ctrl_ram_size);
- ret = of_property_read_u32(np, "ti,davinci-rmii-en", &data);
- if (!ret)
- pdata->rmii_en = data;
+ of_property_read_u8(np, "ti,davinci-rmii-en", &pdata->rmii_en);
- ret = of_property_read_u32(np, "ti,davinci-no-bd-ram", &data);
- if (!ret)
- pdata->no_bd_ram = data;
+ pdata->no_bd_ram = of_property_read_bool(np, "ti,davinci-no-bd-ram");
priv->phy_node = of_parse_phandle(np, "phy-handle", 0);
if (!priv->phy_node)
pdata->phy_id = "";
pdev->dev.platform_data = pdata;
-nodata:
+
return pdata;
}
-#else
-static struct emac_platform_data
- *davinci_emac_of_get_pdata(struct platform_device *pdev,
- struct emac_priv *priv)
-{
- return pdev->dev.platform_data;
-}
-#endif
+
/**
* davinci_emac_probe - EMAC device probe
* @pdev: The DaVinci EMAC device that we are removing
@@ -1856,7 +1816,7 @@
struct resource *res;
struct net_device *ndev;
struct emac_priv *priv;
- unsigned long size, hw_ram_addr;
+ unsigned long hw_ram_addr;
struct emac_platform_data *pdata;
struct device *emac_dev;
struct cpdma_params dma_params;
@@ -1907,25 +1867,11 @@
emac_dev = &ndev->dev;
/* Get EMAC platform data */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&pdev->dev,"error getting res\n");
- rc = -ENOENT;
- goto no_pdata;
- }
-
priv->emac_base_phys = res->start + pdata->ctrl_reg_offset;
- size = resource_size(res);
- if (!devm_request_mem_region(&pdev->dev, res->start,
- size, ndev->name)) {
- dev_err(&pdev->dev, "failed request_mem_region() for regs\n");
- rc = -ENXIO;
- goto no_pdata;
- }
-
- priv->remap_addr = devm_ioremap(&pdev->dev, res->start, size);
- if (!priv->remap_addr) {
+ priv->remap_addr = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(priv->remap_addr)) {
dev_err(&pdev->dev, "unable to map IO\n");
- rc = -ENOMEM;
+ rc = PTR_ERR(priv->remap_addr);
goto no_pdata;
}
priv->emac_base = priv->remap_addr + pdata->ctrl_reg_offset;
@@ -2076,11 +2022,13 @@
.resume = davinci_emac_resume,
};
+#if IS_ENABLED(CONFIG_OF)
static const struct of_device_id davinci_emac_of_match[] = {
{.compatible = "ti,davinci-dm6467-emac", },
{},
};
MODULE_DEVICE_TABLE(of, davinci_emac_of_match);
+#endif
/* davinci_emac_driver: EMAC platform driver structure */
static struct platform_driver davinci_emac_driver = {
diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index c47f0db..dac6f58 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c
@@ -291,6 +291,7 @@
return 0;
}
+#if IS_ENABLED(CONFIG_OF)
static int davinci_mdio_probe_dt(struct mdio_platform_data *data,
struct platform_device *pdev)
{
@@ -308,7 +309,7 @@
return 0;
}
-
+#endif
static int davinci_mdio_probe(struct platform_device *pdev)
{
@@ -477,11 +478,13 @@
.resume_early = davinci_mdio_resume,
};
+#if IS_ENABLED(CONFIG_OF)
static const struct of_device_id davinci_mdio_of_mtable[] = {
{ .compatible = "ti,davinci_mdio", },
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, davinci_mdio_of_mtable);
+#endif
static struct platform_driver davinci_mdio_driver = {
.driver = {
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d811b06..18373b6 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -638,6 +638,14 @@
return __ethtool_get_settings(vlan->lowerdev, cmd);
}
+static netdev_features_t macvlan_fix_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ struct macvlan_dev *vlan = netdev_priv(dev);
+
+ return features & (vlan->set_features | ~MACVLAN_FEATURES);
+}
+
static const struct ethtool_ops macvlan_ethtool_ops = {
.get_link = ethtool_op_get_link,
.get_settings = macvlan_ethtool_get_settings,
@@ -651,6 +659,7 @@
.ndo_stop = macvlan_stop,
.ndo_start_xmit = macvlan_start_xmit,
.ndo_change_mtu = macvlan_change_mtu,
+ .ndo_fix_features = macvlan_fix_features,
.ndo_change_rx_flags = macvlan_change_rx_flags,
.ndo_set_mac_address = macvlan_set_mac_address,
.ndo_set_rx_mode = macvlan_set_mac_lists,
@@ -791,6 +800,7 @@
vlan->port = port;
vlan->receive = receive;
vlan->forward = forward;
+ vlan->set_features = MACVLAN_FEATURES;
vlan->mode = MACVLAN_MODE_VEPA;
if (data && data[IFLA_MACVLAN_MODE])
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 5a76f20..5bfaecd 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -65,11 +65,14 @@
static const struct proto_ops macvtap_socket_ops;
+#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
+ NETIF_F_TSO6 | NETIF_F_UFO)
+#define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO)
/*
* RCU usage:
* The macvtap_queue and the macvlan_dev are loosely coupled, the
* pointers from one to the other can only be read while rcu_read_lock
- * or macvtap_lock is held.
+ * or rtnl is held.
*
* Both the file and the macvlan_dev hold a reference on the macvtap_queue
* through sock_hold(&q->sk). When the macvlan_dev goes away first,
@@ -81,7 +84,6 @@
* file or the dev. The data structure is freed through __sk_free
* when both our references and any pending SKBs are gone.
*/
-static DEFINE_SPINLOCK(macvtap_lock);
static int macvtap_enable_queue(struct net_device *dev, struct file *file,
struct macvtap_queue *q)
@@ -89,7 +91,7 @@
struct macvlan_dev *vlan = netdev_priv(dev);
int err = -EINVAL;
- spin_lock(&macvtap_lock);
+ ASSERT_RTNL();
if (q->enabled)
goto out;
@@ -101,7 +103,6 @@
vlan->numvtaps++;
out:
- spin_unlock(&macvtap_lock);
return err;
}
@@ -111,7 +112,7 @@
struct macvlan_dev *vlan = netdev_priv(dev);
int err = -EBUSY;
- spin_lock(&macvtap_lock);
+ rtnl_lock();
if (vlan->numqueues == MAX_MACVTAP_QUEUES)
goto out;
@@ -130,26 +131,25 @@
vlan->numqueues++;
out:
- spin_unlock(&macvtap_lock);
+ rtnl_unlock();
return err;
}
-static int __macvtap_disable_queue(struct macvtap_queue *q)
+static int macvtap_disable_queue(struct macvtap_queue *q)
{
struct macvlan_dev *vlan;
struct macvtap_queue *nq;
- vlan = rcu_dereference_protected(q->vlan,
- lockdep_is_held(&macvtap_lock));
-
+ ASSERT_RTNL();
if (!q->enabled)
return -EINVAL;
+ vlan = rtnl_dereference(q->vlan);
+
if (vlan) {
int index = q->queue_index;
BUG_ON(index >= vlan->numvtaps);
- nq = rcu_dereference_protected(vlan->taps[vlan->numvtaps - 1],
- lockdep_is_held(&macvtap_lock));
+ nq = rtnl_dereference(vlan->taps[vlan->numvtaps - 1]);
nq->queue_index = index;
rcu_assign_pointer(vlan->taps[index], nq);
@@ -162,17 +162,6 @@
return 0;
}
-static int macvtap_disable_queue(struct macvtap_queue *q)
-{
- int err;
-
- spin_lock(&macvtap_lock);
- err = __macvtap_disable_queue(q);
- spin_unlock(&macvtap_lock);
-
- return err;
-}
-
/*
* The file owning the queue got closed, give up both
* the reference that the files holds as well as the
@@ -185,12 +174,12 @@
{
struct macvlan_dev *vlan;
- spin_lock(&macvtap_lock);
- vlan = rcu_dereference_protected(q->vlan,
- lockdep_is_held(&macvtap_lock));
+ rtnl_lock();
+ vlan = rtnl_dereference(q->vlan);
+
if (vlan) {
if (q->enabled)
- BUG_ON(__macvtap_disable_queue(q));
+ BUG_ON(macvtap_disable_queue(q));
vlan->numqueues--;
RCU_INIT_POINTER(q->vlan, NULL);
@@ -198,7 +187,7 @@
list_del_init(&q->next);
}
- spin_unlock(&macvtap_lock);
+ rtnl_unlock();
synchronize_rcu();
sock_put(&q->sk);
@@ -260,7 +249,7 @@
struct macvtap_queue *q, *tmp, *qlist[MAX_MACVTAP_QUEUES];
int i, j = 0;
- spin_lock(&macvtap_lock);
+ ASSERT_RTNL();
list_for_each_entry_safe(q, tmp, &vlan->queue_list, next) {
list_del_init(&q->next);
qlist[j++] = q;
@@ -275,9 +264,6 @@
BUG_ON(vlan->numqueues);
/* guarantee that any future macvtap_set_queue will fail */
vlan->numvtaps = MAX_MACVTAP_QUEUES;
- spin_unlock(&macvtap_lock);
-
- synchronize_rcu();
for (--j; j >= 0; j--)
sock_put(&qlist[j]->sk);
@@ -290,14 +276,44 @@
*/
static int macvtap_forward(struct net_device *dev, struct sk_buff *skb)
{
+ struct macvlan_dev *vlan = netdev_priv(dev);
struct macvtap_queue *q = macvtap_get_queue(dev, skb);
+ netdev_features_t features;
if (!q)
goto drop;
if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len)
goto drop;
- skb_queue_tail(&q->sk.sk_receive_queue, skb);
+ skb->dev = dev;
+ /* Apply the forward feature mask so that we perform segmentation
+ * according to users wishes.
+ */
+ features = netif_skb_features(skb) & vlan->tap_features;
+ if (netif_needs_gso(skb, features)) {
+ struct sk_buff *segs = __skb_gso_segment(skb, features, false);
+
+ if (IS_ERR(segs))
+ goto drop;
+
+ if (!segs) {
+ skb_queue_tail(&q->sk.sk_receive_queue, skb);
+ goto wake_up;
+ }
+
+ kfree_skb(skb);
+ while (segs) {
+ struct sk_buff *nskb = segs->next;
+
+ segs->next = NULL;
+ skb_queue_tail(&q->sk.sk_receive_queue, segs);
+ segs = nskb;
+ }
+ } else {
+ skb_queue_tail(&q->sk.sk_receive_queue, skb);
+ }
+
+wake_up:
wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND);
return NET_RX_SUCCESS;
@@ -366,6 +382,11 @@
struct macvlan_dev *vlan = netdev_priv(dev);
INIT_LIST_HEAD(&vlan->queue_list);
+ /* Since macvlan supports all offloads by default, make
+ * tap support all offloads also.
+ */
+ vlan->tap_features = TUN_OFFLOADS;
+
/* Don't put anything that may fail after macvlan_common_newlink
* because we can't undo what it does.
*/
@@ -771,8 +792,8 @@
skb_probe_transport_header(skb, ETH_HLEN);
- rcu_read_lock_bh();
- vlan = rcu_dereference_bh(q->vlan);
+ rcu_read_lock();
+ vlan = rcu_dereference(q->vlan);
/* copy skb_ubuf_info for callback when skb has no error */
if (zerocopy) {
skb_shinfo(skb)->destructor_arg = m->msg_control;
@@ -783,7 +804,7 @@
macvlan_start_xmit(skb, vlan->dev);
else
kfree_skb(skb);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return total_len;
@@ -791,11 +812,11 @@
kfree_skb(skb);
err:
- rcu_read_lock_bh();
- vlan = rcu_dereference_bh(q->vlan);
+ rcu_read_lock();
+ vlan = rcu_dereference(q->vlan);
if (vlan)
vlan->dev->stats.tx_dropped++;
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return err;
}
@@ -871,11 +892,11 @@
copied += len;
done:
- rcu_read_lock_bh();
- vlan = rcu_dereference_bh(q->vlan);
+ rcu_read_lock();
+ vlan = rcu_dereference(q->vlan);
if (vlan)
macvlan_count_rx(vlan, copied - vnet_hdr_len, ret == 0, 0);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return ret ? ret : copied;
}
@@ -941,11 +962,10 @@
{
struct macvlan_dev *vlan;
- rcu_read_lock_bh();
- vlan = rcu_dereference_bh(q->vlan);
+ ASSERT_RTNL();
+ vlan = rtnl_dereference(q->vlan);
if (vlan)
dev_hold(vlan->dev);
- rcu_read_unlock_bh();
return vlan;
}
@@ -976,6 +996,58 @@
return ret;
}
+static int set_offload(struct macvtap_queue *q, unsigned long arg)
+{
+ struct macvlan_dev *vlan;
+ netdev_features_t features;
+ netdev_features_t feature_mask = 0;
+
+ vlan = rtnl_dereference(q->vlan);
+ if (!vlan)
+ return -ENOLINK;
+
+ features = vlan->dev->features;
+
+ if (arg & TUN_F_CSUM) {
+ feature_mask = NETIF_F_HW_CSUM;
+
+ if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) {
+ if (arg & TUN_F_TSO_ECN)
+ feature_mask |= NETIF_F_TSO_ECN;
+ if (arg & TUN_F_TSO4)
+ feature_mask |= NETIF_F_TSO;
+ if (arg & TUN_F_TSO6)
+ feature_mask |= NETIF_F_TSO6;
+ }
+
+ if (arg & TUN_F_UFO)
+ feature_mask |= NETIF_F_UFO;
+ }
+
+ /* tun/tap driver inverts the usage for TSO offloads, where
+ * setting the TSO bit means that the userspace wants to
+ * accept TSO frames and turning it off means that user space
+ * does not support TSO.
+ * For macvtap, we have to invert it to mean the same thing.
+ * When user space turns off TSO, we turn off GSO/LRO so that
+ * user-space will not receive TSO frames.
+ */
+ if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO))
+ features |= RX_OFFLOADS;
+ else
+ features &= ~RX_OFFLOADS;
+
+ /* tap_features are the same as features on tun/tap and
+ * reflect user expectations.
+ */
+ vlan->tap_features = vlan->dev->features &
+ (feature_mask | ~TUN_OFFLOADS);
+ vlan->set_features = features;
+ netdev_update_features(vlan->dev);
+
+ return 0;
+}
+
/*
* provide compatibility with generic tun/tap interface
*/
@@ -1008,21 +1080,27 @@
return ret;
case TUNGETIFF:
+ rtnl_lock();
vlan = macvtap_get_vlan(q);
- if (!vlan)
+ if (!vlan) {
+ rtnl_unlock();
return -ENOLINK;
+ }
ret = 0;
if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) ||
put_user(q->flags, &ifr->ifr_flags))
ret = -EFAULT;
macvtap_put_vlan(vlan);
+ rtnl_unlock();
return ret;
case TUNSETQUEUE:
if (get_user(u, &ifr->ifr_flags))
return -EFAULT;
- return macvtap_ioctl_set_queue(file, u);
+ rtnl_lock();
+ ret = macvtap_ioctl_set_queue(file, u);
+ rtnl_unlock();
case TUNGETFEATURES:
if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR |
@@ -1062,7 +1140,10 @@
got enabled for forwarded frames */
if (!(q->flags & IFF_VNET_HDR))
return -EINVAL;
- return 0;
+ rtnl_lock();
+ ret = set_offload(q, arg);
+ rtnl_unlock();
+ return ret;
default:
return -EINVAL;
diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c
new file mode 100644
index 0000000..dc364be
--- /dev/null
+++ b/drivers/net/nlmon.c
@@ -0,0 +1,170 @@
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <net/net_namespace.h>
+#include <linux/if_arp.h>
+
+struct pcpu_lstats {
+ u64 packets;
+ u64 bytes;
+ struct u64_stats_sync syncp;
+};
+
+static netdev_tx_t nlmon_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ int len = skb->len;
+ struct pcpu_lstats *stats = this_cpu_ptr(dev->lstats);
+
+ u64_stats_update_begin(&stats->syncp);
+ stats->bytes += len;
+ stats->packets++;
+ u64_stats_update_end(&stats->syncp);
+
+ dev_kfree_skb(skb);
+
+ return NETDEV_TX_OK;
+}
+
+static int nlmon_is_valid_mtu(int new_mtu)
+{
+ return new_mtu >= sizeof(struct nlmsghdr) && new_mtu <= INT_MAX;
+}
+
+static int nlmon_change_mtu(struct net_device *dev, int new_mtu)
+{
+ if (!nlmon_is_valid_mtu(new_mtu))
+ return -EINVAL;
+
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+static int nlmon_dev_init(struct net_device *dev)
+{
+ dev->lstats = alloc_percpu(struct pcpu_lstats);
+
+ return dev->lstats == NULL ? -ENOMEM : 0;
+}
+
+static void nlmon_dev_uninit(struct net_device *dev)
+{
+ free_percpu(dev->lstats);
+}
+
+static struct netlink_tap nlmon_tap;
+
+static int nlmon_open(struct net_device *dev)
+{
+ return netlink_add_tap(&nlmon_tap);
+}
+
+static int nlmon_close(struct net_device *dev)
+{
+ return netlink_remove_tap(&nlmon_tap);
+}
+
+static struct rtnl_link_stats64 *
+nlmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ int i;
+ u64 bytes = 0, packets = 0;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_lstats *nl_stats;
+ u64 tbytes, tpackets;
+ unsigned int start;
+
+ nl_stats = per_cpu_ptr(dev->lstats, i);
+
+ do {
+ start = u64_stats_fetch_begin_bh(&nl_stats->syncp);
+ tbytes = nl_stats->bytes;
+ tpackets = nl_stats->packets;
+ } while (u64_stats_fetch_retry_bh(&nl_stats->syncp, start));
+
+ packets += tpackets;
+ bytes += tbytes;
+ }
+
+ stats->rx_packets = packets;
+ stats->tx_packets = 0;
+
+ stats->rx_bytes = bytes;
+ stats->tx_bytes = 0;
+
+ return stats;
+}
+
+static u32 always_on(struct net_device *dev)
+{
+ return 1;
+}
+
+static const struct ethtool_ops nlmon_ethtool_ops = {
+ .get_link = always_on,
+};
+
+static const struct net_device_ops nlmon_ops = {
+ .ndo_init = nlmon_dev_init,
+ .ndo_uninit = nlmon_dev_uninit,
+ .ndo_open = nlmon_open,
+ .ndo_stop = nlmon_close,
+ .ndo_start_xmit = nlmon_xmit,
+ .ndo_get_stats64 = nlmon_get_stats64,
+ .ndo_change_mtu = nlmon_change_mtu,
+};
+
+static struct netlink_tap nlmon_tap __read_mostly = {
+ .module = THIS_MODULE,
+};
+
+static void nlmon_setup(struct net_device *dev)
+{
+ dev->type = ARPHRD_NETLINK;
+ dev->tx_queue_len = 0;
+
+ dev->netdev_ops = &nlmon_ops;
+ dev->ethtool_ops = &nlmon_ethtool_ops;
+ dev->destructor = free_netdev;
+
+ dev->features = NETIF_F_FRAGLIST | NETIF_F_HIGHDMA;
+ dev->flags = IFF_NOARP;
+
+ /* That's rather a softlimit here, which, of course,
+ * can be altered. Not a real MTU, but what is to be
+ * expected in most cases.
+ */
+ dev->mtu = NLMSG_GOODSIZE;
+}
+
+static __init int nlmon_register(void)
+{
+ int err;
+ struct net_device *nldev;
+
+ nldev = nlmon_tap.dev = alloc_netdev(0, "netlink", nlmon_setup);
+ if (unlikely(nldev == NULL))
+ return -ENOMEM;
+
+ err = register_netdev(nldev);
+ if (unlikely(err))
+ free_netdev(nldev);
+
+ return err;
+}
+
+static __exit void nlmon_unregister(void)
+{
+ struct net_device *nldev = nlmon_tap.dev;
+
+ unregister_netdev(nldev);
+}
+
+module_init(nlmon_register);
+module_exit(nlmon_unregister);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
+MODULE_AUTHOR("Mathieu Geli <geli@enseirb.fr>");
+MODULE_DESCRIPTION("Netlink monitoring device");
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 9ca3996..279ad50 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -130,26 +130,6 @@
/**
* some more debug stuff
*/
-#define IUCV_HEXDUMP16(importance,header,ptr) \
-PRINT_##importance(header "%02x %02x %02x %02x %02x %02x %02x %02x " \
- "%02x %02x %02x %02x %02x %02x %02x %02x\n", \
- *(((char*)ptr)),*(((char*)ptr)+1),*(((char*)ptr)+2), \
- *(((char*)ptr)+3),*(((char*)ptr)+4),*(((char*)ptr)+5), \
- *(((char*)ptr)+6),*(((char*)ptr)+7),*(((char*)ptr)+8), \
- *(((char*)ptr)+9),*(((char*)ptr)+10),*(((char*)ptr)+11), \
- *(((char*)ptr)+12),*(((char*)ptr)+13), \
- *(((char*)ptr)+14),*(((char*)ptr)+15)); \
-PRINT_##importance(header "%02x %02x %02x %02x %02x %02x %02x %02x " \
- "%02x %02x %02x %02x %02x %02x %02x %02x\n", \
- *(((char*)ptr)+16),*(((char*)ptr)+17), \
- *(((char*)ptr)+18),*(((char*)ptr)+19), \
- *(((char*)ptr)+20),*(((char*)ptr)+21), \
- *(((char*)ptr)+22),*(((char*)ptr)+23), \
- *(((char*)ptr)+24),*(((char*)ptr)+25), \
- *(((char*)ptr)+26),*(((char*)ptr)+27), \
- *(((char*)ptr)+28),*(((char*)ptr)+29), \
- *(((char*)ptr)+30),*(((char*)ptr)+31));
-
#define PRINTK_HEADER " iucv: " /* for debugging */
/* dummy device to make sure netiucv_pm functions are called */
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index c4f392d..41ef943 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -738,7 +738,7 @@
int qdio_err;
};
-#define QETH_NAPI_WEIGHT 128
+#define QETH_NAPI_WEIGHT NAPI_POLL_WEIGHT
struct qeth_card {
struct list_head list;
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 6cd0fc1..e4ca704 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1282,8 +1282,10 @@
qeth_free_cq(card);
cancel_delayed_work_sync(&card->buffer_reclaim_work);
- for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j)
- dev_kfree_skb_any(card->qdio.in_q->bufs[j].rx_skb);
+ for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
+ if (card->qdio.in_q->bufs[j].rx_skb)
+ dev_kfree_skb_any(card->qdio.in_q->bufs[j].rx_skb);
+ }
kfree(card->qdio.in_q);
card->qdio.in_q = NULL;
/* inbound buffer pool */
@@ -1729,14 +1731,14 @@
QETH_DBF_TEXT(SETUP, 2, "cfgblkt");
if (prcd[74] == 0xF0 && prcd[75] == 0xF0 &&
- (prcd[76] == 0xF5 || prcd[76] == 0xF6)) {
- card->info.blkt.time_total = 250;
- card->info.blkt.inter_packet = 5;
- card->info.blkt.inter_packet_jumbo = 15;
- } else {
+ prcd[76] >= 0xF1 && prcd[76] <= 0xF4) {
card->info.blkt.time_total = 0;
card->info.blkt.inter_packet = 0;
card->info.blkt.inter_packet_jumbo = 0;
+ } else {
+ card->info.blkt.time_total = 250;
+ card->info.blkt.inter_packet = 5;
+ card->info.blkt.inter_packet_jumbo = 15;
}
}
@@ -2198,11 +2200,11 @@
case QETH_LINK_TYPE_LANE_TR:
return 2000;
default:
- return 1492;
+ return card->options.layer2 ? 1500 : 1492;
}
case QETH_CARD_TYPE_OSM:
case QETH_CARD_TYPE_OSX:
- return 1492;
+ return card->options.layer2 ? 1500 : 1492;
default:
return 1500;
}
@@ -2275,9 +2277,10 @@
card->info.max_mtu = mtu;
card->qdio.in_buf_size = mtu + 2 * PAGE_SIZE;
} else {
- card->info.initial_mtu = qeth_get_initial_mtu_for_card(card);
card->info.max_mtu = *(__u16 *)QETH_ULP_ENABLE_RESP_MAX_MTU(
iob->data);
+ card->info.initial_mtu = min(card->info.max_mtu,
+ qeth_get_initial_mtu_for_card(card));
card->qdio.in_buf_size = QETH_IN_BUF_SIZE_DEFAULT;
}
diff --git a/fs/select.c b/fs/select.c
index 8c1c96c..79b876e 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -27,6 +27,7 @@
#include <linux/rcupdate.h>
#include <linux/hrtimer.h>
#include <linux/sched/rt.h>
+#include <net/ll_poll.h>
#include <asm/uaccess.h>
@@ -384,9 +385,10 @@
#define POLLEX_SET (POLLPRI)
static inline void wait_key_set(poll_table *wait, unsigned long in,
- unsigned long out, unsigned long bit)
+ unsigned long out, unsigned long bit,
+ unsigned int ll_flag)
{
- wait->_key = POLLEX_SET;
+ wait->_key = POLLEX_SET | ll_flag;
if (in & bit)
wait->_key |= POLLIN_SET;
if (out & bit)
@@ -400,6 +402,8 @@
poll_table *wait;
int retval, i, timed_out = 0;
unsigned long slack = 0;
+ unsigned int ll_flag = POLL_LL;
+ u64 ll_time = ll_end_time();
rcu_read_lock();
retval = max_select_fd(n, fds);
@@ -422,6 +426,7 @@
retval = 0;
for (;;) {
unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
+ bool can_ll = false;
inp = fds->in; outp = fds->out; exp = fds->ex;
rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
@@ -449,7 +454,8 @@
f_op = f.file->f_op;
mask = DEFAULT_POLLMASK;
if (f_op && f_op->poll) {
- wait_key_set(wait, in, out, bit);
+ wait_key_set(wait, in, out,
+ bit, ll_flag);
mask = (*f_op->poll)(f.file, wait);
}
fdput(f);
@@ -468,6 +474,11 @@
retval++;
wait->_qproc = NULL;
}
+ if (mask & POLL_LL)
+ can_ll = true;
+ /* got something, stop busy polling */
+ if (retval)
+ ll_flag = 0;
}
}
if (res_in)
@@ -486,6 +497,9 @@
break;
}
+ if (can_ll && can_poll_ll(ll_time))
+ continue;
+
/*
* If this is the first loop and we have a timeout
* given, then we convert to ktime_t and set the to
@@ -717,7 +731,8 @@
* pwait poll_table will be used by the fd-provided poll handler for waiting,
* if pwait->_qproc is non-NULL.
*/
-static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
+static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait,
+ bool *can_ll, unsigned int ll_flag)
{
unsigned int mask;
int fd;
@@ -731,7 +746,10 @@
mask = DEFAULT_POLLMASK;
if (f.file->f_op && f.file->f_op->poll) {
pwait->_key = pollfd->events|POLLERR|POLLHUP;
+ pwait->_key |= ll_flag;
mask = f.file->f_op->poll(f.file, pwait);
+ if (mask & POLL_LL)
+ *can_ll = true;
}
/* Mask out unneeded events. */
mask &= pollfd->events | POLLERR | POLLHUP;
@@ -750,6 +768,8 @@
ktime_t expire, *to = NULL;
int timed_out = 0, count = 0;
unsigned long slack = 0;
+ unsigned int ll_flag = POLL_LL;
+ u64 ll_time = ll_end_time();
/* Optimise the no-wait case */
if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
@@ -762,6 +782,7 @@
for (;;) {
struct poll_list *walk;
+ bool can_ll = false;
for (walk = list; walk != NULL; walk = walk->next) {
struct pollfd * pfd, * pfd_end;
@@ -776,9 +797,10 @@
* this. They'll get immediately deregistered
* when we break out and return.
*/
- if (do_pollfd(pfd, pt)) {
+ if (do_pollfd(pfd, pt, &can_ll, ll_flag)) {
count++;
pt->_qproc = NULL;
+ ll_flag = 0;
}
}
}
@@ -795,6 +817,8 @@
if (count || timed_out)
break;
+ if (can_ll && can_poll_ll(ll_time))
+ continue;
/*
* If this is the first loop and we have a timeout
* given, then we convert to ktime_t and set the to
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index f49a9f6..ddd33fd 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -65,6 +65,7 @@
DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
+ netdev_features_t set_features;
enum macvlan_mode mode;
u16 flags;
int (*receive)(struct sk_buff *skb);
@@ -75,6 +76,7 @@
struct list_head queue_list;
int numvtaps;
int numqueues;
+ netdev_features_t tap_features;
int minor;
};
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index bbca128..b4fa5e4 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -323,6 +323,11 @@
return ktime_add_ns(kt, usec * 1000);
}
+static inline ktime_t ktime_add_ms(const ktime_t kt, const u64 msec)
+{
+ return ktime_add_ns(kt, msec * NSEC_PER_MSEC);
+}
+
static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec)
{
return ktime_sub_ns(kt, usec * 1000);
@@ -366,7 +371,15 @@
static inline ktime_t ns_to_ktime(u64 ns)
{
static const ktime_t ktime_zero = { .tv64 = 0 };
+
return ktime_add_ns(ktime_zero, ns);
}
+static inline ktime_t ms_to_ktime(u64 ms)
+{
+ static const ktime_t ktime_zero = { .tv64 = 0 };
+
+ return ktime_add_ms(ktime_zero, ms);
+}
+
#endif
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index f78b430..86fde81a 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -145,4 +145,14 @@
return __netlink_dump_start(ssk, skb, nlh, control);
}
+struct netlink_tap {
+ struct net_device *dev;
+ struct module *module;
+ struct list_head list;
+};
+
+extern int netlink_add_tap(struct netlink_tap *nt);
+extern int __netlink_remove_tap(struct netlink_tap *nt);
+extern int netlink_remove_tap(struct netlink_tap *nt);
+
#endif /* __LINUX_NETLINK_H */
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index e07feb4..e4c5a2d 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -50,7 +50,7 @@
int state;
- __u8 probes;
+ __u8 dad_probes;
__u8 flags;
__u16 scope;
@@ -58,7 +58,7 @@
unsigned long cstamp; /* created timestamp */
unsigned long tstamp; /* updated timestamp */
- struct timer_list timer;
+ struct timer_list dad_timer;
struct inet6_dev *idev;
struct rt6_info *rt;
@@ -195,6 +195,10 @@
struct neigh_parms *nd_parms;
struct ipv6_devconf cnf;
struct ipv6_devstat stats;
+
+ struct timer_list rs_timer;
+ __u8 rs_probes;
+
unsigned long tstamp; /* ipv6InterfaceTable update timestamp */
struct rcu_head rcu;
};
diff --git a/include/net/ll_poll.h b/include/net/ll_poll.h
index fcc7c36..5bf2b3a 100644
--- a/include/net/ll_poll.h
+++ b/include/net/ll_poll.h
@@ -30,6 +30,7 @@
#ifdef CONFIG_NET_LL_RX_POLL
struct napi_struct;
+extern unsigned int sysctl_net_ll_read __read_mostly;
extern unsigned int sysctl_net_ll_poll __read_mostly;
/* return values from ndo_ll_poll */
@@ -38,17 +39,18 @@
/* we can use sched_clock() because we don't care much about precision
* we only care that the average is bounded
+ * we don't mind a ~2.5% imprecision so <<10 instead of *1000
+ * sk->sk_ll_usec is a u_int so this can't overflow
*/
-static inline u64 ll_end_time(struct sock *sk)
+static inline u64 ll_sk_end_time(struct sock *sk)
{
- u64 end_time = ACCESS_ONCE(sk->sk_ll_usec);
+ return ((u64)ACCESS_ONCE(sk->sk_ll_usec) << 10) + sched_clock();
+}
- /* we don't mind a ~2.5% imprecision
- * sk->sk_ll_usec is a u_int so this can't overflow
- */
- end_time = (end_time << 10) + sched_clock();
-
- return end_time;
+/* in poll/select we use the global sysctl_net_ll_poll value */
+static inline u64 ll_end_time(void)
+{
+ return ((u64)ACCESS_ONCE(sysctl_net_ll_poll) << 10) + sched_clock();
}
static inline bool sk_valid_ll(struct sock *sk)
@@ -62,10 +64,13 @@
return !time_after64(sched_clock(), end_time);
}
+/* when used in sock_poll() nonblock is known at compile time to be true
+ * so the loop and end_time will be optimized out
+ */
static inline bool sk_poll_ll(struct sock *sk, int nonblock)
{
+ u64 end_time = nonblock ? 0 : ll_sk_end_time(sk);
const struct net_device_ops *ops;
- u64 end_time = ll_end_time(sk);
struct napi_struct *napi;
int rc = false;
@@ -84,7 +89,6 @@
goto out;
do {
-
rc = ops->ndo_ll_poll(napi);
if (rc == LL_FLUSH_FAILED)
@@ -95,8 +99,8 @@
NET_ADD_STATS_BH(sock_net(sk),
LINUX_MIB_LOWLATENCYRXPACKETS, rc);
- } while (skb_queue_empty(&sk->sk_receive_queue)
- && can_poll_ll(end_time) && !nonblock);
+ } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
+ can_poll_ll(end_time));
rc = !skb_queue_empty(&sk->sk_receive_queue);
out:
@@ -118,7 +122,12 @@
#else /* CONFIG_NET_LL_RX_POLL */
-static inline u64 ll_end_time(struct sock *sk)
+static inline u64 sk_ll_end_time(struct sock *sk)
+{
+ return 0;
+}
+
+static inline u64 ll_end_time(void)
{
return 0;
}
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 6321c08..e6b95bc 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -188,11 +188,6 @@
* Section: Macros, externs, and inlines
*/
-
-#ifdef TEST_FRAME
-#include <test_frame.h>
-#else
-
/* spin lock wrappers. */
#define sctp_spin_lock_irqsave(lock, flags) spin_lock_irqsave(lock, flags)
#define sctp_spin_unlock_irqrestore(lock, flags) \
@@ -218,8 +213,6 @@
#define SCTP_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)->sctp.sctp_statistics, field)
#define SCTP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->sctp.sctp_statistics, field)
-#endif /* !TEST_FRAME */
-
/* sctp mib definitions */
enum {
SCTP_MIB_NUM = 0,
@@ -567,24 +560,6 @@
/* Round an int up to the next multiple of 4. */
#define WORD_ROUND(s) (((s)+3)&~3)
-/* Compare two timevals. */
-#define tv_lt(s, t) \
- (s.tv_sec < t.tv_sec || (s.tv_sec == t.tv_sec && s.tv_usec < t.tv_usec))
-
-/* Add tv1 to tv2. */
-#define TIMEVAL_ADD(tv1, tv2) \
-({ \
- suseconds_t usecs = (tv2).tv_usec + (tv1).tv_usec; \
- time_t secs = (tv2).tv_sec + (tv1).tv_sec; \
-\
- if (usecs >= 1000000) { \
- usecs -= 1000000; \
- secs++; \
- } \
- (tv2).tv_sec = secs; \
- (tv2).tv_usec = usecs; \
-})
-
/* External references. */
extern struct proto sctp_prot;
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 1bd4c41..e745c92 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -54,7 +54,7 @@
#ifndef __sctp_structs_h__
#define __sctp_structs_h__
-#include <linux/time.h> /* We get struct timespec. */
+#include <linux/ktime.h>
#include <linux/socket.h> /* linux/in.h needs this!! */
#include <linux/in.h> /* We get struct sockaddr_in. */
#include <linux/in6.h> /* We get struct in6_addr */
@@ -284,7 +284,7 @@
__u32 peer_ttag;
/* When does this cookie expire? */
- struct timeval expiration;
+ ktime_t expiration;
/* Number of inbound/outbound streams which are set
* and negotiated during the INIT process.
@@ -1537,7 +1537,7 @@
sctp_state_t state;
/* The cookie life I award for any cookie. */
- struct timeval cookie_life;
+ ktime_t cookie_life;
/* Overall : The overall association error count.
* Error Count : [Clear this any time I get something.]
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6fa8083..d198005 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1319,9 +1319,9 @@
/* Fastopen key context */
struct tcp_fastopen_context {
- struct crypto_cipher __rcu *tfm;
- __u8 key[TCP_FASTOPEN_KEY_LENGTH];
- struct rcu_head rcu;
+ struct crypto_cipher *tfm;
+ __u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ struct rcu_head rcu;
};
/* write queue abstraction */
diff --git a/include/uapi/asm-generic/poll.h b/include/uapi/asm-generic/poll.h
index 9ce7f44..4aee586 100644
--- a/include/uapi/asm-generic/poll.h
+++ b/include/uapi/asm-generic/poll.h
@@ -30,6 +30,8 @@
#define POLLFREE 0x4000 /* currently only for epoll */
+#define POLL_LL 0x8000
+
struct pollfd {
int fd;
short events;
diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h
index 82c7d1b..d7fea34 100644
--- a/include/uapi/linux/if_arp.h
+++ b/include/uapi/linux/if_arp.h
@@ -93,6 +93,7 @@
#define ARPHRD_PHONET_PIPE 821 /* PhoNet pipe header */
#define ARPHRD_CAIF 822 /* CAIF media type */
#define ARPHRD_IP6GRE 823 /* GRE over IPv6 */
+#define ARPHRD_NETLINK 824 /* Netlink header */
#define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */
#define ARPHRD_NONE 0xFFFE /* zero header length */
diff --git a/net/core/sock.c b/net/core/sock.c
index 1e744b1..b6c619f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2307,7 +2307,7 @@
#ifdef CONFIG_NET_LL_RX_POLL
sk->sk_napi_id = 0;
- sk->sk_ll_usec = sysctl_net_ll_poll;
+ sk->sk_ll_usec = sysctl_net_ll_read;
#endif
/*
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 62702c2..afc677e 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -306,6 +306,14 @@
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "low_latency_read",
+ .data = &sysctl_net_ll_read,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+#
#endif
#endif /* CONFIG_NET */
{
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 90788a1..afaf3cd 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -253,37 +253,32 @@
return !qdisc_tx_is_noop(dev);
}
-static void addrconf_del_timer(struct inet6_ifaddr *ifp)
+static void addrconf_del_rs_timer(struct inet6_dev *idev)
{
- if (del_timer(&ifp->timer))
+ if (del_timer(&idev->rs_timer))
+ __in6_dev_put(idev);
+}
+
+static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp)
+{
+ if (del_timer(&ifp->dad_timer))
__in6_ifa_put(ifp);
}
-enum addrconf_timer_t {
- AC_NONE,
- AC_DAD,
- AC_RS,
-};
-
-static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
- enum addrconf_timer_t what,
- unsigned long when)
+static void addrconf_mod_rs_timer(struct inet6_dev *idev,
+ unsigned long when)
{
- if (!del_timer(&ifp->timer))
- in6_ifa_hold(ifp);
+ if (!timer_pending(&idev->rs_timer))
+ in6_dev_hold(idev);
+ mod_timer(&idev->rs_timer, jiffies + when);
+}
- switch (what) {
- case AC_DAD:
- ifp->timer.function = addrconf_dad_timer;
- break;
- case AC_RS:
- ifp->timer.function = addrconf_rs_timer;
- break;
- default:
- break;
- }
- ifp->timer.expires = jiffies + when;
- add_timer(&ifp->timer);
+static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp,
+ unsigned long when)
+{
+ if (!timer_pending(&ifp->dad_timer))
+ in6_ifa_hold(ifp);
+ mod_timer(&ifp->dad_timer, jiffies + when);
}
static int snmp6_alloc_dev(struct inet6_dev *idev)
@@ -326,6 +321,7 @@
WARN_ON(!list_empty(&idev->addr_list));
WARN_ON(idev->mc_list != NULL);
+ WARN_ON(timer_pending(&idev->rs_timer));
#ifdef NET_REFCNT_DEBUG
pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL");
@@ -357,7 +353,8 @@
rwlock_init(&ndev->lock);
ndev->dev = dev;
INIT_LIST_HEAD(&ndev->addr_list);
-
+ setup_timer(&ndev->rs_timer, addrconf_rs_timer,
+ (unsigned long)ndev);
memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
ndev->cnf.mtu6 = dev->mtu;
ndev->cnf.sysctl = NULL;
@@ -776,7 +773,7 @@
in6_dev_put(ifp->idev);
- if (del_timer(&ifp->timer))
+ if (del_timer(&ifp->dad_timer))
pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
if (ifp->state != INET6_IFADDR_STATE_DEAD) {
@@ -869,9 +866,9 @@
spin_lock_init(&ifa->lock);
spin_lock_init(&ifa->state_lock);
- init_timer(&ifa->timer);
+ setup_timer(&ifa->dad_timer, addrconf_dad_timer,
+ (unsigned long)ifa);
INIT_HLIST_NODE(&ifa->addr_lst);
- ifa->timer.data = (unsigned long) ifa;
ifa->scope = scope;
ifa->prefix_len = pfxlen;
ifa->flags = flags | IFA_F_TENTATIVE;
@@ -994,7 +991,7 @@
}
write_unlock_bh(&idev->lock);
- addrconf_del_timer(ifp);
+ addrconf_del_dad_timer(ifp);
ipv6_ifa_notify(RTM_DELADDR, ifp);
@@ -1447,6 +1444,23 @@
}
EXPORT_SYMBOL(ipv6_dev_get_saddr);
+static int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
+ unsigned char banned_flags)
+{
+ struct inet6_ifaddr *ifp;
+ int err = -EADDRNOTAVAIL;
+
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ if (ifp->scope == IFA_LINK &&
+ !(ifp->flags & banned_flags)) {
+ *addr = ifp->addr;
+ err = 0;
+ break;
+ }
+ }
+ return err;
+}
+
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
unsigned char banned_flags)
{
@@ -1456,17 +1470,8 @@
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev) {
- struct inet6_ifaddr *ifp;
-
read_lock_bh(&idev->lock);
- list_for_each_entry(ifp, &idev->addr_list, if_list) {
- if (ifp->scope == IFA_LINK &&
- !(ifp->flags & banned_flags)) {
- *addr = ifp->addr;
- err = 0;
- break;
- }
- }
+ err = __ipv6_get_lladdr(idev, addr, banned_flags);
read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
@@ -1580,7 +1585,7 @@
{
if (ifp->flags&IFA_F_PERMANENT) {
spin_lock_bh(&ifp->lock);
- addrconf_del_timer(ifp);
+ addrconf_del_dad_timer(ifp);
ifp->flags |= IFA_F_TENTATIVE;
if (dad_failed)
ifp->flags |= IFA_F_DADFAILED;
@@ -2502,12 +2507,6 @@
read_unlock_bh(&idev->lock);
ipv6_del_addr(ifp);
-
- /* If the last address is deleted administratively,
- disable IPv6 on this interface.
- */
- if (list_empty(&idev->addr_list))
- addrconf_ifdown(idev->dev, 1);
return 0;
}
}
@@ -3036,7 +3035,7 @@
hlist_for_each_entry_rcu(ifa, h, addr_lst) {
if (ifa->idev == idev) {
hlist_del_init_rcu(&ifa->addr_lst);
- addrconf_del_timer(ifa);
+ addrconf_del_dad_timer(ifa);
goto restart;
}
}
@@ -3045,6 +3044,8 @@
write_lock_bh(&idev->lock);
+ addrconf_del_rs_timer(idev);
+
/* Step 2: clear flags for stateless addrconf */
if (!how)
idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
@@ -3074,7 +3075,7 @@
while (!list_empty(&idev->addr_list)) {
ifa = list_first_entry(&idev->addr_list,
struct inet6_ifaddr, if_list);
- addrconf_del_timer(ifa);
+ addrconf_del_dad_timer(ifa);
list_del(&ifa->if_list);
@@ -3116,10 +3117,10 @@
static void addrconf_rs_timer(unsigned long data)
{
- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
- struct inet6_dev *idev = ifp->idev;
+ struct inet6_dev *idev = (struct inet6_dev *)data;
+ struct in6_addr lladdr;
- read_lock(&idev->lock);
+ write_lock(&idev->lock);
if (idev->dead || !(idev->if_flags & IF_READY))
goto out;
@@ -3130,18 +3131,19 @@
if (idev->if_flags & IF_RA_RCVD)
goto out;
- spin_lock(&ifp->lock);
- if (ifp->probes++ < idev->cnf.rtr_solicits) {
- /* The wait after the last probe can be shorter */
- addrconf_mod_timer(ifp, AC_RS,
- (ifp->probes == idev->cnf.rtr_solicits) ?
- idev->cnf.rtr_solicit_delay :
- idev->cnf.rtr_solicit_interval);
- spin_unlock(&ifp->lock);
+ if (idev->rs_probes++ < idev->cnf.rtr_solicits) {
+ if (!__ipv6_get_lladdr(idev, &lladdr, IFA_F_TENTATIVE))
+ ndisc_send_rs(idev->dev, &lladdr,
+ &in6addr_linklocal_allrouters);
+ else
+ goto out;
- ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
+ /* The wait after the last probe can be shorter */
+ addrconf_mod_rs_timer(idev, (idev->rs_probes ==
+ idev->cnf.rtr_solicits) ?
+ idev->cnf.rtr_solicit_delay :
+ idev->cnf.rtr_solicit_interval);
} else {
- spin_unlock(&ifp->lock);
/*
* Note: we do not support deprecated "all on-link"
* assumption any longer.
@@ -3150,8 +3152,8 @@
}
out:
- read_unlock(&idev->lock);
- in6_ifa_put(ifp);
+ write_unlock(&idev->lock);
+ in6_dev_put(idev);
}
/*
@@ -3167,8 +3169,8 @@
else
rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
- ifp->probes = idev->cnf.dad_transmits;
- addrconf_mod_timer(ifp, AC_DAD, rand_num);
+ ifp->dad_probes = idev->cnf.dad_transmits;
+ addrconf_mod_dad_timer(ifp, rand_num);
}
static void addrconf_dad_start(struct inet6_ifaddr *ifp)
@@ -3229,40 +3231,40 @@
struct inet6_dev *idev = ifp->idev;
struct in6_addr mcaddr;
- if (!ifp->probes && addrconf_dad_end(ifp))
+ if (!ifp->dad_probes && addrconf_dad_end(ifp))
goto out;
- read_lock(&idev->lock);
+ write_lock(&idev->lock);
if (idev->dead || !(idev->if_flags & IF_READY)) {
- read_unlock(&idev->lock);
+ write_unlock(&idev->lock);
goto out;
}
spin_lock(&ifp->lock);
if (ifp->state == INET6_IFADDR_STATE_DEAD) {
spin_unlock(&ifp->lock);
- read_unlock(&idev->lock);
+ write_unlock(&idev->lock);
goto out;
}
- if (ifp->probes == 0) {
+ if (ifp->dad_probes == 0) {
/*
* DAD was successful
*/
ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
spin_unlock(&ifp->lock);
- read_unlock(&idev->lock);
+ write_unlock(&idev->lock);
addrconf_dad_completed(ifp);
goto out;
}
- ifp->probes--;
- addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
+ ifp->dad_probes--;
+ addrconf_mod_dad_timer(ifp, ifp->idev->nd_parms->retrans_time);
spin_unlock(&ifp->lock);
- read_unlock(&idev->lock);
+ write_unlock(&idev->lock);
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
@@ -3274,6 +3276,9 @@
static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
{
struct net_device *dev = ifp->idev->dev;
+ struct in6_addr lladdr;
+
+ addrconf_del_dad_timer(ifp);
/*
* Configure the address for reception. Now it is valid.
@@ -3294,13 +3299,20 @@
* [...] as part of DAD [...] there is no need
* to delay again before sending the first RS
*/
- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
+ if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
+ ndisc_send_rs(dev, &lladdr,
+ &in6addr_linklocal_allrouters);
+ else
+ return;
- spin_lock_bh(&ifp->lock);
- ifp->probes = 1;
+ write_lock_bh(&ifp->idev->lock);
+ spin_lock(&ifp->lock);
+ ifp->idev->rs_probes = 1;
ifp->idev->if_flags |= IF_RS_SENT;
- addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval);
- spin_unlock_bh(&ifp->lock);
+ addrconf_mod_rs_timer(ifp->idev,
+ ifp->idev->cnf.rtr_solicit_interval);
+ spin_unlock(&ifp->lock);
+ write_unlock_bh(&ifp->idev->lock);
}
}
@@ -4363,6 +4375,7 @@
}
write_unlock_bh(&idev->lock);
+ addrconf_verify(0);
return 0;
}
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 7210456..d2f8742 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -5,6 +5,7 @@
#include <linux/export.h>
#include <net/ipv6.h>
+#include <net/addrconf.h>
#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 275d901..6967fbc 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -57,6 +57,7 @@
#include <linux/audit.h>
#include <linux/mutex.h>
#include <linux/vmalloc.h>
+#include <linux/if_arp.h>
#include <asm/cacheflush.h>
#include <net/net_namespace.h>
@@ -101,6 +102,9 @@
static ATOMIC_NOTIFIER_HEAD(netlink_chain);
+static DEFINE_SPINLOCK(netlink_tap_lock);
+static struct list_head netlink_tap_all __read_mostly;
+
static inline u32 netlink_group_mask(u32 group)
{
return group ? 1 << (group - 1) : 0;
@@ -111,6 +115,100 @@
return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
}
+int netlink_add_tap(struct netlink_tap *nt)
+{
+ if (unlikely(nt->dev->type != ARPHRD_NETLINK))
+ return -EINVAL;
+
+ spin_lock(&netlink_tap_lock);
+ list_add_rcu(&nt->list, &netlink_tap_all);
+ spin_unlock(&netlink_tap_lock);
+
+ if (nt->module)
+ __module_get(nt->module);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(netlink_add_tap);
+
+int __netlink_remove_tap(struct netlink_tap *nt)
+{
+ bool found = false;
+ struct netlink_tap *tmp;
+
+ spin_lock(&netlink_tap_lock);
+
+ list_for_each_entry(tmp, &netlink_tap_all, list) {
+ if (nt == tmp) {
+ list_del_rcu(&nt->list);
+ found = true;
+ goto out;
+ }
+ }
+
+ pr_warn("__netlink_remove_tap: %p not found\n", nt);
+out:
+ spin_unlock(&netlink_tap_lock);
+
+ if (found && nt->module)
+ module_put(nt->module);
+
+ return found ? 0 : -ENODEV;
+}
+EXPORT_SYMBOL_GPL(__netlink_remove_tap);
+
+int netlink_remove_tap(struct netlink_tap *nt)
+{
+ int ret;
+
+ ret = __netlink_remove_tap(nt);
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(netlink_remove_tap);
+
+static int __netlink_deliver_tap_skb(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct sk_buff *nskb;
+ int ret = -ENOMEM;
+
+ dev_hold(dev);
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (nskb) {
+ nskb->dev = dev;
+ ret = dev_queue_xmit(nskb);
+ if (unlikely(ret > 0))
+ ret = net_xmit_errno(ret);
+ }
+
+ dev_put(dev);
+ return ret;
+}
+
+static void __netlink_deliver_tap(struct sk_buff *skb)
+{
+ int ret;
+ struct netlink_tap *tmp;
+
+ list_for_each_entry_rcu(tmp, &netlink_tap_all, list) {
+ ret = __netlink_deliver_tap_skb(skb, tmp->dev);
+ if (unlikely(ret))
+ break;
+ }
+}
+
+static void netlink_deliver_tap(struct sk_buff *skb)
+{
+ rcu_read_lock();
+
+ if (unlikely(!list_empty(&netlink_tap_all)))
+ __netlink_deliver_tap(skb);
+
+ rcu_read_unlock();
+}
+
static void netlink_overrun(struct sock *sk)
{
struct netlink_sock *nlk = nlk_sk(sk);
@@ -1518,6 +1616,8 @@
{
int len = skb->len;
+ netlink_deliver_tap(skb);
+
#ifdef CONFIG_NETLINK_MMAP
if (netlink_skb_is_mmaped(skb))
netlink_queue_mmaped_skb(sk, skb);
@@ -1578,6 +1678,11 @@
ret = -ECONNREFUSED;
if (nlk->netlink_rcv != NULL) {
+ /* We could do a netlink_deliver_tap(skb) here as well
+ * but since this is intended for the kernel only, we
+ * should rather let it stay under the hood.
+ */
+
ret = skb->len;
netlink_skb_set_owner_r(skb, sk);
NETLINK_CB(skb).sk = ssk;
@@ -2975,6 +3080,8 @@
nl_table[i].compare = netlink_compare;
}
+ INIT_LIST_HEAD(&netlink_tap_all);
+
netlink_add_usersock_entry();
sock_register(&netlink_family_ops);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index bf6e6bd..9a383a8 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -102,13 +102,7 @@
sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port);
asoc->state = SCTP_STATE_CLOSED;
-
- /* Set these values from the socket values, a conversion between
- * millsecons to seconds/microseconds must also be done.
- */
- asoc->cookie_life.tv_sec = sp->assocparams.sasoc_cookie_life / 1000;
- asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000)
- * 1000;
+ asoc->cookie_life = ms_to_ktime(sp->assocparams.sasoc_cookie_life);
asoc->frag_point = 0;
asoc->user_frag = sp->user_frag;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index a8b2674..b26999d 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -247,10 +247,9 @@
/* Final destructor for endpoint. */
static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
{
- SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return);
+ struct sock *sk;
- /* Free up the HMAC transform. */
- crypto_free_hash(sctp_sk(ep->base.sk)->hmac);
+ SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return);
/* Free the digest buffer */
kfree(ep->digest);
@@ -271,13 +270,15 @@
memset(ep->secret_key, 0, sizeof(ep->secret_key));
- /* Remove and free the port */
- if (sctp_sk(ep->base.sk)->bind_hash)
- sctp_put_port(ep->base.sk);
-
/* Give up our hold on the sock. */
- if (ep->base.sk)
- sock_put(ep->base.sk);
+ sk = ep->base.sk;
+ if (sk != NULL) {
+ /* Remove and free the port */
+ if (sctp_sk(sk)->bind_hash)
+ sctp_put_port(sk);
+
+ sock_put(sk);
+ }
kfree(ep);
SCTP_DBG_OBJCNT_DEC(ep);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 0c83162..62526c4 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -138,7 +138,7 @@
peer = asoc->peer.primary_path;
if (unlikely(peer == NULL)) {
- WARN(1, "Association %p with NULL primary path!", asoc);
+ WARN(1, "Association %p with NULL primary path!\n", asoc);
return;
}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index fc85487..dd71f1f 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1630,8 +1630,8 @@
cookie->c.adaptation_ind = asoc->peer.adaptation_ind;
/* Set an expiration time for the cookie. */
- do_gettimeofday(&cookie->c.expiration);
- TIMEVAL_ADD(asoc->cookie_life, cookie->c.expiration);
+ cookie->c.expiration = ktime_add(asoc->cookie_life,
+ ktime_get());
/* Copy the peer's init packet. */
memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr,
@@ -1680,7 +1680,7 @@
unsigned int len;
sctp_scope_t scope;
struct sk_buff *skb = chunk->skb;
- struct timeval tv;
+ ktime_t kt;
struct hash_desc desc;
/* Header size is static data prior to the actual cookie, including
@@ -1757,11 +1757,11 @@
* down the new association establishment instead of every packet.
*/
if (sock_flag(ep->base.sk, SOCK_TIMESTAMP))
- skb_get_timestamp(skb, &tv);
+ kt = skb_get_ktime(skb);
else
- do_gettimeofday(&tv);
+ kt = ktime_get();
- if (!asoc && tv_lt(bear_cookie->expiration, tv)) {
+ if (!asoc && ktime_compare(bear_cookie->expiration, kt) < 0) {
/*
* Section 3.3.10.3 Stale Cookie Error (3)
*
@@ -1773,9 +1773,7 @@
len = ntohs(chunk->chunk_hdr->length);
*errp = sctp_make_op_error_space(asoc, chunk, len);
if (*errp) {
- suseconds_t usecs = (tv.tv_sec -
- bear_cookie->expiration.tv_sec) * 1000000L +
- tv.tv_usec - bear_cookie->expiration.tv_usec;
+ suseconds_t usecs = ktime_to_us(ktime_sub(kt, bear_cookie->expiration));
__be32 n = htonl(usecs);
sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE,
@@ -2514,8 +2512,7 @@
/* Suggested Cookie Life span increment's unit is msec,
* (1/1000sec).
*/
- asoc->cookie_life.tv_sec += stale / 1000;
- asoc->cookie_life.tv_usec += (stale % 1000) * 1000;
+ asoc->cookie_life = ktime_add_ms(asoc->cookie_life, stale);
break;
case SCTP_PARAM_HOST_NAME_ADDRESS:
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 32db19b..66fcdcf 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -93,6 +93,7 @@
static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
static int sctp_wait_for_accept(struct sock *sk, long timeo);
static void sctp_wait_for_close(struct sock *sk, long timeo);
+static void sctp_destruct_sock(struct sock *sk);
static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
union sctp_addr *addr, int len);
static int sctp_bindx_add(struct sock *, struct sockaddr *, int);
@@ -2910,13 +2911,8 @@
asoc->max_retrans = assocparams.sasoc_asocmaxrxt;
}
- if (assocparams.sasoc_cookie_life != 0) {
- asoc->cookie_life.tv_sec =
- assocparams.sasoc_cookie_life / 1000;
- asoc->cookie_life.tv_usec =
- (assocparams.sasoc_cookie_life % 1000)
- * 1000;
- }
+ if (assocparams.sasoc_cookie_life != 0)
+ asoc->cookie_life = ms_to_ktime(assocparams.sasoc_cookie_life);
} else {
/* Set the values to the endpoint */
struct sctp_sock *sp = sctp_sk(sk);
@@ -3971,6 +3967,8 @@
sp->hmac = NULL;
+ sk->sk_destruct = sctp_destruct_sock;
+
SCTP_DBG_OBJCNT_INC(sock);
local_bh_disable();
@@ -4013,6 +4011,17 @@
local_bh_enable();
}
+/* Triggered when there are no references on the socket anymore */
+static void sctp_destruct_sock(struct sock *sk)
+{
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ /* Free up the HMAC transform. */
+ crypto_free_hash(sp->hmac);
+
+ inet_sock_destruct(sk);
+}
+
/* API 4.1.7 shutdown() - TCP Style Syntax
* int shutdown(int socket, int how);
*
@@ -5074,10 +5083,7 @@
assocparams.sasoc_asocmaxrxt = asoc->max_retrans;
assocparams.sasoc_peer_rwnd = asoc->peer.rwnd;
assocparams.sasoc_local_rwnd = asoc->a_rwnd;
- assocparams.sasoc_cookie_life = (asoc->cookie_life.tv_sec
- * 1000) +
- (asoc->cookie_life.tv_usec
- / 1000);
+ assocparams.sasoc_cookie_life = ktime_to_ms(asoc->cookie_life);
list_for_each(pos, &asoc->peer.transport_addr_list) {
cnt ++;
@@ -6030,7 +6036,6 @@
*/
static int sctp_get_port(struct sock *sk, unsigned short snum)
{
- long ret;
union sctp_addr addr;
struct sctp_af *af = sctp_sk(sk)->pf->af;
@@ -6039,9 +6044,7 @@
addr.v4.sin_port = htons(snum);
/* Note: sk->sk_num gets filled in if ephemeral port request. */
- ret = sctp_get_port_local(sk, &addr);
-
- return ret ? 1 : 0;
+ return !!sctp_get_port_local(sk, &addr);
}
/*
@@ -6856,7 +6859,7 @@
newsk->sk_reuse = sk->sk_reuse;
newsk->sk_shutdown = sk->sk_shutdown;
- newsk->sk_destruct = inet_sock_destruct;
+ newsk->sk_destruct = sctp_destruct_sock;
newsk->sk_family = sk->sk_family;
newsk->sk_protocol = IPPROTO_SCTP;
newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
diff --git a/net/socket.c b/net/socket.c
index 3eec3f7..4da14cb 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -107,6 +107,7 @@
#include <net/ll_poll.h>
#ifdef CONFIG_NET_LL_RX_POLL
+unsigned int sysctl_net_ll_read __read_mostly;
unsigned int sysctl_net_ll_poll __read_mostly;
#endif
@@ -1147,13 +1148,24 @@
/* No kernel lock held - perfect */
static unsigned int sock_poll(struct file *file, poll_table *wait)
{
+ unsigned int ll_flag = 0;
struct socket *sock;
/*
* We can't return errors to poll, so it's either yes or no.
*/
sock = file->private_data;
- return sock->ops->poll(file, sock, wait);
+
+ if (sk_valid_ll(sock->sk)) {
+ /* this socket can poll_ll so tell the system call */
+ ll_flag = POLL_LL;
+
+ /* once, only if requested by syscall */
+ if (wait && (wait->_key & POLL_LL))
+ sk_poll_ll(sock->sk, 1);
+ }
+
+ return ll_flag | sock->ops->poll(file, sock, wait);
}
static int sock_mmap(struct file *file, struct vm_area_struct *vma)