Merge tag 'perf-tools-2020-12-24' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull more perf tools updates from Arnaldo Carvalho de Melo:
- Refactor 'perf stat' per CPU/socket/die/thread aggregation fixing use
cases in ARM machines.
- Fix memory leak when synthesizing SDT probes in 'perf probe'.
- Update kernel header copies related to KVM, epol_pwait. msr-index and
powerpc and s390 syscall tables.
* tag 'perf-tools-2020-12-24' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (24 commits)
perf probe: Fix memory leak when synthesizing SDT probes
perf stat aggregation: Add separate thread member
perf stat aggregation: Add separate core member
perf stat aggregation: Add separate die member
perf stat aggregation: Add separate socket member
perf stat aggregation: Add separate node member
perf stat aggregation: Start using cpu_aggr_id in map
perf cpumap: Drop in cpu_aggr_map struct
perf cpumap: Add new map type for aggregation
perf stat: Replace aggregation ID with a struct
perf cpumap: Add new struct for cpu aggregation
perf cpumap: Use existing allocator to avoid using malloc
perf tests: Improve topology test to check all aggregation types
perf tools: Update s390's syscall.tbl copy from the kernel sources
perf tools: Update powerpc's syscall.tbl copy from the kernel sources
perf s390: Move syscall.tbl check into check-headers.sh
perf powerpc: Move syscall.tbl check to check-headers.sh
tools headers UAPI: Synch KVM's svm.h header with the kernel
tools kvm headers: Update KVM headers from the kernel sources
tools headers UAPI: Sync KVM's vmx.h header with the kernel sources
...
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index e972caa..e35a3f2 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -428,7 +428,7 @@
programs, particularly malloc debuggers, may consume lots of them,
e.g., up to one or two maps per allocation.
-The default value is 65536.
+The default value is 65530.
memory_failure_early_kill:
diff --git a/Documentation/devicetree/bindings/arm/idle-states.yaml b/Documentation/devicetree/bindings/arm/idle-states.yaml
index ea805c1..52bce5d 100644
--- a/Documentation/devicetree/bindings/arm/idle-states.yaml
+++ b/Documentation/devicetree/bindings/arm/idle-states.yaml
@@ -313,7 +313,7 @@
wakeup-latency-us by this duration.
idle-state-name:
- $ref: /schemas/types.yaml#definitions/string
+ $ref: /schemas/types.yaml#/definitions/string
description:
A string used as a descriptive name for the idle state.
diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml
index 0503651..863a287 100644
--- a/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml
+++ b/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml
@@ -34,7 +34,7 @@
description:
The SRAM that needs to be claimed to access the display engine
bus.
- $ref: /schemas/types.yaml#definitions/phandle-array
+ $ref: /schemas/types.yaml#/definitions/phandle-array
maxItems: 1
ranges: true
diff --git a/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml b/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml
index 0bee469..4ac78b4 100644
--- a/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml
+++ b/Documentation/devicetree/bindings/bus/baikal,bt1-axi.yaml
@@ -46,7 +46,7 @@
const: 1
syscon:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description: Phandle to the Baikal-T1 System Controller DT node
interrupts:
diff --git a/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml b/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml
index e709e53..940486e 100644
--- a/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml
+++ b/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml
@@ -29,18 +29,18 @@
- const: fsl,imx8qxp-lpcg
- items:
- enum:
- - fsl,imx8qm-lpcg
+ - fsl,imx8qm-lpcg
- const: fsl,imx8qxp-lpcg
- enum:
- - fsl,imx8qxp-lpcg-adma
- - fsl,imx8qxp-lpcg-conn
- - fsl,imx8qxp-lpcg-dc
- - fsl,imx8qxp-lpcg-dsp
- - fsl,imx8qxp-lpcg-gpu
- - fsl,imx8qxp-lpcg-hsio
- - fsl,imx8qxp-lpcg-img
- - fsl,imx8qxp-lpcg-lsio
- - fsl,imx8qxp-lpcg-vpu
+ - fsl,imx8qxp-lpcg-adma
+ - fsl,imx8qxp-lpcg-conn
+ - fsl,imx8qxp-lpcg-dc
+ - fsl,imx8qxp-lpcg-dsp
+ - fsl,imx8qxp-lpcg-gpu
+ - fsl,imx8qxp-lpcg-hsio
+ - fsl,imx8qxp-lpcg-img
+ - fsl,imx8qxp-lpcg-lsio
+ - fsl,imx8qxp-lpcg-vpu
deprecated: true
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/connector/usb-connector.yaml b/Documentation/devicetree/bindings/connector/usb-connector.yaml
index a84464b..4286ed7 100644
--- a/Documentation/devicetree/bindings/connector/usb-connector.yaml
+++ b/Documentation/devicetree/bindings/connector/usb-connector.yaml
@@ -37,7 +37,7 @@
description: Size of the connector, should be specified in case of
non-fullsize 'usb-a-connector' or 'usb-b-connector' compatible
connectors.
- $ref: /schemas/types.yaml#definitions/string
+ $ref: /schemas/types.yaml#/definitions/string
enum:
- mini
@@ -67,7 +67,7 @@
power-role:
description: Determines the power role that the Type C connector will
support. "dual" refers to Dual Role Port (DRP).
- $ref: /schemas/types.yaml#definitions/string
+ $ref: /schemas/types.yaml#/definitions/string
enum:
- source
@@ -76,7 +76,7 @@
try-power-role:
description: Preferred power role.
- $ref: /schemas/types.yaml#definitions/string
+ $ref: /schemas/types.yaml#/definitions/string
enum:
- source
@@ -86,7 +86,7 @@
data-role:
description: Data role if Type C connector supports USB data. "dual" refers
Dual Role Device (DRD).
- $ref: /schemas/types.yaml#definitions/string
+ $ref: /schemas/types.yaml#/definitions/string
enum:
- host
@@ -105,7 +105,7 @@
Type-C Cable and Connector specification, when Power Delivery is not
supported.
allOf:
- - $ref: /schemas/types.yaml#definitions/string
+ - $ref: /schemas/types.yaml#/definitions/string
enum:
- default
- 1.5A
diff --git a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
index 60585a4..9392b55 100644
--- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
@@ -49,8 +49,8 @@
Video port for panel or connector.
required:
- - port@0
- - port@1
+ - port@0
+ - port@1
required:
- compatible
diff --git a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
index 8c0e4f28..fccd635 100644
--- a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
@@ -26,11 +26,9 @@
description: GPIO connected to active low reset
dvdd12-supply:
- maxItems: 1
description: Regulator for 1.2V digital core power.
dvdd25-supply:
- maxItems: 1
description: Regulator for 2.5V digital core power.
ports:
diff --git a/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml
index ab5be26..35c9dfd 100644
--- a/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml
@@ -39,10 +39,10 @@
properties:
'#address-cells':
- const: 1
+ const: 1
'#size-cells':
- const: 0
+ const: 0
port@0:
type: object
diff --git a/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml
index efbb3d0..02cfc0a 100644
--- a/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml
@@ -35,11 +35,9 @@
maxItems: 1
ovdd-supply:
- maxItems: 1
description: I/O voltage
pwr18-supply:
- maxItems: 1
description: core voltage
interrupts:
diff --git a/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml b/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml
index e5e3c72..66a14d6 100644
--- a/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml
@@ -79,8 +79,7 @@
The GPIO used to control the power down line of this device.
maxItems: 1
- power-supply:
- maxItems: 1
+ power-supply: true
required:
- compatible
diff --git a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
index 7e27cfc..763c790 100644
--- a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
@@ -35,11 +35,9 @@
description: GPIO connected to active low reset.
vdd12-supply:
- maxItems: 1
description: Regulator for 1.2V digital core power.
vdd33-supply:
- maxItems: 1
description: Regulator for 3.3V digital core power.
ports:
diff --git a/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml b/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml
index 3ddb35f..64e8a1c 100644
--- a/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml
@@ -60,7 +60,6 @@
description: GPIO controlling bridge enable
vdd-supply:
- maxItems: 1
description: Power supply for the bridge
required:
diff --git a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml
index 469ac4a..3d5ce08 100644
--- a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.yaml
@@ -74,7 +74,6 @@
description: Power down GPIO signal, pin name "/PDWN", active low.
vcc-supply:
- maxItems: 1
description:
Power supply for the TTL output, TTL CLOCKOUT signal, LVDS input, PLL and
digital circuitry.
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml
index fd3113a..b5959cc 100644
--- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml
@@ -28,11 +28,9 @@
description: i2c address of the bridge, 0x0f
vdd-supply:
- maxItems: 1
description: 1.2V LVDS Power Supply
vddio-supply:
- maxItems: 1
description: 1.8V IO Power Supply
stby-gpios:
diff --git a/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml b/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml
index 40caa61..a222b52 100644
--- a/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml
+++ b/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml
@@ -18,8 +18,8 @@
properties:
compatible:
items:
- - const: intel,keembay-msscam
- - const: syscon
+ - const: intel,keembay-msscam
+ - const: syscon
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml b/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml
index 91cb4c3..a108029 100644
--- a/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml
+++ b/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml
@@ -32,7 +32,7 @@
- power-supply
- reset-gpios
-additionalProperties: false
+unevaluatedProperties: false
examples:
- |
diff --git a/Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml b/Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml
index d2170de..2f5df1d 100644
--- a/Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml
+++ b/Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml
@@ -22,7 +22,7 @@
compatible:
items:
- enum:
- - tianma,fhd-video
+ - tianma,fhd-video
- const: novatek,nt36672a
description: This indicates the panel manufacturer of the panel that is
in turn using the NT36672A panel driver. This compatible string
diff --git a/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml b/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml
index 7b9d468..403d579 100644
--- a/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml
+++ b/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml
@@ -98,7 +98,6 @@
maxItems: 1
dmas:
- maxItems: 4
items:
- description: Video layer, plane 0 (RGB or luma)
- description: Video layer, plane 1 (U/V or U)
diff --git a/Documentation/devicetree/bindings/dma/dma-common.yaml b/Documentation/devicetree/bindings/dma/dma-common.yaml
index 307b499..ad06d36 100644
--- a/Documentation/devicetree/bindings/dma/dma-common.yaml
+++ b/Documentation/devicetree/bindings/dma/dma-common.yaml
@@ -38,12 +38,12 @@
maxItems: 255
dma-channels:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description:
Number of DMA channels supported by the controller.
dma-requests:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description:
Number of DMA request signals supported by the controller.
diff --git a/Documentation/devicetree/bindings/dma/dma-router.yaml b/Documentation/devicetree/bindings/dma/dma-router.yaml
index 4cee566..e727484 100644
--- a/Documentation/devicetree/bindings/dma/dma-router.yaml
+++ b/Documentation/devicetree/bindings/dma/dma-router.yaml
@@ -23,7 +23,7 @@
pattern: "^dma-router(@.*)?$"
dma-masters:
- $ref: /schemas/types.yaml#definitions/phandle-array
+ $ref: /schemas/types.yaml#/definitions/phandle-array
description:
Array of phandles to the DMA controllers the router can direct
the signal to.
diff --git a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
index 00f19b3..6a20437 100644
--- a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
+++ b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
@@ -48,7 +48,7 @@
ingenic,reserved-channels property.
ingenic,reserved-channels:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: >
Bitmask of channels to reserve for devices that need a specific
channel. These channels will only be assigned when explicitely
diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml
index b548e47..c07eb6f 100644
--- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml
+++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml
@@ -73,7 +73,6 @@
maxItems: 1
clock-names:
- maxItems: 1
items:
- const: fck
diff --git a/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml b/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml
index ef1d687..6b35089 100644
--- a/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml
+++ b/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml
@@ -54,7 +54,7 @@
maximum: 16
dma-masters:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
Number of DMA masters supported by the controller. In case if
not specified the driver will try to auto-detect this and
@@ -63,7 +63,7 @@
maximum: 4
chan_allocation_order:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
DMA channels allocation order specifier. Zero means ascending order
(first free allocated), while one - descending (last free allocated).
@@ -71,7 +71,7 @@
enum: [0, 1]
chan_priority:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
DMA channels priority order. Zero means ascending channels priority
so the very first channel has the highest priority. While 1 means
@@ -80,7 +80,7 @@
enum: [0, 1]
block_size:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: Maximum block size supported by the DMA controller.
enum: [3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095]
@@ -139,7 +139,7 @@
default: 256
snps,dma-protection-control:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
Bits one-to-one passed to the AHB HPROT[3:1] bus. Each bit setting
indicates the following features: bit 0 - privileged mode,
diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml
index 6edfa70..d5117c6 100644
--- a/Documentation/devicetree/bindings/eeprom/at24.yaml
+++ b/Documentation/devicetree/bindings/eeprom/at24.yaml
@@ -131,7 +131,7 @@
default: 1
read-only:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Disables writes to the eeprom.
@@ -141,7 +141,7 @@
Total eeprom size in bytes.
no-read-rollover:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Indicates that the multi-address eeprom does not automatically roll
over reads to the next slave address. Please consult the manual of
diff --git a/Documentation/devicetree/bindings/eeprom/at25.yaml b/Documentation/devicetree/bindings/eeprom/at25.yaml
index 7449736..121a601 100644
--- a/Documentation/devicetree/bindings/eeprom/at25.yaml
+++ b/Documentation/devicetree/bindings/eeprom/at25.yaml
@@ -45,13 +45,13 @@
spi-max-frequency: true
pagesize:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
enum: [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072]
description:
Size of the eeprom page.
size:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description:
Total eeprom size in bytes.
diff --git a/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml b/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml
index 6f3e3c0..b79f069 100644
--- a/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml
+++ b/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml
@@ -32,7 +32,7 @@
PVT controller has 5 VM (voltage monitor) sensors.
vm-map defines CPU core to VM instance mapping. A
value of 0xff means that VM sensor is unused.
- $ref: /schemas/types.yaml#definitions/uint8-array
+ $ref: /schemas/types.yaml#/definitions/uint8-array
maxItems: 5
clocks:
diff --git a/Documentation/devicetree/bindings/hwmon/sensirion,shtc1.yaml b/Documentation/devicetree/bindings/hwmon/sensirion,shtc1.yaml
index c523a1b..7d49478 100644
--- a/Documentation/devicetree/bindings/hwmon/sensirion,shtc1.yaml
+++ b/Documentation/devicetree/bindings/hwmon/sensirion,shtc1.yaml
@@ -29,12 +29,12 @@
const: 0x70
sensirion,blocking-io:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
If set, the driver hold the i2c bus until measurement is finished.
sensirion,low-precision:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
If set, the sensor aquire data with low precision (not recommended).
The driver aquire data with high precision by default.
diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml
index c17e5d3..8020d73 100644
--- a/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml
+++ b/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml
@@ -61,7 +61,7 @@
Array of three(TMP513) or two(TMP512) n-Factor value for each remote
temperature channel.
See datasheet Table 11 for n-Factor range list and value interpretation.
- $ref: /schemas/types.yaml#definitions/uint32-array
+ $ref: /schemas/types.yaml#/definitions/uint32-array
minItems: 2
maxItems: 3
items:
diff --git a/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml b/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml
index 6a991e9..2716d4e 100644
--- a/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml
@@ -17,8 +17,7 @@
- lltc,ltc2496
vref-supply:
- description: phandle to an external regulator providing the reference voltage
- $ref: /schemas/types.yaml#/definitions/phandle
+ description: Power supply for the reference voltage
reg:
description: spi chipselect number according to the usual spi bindings
diff --git a/Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml b/Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml
index 7037f82..88384b6 100644
--- a/Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml
+++ b/Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml
@@ -22,8 +22,7 @@
- ti,hdc2010
- ti,hdc2080
- vdd-supply:
- maxItems: 1
+ vdd-supply: true
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/iio/light/upisemi,us5182.yaml b/Documentation/devicetree/bindings/iio/light/upisemi,us5182.yaml
index 4a9b282..de5882c 100644
--- a/Documentation/devicetree/bindings/iio/light/upisemi,us5182.yaml
+++ b/Documentation/devicetree/bindings/iio/light/upisemi,us5182.yaml
@@ -45,7 +45,7 @@
default: 0x16
upisemi,continuous:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description: |
This chip has two power modes: one-shot (chip takes one measurement and
then shuts itself down) and continuous (chip takes continuous
diff --git a/Documentation/devicetree/bindings/iio/proximity/semtech,sx9310.yaml b/Documentation/devicetree/bindings/iio/proximity/semtech,sx9310.yaml
index ccfb163..5de0bb2 100644
--- a/Documentation/devicetree/bindings/iio/proximity/semtech,sx9310.yaml
+++ b/Documentation/devicetree/bindings/iio/proximity/semtech,sx9310.yaml
@@ -72,7 +72,7 @@
- finest
semtech,startup-sensor:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
enum: [0, 1, 2, 3]
default: 0
description:
@@ -81,7 +81,7 @@
compensation.
semtech,proxraw-strength:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
enum: [0, 2, 4, 8]
default: 2
description:
@@ -89,7 +89,7 @@
represent 1-1/N.
semtech,avg-pos-strength:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
enum: [0, 16, 64, 128, 256, 512, 1024, 4294967295]
default: 16
description:
diff --git a/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml b/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml
index 378a85c..878464f 100644
--- a/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml
+++ b/Documentation/devicetree/bindings/input/fsl,mpr121-touchkey.yaml
@@ -31,8 +31,7 @@
interrupts:
maxItems: 1
- vdd-supply:
- maxItems: 1
+ vdd-supply: true
linux,keycodes:
minItems: 1
diff --git a/Documentation/devicetree/bindings/input/gpio-keys.yaml b/Documentation/devicetree/bindings/input/gpio-keys.yaml
index 6966ab0..060a309 100644
--- a/Documentation/devicetree/bindings/input/gpio-keys.yaml
+++ b/Documentation/devicetree/bindings/input/gpio-keys.yaml
@@ -34,13 +34,13 @@
linux,code:
description: Key / Axis code to emit.
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
linux,input-type:
description:
Specify event type this button/key generates. If not specified defaults to
<1> == EV_KEY.
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
default: 1
@@ -56,12 +56,12 @@
linux,input-value = <0xffffffff>; /* -1 */
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
debounce-interval:
description:
Debouncing interval time in milliseconds. If not specified defaults to 5.
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
default: 5
@@ -79,7 +79,7 @@
EV_ACT_ANY - both asserted and deasserted
EV_ACT_ASSERTED - asserted
EV_ACT_DEASSERTED - deasserted
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
enum: [0, 1, 2]
linux,can-disable:
@@ -118,7 +118,7 @@
poll-interval:
description:
Poll interval time in milliseconds
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
required:
- poll-interval
diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml
index 4ce1094..bfc3a8b 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml
@@ -55,8 +55,7 @@
wakeup-source: true
- vcc-supply:
- maxItems: 1
+ vcc-supply: true
gain:
description: Allows setting the sensitivity in the range from 0 to 31.
diff --git a/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml b/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml
index 039e08a..91bb3c2 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/mti,gic.yaml
@@ -42,7 +42,7 @@
Specifies the list of CPU interrupt vectors to which the GIC may not
route interrupts. This property is ignored if the CPU is started in EIC
mode.
- $ref: /schemas/types.yaml#definitions/uint32-array
+ $ref: /schemas/types.yaml#/definitions/uint32-array
minItems: 1
maxItems: 6
uniqueItems: true
@@ -56,7 +56,7 @@
It accepts two values: the 1st is the starting interrupt and the 2nd is
the size of the reserved range. If not specified, the driver will
allocate the last (2 * number of VPEs in the system).
- $ref: /schemas/types.yaml#definitions/uint32-array
+ $ref: /schemas/types.yaml#/definitions/uint32-array
items:
- minimum: 0
maximum: 254
diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,pruss-intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/ti,pruss-intc.yaml
index 1c4c009..c2ce215 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/ti,pruss-intc.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/ti,pruss-intc.yaml
@@ -80,7 +80,7 @@
mapping is provided.
ti,irqs-reserved:
- $ref: /schemas/types.yaml#definitions/uint8
+ $ref: /schemas/types.yaml#/definitions/uint8
description: |
Bitmask of host interrupts between 0 and 7 (corresponding to PRUSS INTC
output interrupts 2 through 9) that are not connected to the Arm interrupt
diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml
index b5af120..3d89668 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml
@@ -76,7 +76,7 @@
"limit" specifies the limit for translation
ti,unmapped-event-sources:
- $ref: /schemas/types.yaml#definitions/phandle-array
+ $ref: /schemas/types.yaml#/definitions/phandle-array
description:
Array of phandles to DMA controllers where the unmapped events originate.
diff --git a/Documentation/devicetree/bindings/leds/backlight/common.yaml b/Documentation/devicetree/bindings/leds/backlight/common.yaml
index bc817f7..702ba35 100644
--- a/Documentation/devicetree/bindings/leds/backlight/common.yaml
+++ b/Documentation/devicetree/bindings/leds/backlight/common.yaml
@@ -22,7 +22,7 @@
The default brightness that should be applied to the LED by the operating
system on start-up. The brightness should not exceed the brightness the
LED can provide.
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
max-brightness:
description:
@@ -31,6 +31,6 @@
on the brightness apart from what the driver says, as it could happen
that a LED can be made so bright that it gets damaged or causes damage
due to restrictions in a specific system, such as mounting conditions.
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
additionalProperties: true
diff --git a/Documentation/devicetree/bindings/leds/common.yaml b/Documentation/devicetree/bindings/leds/common.yaml
index f1211e7..b1f3637 100644
--- a/Documentation/devicetree/bindings/leds/common.yaml
+++ b/Documentation/devicetree/bindings/leds/common.yaml
@@ -27,21 +27,21 @@
List of device current outputs the LED is connected to. The outputs are
identified by the numbers that must be defined in the LED device binding
documentation.
- $ref: /schemas/types.yaml#definitions/uint32-array
+ $ref: /schemas/types.yaml#/definitions/uint32-array
function:
description:
LED function. Use one of the LED_FUNCTION_* prefixed definitions
from the header include/dt-bindings/leds/common.h. If there is no
matching LED_FUNCTION available, add a new one.
- $ref: /schemas/types.yaml#definitions/string
+ $ref: /schemas/types.yaml#/definitions/string
color:
description:
Color of the LED. Use one of the LED_COLOR_ID_* prefixed definitions from
the header include/dt-bindings/leds/common.h. If there is no matching
LED_COLOR_ID available, add a new one.
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
minimum: 0
maximum: 9
@@ -49,7 +49,7 @@
description:
Integer to be used when more than one instance of the same function is
needed, differing only with an ordinal number.
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
label:
description:
@@ -66,7 +66,7 @@
produced where the LED momentarily turns off (or on). The "keep" setting
will keep the LED at whatever its current state is, without producing a
glitch.
- $ref: /schemas/types.yaml#definitions/string
+ $ref: /schemas/types.yaml#/definitions/string
enum:
- on
- off
@@ -77,7 +77,7 @@
description:
This parameter, if present, is a string defining the trigger assigned to
the LED.
- $ref: /schemas/types.yaml#definitions/string
+ $ref: /schemas/types.yaml#/definitions/string
enum:
# LED will act as a back-light, controlled by the framebuffer system
@@ -109,7 +109,7 @@
brightness and duration (in ms). The exact format is
described in:
Documentation/devicetree/bindings/leds/leds-trigger-pattern.txt
- $ref: /schemas/types.yaml#definitions/uint32-matrix
+ $ref: /schemas/types.yaml#/definitions/uint32-matrix
items:
minItems: 2
maxItems: 2
@@ -143,7 +143,7 @@
the device tree and be referenced by a phandle and a set of phandle
arguments. A length of arguments should be specified by the
#trigger-source-cells property in the source node.
- $ref: /schemas/types.yaml#definitions/phandle-array
+ $ref: /schemas/types.yaml#/definitions/phandle-array
# Required properties for flash LED child nodes:
flash-max-microamp:
diff --git a/Documentation/devicetree/bindings/leds/leds-lp55xx.yaml b/Documentation/devicetree/bindings/leds/leds-lp55xx.yaml
index 58e9747..f552cd1 100644
--- a/Documentation/devicetree/bindings/leds/leds-lp55xx.yaml
+++ b/Documentation/devicetree/bindings/leds/leds-lp55xx.yaml
@@ -35,7 +35,7 @@
description: I2C slave address
clock-mode:
- $ref: /schemas/types.yaml#definitions/uint8
+ $ref: /schemas/types.yaml#/definitions/uint8
description: |
Input clock mode
enum:
@@ -49,7 +49,7 @@
GPIO attached to the chip's enable pin
pwr-sel:
- $ref: /schemas/types.yaml#definitions/uint8
+ $ref: /schemas/types.yaml#/definitions/uint8
description: |
LP8501 specific property. Power selection for output channels.
enum:
@@ -70,14 +70,14 @@
$ref: common.yaml#
properties:
led-cur:
- $ref: /schemas/types.yaml#definitions/uint8
+ $ref: /schemas/types.yaml#/definitions/uint8
description: |
Current setting at each LED channel (mA x10, 0 if LED is not connected)
minimum: 0
maximum: 255
max-cur:
- $ref: /schemas/types.yaml#definitions/uint8
+ $ref: /schemas/types.yaml#/definitions/uint8
description: Maximun current at each LED channel.
reg:
@@ -97,7 +97,7 @@
- 8 # LED output D9
chan-name:
- $ref: /schemas/types.yaml#definitions/string
+ $ref: /schemas/types.yaml#/definitions/string
description: name of channel
required:
diff --git a/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml b/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml
index d43791a..d07eb00 100644
--- a/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml
+++ b/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml
@@ -61,7 +61,6 @@
- description: low-priority non-secure
- description: high-priority non-secure
- description: Secure
- maxItems: 3
clocks:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/media/coda.yaml b/Documentation/devicetree/bindings/media/coda.yaml
index 7bac005..36781ee 100644
--- a/Documentation/devicetree/bindings/media/coda.yaml
+++ b/Documentation/devicetree/bindings/media/coda.yaml
@@ -44,6 +44,21 @@
- const: per
- const: ahb
+ interrupts:
+ minItems: 1
+ items:
+ - description: BIT processor interrupt
+ - description: JPEG unit interrupt
+
+ interrupt-names:
+ minItems: 1
+ items:
+ - const: bit
+ - const: jpeg
+
+ power-domains:
+ maxItems: 1
+
resets:
maxItems: 1
@@ -59,6 +74,8 @@
- clocks
- clock-names
+additionalProperties: false
+
allOf:
- if:
properties:
@@ -68,34 +85,17 @@
then:
properties:
interrupts:
- items:
- - description: BIT processor interrupt
- - description: JPEG unit interrupt
+ minItems: 2
interrupt-names:
- items:
- - const: bit
- - const: jpeg
+ minItems: 2
else:
properties:
interrupts:
- items:
- - description: BIT processor interrupt
-
- - if:
- properties:
- compatible:
- contains:
- enum:
- - fsl,imx6dl-vpu
- - fsl,imx6q-vpu
- then:
- properties:
- power-domains:
- $ref: /schemas/types.yaml#/definitions/phandle
- description: phandle pointing to the PU power domain
maxItems: 1
+ power-domains: false
+
examples:
- |
vpu: video-codec@63ff4000 {
diff --git a/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml b/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml
index 9ea8270..68ee8c7 100644
--- a/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml
@@ -40,7 +40,6 @@
poc-supply:
description: Regulator providing Power over Coax to the cameras
- maxItems: 1
enable-gpios:
description: GPIO connected to the \#PWDN pin with inverted polarity
diff --git a/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml b/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml
index d94bd67..bb35283 100644
--- a/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
# Copyright (C) 2014--2020 Intel Corporation
-
+%YAML 1.2
+---
$id: http://devicetree.org/schemas/media/i2c/mipi-ccs.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
@@ -26,11 +27,11 @@
compatible:
oneOf:
- items:
- - const: mipi-ccs-1.1
- - const: mipi-ccs
+ - const: mipi-ccs-1.1
+ - const: mipi-ccs
- items:
- - const: mipi-ccs-1.0
- - const: mipi-ccs
+ - const: mipi-ccs-1.0
+ - const: mipi-ccs
- const: nokia,smia
reg:
@@ -38,15 +39,12 @@
vana-supply:
description: Analogue voltage supply (VANA), sensor dependent.
- maxItems: 1
vcore-supply:
description: Core voltage supply (VCore), sensor dependent.
- maxItems: 1
vio-supply:
description: I/O voltage supply (VIO), sensor dependent.
- maxItems: 1
clocks:
description: External clock to the sensor.
diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml
index 1a3590d..eb12526 100644
--- a/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml
@@ -37,15 +37,12 @@
vdddo-supply:
description: Chip digital IO regulator (1.8V).
- maxItems: 1
vdda-supply:
description: Chip analog regulator (2.7V).
- maxItems: 1
vddd-supply:
description: Chip digital core regulator (1.12V).
- maxItems: 1
flash-leds:
description: See ../video-interfaces.txt
diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml
index f697e1a..a66acb2 100644
--- a/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml
@@ -33,15 +33,12 @@
vana-supply:
description: Sensor 2.8 V analog supply.
- maxItems: 1
vdig-supply:
description: Sensor 1.8 V digital core supply.
- maxItems: 1
vddl-supply:
description: Sensor digital IO 1.2 V supply.
- maxItems: 1
port:
type: object
diff --git a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml
index 888ab4b..19e9afb 100644
--- a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml
+++ b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml
@@ -26,8 +26,7 @@
drive-open-drain: true
- vdd-supply:
- maxItems: 1
+ vdd-supply: true
pinctrl:
type: object
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
index c7c9ad4..7f2578d 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
@@ -38,7 +38,7 @@
const: stmmaceth
syscon:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
Phandle to the device containing the EMAC or GMAC clock
register
@@ -114,7 +114,7 @@
then:
properties:
allwinner,leds-active-low:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
EPHY LEDs are active low.
@@ -126,7 +126,7 @@
const: allwinner,sun8i-h3-mdio-mux
mdio-parent-bus:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
Phandle to EMAC MDIO.
diff --git a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
index 6b057b1..1f133f4 100644
--- a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
@@ -60,7 +60,7 @@
- const: timing-adjustment
amlogic,tx-delay-ns:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description:
The internal RGMII TX clock delay (provided by this driver) in
nanoseconds. Allowed values are 0ns, 2ns, 4ns, 6ns.
diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml
index 8e04463..8a3494d 100644
--- a/Documentation/devicetree/bindings/net/dsa/dsa.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml
@@ -54,7 +54,7 @@
description:
Describes the label associated with this port, which will become
the netdev name
- $ref: /schemas/types.yaml#definitions/string
+ $ref: /schemas/types.yaml#/definitions/string
link:
description:
@@ -62,13 +62,13 @@
port is used as the outgoing port towards the phandle ports. The
full routing information must be given, not just the one hop
routes to neighbouring switches
- $ref: /schemas/types.yaml#definitions/phandle-array
+ $ref: /schemas/types.yaml#/definitions/phandle-array
ethernet:
description:
Should be a phandle to a valid Ethernet device node. This host
device is what the switch port is connected to
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
phy-handle: true
diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index cc93063..0965f65 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -16,7 +16,7 @@
local-mac-address:
description:
Specifies the MAC address that was assigned to the network device.
- $ref: /schemas/types.yaml#definitions/uint8-array
+ $ref: /schemas/types.yaml#/definitions/uint8-array
items:
- minItems: 6
maxItems: 6
@@ -27,20 +27,20 @@
program; should be used in cases where the MAC address assigned
to the device by the boot program is different from the
local-mac-address property.
- $ref: /schemas/types.yaml#definitions/uint8-array
+ $ref: /schemas/types.yaml#/definitions/uint8-array
items:
- minItems: 6
maxItems: 6
max-frame-size:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description:
Maximum transfer unit (IEEE defined MTU), rather than the
maximum frame size (there\'s contradiction in the Devicetree
Specification).
max-speed:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description:
Specifies maximum speed in Mbit/s supported by the device.
@@ -101,7 +101,7 @@
$ref: "#/properties/phy-connection-type"
phy-handle:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
Specifies a reference to a node representing a PHY device.
@@ -114,7 +114,7 @@
deprecated: true
rx-fifo-depth:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description:
The size of the controller\'s receive fifo in bytes. This is used
for components that can have configurable receive fifo sizes,
@@ -129,12 +129,12 @@
If this property is present then the MAC applies the RX delay.
sfp:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
Specifies a reference to a node representing a SFP cage.
tx-fifo-depth:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description:
The size of the controller\'s transmit fifo in bytes. This
is used for components that can have configurable fifo sizes.
@@ -150,7 +150,7 @@
description:
Specifies the PHY management type. If auto is set and fixed-link
is not specified, it uses MDIO for management.
- $ref: /schemas/types.yaml#definitions/string
+ $ref: /schemas/types.yaml#/definitions/string
default: auto
enum:
- auto
@@ -198,17 +198,17 @@
speed:
description:
Link speed.
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
enum: [10, 100, 1000]
full-duplex:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Indicates that full-duplex is used. When absent, half
duplex is assumed.
asym-pause:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Indicates that asym_pause should be enabled.
diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
index 6dd72fa..2766fe4 100644
--- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
@@ -78,57 +78,57 @@
Maximum PHY supported speed in Mbits / seconds.
broken-turn-around:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
If set, indicates the PHY device does not correctly release
the turn around line low at end of the control phase of the
MDIO transaction.
enet-phy-lane-swap:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
If set, indicates the PHY will swap the TX/RX lanes to
compensate for the board being designed with the lanes
swapped.
eee-broken-100tx:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Mark the corresponding energy efficient ethernet mode as
broken and request the ethernet to stop advertising it.
eee-broken-1000t:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Mark the corresponding energy efficient ethernet mode as
broken and request the ethernet to stop advertising it.
eee-broken-10gt:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Mark the corresponding energy efficient ethernet mode as
broken and request the ethernet to stop advertising it.
eee-broken-1000kx:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Mark the corresponding energy efficient ethernet mode as
broken and request the ethernet to stop advertising it.
eee-broken-10gkx4:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Mark the corresponding energy efficient ethernet mode as
broken and request the ethernet to stop advertising it.
eee-broken-10gkr:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Mark the corresponding energy efficient ethernet mode as
broken and request the ethernet to stop advertising it.
phy-is-integrated:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
If set, indicates that the PHY is integrated into the same
physical package as the Ethernet MAC. If needed, muxers
@@ -158,7 +158,7 @@
this property is missing the delay will be skipped.
sfp:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
Specifies a reference to a node representing a SFP cage.
diff --git a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml
index 2159b7d..7f620a7 100644
--- a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml
+++ b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml
@@ -31,7 +31,7 @@
phy-mode: true
pcs-handle:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
A reference to a node representing a PCS PHY device found on
the internal MDIO bus.
diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml
index e811e0f..08e15fb 100644
--- a/Documentation/devicetree/bindings/net/mdio.yaml
+++ b/Documentation/devicetree/bindings/net/mdio.yaml
@@ -70,7 +70,7 @@
The ID number for the device.
broken-turn-around:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
If set, indicates the MDIO device does not correctly release
the turn around line low at end of the control phase of the
diff --git a/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml b/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml
index 0bbd598..e6a5ff2 100644
--- a/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml
+++ b/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml
@@ -42,7 +42,7 @@
- const: trans
mediatek,pericfg:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
Phandle to the device containing the PERICFG register range. This is used
to control the MII mode.
diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
index 4d8464b..8a2d126 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -114,14 +114,13 @@
validating firwmare used by the GSI.
modem-remoteproc:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
This defines the phandle to the remoteproc node representing
the modem subsystem. This is requied so the IPA driver can
receive and act on notifications of modem up/down events.
memory-region:
- $ref: /schemas/types.yaml#/definitions/phandle-array
maxItems: 1
description:
If present, a phandle for a reserved memory area that holds
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 11a6fdb..b2f6083 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -126,7 +126,7 @@
in a different mode than the PHY in order to function.
snps,axi-config:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
AXI BUS Mode parameters. Phandle to a node that can contain the
following properties
@@ -141,7 +141,7 @@
* snps,rb, rebuild INCRx Burst
snps,mtl-rx-config:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
Multiple RX Queues parameters. Phandle to a node that can
contain the following properties
@@ -164,7 +164,7 @@
* snps,priority, RX queue priority (Range 0x0 to 0xF)
snps,mtl-tx-config:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
Multiple TX Queues parameters. Phandle to a node that can
contain the following properties
@@ -198,7 +198,7 @@
snps,reset-active-low:
deprecated: true
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Indicates that the PHY Reset is active low
@@ -208,55 +208,55 @@
Triplet of delays. The 1st cell is reset pre-delay in micro
seconds. The 2nd cell is reset pulse in micro seconds. The 3rd
cell is reset post-delay in micro seconds.
- $ref: /schemas/types.yaml#definitions/uint32-array
+ $ref: /schemas/types.yaml#/definitions/uint32-array
minItems: 3
maxItems: 3
snps,aal:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Use Address-Aligned Beats
snps,fixed-burst:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Program the DMA to use the fixed burst mode
snps,mixed-burst:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Program the DMA to use the mixed burst mode
snps,force_thresh_dma_mode:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Force DMA to use the threshold mode for both tx and rx
snps,force_sf_dma_mode:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Force DMA to use the Store and Forward mode for both tx and
rx. This flag is ignored if force_thresh_dma_mode is set.
snps,en-tx-lpi-clockgating:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Enable gating of the MAC TX clock during TX low-power mode
snps,multicast-filter-bins:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description:
Number of multicast filter hash bins supported by this device
instance
snps,perfect-filter-entries:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description:
Number of perfect filter entries supported by this device
instance
snps,ps-speed:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description:
Port selection speed that can be passed to the core when PCS
is supported. For example, this is used in case of SGMII and
@@ -307,25 +307,25 @@
snps,pbl:
description:
Programmable Burst Length (tx and rx)
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
enum: [2, 4, 8]
snps,txpbl:
description:
Tx Programmable Burst Length. If set, DMA tx will use this
value rather than snps,pbl.
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
enum: [2, 4, 8]
snps,rxpbl:
description:
Rx Programmable Burst Length. If set, DMA rx will use this
value rather than snps,pbl.
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
enum: [2, 4, 8]
snps,no-pbl-x8:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Don\'t multiply the pbl/txpbl/rxpbl values by 8. For core
rev < 3.50, don\'t multiply the values by 4.
@@ -351,7 +351,7 @@
then:
properties:
snps,tso:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Enables the TSO feature otherwise it will be managed by
MAC HW capability register.
diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml
index cbacc04..8a03a24 100644
--- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml
+++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml
@@ -64,7 +64,7 @@
- const: ether # for others
socionext,syscon-phy-mode:
- $ref: /schemas/types.yaml#definitions/phandle-array
+ $ref: /schemas/types.yaml#/definitions/phandle-array
description:
A phandle to syscon with one argument that configures phy mode.
The argument is the ID of MAC instance.
diff --git a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml
index dadeb8f..07a00f5 100644
--- a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml
+++ b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml
@@ -70,7 +70,7 @@
pinctrl-names: true
syscon:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
Phandle to the system control device node which provides access to
efuse IO range with MAC addresses
diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.yaml b/Documentation/devicetree/bindings/net/ti,dp83867.yaml
index 4050a36..047d757 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.yaml
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.yaml
@@ -47,31 +47,31 @@
takes precedence.
tx-fifo-depth:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
Transmitt FIFO depth see dt-bindings/net/ti-dp83867.h for values
rx-fifo-depth:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
Receive FIFO depth see dt-bindings/net/ti-dp83867.h for values
ti,clk-output-sel:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
Muxing option for CLK_OUT pin. See dt-bindings/net/ti-dp83867.h
for applicable values. The CLK_OUT pin can also be disabled by this
property. When omitted, the PHY's default will be left as is.
ti,rx-internal-delay:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
RGMII Receive Clock Delay - see dt-bindings/net/ti-dp83867.h
for applicable values. Required only if interface type is
PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_RXID.
ti,tx-internal-delay:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
RGMII Transmit Clock Delay - see dt-bindings/net/ti-dp83867.h
for applicable values. Required only if interface type is
@@ -101,7 +101,7 @@
ti,fifo-depth:
deprecated: true
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h for applicable
values.
diff --git a/Documentation/devicetree/bindings/net/ti,dp83869.yaml b/Documentation/devicetree/bindings/net/ti,dp83869.yaml
index c3235f0..70a1209 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83869.yaml
+++ b/Documentation/devicetree/bindings/net/ti,dp83869.yaml
@@ -44,22 +44,22 @@
to a maximum value (70 ohms).
tx-fifo-depth:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
Transmitt FIFO depth see dt-bindings/net/ti-dp83869.h for values
rx-fifo-depth:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
Receive FIFO depth see dt-bindings/net/ti-dp83869.h for values
ti,clk-output-sel:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
Muxing option for CLK_OUT pin see dt-bindings/net/ti-dp83869.h for values.
ti,op-mode:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
Operational mode for the PHY. If this is not set then the operational
mode is set by the straps. see dt-bindings/net/ti-dp83869.h for values
diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index 227270c..c47b58f 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -119,12 +119,12 @@
description: label associated with this port
ti,mac-only:
- $ref: /schemas/types.yaml#definitions/flag
+ $ref: /schemas/types.yaml#/definitions/flag
description:
Specifies the port works in mac-only mode.
ti,syscon-efuse:
- $ref: /schemas/types.yaml#definitions/phandle-array
+ $ref: /schemas/types.yaml#/definitions/phandle-array
description:
Phandle to the system control device node which provides access
to efuse IO range with MAC addresses
diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
index 6af9991..85c2f69 100644
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
@@ -136,7 +136,7 @@
- const: tcl2host-status-ring
qcom,rproc:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
DT entry of q6v5-wcss remoteproc driver.
Phandle to a node that can contain the following properties
diff --git a/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml b/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml
index 83d5d0a..cbbf5e8 100644
--- a/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml
+++ b/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml
@@ -44,13 +44,13 @@
- const: refclk
syscon-phy-power:
- $ref: /schemas/types.yaml#definitions/phandle-array
+ $ref: /schemas/types.yaml#/definitions/phandle-array
description:
phandle/offset pair. Phandle to the system control module and
register offset to power on/off the PHY.
ctrl-module:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
(deprecated) phandle of the control module used by PHY driver
to power on the PHY. Use syscon-phy-power instead.
diff --git a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml
index fd12baf..d14cb9b 100644
--- a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml
+++ b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml
@@ -83,11 +83,11 @@
SUSBSYS clocks.
mediatek,infracfg:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description: phandle to the device containing the INFRACFG register range.
mediatek,smi:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description: phandle to the device containing the SMI register range.
patternProperties:
@@ -131,11 +131,11 @@
SUSBSYS clocks.
mediatek,infracfg:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description: phandle to the device containing the INFRACFG register range.
mediatek,smi:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description: phandle to the device containing the SMI register range.
patternProperties:
@@ -179,11 +179,11 @@
SUSBSYS clocks.
mediatek,infracfg:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description: phandle to the device containing the INFRACFG register range.
mediatek,smi:
- $ref: /schemas/types.yaml#definitions/phandle
+ $ref: /schemas/types.yaml#/definitions/phandle
description: phandle to the device containing the SMI register range.
required:
diff --git a/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml b/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml
index ee92e6a..5fcdf58 100644
--- a/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml
+++ b/Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml
@@ -27,7 +27,7 @@
of this binary blob is kept secret by CellWise. The only way to obtain
it is to mail two batteries to a test facility of CellWise and receive
back a test report with the binary blob.
- $ref: /schemas/types.yaml#definitions/uint8-array
+ $ref: /schemas/types.yaml#/definitions/uint8-array
minItems: 64
maxItems: 64
diff --git a/Documentation/devicetree/bindings/powerpc/sleep.yaml b/Documentation/devicetree/bindings/powerpc/sleep.yaml
index 6494c7d..1b0936a 100644
--- a/Documentation/devicetree/bindings/powerpc/sleep.yaml
+++ b/Documentation/devicetree/bindings/powerpc/sleep.yaml
@@ -42,6 +42,6 @@
properties:
sleep:
- $ref: /schemas/types.yaml#definitions/phandle-array
+ $ref: /schemas/types.yaml#/definitions/phandle-array
additionalProperties: true
diff --git a/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml b/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml
index e7b3abe..0a66338 100644
--- a/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/anatop-regulator.yaml
@@ -59,7 +59,6 @@
description: u32 value representing regulator enable bit offset.
vin-supply:
- $ref: '/schemas/types.yaml#/definitions/phandle'
description: input supply phandle.
required:
diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml
index c1d4c19..f54cae9 100644
--- a/Documentation/devicetree/bindings/serial/8250.yaml
+++ b/Documentation/devicetree/bindings/serial/8250.yaml
@@ -126,7 +126,7 @@
maxItems: 1
current-speed:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: The current active speed of the UART.
reg-offset:
@@ -154,7 +154,7 @@
Set to indicate that the port does not implement loopback test mode.
fifo-size:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: The fifo size of the UART.
auto-flow-control:
@@ -165,7 +165,7 @@
property.
tx-threshold:
- $ref: /schemas/types.yaml#definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
Specify the TX FIFO low water indication for parts with programmable
TX FIFO thresholds.
diff --git a/Documentation/devicetree/bindings/serial/litex,liteuart.yaml b/Documentation/devicetree/bindings/serial/litex,liteuart.yaml
index bc79b3c..c4f1f48 100644
--- a/Documentation/devicetree/bindings/serial/litex,liteuart.yaml
+++ b/Documentation/devicetree/bindings/serial/litex,liteuart.yaml
@@ -29,6 +29,8 @@
- compatible
- reg
+additionalProperties: false
+
examples:
- |
uart0: serial@e0001800 {
diff --git a/Documentation/devicetree/bindings/soc/litex/litex,soc-controller.yaml b/Documentation/devicetree/bindings/soc/litex/litex,soc-controller.yaml
index e2b7887..c8b57c7 100644
--- a/Documentation/devicetree/bindings/soc/litex/litex,soc-controller.yaml
+++ b/Documentation/devicetree/bindings/soc/litex/litex,soc-controller.yaml
@@ -28,6 +28,8 @@
- compatible
- reg
+additionalProperties: false
+
examples:
- |
soc_ctrl0: soc-controller@f0000000 {
diff --git a/Documentation/devicetree/bindings/soc/mediatek/devapc.yaml b/Documentation/devicetree/bindings/soc/mediatek/devapc.yaml
index 6c763f8..31e4d3c 100644
--- a/Documentation/devicetree/bindings/soc/mediatek/devapc.yaml
+++ b/Documentation/devicetree/bindings/soc/mediatek/devapc.yaml
@@ -44,6 +44,8 @@
- clocks
- clock-names
+additionalProperties: false
+
examples:
- |
#include <dt-bindings/interrupt-controller/arm-gic.h>
diff --git a/Documentation/devicetree/bindings/soc/ti/k3-ringacc.yaml b/Documentation/devicetree/bindings/soc/ti/k3-ringacc.yaml
index c3c595e2..ddea3d4 100644
--- a/Documentation/devicetree/bindings/soc/ti/k3-ringacc.yaml
+++ b/Documentation/devicetree/bindings/soc/ti/k3-ringacc.yaml
@@ -55,7 +55,7 @@
description: TI-SCI RM subtype for GP ring range
ti,sci:
- $ref: /schemas/types.yaml#definitions/phandle-array
+ $ref: /schemas/types.yaml#/definitions/phandle-array
description: phandle on TI-SCI compatible System controller node
ti,sci-dev-id:
diff --git a/Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu-settings.yaml b/Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu-settings.yaml
index 378d0ce..cb245f4 100644
--- a/Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu-settings.yaml
+++ b/Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu-settings.yaml
@@ -26,9 +26,18 @@
- compatible
- reg
+additionalProperties: false
+
examples:
- |
- xlnx_vcu: vcu@a0041000 {
- compatible = "xlnx,vcu-settings", "syscon";
- reg = <0x0 0xa0041000 0x0 0x1000>;
+ fpga {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ xlnx_vcu: vcu@a0041000 {
+ compatible = "xlnx,vcu-settings", "syscon";
+ reg = <0x0 0xa0041000 0x0 0x1000>;
+ };
};
+
+...
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml
index be390ac..dd47fef 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml
@@ -57,7 +57,7 @@
A list of the connections between audio components. Each entry
is a pair of strings, the first being the connection's sink, the
second being the connection's source.
- $ref: /schemas/types.yaml#definitions/non-unique-string-array
+ $ref: /schemas/types.yaml#/definitions/non-unique-string-array
minItems: 2
maxItems: 18
items:
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml b/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml
index e543a61..b55775e 100644
--- a/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml
@@ -44,7 +44,6 @@
maxItems: 3
clock-names:
- maxItems: 3
items:
- const: hda
- const: hda2hdmi
@@ -54,7 +53,6 @@
maxItems: 3
reset-names:
- maxItems: 3
items:
- const: hda
- const: hda2hdmi
diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml
index 6ad48c7..f2443b6 100644
--- a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml
+++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml
@@ -106,7 +106,7 @@
Must contain the phandle and index of the SAI sub-block providing
the synchronization.
allOf:
- - $ref: /schemas/types.yaml#definitions/phandle-array
+ - $ref: /schemas/types.yaml#/definitions/phandle-array
- maxItems: 1
st,iec60958:
@@ -117,7 +117,7 @@
configured according to protocol defined in related DAI link node,
such as i2s, left justified, right justified, dsp and pdm protocols.
allOf:
- - $ref: /schemas/types.yaml#definitions/flag
+ - $ref: /schemas/types.yaml#/definitions/flag
"#clock-cells":
description: Configure the SAI device as master clock provider.
diff --git a/Documentation/devicetree/bindings/usb/renesas,usb-xhci.yaml b/Documentation/devicetree/bindings/usb/renesas,usb-xhci.yaml
index 0f078bd..2260325 100644
--- a/Documentation/devicetree/bindings/usb/renesas,usb-xhci.yaml
+++ b/Documentation/devicetree/bindings/usb/renesas,usb-xhci.yaml
@@ -51,7 +51,6 @@
maxItems: 1
phy-names:
- maxItems: 1
items:
- const: usb
diff --git a/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml b/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml
index 737c1f4..54c361d 100644
--- a/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml
+++ b/Documentation/devicetree/bindings/usb/renesas,usbhs.yaml
@@ -74,11 +74,8 @@
phys:
maxItems: 1
- items:
- - description: phandle + phy specifier pair.
phy-names:
- maxItems: 1
items:
- const: usb
diff --git a/Documentation/filesystems/ext4/journal.rst b/Documentation/filesystems/ext4/journal.rst
index 849d5b1..cdbfec4 100644
--- a/Documentation/filesystems/ext4/journal.rst
+++ b/Documentation/filesystems/ext4/journal.rst
@@ -681,3 +681,53 @@
- Stores the TID of the commit, CRC of the fast commit of which this tag
represents the end of
+Fast Commit Replay Idempotence
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Fast commits tags are idempotent in nature provided the recovery code follows
+certain rules. The guiding principle that the commit path follows while
+committing is that it stores the result of a particular operation instead of
+storing the procedure.
+
+Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
+was associated with inode 10. During fast commit, instead of storing this
+operation as a procedure "rename a to b", we store the resulting file system
+state as a "series" of outcomes:
+
+- Link dirent b to inode 10
+- Unlink dirent a
+- Inode 10 with valid refcount
+
+Now when recovery code runs, it needs "enforce" this state on the file
+system. This is what guarantees idempotence of fast commit replay.
+
+Let's take an example of a procedure that is not idempotent and see how fast
+commits make it idempotent. Consider following sequence of operations:
+
+1) rm A
+2) mv B A
+3) read A
+
+If we store this sequence of operations as is then the replay is not idempotent.
+Let's say while in replay, we crash after (2). During the second replay,
+file A (which was actually created as a result of "mv B A" operation) would get
+deleted. Thus, file named A would be absent when we try to read A. So, this
+sequence of operations is not idempotent. However, as mentioned above, instead
+of storing the procedure fast commits store the outcome of each procedure. Thus
+the fast commit log for above procedure would be as follows:
+
+(Let's assume dirent A was linked to inode 10 and dirent B was linked to
+inode 11 before the replay)
+
+1) Unlink A
+2) Link A to inode 11
+3) Unlink B
+4) Inode 11
+
+If we crash after (3) we will have file A linked to inode 11. During the second
+replay, we will remove file A (inode 11). But we will create it back and make
+it point to inode 11. We won't find B, so we'll just skip that step. At this
+point, the refcount for inode 11 is not reliable, but that gets fixed by the
+replay of last inode 11 tag. Thus, by converting a non-idempotent procedure
+into a series of idempotent outcomes, fast commits ensured idempotence during
+the replay.
diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst
index 1879f88..230ee42 100644
--- a/Documentation/process/submit-checklist.rst
+++ b/Documentation/process/submit-checklist.rst
@@ -75,44 +75,44 @@
13) Has been build- and runtime tested with and without ``CONFIG_SMP`` and
``CONFIG_PREEMPT.``
-16) All codepaths have been exercised with all lockdep features enabled.
+14) All codepaths have been exercised with all lockdep features enabled.
-17) All new ``/proc`` entries are documented under ``Documentation/``
+15) All new ``/proc`` entries are documented under ``Documentation/``
-18) All new kernel boot parameters are documented in
+16) All new kernel boot parameters are documented in
``Documentation/admin-guide/kernel-parameters.rst``.
-19) All new module parameters are documented with ``MODULE_PARM_DESC()``
+17) All new module parameters are documented with ``MODULE_PARM_DESC()``
-20) All new userspace interfaces are documented in ``Documentation/ABI/``.
+18) All new userspace interfaces are documented in ``Documentation/ABI/``.
See ``Documentation/ABI/README`` for more information.
Patches that change userspace interfaces should be CCed to
linux-api@vger.kernel.org.
-21) Check that it all passes ``make headers_check``.
+19) Check that it all passes ``make headers_check``.
-22) Has been checked with injection of at least slab and page-allocation
+20) Has been checked with injection of at least slab and page-allocation
failures. See ``Documentation/fault-injection/``.
If the new code is substantial, addition of subsystem-specific fault
injection might be appropriate.
-23) Newly-added code has been compiled with ``gcc -W`` (use
+21) Newly-added code has been compiled with ``gcc -W`` (use
``make EXTRA_CFLAGS=-W``). This will generate lots of noise, but is good
for finding bugs like "warning: comparison between signed and unsigned".
-24) Tested after it has been merged into the -mm patchset to make sure
+22) Tested after it has been merged into the -mm patchset to make sure
that it still works with all of the other queued patches and various
changes in the VM, VFS, and other subsystems.
-25) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a
+23) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a
comment in the source code that explains the logic of what they are doing
and why.
-26) If any ioctl's are added by the patch, then also update
+24) If any ioctl's are added by the patch, then also update
``Documentation/userspace-api/ioctl/ioctl-number.rst``.
-27) If your modified source code depends on or uses any of the kernel
+25) If your modified source code depends on or uses any of the kernel
APIs or features that are related to the following ``Kconfig`` symbols,
then test multiple builds with the related ``Kconfig`` symbols disabled
and/or ``=m`` (if that option is available) [not all of these at the
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index fb8261a..5ba5412 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -411,6 +411,12 @@
now, but you can do this to mark internal company procedures or just
point out some special detail about the sign-off.
+Any further SoBs (Signed-off-by:'s) following the author's SoB are from
+people handling and transporting the patch, but were not involved in its
+development. SoB chains should reflect the **real** route a patch took
+as it was propagated to the maintainers and ultimately to Linus, with
+the first SoB entry signalling primary authorship of a single author.
+
When to use Acked-by:, Cc:, and Co-developed-by:
------------------------------------------------
@@ -446,7 +452,7 @@
have been included in the discussion.
Co-developed-by: states that the patch was co-created by multiple developers;
-it is a used to give attribution to co-authors (in addition to the author
+it is used to give attribution to co-authors (in addition to the author
attributed by the From: tag) when several people work on a single patch. Since
Co-developed-by: denotes authorship, every Co-developed-by: must be immediately
followed by a Signed-off-by: of the associated co-author. Standard sign-off
diff --git a/MAINTAINERS b/MAINTAINERS
index ad0e34b..154c870 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3199,6 +3199,7 @@
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
F: block/
F: drivers/block/
+F: fs/block_dev.c
F: include/linux/blk*
F: kernel/trace/blktrace.c
F: lib/sbitmap.c
@@ -4354,8 +4355,8 @@
F: drivers/media/pci/cobalt/
COCCINELLE/Semantic Patches (SmPL)
-M: Julia Lawall <Julia.Lawall@lip6.fr>
-M: Gilles Muller <Gilles.Muller@lip6.fr>
+M: Julia Lawall <Julia.Lawall@inria.fr>
+M: Gilles Muller <Gilles.Muller@inria.fr>
M: Nicolas Palix <nicolas.palix@imag.fr>
M: Michal Marek <michal.lkml@markovi.net>
L: cocci@systeme.lip6.fr (moderated for non-subscribers)
diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c
index 0a2ab6c..e5d870f 100644
--- a/arch/alpha/kernel/sys_jensen.c
+++ b/arch/alpha/kernel/sys_jensen.c
@@ -7,7 +7,7 @@
*
* Code supporting the Jensen.
*/
-
+#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/mm.h>
diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index 3ee4f43..9de7ab2 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h
@@ -66,24 +66,17 @@
#define MAX_UNCOMP_KERNEL_SIZE SZ_32M
/*
- * phys-to-virt patching requires that the physical to virtual offset fits
- * into the immediate field of an add/sub instruction, which comes down to the
- * 24 least significant bits being zero, and so the offset should be a multiple
- * of 16 MB. Since PAGE_OFFSET itself is a multiple of 16 MB, the physical
- * base should be aligned to 16 MB as well.
+ * phys-to-virt patching requires that the physical to virtual offset is a
+ * multiple of 2 MiB. However, using an alignment smaller than TEXT_OFFSET
+ * here throws off the memory allocation logic, so let's use the lowest power
+ * of two greater than 2 MiB and greater than TEXT_OFFSET.
*/
-#define EFI_PHYS_ALIGN SZ_16M
-
-/* on ARM, the FDT should be located in a lowmem region */
-static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr)
-{
- return round_down(image_addr, EFI_PHYS_ALIGN) + SZ_512M;
-}
+#define EFI_PHYS_ALIGN max(UL(SZ_2M), roundup_pow_of_two(TEXT_OFFSET))
/* on ARM, the initrd should be loaded in a lowmem region */
static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr)
{
- return round_down(image_addr, EFI_PHYS_ALIGN) + SZ_512M;
+ return round_down(image_addr, SZ_4M) + SZ_512M;
}
struct efi_arm_entry_state {
@@ -93,4 +86,9 @@
u32 sctlr_after_ebs;
};
+static inline void efi_capsule_flush_cache_range(void *addr, int size)
+{
+ __cpuc_flush_dcache_area(addr, size);
+}
+
#endif /* _ASM_ARM_EFI_H */
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 6ab2b0a..5c48eb4 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -549,7 +549,7 @@
seq_printf(p, "%*s%u: ", prec - 1, "IPI", i);
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
+ seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
seq_printf(p, " %s\n", ipi_types[i]);
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index d0d94f7..05e1735 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1877,6 +1877,7 @@
select EFI_RUNTIME_WRAPPERS
select EFI_STUB
select EFI_GENERIC_STUB
+ imply IMA_SECURE_AND_OR_TRUSTED_BOOT
default y
help
This option provides support for runtime services provided
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 973b144..3578aba 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -64,12 +64,6 @@
#define EFI_KIMG_ALIGN \
(SEGMENT_ALIGN > THREAD_ALIGN ? SEGMENT_ALIGN : THREAD_ALIGN)
-/* on arm64, the FDT may be located anywhere in system RAM */
-static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr)
-{
- return ULONG_MAX;
-}
-
/*
* On arm64, we have to ensure that the initrd ends up in the linear region,
* which is a 1 GB aligned region of size '1UL << (VA_BITS_MIN - 1)' that is
@@ -141,4 +135,9 @@
void efi_virtmap_load(void);
void efi_virtmap_unload(void);
+static inline void efi_capsule_flush_cache_range(void *addr, int size)
+{
+ __flush_dcache_area(addr, size);
+}
+
#endif /* _ASM_EFI_H */
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 19b1705..6bc3a36 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -811,7 +811,7 @@
seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
prec >= 4 ? " " : "");
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
+ seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
seq_printf(p, " %s\n", ipi_types[i]);
}
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index e76c866..49cd6d2 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -216,12 +216,9 @@
if (!action)
goto skip;
seq_printf(p, "%3d: ", i);
-#ifdef CONFIG_SMP
+
for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-#else
- seq_printf(p, "%10u ", kstat_irqs(i));
-#endif
+ seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, j));
seq_printf(p, " %14s", irq_desc_get_chip(desc)->name);
#ifndef PARISC_IRQ_CR16_COUNTS
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index ec0b218..2b8da92 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -373,6 +373,8 @@
targets += $(image-y) $(initrd-y)
targets += $(foreach x, dtbImage uImage cuImage simpleImage treeImage, \
$(patsubst $(x).%, dts/%.dtb, $(filter $(x).%, $(image-y))))
+targets += $(foreach x, dtbImage uImage cuImage simpleImage treeImage, \
+ $(patsubst $(x).%, dts/fsl/%.dtb, $(filter $(x).%, $(image-y))))
$(addprefix $(obj)/, $(initrd-y)): $(obj)/ramdisk.image.gz
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index cfa8148..cc1bca5 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -180,7 +180,12 @@
#define VCPU_GPR(n) __VCPU_GPR(__REG_##n)
#ifdef __KERNEL__
-#ifdef CONFIG_PPC64
+
+/*
+ * We use __powerpc64__ here because we want the compat VDSO to use the 32-bit
+ * version below in the else case of the ifdef.
+ */
+#ifdef __powerpc64__
#define STACKFRAMESIZE 256
#define __STK_REG(i) (112 + ((i)-14)*8)
diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h
index b558b07..881f655 100644
--- a/arch/powerpc/include/asm/vdso/timebase.h
+++ b/arch/powerpc/include/asm/vdso/timebase.h
@@ -49,7 +49,7 @@
return mftb();
}
-static inline u64 get_tb(void)
+static __always_inline u64 get_tb(void)
{
unsigned int tbhi, tblo, tbhi2;
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 541664d9..a2f72c9 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -121,18 +121,28 @@
#ifdef CONFIG_VMAP_STACK
mfspr r11, SPRN_SRR0
mtctr r11
-#endif
+ andi. r11, r9, MSR_PR
+ mr r11, r1
+ lwz r1,TASK_STACK-THREAD(r12)
+ beq- 99f
+ addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+ li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+ mtmsr r10
+ isync
+ tovirt(r12, r12)
+ stw r11,GPR1(r1)
+ stw r11,0(r1)
+ mr r11, r1
+#else
andi. r11, r9, MSR_PR
lwz r11,TASK_STACK-THREAD(r12)
beq- 99f
addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE
-#ifdef CONFIG_VMAP_STACK
- li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
- mtmsr r10
- isync
+ tophys(r11, r11)
+ stw r1,GPR1(r11)
+ stw r1,0(r11)
+ tovirt(r1, r11) /* set new kernel sp */
#endif
- tovirt_vmstack r12, r12
- tophys_novmstack r11, r11
mflr r10
stw r10, _LINK(r11)
#ifdef CONFIG_VMAP_STACK
@@ -140,9 +150,6 @@
#else
mfspr r10,SPRN_SRR0
#endif
- stw r1,GPR1(r11)
- stw r1,0(r11)
- tovirt_novmstack r1, r11 /* set new kernel sp */
stw r10,_NIP(r11)
mfcr r10
rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 2b9b1bb..9e2246e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -990,7 +990,7 @@
{ NULL, },
};
-static int init_big_cores(void)
+static int __init init_big_cores(void)
{
int cpu;
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 59aa294..9cb6f524 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -2,7 +2,7 @@
# List of files in the vdso, has to be asm only for now
-ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN
+ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24
include $(srctree)/lib/vdso/Makefile
obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
@@ -27,7 +27,7 @@
CC32FLAGS :=
ifdef CONFIG_PPC64
CC32FLAGS += -m32
-KBUILD_CFLAGS := $(filter-out -mcmodel=medium,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS := $(filter-out -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc,$(KBUILD_CFLAGS))
endif
targets := $(obj-vdso32) vdso32.so.dbg
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index d365810..bf363ff 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
# List of files in the vdso, has to be asm only for now
-ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN
+ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24
include $(srctree)/lib/vdso/Makefile
obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
index 7542282..6d98cd9 100644
--- a/arch/riscv/include/asm/efi.h
+++ b/arch/riscv/include/asm/efi.h
@@ -27,12 +27,6 @@
#define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
-/* on RISC-V, the FDT may be located anywhere in system RAM */
-static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr)
-{
- return ULONG_MAX;
-}
-
/* Load initrd at enough distance from DRAM start */
static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr)
{
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 13ba533..bf53791 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -176,7 +176,7 @@
* Make sure that any memory beyond mem_start + (-PAGE_OFFSET) is removed
* as it is unusable by kernel.
*/
- memblock_enforce_memory_limit(mem_start - PAGE_OFFSET);
+ memblock_enforce_memory_limit(-PAGE_OFFSET);
/* Reserve from the start of the kernel to the end of the kernel */
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 3514420..f8a8b94 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -124,7 +124,7 @@
raw_spin_lock_irqsave(&desc->lock, flags);
seq_printf(p, "%3d: ", irq);
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
+ seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, cpu));
if (desc->irq_data.chip)
seq_printf(p, " %8s", desc->irq_data.chip->name);
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 40b8fd3..e0bc398 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -35,7 +35,7 @@
cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone
KBUILD_CFLAGS += $(cflags-y)
KBUILD_CFLAGS += -mno-mmx -mno-sse
-KBUILD_CFLAGS += -ffreestanding
+KBUILD_CFLAGS += -ffreestanding -fshort-wchar
KBUILD_CFLAGS += -fno-stack-protector
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index bc9758e..c98f783 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -213,8 +213,6 @@
static inline bool efi_is_native(void)
{
- if (!IS_ENABLED(CONFIG_X86_64))
- return true;
return efi_is_64bit();
}
@@ -382,4 +380,7 @@
}
#endif
+#define arch_ima_efi_boot_mode \
+ ({ extern struct boot_params boot_params; boot_params.secure_boot; })
+
#endif /* _ASM_X86_EFI_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 68608bd..5eeb808 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -161,5 +161,3 @@
obj-$(CONFIG_MMCONF_FAM10H) += mmconf-fam10h_64.o
obj-y += vsmp_64.o
endif
-
-obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o
diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c
deleted file mode 100644
index 7dfb1e8..0000000
--- a/arch/x86/kernel/ima_arch.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * Copyright (C) 2018 IBM Corporation
- */
-#include <linux/efi.h>
-#include <linux/module.h>
-#include <linux/ima.h>
-
-extern struct boot_params boot_params;
-
-static enum efi_secureboot_mode get_sb_mode(void)
-{
- efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
- efi_status_t status;
- unsigned long size;
- u8 secboot, setupmode;
-
- size = sizeof(secboot);
-
- if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) {
- pr_info("ima: secureboot mode unknown, no efi\n");
- return efi_secureboot_mode_unknown;
- }
-
- /* Get variable contents into buffer */
- status = efi.get_variable(L"SecureBoot", &efi_variable_guid,
- NULL, &size, &secboot);
- if (status == EFI_NOT_FOUND) {
- pr_info("ima: secureboot mode disabled\n");
- return efi_secureboot_mode_disabled;
- }
-
- if (status != EFI_SUCCESS) {
- pr_info("ima: secureboot mode unknown\n");
- return efi_secureboot_mode_unknown;
- }
-
- size = sizeof(setupmode);
- status = efi.get_variable(L"SetupMode", &efi_variable_guid,
- NULL, &size, &setupmode);
-
- if (status != EFI_SUCCESS) /* ignore unknown SetupMode */
- setupmode = 0;
-
- if (secboot == 0 || setupmode == 1) {
- pr_info("ima: secureboot mode disabled\n");
- return efi_secureboot_mode_disabled;
- }
-
- pr_info("ima: secureboot mode enabled\n");
- return efi_secureboot_mode_enabled;
-}
-
-bool arch_ima_get_secureboot(void)
-{
- static enum efi_secureboot_mode sb_mode;
- static bool initialized;
-
- if (!initialized && efi_enabled(EFI_BOOT)) {
- sb_mode = boot_params.secure_boot;
-
- if (sb_mode == efi_secureboot_mode_unset)
- sb_mode = get_sb_mode();
- initialized = true;
- }
-
- if (sb_mode == efi_secureboot_mode_enabled)
- return true;
- else
- return false;
-}
-
-/* secureboot arch rules */
-static const char * const sb_arch_rules[] = {
-#if !IS_ENABLED(CONFIG_KEXEC_SIG)
- "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig",
-#endif /* CONFIG_KEXEC_SIG */
- "measure func=KEXEC_KERNEL_CHECK",
-#if !IS_ENABLED(CONFIG_MODULE_SIG)
- "appraise func=MODULE_CHECK appraise_type=imasig",
-#endif
- "measure func=MODULE_CHECK",
- NULL
-};
-
-const char * const *arch_get_ima_policy(void)
-{
- if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) {
- if (IS_ENABLED(CONFIG_MODULE_SIG))
- set_module_sig_enforced();
- return sb_arch_rules;
- }
- return NULL;
-}
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 0a2ec80..f5477ea 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -25,6 +25,7 @@
*
* Send feedback to <colpatch@us.ibm.com>
*/
+#include <linux/interrupt.h>
#include <linux/nodemask.h>
#include <linux/export.h>
#include <linux/mmzone.h>
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index 205a9bc..7d7ffb9 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -93,37 +93,22 @@
/*
* Determine whether we're in secure boot mode.
- *
- * Please keep the logic in sync with
- * drivers/firmware/efi/libstub/secureboot.c:efi_get_secureboot().
*/
static enum efi_secureboot_mode xen_efi_get_secureboot(void)
{
- static efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
static efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID;
+ enum efi_secureboot_mode mode;
efi_status_t status;
- u8 moksbstate, secboot, setupmode;
+ u8 moksbstate;
unsigned long size;
- size = sizeof(secboot);
- status = efi.get_variable(L"SecureBoot", &efi_variable_guid,
- NULL, &size, &secboot);
-
- if (status == EFI_NOT_FOUND)
- return efi_secureboot_mode_disabled;
-
- if (status != EFI_SUCCESS)
- goto out_efi_err;
-
- size = sizeof(setupmode);
- status = efi.get_variable(L"SetupMode", &efi_variable_guid,
- NULL, &size, &setupmode);
-
- if (status != EFI_SUCCESS)
- goto out_efi_err;
-
- if (secboot == 0 || setupmode == 1)
- return efi_secureboot_mode_disabled;
+ mode = efi_get_secureboot_mode(efi.get_variable);
+ if (mode == efi_secureboot_mode_unknown) {
+ pr_err("Could not determine UEFI Secure Boot status.\n");
+ return efi_secureboot_mode_unknown;
+ }
+ if (mode != efi_secureboot_mode_enabled)
+ return mode;
/* See if a user has put the shim into insecure mode. */
size = sizeof(moksbstate);
@@ -140,10 +125,6 @@
secure_boot_enabled:
pr_info("UEFI Secure Boot is enabled.\n");
return efi_secureboot_mode_enabled;
-
- out_efi_err:
- pr_err("Could not determine UEFI Secure Boot status.\n");
- return efi_secureboot_mode_unknown;
}
void __init xen_efi_init(struct boot_params *boot_params)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index ffa418c..ac6078a 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2185,6 +2185,9 @@
WEIGHT_ONE);
}
+ TRACE_IOCG_PATH(iocg_idle, iocg, now,
+ atomic64_read(&iocg->active_period),
+ atomic64_read(&ioc->cur_period), vtime);
__propagate_weights(iocg, 0, 0, false, now);
list_del_init(&iocg->active_list);
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b09ce00..c338c9b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -650,6 +650,14 @@
if (!IS_ENABLED(CONFIG_SMP) ||
!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags))
return false;
+ /*
+ * With force threaded interrupts enabled, raising softirq from an SMP
+ * function call will always result in waking the ksoftirqd thread.
+ * This is probably worse than completing the request on a different
+ * cache domain.
+ */
+ if (force_irqthreads)
+ return false;
/* same CPU or cache domain? Complete locally */
if (cpu == rq->mq_ctx->cpu ||
@@ -1496,31 +1504,6 @@
int srcu_idx;
/*
- * We should be running this queue from one of the CPUs that
- * are mapped to it.
- *
- * There are at least two related races now between setting
- * hctx->next_cpu from blk_mq_hctx_next_cpu() and running
- * __blk_mq_run_hw_queue():
- *
- * - hctx->next_cpu is found offline in blk_mq_hctx_next_cpu(),
- * but later it becomes online, then this warning is harmless
- * at all
- *
- * - hctx->next_cpu is found online in blk_mq_hctx_next_cpu(),
- * but later it becomes offline, then the warning can't be
- * triggered, and we depend on blk-mq timeout handler to
- * handle dispatched requests to this hctx
- */
- if (!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
- cpu_online(hctx->next_cpu)) {
- printk(KERN_WARNING "run queue from wrong CPU %d, hctx %s\n",
- raw_smp_processor_id(),
- cpumask_empty(hctx->cpumask) ? "inactive": "active");
- dump_stack();
- }
-
- /*
* We can't run the queue inline with ints disabled. Ensure that
* we catch bad users of this early.
*/
diff --git a/block/genhd.c b/block/genhd.c
index b84b8671..73faec43 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/*
* gendisk handling
+ *
+ * Portions Copyright (C) 2020 Christoph Hellwig
*/
#include <linux/module.h>
diff --git a/block/partitions/core.c b/block/partitions/core.c
index deca253..e7d776d 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -2,6 +2,7 @@
/*
* Copyright (C) 1991-1998 Linus Torvalds
* Re-organised Feb 1998 Russell King
+ * Copyright (C) 2020 Christoph Hellwig
*/
#include <linux/fs.h>
#include <linux/slab.h>
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 4426082..b11b08a 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -5,6 +5,7 @@
#include <linux/list_sort.h>
#include <linux/libnvdimm.h>
#include <linux/module.h>
+#include <linux/nospec.h>
#include <linux/mutex.h>
#include <linux/ndctl.h>
#include <linux/sysfs.h>
@@ -282,18 +283,19 @@
static union acpi_object *int_to_buf(union acpi_object *integer)
{
- union acpi_object *buf = ACPI_ALLOCATE(sizeof(*buf) + 4);
+ union acpi_object *buf = NULL;
void *dst = NULL;
- if (!buf)
- goto err;
-
if (integer->type != ACPI_TYPE_INTEGER) {
WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n",
integer->type);
goto err;
}
+ buf = ACPI_ALLOCATE(sizeof(*buf) + 4);
+ if (!buf)
+ goto err;
+
dst = buf + 1;
buf->type = ACPI_TYPE_BUFFER;
buf->buffer.length = 4;
@@ -478,8 +480,11 @@
cmd_mask = nd_desc->cmd_mask;
if (cmd == ND_CMD_CALL && call_pkg->nd_family) {
family = call_pkg->nd_family;
- if (!test_bit(family, &nd_desc->bus_family_mask))
+ if (family > NVDIMM_BUS_FAMILY_MAX ||
+ !test_bit(family, &nd_desc->bus_family_mask))
return -EINVAL;
+ family = array_index_nospec(family,
+ NVDIMM_BUS_FAMILY_MAX + 1);
dsm_mask = acpi_desc->family_dsm_mask[family];
guid = to_nfit_bus_uuid(family);
} else {
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 92f84ed..6727358 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -318,7 +318,8 @@
blk_queue_logical_block_size(nbd->disk->queue, blksize);
blk_queue_physical_block_size(nbd->disk->queue, blksize);
- set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
+ if (max_part)
+ set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
if (!set_capacity_and_notify(nbd->disk, bytesize >> 9))
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
return 0;
@@ -1476,9 +1477,11 @@
refcount_set(&nbd->config_refs, 1);
refcount_inc(&nbd->refs);
mutex_unlock(&nbd->config_lock);
- set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
+ if (max_part)
+ set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
} else if (nbd_disconnected(nbd->config)) {
- set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
+ if (max_part)
+ set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
}
out:
mutex_unlock(&nbd_index_mutex);
diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c
index a7caeed..d4aa6bf 100644
--- a/drivers/block/rnbd/rnbd-clt-sysfs.c
+++ b/drivers/block/rnbd/rnbd-clt-sysfs.c
@@ -432,7 +432,7 @@
* i.e. rnbd_clt_unmap_dev_store() leading to a sysfs warning because
* of sysfs link already was removed already.
*/
- if (strlen(dev->blk_symlink_name) && try_module_get(THIS_MODULE)) {
+ if (dev->blk_symlink_name && try_module_get(THIS_MODULE)) {
sysfs_remove_link(rnbd_devs_kobj, dev->blk_symlink_name);
kfree(dev->blk_symlink_name);
module_put(THIS_MODULE);
@@ -521,7 +521,8 @@
return 0;
out_err:
- dev->blk_symlink_name[0] = '\0';
+ kfree(dev->blk_symlink_name);
+ dev->blk_symlink_name = NULL ;
return ret;
}
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index a199b19..96e3f9f 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -88,6 +88,8 @@
dev->discard_alignment = le32_to_cpu(rsp->discard_alignment);
dev->secure_discard = le16_to_cpu(rsp->secure_discard);
dev->rotational = rsp->rotational;
+ dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK);
+ dev->fua = !!(rsp->cache_policy & RNBD_FUA);
dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE;
dev->max_segments = BMAX_SEGMENTS;
@@ -347,19 +349,26 @@
struct rnbd_iu *iu;
struct rtrs_permit *permit;
+ iu = kzalloc(sizeof(*iu), GFP_KERNEL);
+ if (!iu) {
+ return NULL;
+ }
+
permit = rnbd_get_permit(sess, con_type,
wait ? RTRS_PERMIT_WAIT :
RTRS_PERMIT_NOWAIT);
- if (unlikely(!permit))
+ if (unlikely(!permit)) {
+ kfree(iu);
return NULL;
- iu = rtrs_permit_to_pdu(permit);
+ }
+
iu->permit = permit;
/*
* 1st reference is dropped after finishing sending a "user" message,
* 2nd reference is dropped after confirmation with the response is
* returned.
* 1st and 2nd can happen in any order, so the rnbd_iu should be
- * released (rtrs_permit returned to ibbtrs) only leased after both
+ * released (rtrs_permit returned to rtrs) only after both
* are finished.
*/
atomic_set(&iu->refcount, 2);
@@ -371,8 +380,10 @@
static void rnbd_put_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu)
{
- if (atomic_dec_and_test(&iu->refcount))
+ if (atomic_dec_and_test(&iu->refcount)) {
rnbd_put_permit(sess, iu->permit);
+ kfree(iu);
+ }
}
static void rnbd_softirq_done_fn(struct request *rq)
@@ -382,6 +393,7 @@
struct rnbd_iu *iu;
iu = blk_mq_rq_to_pdu(rq);
+ sg_free_table_chained(&iu->sgt, RNBD_INLINE_SG_CNT);
rnbd_put_permit(sess, iu->permit);
blk_mq_end_request(rq, errno_to_blk_status(iu->errno));
}
@@ -475,7 +487,7 @@
iu->buf = NULL;
iu->dev = dev;
- sg_mark_end(&iu->sglist[0]);
+ sg_alloc_table(&iu->sgt, 1, GFP_KERNEL);
msg.hdr.type = cpu_to_le16(RNBD_MSG_CLOSE);
msg.device_id = cpu_to_le32(device_id);
@@ -490,6 +502,7 @@
err = errno;
}
+ sg_free_table(&iu->sgt);
rnbd_put_iu(sess, iu);
return err;
}
@@ -562,7 +575,8 @@
iu->buf = rsp;
iu->dev = dev;
- sg_init_one(iu->sglist, rsp, sizeof(*rsp));
+ sg_alloc_table(&iu->sgt, 1, GFP_KERNEL);
+ sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp));
msg.hdr.type = cpu_to_le16(RNBD_MSG_OPEN);
msg.access_mode = dev->access_mode;
@@ -570,7 +584,7 @@
WARN_ON(!rnbd_clt_get_dev(dev));
err = send_usr_msg(sess->rtrs, READ, iu,
- &vec, sizeof(*rsp), iu->sglist, 1,
+ &vec, sizeof(*rsp), iu->sgt.sgl, 1,
msg_open_conf, &errno, wait);
if (err) {
rnbd_clt_put_dev(dev);
@@ -580,6 +594,7 @@
err = errno;
}
+ sg_free_table(&iu->sgt);
rnbd_put_iu(sess, iu);
return err;
}
@@ -608,7 +623,8 @@
iu->buf = rsp;
iu->sess = sess;
- sg_init_one(iu->sglist, rsp, sizeof(*rsp));
+ sg_alloc_table(&iu->sgt, 1, GFP_KERNEL);
+ sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp));
msg.hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO);
msg.ver = RNBD_PROTO_VER_MAJOR;
@@ -624,7 +640,7 @@
goto put_iu;
}
err = send_usr_msg(sess->rtrs, READ, iu,
- &vec, sizeof(*rsp), iu->sglist, 1,
+ &vec, sizeof(*rsp), iu->sgt.sgl, 1,
msg_sess_info_conf, &errno, wait);
if (err) {
rnbd_clt_put_sess(sess);
@@ -634,7 +650,7 @@
} else {
err = errno;
}
-
+ sg_free_table(&iu->sgt);
rnbd_put_iu(sess, iu);
return err;
}
@@ -803,7 +819,7 @@
rnbd_init_cpu_qlists(sess->cpu_queues);
/*
- * That is simple percpu variable which stores cpu indeces, which are
+ * That is simple percpu variable which stores cpu indices, which are
* incremented on each access. We need that for the sake of fairness
* to wake up queues in a round-robin manner.
*/
@@ -1014,11 +1030,10 @@
* See queue limits.
*/
if (req_op(rq) != REQ_OP_DISCARD)
- sg_cnt = blk_rq_map_sg(dev->queue, rq, iu->sglist);
+ sg_cnt = blk_rq_map_sg(dev->queue, rq, iu->sgt.sgl);
if (sg_cnt == 0)
- /* Do not forget to mark the end */
- sg_mark_end(&iu->sglist[0]);
+ sg_mark_end(&iu->sgt.sgl[0]);
msg.hdr.type = cpu_to_le16(RNBD_MSG_IO);
msg.device_id = cpu_to_le32(dev->device_id);
@@ -1027,13 +1042,13 @@
.iov_base = &msg,
.iov_len = sizeof(msg)
};
- size = rnbd_clt_get_sg_size(iu->sglist, sg_cnt);
+ size = rnbd_clt_get_sg_size(iu->sgt.sgl, sg_cnt);
req_ops = (struct rtrs_clt_req_ops) {
.priv = iu,
.conf_fn = msg_io_conf,
};
err = rtrs_clt_request(rq_data_dir(rq), &req_ops, rtrs, permit,
- &vec, 1, size, iu->sglist, sg_cnt);
+ &vec, 1, size, iu->sgt.sgl, sg_cnt);
if (unlikely(err)) {
rnbd_clt_err_rl(dev, "RTRS failed to transfer IO, err: %d\n",
err);
@@ -1120,6 +1135,7 @@
struct rnbd_clt_dev *dev = rq->rq_disk->private_data;
struct rnbd_iu *iu = blk_mq_rq_to_pdu(rq);
int err;
+ blk_status_t ret = BLK_STS_IOERR;
if (unlikely(dev->dev_state != DEV_STATE_MAPPED))
return BLK_STS_IOERR;
@@ -1131,32 +1147,35 @@
return BLK_STS_RESOURCE;
}
+ iu->sgt.sgl = iu->first_sgl;
+ err = sg_alloc_table_chained(&iu->sgt,
+ /* Even-if the request has no segment,
+ * sglist must have one entry at least */
+ blk_rq_nr_phys_segments(rq) ? : 1,
+ iu->sgt.sgl,
+ RNBD_INLINE_SG_CNT);
+ if (err) {
+ rnbd_clt_err_rl(dev, "sg_alloc_table_chained ret=%d\n", err);
+ rnbd_clt_dev_kick_mq_queue(dev, hctx, 10/*ms*/);
+ rnbd_put_permit(dev->sess, iu->permit);
+ return BLK_STS_RESOURCE;
+ }
+
blk_mq_start_request(rq);
err = rnbd_client_xfer_request(dev, rq, iu);
if (likely(err == 0))
return BLK_STS_OK;
if (unlikely(err == -EAGAIN || err == -ENOMEM)) {
rnbd_clt_dev_kick_mq_queue(dev, hctx, 10/*ms*/);
- rnbd_put_permit(dev->sess, iu->permit);
- return BLK_STS_RESOURCE;
+ ret = BLK_STS_RESOURCE;
}
-
+ sg_free_table_chained(&iu->sgt, RNBD_INLINE_SG_CNT);
rnbd_put_permit(dev->sess, iu->permit);
- return BLK_STS_IOERR;
-}
-
-static int rnbd_init_request(struct blk_mq_tag_set *set, struct request *rq,
- unsigned int hctx_idx, unsigned int numa_node)
-{
- struct rnbd_iu *iu = blk_mq_rq_to_pdu(rq);
-
- sg_init_table(iu->sglist, BMAX_SEGMENTS);
- return 0;
+ return ret;
}
static struct blk_mq_ops rnbd_mq_ops = {
.queue_rq = rnbd_queue_rq,
- .init_request = rnbd_init_request,
.complete = rnbd_softirq_done_fn,
};
@@ -1170,7 +1189,7 @@
tag_set->numa_node = NUMA_NO_NODE;
tag_set->flags = BLK_MQ_F_SHOULD_MERGE |
BLK_MQ_F_TAG_QUEUE_SHARED;
- tag_set->cmd_size = sizeof(struct rnbd_iu);
+ tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE;
tag_set->nr_hw_queues = num_online_cpus();
return blk_mq_alloc_tag_set(tag_set);
@@ -1208,7 +1227,7 @@
*/
sess->rtrs = rtrs_clt_open(&rtrs_ops, sessname,
paths, path_cnt, port_nr,
- sizeof(struct rnbd_iu),
+ 0, /* Do not use pdu of rtrs */
RECONNECT_DELAY, BMAX_SEGMENTS,
BLK_MAX_SEGMENT_SIZE,
MAX_RECONNECTS);
@@ -1305,7 +1324,7 @@
blk_queue_max_segments(dev->queue, dev->max_segments);
blk_queue_io_opt(dev->queue, dev->sess->max_io_size);
blk_queue_virt_boundary(dev->queue, SZ_4K - 1);
- blk_queue_write_cache(dev->queue, true, true);
+ blk_queue_write_cache(dev->queue, dev->wc, dev->fua);
dev->queue->queuedata = dev;
}
@@ -1388,12 +1407,11 @@
goto out_queues;
}
- dev->pathname = kzalloc(strlen(pathname) + 1, GFP_KERNEL);
+ dev->pathname = kstrdup(pathname, GFP_KERNEL);
if (!dev->pathname) {
ret = -ENOMEM;
goto out_queues;
}
- strlcpy(dev->pathname, pathname, strlen(pathname) + 1);
dev->clt_device_id = ret;
dev->sess = sess;
@@ -1529,13 +1547,13 @@
}
rnbd_clt_info(dev,
- "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, rotational: %d)\n",
+ "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, rotational: %d, wc: %d, fua: %d)\n",
dev->gd->disk_name, dev->nsectors,
dev->logical_block_size, dev->physical_block_size,
dev->max_write_same_sectors, dev->max_discard_sectors,
dev->discard_granularity, dev->discard_alignment,
dev->secure_discard, dev->max_segments,
- dev->max_hw_sectors, dev->rotational);
+ dev->max_hw_sectors, dev->rotational, dev->wc, dev->fua);
mutex_unlock(&dev->lock);
@@ -1667,7 +1685,7 @@
/*
* Here at this point there is no any concurrent access to sessions
* list and devices list:
- * 1. New session or device can'be be created - session sysfs files
+ * 1. New session or device can't be created - session sysfs files
* are removed.
* 2. Device or session can't be removed - module reference is taken
* into account in unmap device sysfs callback.
diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h
index b193d590..537d499 100644
--- a/drivers/block/rnbd/rnbd-clt.h
+++ b/drivers/block/rnbd/rnbd-clt.h
@@ -44,6 +44,13 @@
int errno;
};
+#ifdef CONFIG_ARCH_NO_SG_CHAIN
+#define RNBD_INLINE_SG_CNT 0
+#else
+#define RNBD_INLINE_SG_CNT 2
+#endif
+#define RNBD_RDMA_SGL_SIZE (sizeof(struct scatterlist) * RNBD_INLINE_SG_CNT)
+
struct rnbd_iu {
union {
struct request *rq; /* for block io */
@@ -56,11 +63,12 @@
/* use to send msg associated with a sess */
struct rnbd_clt_session *sess;
};
- struct scatterlist sglist[BMAX_SEGMENTS];
+ struct sg_table sgt;
struct work_struct work;
int errno;
struct rnbd_iu_comp comp;
atomic_t refcount;
+ struct scatterlist first_sgl[]; /* must be the last one */
};
struct rnbd_cpu_qlist {
@@ -112,6 +120,8 @@
enum rnbd_access_mode access_mode;
bool read_only;
bool rotational;
+ bool wc;
+ bool fua;
u32 max_hw_sectors;
u32 max_write_same_sectors;
u32 max_discard_sectors;
diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h
index ca16624..c1bc5c0 100644
--- a/drivers/block/rnbd/rnbd-proto.h
+++ b/drivers/block/rnbd/rnbd-proto.h
@@ -108,6 +108,11 @@
__le32 device_id;
};
+enum rnbd_cache_policy {
+ RNBD_FUA = 1 << 0,
+ RNBD_WRITEBACK = 1 << 1,
+};
+
/**
* struct rnbd_msg_open_rsp - response message to RNBD_MSG_OPEN
* @hdr: message header
@@ -124,6 +129,7 @@
* @max_segments: max segments hardware support in one transfer
* @secure_discard: supports secure discard
* @rotation: is a rotational disc?
+ * @cache_policy: support write-back caching or FUA?
*/
struct rnbd_msg_open_rsp {
struct rnbd_msg_hdr hdr;
@@ -139,7 +145,8 @@
__le16 max_segments;
__le16 secure_discard;
u8 rotational;
- u8 reserved[11];
+ u8 cache_policy;
+ u8 reserved[10];
};
/**
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index d1ee72e..b8e4433 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -338,9 +338,10 @@
void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev)
{
+ mutex_lock(&sess_dev->sess->lock);
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
+ mutex_unlock(&sess_dev->sess->lock);
sess_dev->keep_id = true;
-
}
static int process_msg_close(struct rtrs_srv *rtrs,
@@ -549,6 +550,7 @@
struct rnbd_srv_sess_dev *sess_dev)
{
struct rnbd_dev *rnbd_dev = sess_dev->rnbd_dev;
+ struct request_queue *q = bdev_get_queue(rnbd_dev->bdev);
rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP);
rsp->device_id =
@@ -573,8 +575,12 @@
cpu_to_le32(rnbd_dev_get_discard_alignment(rnbd_dev));
rsp->secure_discard =
cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev));
- rsp->rotational =
- !blk_queue_nonrot(bdev_get_queue(rnbd_dev->bdev));
+ rsp->rotational = !blk_queue_nonrot(q);
+ rsp->cache_policy = 0;
+ if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+ rsp->cache_policy |= RNBD_WRITEBACK;
+ if (blk_queue_fua(q))
+ rsp->cache_policy |= RNBD_FUA;
}
static struct rnbd_srv_sess_dev *
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 27513d3..737b207 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -367,19 +367,28 @@
}
EXPORT_SYMBOL_GPL(kill_dev_dax);
-static void free_dev_dax_ranges(struct dev_dax *dev_dax)
+static void trim_dev_dax_range(struct dev_dax *dev_dax)
{
+ int i = dev_dax->nr_range - 1;
+ struct range *range = &dev_dax->ranges[i].range;
struct dax_region *dax_region = dev_dax->region;
- int i;
device_lock_assert(dax_region->dev);
- for (i = 0; i < dev_dax->nr_range; i++) {
- struct range *range = &dev_dax->ranges[i].range;
+ dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i,
+ (unsigned long long)range->start,
+ (unsigned long long)range->end);
- __release_region(&dax_region->res, range->start,
- range_len(range));
+ __release_region(&dax_region->res, range->start, range_len(range));
+ if (--dev_dax->nr_range == 0) {
+ kfree(dev_dax->ranges);
+ dev_dax->ranges = NULL;
}
- dev_dax->nr_range = 0;
+}
+
+static void free_dev_dax_ranges(struct dev_dax *dev_dax)
+{
+ while (dev_dax->nr_range)
+ trim_dev_dax_range(dev_dax);
}
static void unregister_dev_dax(void *dev)
@@ -763,22 +772,14 @@
return 0;
}
- ranges = krealloc(dev_dax->ranges, sizeof(*ranges)
- * (dev_dax->nr_range + 1), GFP_KERNEL);
- if (!ranges)
+ alloc = __request_region(res, start, size, dev_name(dev), 0);
+ if (!alloc)
return -ENOMEM;
- alloc = __request_region(res, start, size, dev_name(dev), 0);
- if (!alloc) {
- /*
- * If this was an empty set of ranges nothing else
- * will release @ranges, so do it now.
- */
- if (!dev_dax->nr_range) {
- kfree(ranges);
- ranges = NULL;
- }
- dev_dax->ranges = ranges;
+ ranges = krealloc(dev_dax->ranges, sizeof(*ranges)
+ * (dev_dax->nr_range + 1), GFP_KERNEL);
+ if (!ranges) {
+ __release_region(res, alloc->start, resource_size(alloc));
return -ENOMEM;
}
@@ -804,15 +805,10 @@
return 0;
rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1);
- if (rc) {
- dev_dbg(dev, "delete range[%d]: %pa:%pa\n", dev_dax->nr_range - 1,
- &alloc->start, &alloc->end);
- dev_dax->nr_range--;
- __release_region(res, alloc->start, resource_size(alloc));
- return rc;
- }
+ if (rc)
+ trim_dev_dax_range(dev_dax);
- return 0;
+ return rc;
}
static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size)
@@ -885,12 +881,7 @@
if (shrink >= range_len(range)) {
devm_release_action(dax_region->dev,
unregister_dax_mapping, &mapping->dev);
- __release_region(&dax_region->res, range->start,
- range_len(range));
- dev_dax->nr_range--;
- dev_dbg(dev, "delete range[%d]: %#llx:%#llx\n", i,
- (unsigned long long) range->start,
- (unsigned long long) range->end);
+ trim_dev_dax_range(dev_dax);
to_shrink -= shrink;
if (!to_shrink)
break;
@@ -1114,16 +1105,9 @@
static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax)
{
- resource_size_t dev_size = dev_dax_size(dev_dax);
struct device *dev = &dev_dax->dev;
int i;
- if (dev_size > 0 && !alloc_is_aligned(dev_dax, dev_size)) {
- dev_dbg(dev, "%s: align %u invalid for size %pa\n",
- __func__, dev_dax->align, &dev_size);
- return -EINVAL;
- }
-
for (i = 0; i < dev_dax->nr_range; i++) {
size_t len = range_len(&dev_dax->ranges[i].range);
@@ -1274,7 +1258,6 @@
put_dax(dax_dev);
free_dev_dax_id(dev_dax);
dax_region_put(dax_region);
- kfree(dev_dax->ranges);
kfree(dev_dax->pgmap);
kfree(dev_dax);
}
diff --git a/drivers/dax/pmem/core.c b/drivers/dax/pmem/core.c
index 62b26bf..062e8bc 100644
--- a/drivers/dax/pmem/core.c
+++ b/drivers/dax/pmem/core.c
@@ -52,7 +52,7 @@
/* adjust the dax_region range to the start of data */
range = pgmap.range;
- range.start += offset,
+ range.start += offset;
dax_region = alloc_dax_region(dev, region_id, &range,
nd_region->target_node, le32_to_cpu(pfn_sb->align),
IORESOURCE_DAX_STATIC);
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index edc279b..cadbd0a 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -752,6 +752,7 @@
static void __exit dax_core_exit(void)
{
+ dax_bus_exit();
unregister_chrdev_region(dax_devt, MINORMASK+1);
ida_destroy(&dax_minor_ida);
dax_fs_exit();
diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c
index 5e7c343..3c4e3430 100644
--- a/drivers/dma-buf/heaps/cma_heap.c
+++ b/drivers/dma-buf/heaps/cma_heap.c
@@ -20,6 +20,7 @@
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
struct cma_heap {
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index d989549..2c3dac5 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -122,7 +122,7 @@
config EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER
bool "Enable the command line initrd loader" if !X86
depends on EFI_STUB && (EFI_GENERIC_STUB || X86)
- default y
+ default y if X86
depends on !RISCV
help
Select this config option to add support for the initrd= command
@@ -147,7 +147,7 @@
config EFI_CAPSULE_LOADER
tristate "EFI capsule loader"
- depends on EFI
+ depends on EFI && !IA64
help
This option exposes a loader interface "/dev/efi_capsule_loader" for
users to load EFI capsules. This driver requires working runtime
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index d6ca2da..467e942 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -12,7 +12,10 @@
obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o
obj-$(CONFIG_EFI) += efi.o vars.o reboot.o memattr.o tpm.o
-obj-$(CONFIG_EFI) += capsule.o memmap.o
+obj-$(CONFIG_EFI) += memmap.o
+ifneq ($(CONFIG_EFI_CAPSULE_LOADER),)
+obj-$(CONFIG_EFI) += capsule.o
+endif
obj-$(CONFIG_EFI_PARAMS_FROM_FDT) += fdtparams.o
obj-$(CONFIG_EFI_VARS) += efivars.o
obj-$(CONFIG_EFI_ESRT) += esrt.o
diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c
index 598b780..7684302 100644
--- a/drivers/firmware/efi/capsule.c
+++ b/drivers/firmware/efi/capsule.c
@@ -12,6 +12,7 @@
#include <linux/highmem.h>
#include <linux/efi.h>
#include <linux/vmalloc.h>
+#include <asm/efi.h>
#include <asm/io.h>
typedef struct {
@@ -244,7 +245,7 @@
for (i = 0; i < sg_count; i++) {
efi_capsule_block_desc_t *sglist;
- sglist = kmap(sg_pages[i]);
+ sglist = kmap_atomic(sg_pages[i]);
for (j = 0; j < SGLIST_PER_PAGE && count > 0; j++) {
u64 sz = min_t(u64, imagesize,
@@ -265,7 +266,18 @@
else
sglist[j].data = page_to_phys(sg_pages[i + 1]);
- kunmap(sg_pages[i]);
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+ /*
+ * At runtime, the firmware has no way to find out where the
+ * sglist elements are mapped, if they are mapped in the first
+ * place. Therefore, on architectures that can only perform
+ * cache maintenance by virtual address, the firmware is unable
+ * to perform this maintenance, and so it is up to the OS to do
+ * it instead.
+ */
+ efi_capsule_flush_cache_range(sglist, PAGE_SIZE);
+#endif
+ kunmap_atomic(sglist);
}
mutex_lock(&capsule_mutex);
diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c
index 914a343..ec2f398 100644
--- a/drivers/firmware/efi/libstub/efi-stub.c
+++ b/drivers/firmware/efi/libstub/efi-stub.c
@@ -273,7 +273,6 @@
install_memreserve_table();
status = allocate_new_fdt_and_exit_boot(handle, &fdt_addr,
- efi_get_max_fdt_addr(image_addr),
initrd_addr, initrd_size,
cmdline_ptr, fdt_addr, fdt_size);
if (status != EFI_SUCCESS)
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 2d7abcd..b50a6c6 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -750,7 +750,6 @@
efi_status_t allocate_new_fdt_and_exit_boot(void *handle,
unsigned long *new_fdt_addr,
- unsigned long max_addr,
u64 initrd_addr, u64 initrd_size,
char *cmdline_ptr,
unsigned long fdt_addr,
@@ -848,4 +847,6 @@
void efi_handle_post_ebs_state(void);
+enum efi_secureboot_mode efi_get_secureboot(void);
+
#endif
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index 368cd60..365c3a4 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -238,7 +238,6 @@
efi_status_t allocate_new_fdt_and_exit_boot(void *handle,
unsigned long *new_fdt_addr,
- unsigned long max_addr,
u64 initrd_addr, u64 initrd_size,
char *cmdline_ptr,
unsigned long fdt_addr,
@@ -275,7 +274,7 @@
efi_info("Exiting boot services and installing virtual address map...\n");
map.map = &memory_map;
- status = efi_allocate_pages(MAX_FDT_SIZE, new_fdt_addr, max_addr);
+ status = efi_allocate_pages(MAX_FDT_SIZE, new_fdt_addr, ULONG_MAX);
if (status != EFI_SUCCESS) {
efi_err("Unable to allocate memory for new device tree.\n");
goto fail;
diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c
index 5efc524..8a18930 100644
--- a/drivers/firmware/efi/libstub/secureboot.c
+++ b/drivers/firmware/efi/libstub/secureboot.c
@@ -12,44 +12,34 @@
#include "efistub.h"
-/* BIOS variables */
-static const efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
-static const efi_char16_t efi_SecureBoot_name[] = L"SecureBoot";
-static const efi_char16_t efi_SetupMode_name[] = L"SetupMode";
-
/* SHIM variables */
static const efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID;
static const efi_char16_t shim_MokSBState_name[] = L"MokSBState";
+static efi_status_t get_var(efi_char16_t *name, efi_guid_t *vendor, u32 *attr,
+ unsigned long *data_size, void *data)
+{
+ return get_efi_var(name, vendor, attr, data_size, data);
+}
+
/*
* Determine whether we're in secure boot mode.
- *
- * Please keep the logic in sync with
- * arch/x86/xen/efi.c:xen_efi_get_secureboot().
*/
enum efi_secureboot_mode efi_get_secureboot(void)
{
u32 attr;
- u8 secboot, setupmode, moksbstate;
unsigned long size;
+ enum efi_secureboot_mode mode;
efi_status_t status;
+ u8 moksbstate;
- size = sizeof(secboot);
- status = get_efi_var(efi_SecureBoot_name, &efi_variable_guid,
- NULL, &size, &secboot);
- if (status == EFI_NOT_FOUND)
- return efi_secureboot_mode_disabled;
- if (status != EFI_SUCCESS)
- goto out_efi_err;
-
- size = sizeof(setupmode);
- status = get_efi_var(efi_SetupMode_name, &efi_variable_guid,
- NULL, &size, &setupmode);
- if (status != EFI_SUCCESS)
- goto out_efi_err;
-
- if (secboot == 0 || setupmode == 1)
- return efi_secureboot_mode_disabled;
+ mode = efi_get_secureboot_mode(get_var);
+ if (mode == efi_secureboot_mode_unknown) {
+ efi_err("Could not determine UEFI Secure Boot status.\n");
+ return efi_secureboot_mode_unknown;
+ }
+ if (mode != efi_secureboot_mode_enabled)
+ return mode;
/*
* See if a user has put the shim into insecure mode. If so, and if the
@@ -69,8 +59,4 @@
secure_boot_enabled:
efi_info("UEFI Secure Boot is enabled.\n");
return efi_secureboot_mode_enabled;
-
-out_efi_err:
- efi_err("Could not determine UEFI Secure Boot status.\n");
- return efi_secureboot_mode_unknown;
}
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 3672539..f14c4ff 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -715,8 +715,11 @@
(IS_ENABLED(CONFIG_X86_32) && buffer_end > KERNEL_IMAGE_SIZE) ||
(IS_ENABLED(CONFIG_X86_64) && buffer_end > MAXMEM_X86_64_4LEVEL) ||
(image_offset == 0)) {
+ extern char _bss[];
+
status = efi_relocate_kernel(&bzimage_addr,
- hdr->init_size, hdr->init_size,
+ (unsigned long)_bss - bzimage_addr,
+ hdr->init_size,
hdr->pref_address,
hdr->kernel_alignment,
LOAD_PHYSICAL_ADDR);
diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c
index ddf9eae..47d67bb 100644
--- a/drivers/firmware/efi/test/efi_test.c
+++ b/drivers/firmware/efi/test/efi_test.c
@@ -663,6 +663,19 @@
return rv;
}
+static long efi_runtime_get_supported_mask(unsigned long arg)
+{
+ unsigned int __user *supported_mask;
+ int rv = 0;
+
+ supported_mask = (unsigned int *)arg;
+
+ if (put_user(efi.runtime_supported_mask, supported_mask))
+ rv = -EFAULT;
+
+ return rv;
+}
+
static long efi_test_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -699,6 +712,9 @@
case EFI_RUNTIME_RESET_SYSTEM:
return efi_runtime_reset_system(arg);
+
+ case EFI_RUNTIME_GET_SUPPORTED_MASK:
+ return efi_runtime_get_supported_mask(arg);
}
return -ENOTTY;
diff --git a/drivers/firmware/efi/test/efi_test.h b/drivers/firmware/efi/test/efi_test.h
index f2446aa..117349e 100644
--- a/drivers/firmware/efi/test/efi_test.h
+++ b/drivers/firmware/efi/test/efi_test.h
@@ -118,4 +118,7 @@
#define EFI_RUNTIME_RESET_SYSTEM \
_IOW('p', 0x0B, struct efi_resetsystem)
+#define EFI_RUNTIME_GET_SUPPORTED_MASK \
+ _IOR('p', 0x0C, unsigned int)
+
#endif /* _DRIVERS_FIRMWARE_EFI_TEST_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 65d1b23..b9c11c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -1414,10 +1414,12 @@
pm_runtime_put_autosuspend(connector->dev->dev);
}
- drm_dp_set_subconnector_property(&amdgpu_connector->base,
- ret,
- amdgpu_dig_connector->dpcd,
- amdgpu_dig_connector->downstream_ports);
+ if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+ connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+ drm_dp_set_subconnector_property(&amdgpu_connector->base,
+ ret,
+ amdgpu_dig_connector->dpcd,
+ amdgpu_dig_connector->downstream_ports);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7d2f7a2..1cb7d73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5069,8 +5069,7 @@
* @pdev: pointer to PCI device
*
* Called when the error recovery driver tells us that its
- * OK to resume normal operation. Use completion to allow
- * halted scsi ops to resume.
+ * OK to resume normal operation.
*/
void amdgpu_pci_resume(struct pci_dev *pdev)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c2ced5b..6e679db5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -496,7 +496,8 @@
break;
}
- if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) {
+ if (amdgpu_sriov_vf(adev) ||
+ !amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) {
size = 0;
} else {
size = amdgpu_gmc_get_vbios_fb_size(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index fc9bb94..5f4805e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1647,7 +1647,7 @@
}
/* No CPG in Arcturus */
- if (adev->asic_type != CHIP_ARCTURUS) {
+ if (adev->gfx.num_gfx_rings) {
r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
if (r)
return r;
@@ -2633,7 +2633,14 @@
static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
bool enable)
{
- u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
+ u32 tmp;
+
+ /* don't toggle interrupts that are only applicable
+ * to me0 pipe0 on AISCs that have me0 removed */
+ if (!adev->gfx.num_gfx_rings)
+ return;
+
+ tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
@@ -3822,7 +3829,7 @@
gfx_v9_0_enable_gui_idle_interrupt(adev, false);
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
- if (adev->asic_type != CHIP_ARCTURUS) {
+ if (adev->gfx.num_gfx_rings) {
/* legacy firmware loading */
r = gfx_v9_0_cp_gfx_load_microcode(adev);
if (r)
@@ -3838,7 +3845,7 @@
if (r)
return r;
- if (adev->asic_type != CHIP_ARCTURUS) {
+ if (adev->gfx.num_gfx_rings) {
r = gfx_v9_0_cp_gfx_resume(adev);
if (r)
return r;
@@ -3848,7 +3855,7 @@
if (r)
return r;
- if (adev->asic_type != CHIP_ARCTURUS) {
+ if (adev->gfx.num_gfx_rings) {
ring = &adev->gfx.gfx_ring[0];
r = amdgpu_ring_test_helper(ring);
if (r)
@@ -3884,7 +3891,7 @@
static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
{
- if (adev->asic_type != CHIP_ARCTURUS)
+ if (adev->gfx.num_gfx_rings)
gfx_v9_0_cp_gfx_enable(adev, enable);
gfx_v9_0_cp_compute_enable(adev, enable);
}
@@ -4025,7 +4032,7 @@
/* stop the rlc */
adev->gfx.rlc.funcs->stop(adev);
- if (adev->asic_type != CHIP_ARCTURUS)
+ if (adev->gfx.num_gfx_rings)
/* Disable GFX parsing/prefetching */
gfx_v9_0_cp_gfx_enable(adev, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index e1531d9..e22268f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1577,13 +1577,10 @@
gmc_v9_0_init_golden_registers(adev);
if (adev->mode_info.num_crtc) {
- if (adev->asic_type != CHIP_ARCTURUS) {
- /* Lockout access through VGA aperture*/
- WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
-
- /* disable VGA render */
- WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
- }
+ /* Lockout access through VGA aperture*/
+ WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
+ /* disable VGA render */
+ WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
}
amdgpu_device_program_register_sequence(adev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 50922ff..72c893f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -422,7 +422,7 @@
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.needs_iommu_device = false,
.supports_cwsr = true,
- .needs_pci_atomics = false,
+ .needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
@@ -440,7 +440,7 @@
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.needs_iommu_device = false,
.supports_cwsr = true,
- .needs_pci_atomics = false,
+ .needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
@@ -458,7 +458,7 @@
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.needs_iommu_device = false,
.supports_cwsr = true,
- .needs_pci_atomics = false,
+ .needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
@@ -476,7 +476,7 @@
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.needs_iommu_device = false,
.supports_cwsr = true,
- .needs_pci_atomics = false,
+ .needs_pci_atomics = true,
.num_sdma_engines = 4,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
@@ -494,7 +494,7 @@
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.needs_iommu_device = false,
.supports_cwsr = true,
- .needs_pci_atomics = false,
+ .needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
@@ -530,7 +530,7 @@
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.needs_iommu_device = false,
.supports_cwsr = true,
- .needs_pci_atomics = false,
+ .needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 2c4dbde..519080e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2386,7 +2386,8 @@
drm_connector_update_edid_property(connector,
aconnector->edid);
- drm_add_edid_modes(connector, aconnector->edid);
+ aconnector->num_modes = drm_add_edid_modes(connector, aconnector->edid);
+ drm_connector_list_update(connector);
if (aconnector->dc_link->aux_mode)
drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux,
@@ -9367,7 +9368,7 @@
if (ret)
goto fail;
- if (dm_old_crtc_state->dsc_force_changed && new_crtc_state)
+ if (dm_old_crtc_state->dsc_force_changed)
new_crtc_state->mode_changed = true;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index 3577785..26ed70e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -165,7 +165,10 @@
handler = list_entry(entry, struct amdgpu_dm_irq_handler_data,
list);
- if (ih == handler) {
+ if (handler == NULL)
+ continue;
+
+ if (ih == handler->handler) {
/* Found our handler. Remove it from the list. */
list_del(&handler->list);
handler_removed = true;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index d00b025..01b1853 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -75,15 +75,8 @@
for (i = 0; i < dc->link_count; i++) {
const struct dc_link *link = dc->links[i];
- /*
- * Only notify active stream or virtual stream.
- * Need to notify virtual stream to work around
- * headless case. HPD does not fire when system is in
- * S0i2.
- */
/* abusing the fact that the dig and phy are coupled to see if the phy is enabled */
- if (link->connector_signal == SIGNAL_TYPE_VIRTUAL ||
- link->link_enc->funcs->is_dig_enabled(link->link_enc))
+ if (link->link_enc->funcs->is_dig_enabled(link->link_enc))
display_count++;
}
@@ -234,12 +227,11 @@
rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
// always update dtos unless clock is lowered and not safe to lower
- if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz)
- rn_update_clocks_update_dpp_dto(
- clk_mgr,
- context,
- clk_mgr_base->clks.actual_dppclk_khz,
- safe_to_lower);
+ rn_update_clocks_update_dpp_dto(
+ clk_mgr,
+ context,
+ clk_mgr_base->clks.actual_dppclk_khz,
+ safe_to_lower);
}
if (update_dispclk &&
@@ -738,32 +730,32 @@
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 9.09,
- .sr_enter_plus_exit_time_us = 10.14,
+ .sr_exit_time_us = 11.90,
+ .sr_enter_plus_exit_time_us = 12.80,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 11.12,
- .sr_enter_plus_exit_time_us = 12.48,
+ .sr_exit_time_us = 13.18,
+ .sr_enter_plus_exit_time_us = 14.30,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 11.12,
- .sr_enter_plus_exit_time_us = 12.48,
+ .sr_exit_time_us = 13.18,
+ .sr_enter_plus_exit_time_us = 14.30,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
- .sr_exit_time_us = 11.12,
- .sr_enter_plus_exit_time_us = 12.48,
+ .sr_exit_time_us = 13.18,
+ .sr_enter_plus_exit_time_us = 14.30,
.valid = true,
},
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
index 11a7b58..7deeec9 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
@@ -99,7 +99,7 @@
/* Trigger the message transaction by writing the message ID */
REG_WRITE(MP1_SMN_C2PMSG_67, msg_id);
- result = rn_smu_wait_for_response(clk_mgr, 10, 1000);
+ result = rn_smu_wait_for_response(clk_mgr, 10, 200000);
ASSERT(result == VBIOSSMC_Result_OK || result == VBIOSSMC_Result_UnknownCmd);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
index 9a8e66b..991b9c5b 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
@@ -74,15 +74,8 @@
for (i = 0; i < dc->link_count; i++) {
const struct dc_link *link = dc->links[i];
- /*
- * Only notify active stream or virtual stream.
- * Need to notify virtual stream to work around
- * headless case. HPD does not fire when system is in
- * S0i2.
- */
/* abusing the fact that the dig and phy are coupled to see if the phy is enabled */
- if (link->connector_signal == SIGNAL_TYPE_VIRTUAL ||
- link->link_enc->funcs->is_dig_enabled(link->link_enc))
+ if (link->link_enc->funcs->is_dig_enabled(link->link_enc))
display_count++;
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 7339d98..58eb0d6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2625,26 +2625,6 @@
}
}
- if (update_type != UPDATE_TYPE_FAST) {
- // If changing VTG FP2: wait until back in vactive to program FP2
- // Need to ensure that pipe unlock happens soon after to minimize race condition
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (pipe_ctx->top_pipe || pipe_ctx->stream != stream)
- continue;
-
- if (!pipe_ctx->update_flags.bits.global_sync)
- continue;
-
- pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK);
- pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
-
- pipe_ctx->stream_res.tg->funcs->set_vtg_params(
- pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true);
- }
- }
-
if ((update_type != UPDATE_TYPE_FAST) && dc->hwss.interdependent_update_lock)
dc->hwss.interdependent_update_lock(dc, context, false);
else
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 6b11d4a..2fc1223 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -3173,13 +3173,7 @@
}
/* DPCD 0x5 bit 0 = 1, it indicate it's branch device */
- if (ds_port.fields.PORT_TYPE == DOWNSTREAM_DP) {
- link->dpcd_caps.is_branch_dev = false;
- }
-
- else {
- link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT;
- }
+ link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT;
switch (ds_port.fields.PORT_TYPE) {
case DOWNSTREAM_VGA:
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index 41679ad..9e796df 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -1241,6 +1241,22 @@
REG_UPDATE(DCHUBP_CNTL, HUBP_VTG_SEL, otg_inst);
}
+bool hubp1_in_blank(struct hubp *hubp)
+{
+ uint32_t in_blank;
+ struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
+
+ REG_GET(DCHUBP_CNTL, HUBP_IN_BLANK, &in_blank);
+ return in_blank ? true : false;
+}
+
+void hubp1_soft_reset(struct hubp *hubp, bool reset)
+{
+ struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
+
+ REG_UPDATE(DCHUBP_CNTL, HUBP_DISABLE, reset ? 1 : 0);
+}
+
void hubp1_init(struct hubp *hubp)
{
//do nothing
@@ -1272,6 +1288,8 @@
.dmdata_set_attributes = NULL,
.dmdata_load = NULL,
+ .hubp_soft_reset = hubp1_soft_reset,
+ .hubp_in_blank = hubp1_in_blank,
};
/*****************************************/
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
index 780af5b..a9a6ed7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
@@ -260,6 +260,7 @@
HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_NO_OUTSTANDING_REQ, mask_sh),\
HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_VTG_SEL, mask_sh),\
HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_DISABLE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_IN_BLANK, mask_sh),\
HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_PIPES, mask_sh),\
HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_BANKS, mask_sh),\
HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, PIPE_INTERLEAVE, mask_sh),\
@@ -455,6 +456,7 @@
type HUBP_VTG_SEL;\
type HUBP_UNDERFLOW_STATUS;\
type HUBP_UNDERFLOW_CLEAR;\
+ type HUBP_IN_BLANK;\
type NUM_PIPES;\
type NUM_BANKS;\
type PIPE_INTERLEAVE;\
@@ -772,5 +774,7 @@
void hubp1_init(struct hubp *hubp);
void hubp1_read_state_common(struct hubp *hubp);
+bool hubp1_in_blank(struct hubp *hubp);
+void hubp1_soft_reset(struct hubp *hubp, bool reset);
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
index 3fcd408..a46cb20 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
@@ -467,6 +467,17 @@
REG_SET(CUR[opp_id], 0, CUR_VUPDATE_LOCK_SET, lock ? 1 : 0);
}
+unsigned int mpc1_get_mpc_out_mux(struct mpc *mpc, int opp_id)
+{
+ struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
+ uint32_t val;
+
+ if (opp_id < MAX_OPP && REG(MUX[opp_id]))
+ REG_GET(MUX[opp_id], MPC_OUT_MUX, &val);
+
+ return val;
+}
+
static const struct mpc_funcs dcn10_mpc_funcs = {
.read_mpcc_state = mpc1_read_mpcc_state,
.insert_plane = mpc1_insert_plane,
@@ -483,6 +494,7 @@
.set_denorm_clamp = NULL,
.set_output_csc = NULL,
.set_output_gamma = NULL,
+ .get_mpc_out_mux = mpc1_get_mpc_out_mux,
};
void dcn10_mpc_construct(struct dcn10_mpc *mpc10,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
index 66a4719..dbfffc6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
@@ -200,4 +200,5 @@
void mpc1_cursor_lock(struct mpc *mpc, int opp_id, bool lock);
+unsigned int mpc1_get_mpc_out_mux(struct mpc *mpc, int opp_id);
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
index b7e44e5..0df0da2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
@@ -1595,6 +1595,8 @@
.hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl,
.hubp_init = hubp1_init,
.validate_dml_output = hubp2_validate_dml_output,
+ .hubp_in_blank = hubp1_in_blank,
+ .hubp_soft_reset = hubp1_soft_reset,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 31a4771..cb822df 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -1586,7 +1586,10 @@
&& !pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe)
hws->funcs.blank_pixel_data(dc, pipe_ctx, !pipe_ctx->plane_state->visible);
- if (pipe_ctx->update_flags.bits.global_sync) {
+ /* Only update TG on top pipe */
+ if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe
+ && !pipe_ctx->prev_odm_pipe) {
+
pipe_ctx->stream_res.tg->funcs->program_global_sync(
pipe_ctx->stream_res.tg,
pipe_ctx->pipe_dlg_param.vready_offset,
@@ -1594,8 +1597,11 @@
pipe_ctx->pipe_dlg_param.vupdate_offset,
pipe_ctx->pipe_dlg_param.vupdate_width);
+ pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK);
+ pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
+
pipe_ctx->stream_res.tg->funcs->set_vtg_params(
- pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false);
+ pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true);
if (hws->funcs.setup_vupdate_interrupt)
hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
@@ -2570,4 +2576,4 @@
{
pipe_ctx->stream_res.opp->funcs->opp_set_disp_pattern_generator(pipe_ctx->stream_res.opp, test_pattern,
color_space, color_depth, solid_color, width, height, offset);
-}
\ No newline at end of file
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
index 99cc095..6a99fdd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
@@ -556,6 +556,7 @@
.set_ocsc_default = mpc2_set_ocsc_default,
.set_output_gamma = mpc2_set_output_gamma,
.power_on_mpc_mem_pwr = mpc20_power_on_ogam_lut,
+ .get_mpc_out_mux = mpc1_get_mpc_out_mux,
};
void dcn20_mpc_construct(struct dcn20_mpc *mpc20,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index ff36db5..e04ecf0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -1933,7 +1933,7 @@
next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx];
else
next_odm_pipe->stream_res.opp = next_odm_pipe->top_pipe->stream_res.opp;
- if (next_odm_pipe->stream->timing.flags.DSC == 1) {
+ if (next_odm_pipe->stream->timing.flags.DSC == 1 && !next_odm_pipe->top_pipe) {
dcn20_acquire_dsc(dc, res_ctx, &next_odm_pipe->stream_res.dsc, next_odm_pipe->pipe_idx);
ASSERT(next_odm_pipe->stream_res.dsc);
if (next_odm_pipe->stream_res.dsc == NULL)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
index af462fe..88ffa9f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
@@ -509,6 +509,8 @@
.hubp_clear_underflow = hubp2_clear_underflow,
.hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl,
.hubp_init = hubp3_init,
+ .hubp_in_blank = hubp1_in_blank,
+ .hubp_soft_reset = hubp1_soft_reset,
};
bool hubp3_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
index d7d053f..3e6f760 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
@@ -1428,6 +1428,7 @@
.program_3dlut = mpc3_program_3dlut,
.release_rmu = mpcc3_release_rmu,
.power_on_mpc_mem_pwr = mpc20_power_on_ogam_lut,
+ .get_mpc_out_mux = mpc1_get_mpc_out_mux,
};
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
index 315e306..22f3f64 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
@@ -188,6 +188,8 @@
void (*set_unbounded_requesting)(
struct hubp *hubp,
bool enable);
+ bool (*hubp_in_blank)(struct hubp *hubp);
+ void (*hubp_soft_reset)(struct hubp *hubp, bool reset);
};
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
index 879f502..75c77ad 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
@@ -359,6 +359,10 @@
int (*release_rmu)(struct mpc *mpc, int mpcc_id);
+ unsigned int (*get_mpc_out_mux)(
+ struct mpc *mpc,
+ int opp_id);
+
};
#endif
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index f512bda..249a076 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -47,10 +47,10 @@
/* Firmware versioning. */
#ifdef DMUB_EXPOSE_VERSION
-#define DMUB_FW_VERSION_GIT_HASH 0xa18e25995
+#define DMUB_FW_VERSION_GIT_HASH 0xf51b86a
#define DMUB_FW_VERSION_MAJOR 0
#define DMUB_FW_VERSION_MINOR 0
-#define DMUB_FW_VERSION_REVISION 46
+#define DMUB_FW_VERSION_REVISION 47
#define DMUB_FW_VERSION_TEST 0
#define DMUB_FW_VERSION_VBIOS 0
#define DMUB_FW_VERSION_HOTFIX 0
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
index f244b72..73ca49f 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
@@ -128,8 +128,12 @@
static inline enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
{
- /* device count must be greater than or equal to tracked hdcp displays */
- return (get_device_count(hdcp) < get_active_display_count(hdcp)) ?
+ /* Some MST display may choose to report the internal panel as an HDCP RX.
+ * To update this condition with 1(because the immediate repeater's internal
+ * panel is possibly not included in DEVICE_COUNT) + get_device_count(hdcp).
+ * Device count must be greater than or equal to tracked hdcp displays.
+ */
+ return ((1 + get_device_count(hdcp)) < get_active_display_count(hdcp)) ?
MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE :
MOD_HDCP_STATUS_SUCCESS;
}
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
index 549c113..a0895a7 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
@@ -207,8 +207,11 @@
static enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
{
- /* device count must be greater than or equal to tracked hdcp displays */
- return (get_device_count(hdcp) < get_active_display_count(hdcp)) ?
+ /* Some MST display may choose to report the internal panel as an HDCP RX. */
+ /* To update this condition with 1(because the immediate repeater's internal */
+ /* panel is possibly not included in DEVICE_COUNT) + get_device_count(hdcp). */
+ /* Device count must be greater than or equal to tracked hdcp displays. */
+ return ((1 + get_device_count(hdcp)) < get_active_display_count(hdcp)) ?
MOD_HDCP_STATUS_HDCP2_DEVICE_COUNT_MISMATCH_FAILURE :
MOD_HDCP_STATUS_SUCCESS;
}
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index cc983f6..4fd8bce 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -82,22 +82,24 @@
unsigned char deviation_gain;
unsigned char min_knee;
unsigned char max_knee;
+ unsigned short blRampReduction;
+ unsigned short blRampStart;
};
static const struct abm_parameters abm_settings_config0[abm_defines_max_level] = {
-// min_red max_red bright_pos dark_pos brightness_gain contrast deviation min_knee max_knee
- {0xff, 0xbf, 0x20, 0x00, 0xff, 0x99, 0xb3, 0x40, 0xe0},
- {0xde, 0x85, 0x20, 0x00, 0xff, 0x90, 0xa8, 0x40, 0xdf},
- {0xb0, 0x50, 0x20, 0x00, 0xc0, 0x88, 0x78, 0x70, 0xa0},
- {0x82, 0x40, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70},
+// min_red max_red bright_pos dark_pos bright_gain contrast dev min_knee max_knee blStart blRed
+ {0xff, 0xbf, 0x20, 0x00, 0xff, 0x99, 0xb3, 0x40, 0xe0, 0xCCCC, 0xCCCC},
+ {0xde, 0x85, 0x20, 0x00, 0xff, 0x90, 0xa8, 0x40, 0xdf, 0xCCCC, 0xCCCC},
+ {0xb0, 0x50, 0x20, 0x00, 0xc0, 0x88, 0x78, 0x70, 0xa0, 0xCCCC, 0xCCCC},
+ {0x82, 0x40, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC},
};
static const struct abm_parameters abm_settings_config1[abm_defines_max_level] = {
-// min_red max_red bright_pos dark_pos brightness_gain contrast deviation min_knee max_knee
- {0xf0, 0xd9, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70},
- {0xcd, 0xa5, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70},
- {0x99, 0x65, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70},
- {0x82, 0x4d, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70},
+// min_red max_red bright_pos dark_pos bright_gain contrast dev min_knee max_knee blStart blRed
+ {0xf0, 0xd9, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC},
+ {0xcd, 0xa5, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC},
+ {0x99, 0x65, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC},
+ {0x82, 0x4d, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xCCCC, 0xCCCC},
};
static const struct abm_parameters * const abm_settings[] = {
@@ -662,6 +664,7 @@
{
struct iram_table_v_2_2 ram_table;
struct abm_config_table config;
+ unsigned int set = params.set;
bool result = false;
uint32_t i, j = 0;
@@ -710,6 +713,18 @@
config.max_knee[i] = ram_table.max_knee[i];
}
+ if (params.backlight_ramping_override) {
+ for (i = 0; i < NUM_AGGR_LEVEL; i++) {
+ config.blRampReduction[i] = params.backlight_ramping_reduction;
+ config.blRampStart[i] = params.backlight_ramping_start;
+ }
+ } else {
+ for (i = 0; i < NUM_AGGR_LEVEL; i++) {
+ config.blRampReduction[i] = abm_settings[set][i].blRampReduction;
+ config.blRampStart[i] = abm_settings[set][i].blRampStart;
+ }
+ }
+
config.min_abm_backlight = ram_table.min_abm_backlight;
#if defined(CONFIG_DRM_AMD_DC_DCN)
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
index fa4728d..6f2eecc 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
@@ -39,6 +39,7 @@
struct dmcu_iram_parameters {
unsigned int *backlight_lut_array;
unsigned int backlight_lut_array_size;
+ bool backlight_ramping_override;
unsigned int backlight_ramping_reduction;
unsigned int backlight_ramping_start;
unsigned int min_abm_backlight;
diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h
index e5aa072..13de692 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h
@@ -30,7 +30,7 @@
#define SMU11_DRIVER_IF_VERSION_NV10 0x36
#define SMU11_DRIVER_IF_VERSION_NV12 0x36
#define SMU11_DRIVER_IF_VERSION_NV14 0x36
-#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x3B
+#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x3D
#define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0xC
#define SMU11_DRIVER_IF_VERSION_VANGOGH 0x02
#define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xF
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 9bccf2a..8cb4fce 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -724,8 +724,13 @@
static int vangogh_system_features_control(struct smu_context *smu, bool en)
{
- return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerNotify,
- en ? RLC_STATUS_NORMAL : RLC_STATUS_OFF, NULL);
+ struct amdgpu_device *adev = smu->adev;
+
+ if (adev->pm.fw_version >= 0x43f1700)
+ return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerNotify,
+ en ? RLC_STATUS_NORMAL : RLC_STATUS_OFF, NULL);
+ else
+ return 0;
}
static const struct pptable_funcs vangogh_ppt_funcs = {
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
index 1f8195b..ca891ae 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
@@ -152,7 +152,6 @@
ret = of_reserved_mem_device_init(dev);
if (ret && ret != -ENODEV)
return ret;
- ret = 0;
for_each_available_child_of_node(np, child) {
if (of_node_name_eq(child, "pipeline")) {
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
index 6b99df6..034ee08 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
@@ -81,10 +81,10 @@
drm_atomic_helper_commit_modeset_enables(dev, old_state);
- drm_atomic_helper_wait_for_flip_done(dev, old_state);
-
drm_atomic_helper_commit_hw_done(old_state);
+ drm_atomic_helper_wait_for_flip_done(dev, old_state);
+
drm_atomic_helper_cleanup_planes(dev, old_state);
}
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c
index 452e505..719a797 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c
@@ -137,9 +137,10 @@
u32 comp_mask)
{
struct komeda_component *c = NULL;
+ unsigned long comp_mask_local = (unsigned long)comp_mask;
int id;
- id = find_first_bit((unsigned long *)&comp_mask, 32);
+ id = find_first_bit(&comp_mask_local, 32);
if (id < 32)
c = komeda_pipeline_get_component(pipe, id);
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
index 8f32ae7..5c08511 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
@@ -704,10 +704,10 @@
cin->layer_alpha = dflow->layer_alpha;
old_st = komeda_component_get_old_state(&compiz->base, drm_st);
- WARN_ON(!old_st);
/* compare with old to check if this input has been changed */
- if (memcmp(&(to_compiz_st(old_st)->cins[idx]), cin, sizeof(*cin)))
+ if (WARN_ON(!old_st) ||
+ memcmp(&(to_compiz_st(old_st)->cins[idx]), cin, sizeof(*cin)))
c_st->changed_active_inputs |= BIT(idx);
komeda_component_add_input(c_st, &dflow->input, idx);
diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c
index ad5cc13..1c939f9 100644
--- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c
@@ -297,13 +297,9 @@
*/
void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
{
- struct irq_desc *desc;
-
if (!HAS_LPE_AUDIO(dev_priv))
return;
- desc = irq_to_desc(dev_priv->lpe_audio.irq);
-
lpe_audio_platdev_destroy(dev_priv);
irq_free_desc(dev_priv->lpe_audio.irq);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c80eeac..6cdb052 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -60,6 +60,24 @@
* and related files, but that will be described in separate chapters.
*/
+/*
+ * Interrupt statistic for PMU. Increments the counter only if the
+ * interrupt originated from the the GPU so interrupts from a device which
+ * shares the interrupt line are not accounted.
+ */
+static inline void pmu_irq_stats(struct drm_i915_private *i915,
+ irqreturn_t res)
+{
+ if (unlikely(res != IRQ_HANDLED))
+ return;
+
+ /*
+ * A clever compiler translates that into INC. A not so clever one
+ * should at least prevent store tearing.
+ */
+ WRITE_ONCE(i915->pmu.irq_count, i915->pmu.irq_count + 1);
+}
+
typedef bool (*long_pulse_detect_func)(enum hpd_pin pin, u32 val);
typedef u32 (*hotplug_enables_func)(struct drm_i915_private *i915,
enum hpd_pin pin);
@@ -1668,6 +1686,8 @@
valleyview_pipestat_irq_handler(dev_priv, pipe_stats);
} while (0);
+ pmu_irq_stats(dev_priv, ret);
+
enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
return ret;
@@ -1745,6 +1765,8 @@
valleyview_pipestat_irq_handler(dev_priv, pipe_stats);
} while (0);
+ pmu_irq_stats(dev_priv, ret);
+
enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
return ret;
@@ -2155,6 +2177,8 @@
if (sde_ier)
raw_reg_write(regs, SDEIER, sde_ier);
+ pmu_irq_stats(i915, ret);
+
/* IRQs are synced during runtime_suspend, we don't require a wakeref */
enable_rpm_wakeref_asserts(&i915->runtime_pm);
@@ -2541,6 +2565,8 @@
gen8_master_intr_enable(regs);
+ pmu_irq_stats(dev_priv, IRQ_HANDLED);
+
return IRQ_HANDLED;
}
@@ -2636,6 +2662,8 @@
gen11_gu_misc_irq_handler(gt, gu_misc_iir);
+ pmu_irq_stats(i915, IRQ_HANDLED);
+
return IRQ_HANDLED;
}
@@ -3934,6 +3962,8 @@
i8xx_pipestat_irq_handler(dev_priv, iir, pipe_stats);
} while (0);
+ pmu_irq_stats(dev_priv, ret);
+
enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
return ret;
@@ -4043,6 +4073,8 @@
i915_pipestat_irq_handler(dev_priv, iir, pipe_stats);
} while (0);
+ pmu_irq_stats(dev_priv, ret);
+
enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
return ret;
@@ -4189,6 +4221,8 @@
i965_pipestat_irq_handler(dev_priv, iir, pipe_stats);
} while (0);
+ pmu_irq_stats(dev_priv, IRQ_HANDLED);
+
enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
return ret;
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index cd786ad..d76685ce 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -4,7 +4,6 @@
* Copyright © 2017-2018 Intel Corporation
*/
-#include <linux/irq.h>
#include <linux/pm_runtime.h>
#include "gt/intel_engine.h"
@@ -424,22 +423,6 @@
return HRTIMER_RESTART;
}
-static u64 count_interrupts(struct drm_i915_private *i915)
-{
- /* open-coded kstat_irqs() */
- struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
- u64 sum = 0;
- int cpu;
-
- if (!desc || !desc->kstat_irqs)
- return 0;
-
- for_each_possible_cpu(cpu)
- sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
-
- return sum;
-}
-
static void i915_pmu_event_destroy(struct perf_event *event)
{
struct drm_i915_private *i915 =
@@ -590,7 +573,7 @@
USEC_PER_SEC /* to MHz */);
break;
case I915_PMU_INTERRUPTS:
- val = count_interrupts(i915);
+ val = READ_ONCE(pmu->irq_count);
break;
case I915_PMU_RC6_RESIDENCY:
val = get_rc6(&i915->gt);
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index a24885a..8405d6d 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -112,6 +112,14 @@
*/
ktime_t sleep_last;
/**
+ * @irq_count: Number of interrupts
+ *
+ * Intentionally unsigned long to avoid atomics or heuristics on 32bit.
+ * 4e9 interrupts are a lot and postprocessing can really deal with an
+ * occasional wraparound easily. It's 32bit after all.
+ */
+ unsigned long irq_count;
+ /**
* @events_attr_group: Device events attribute group.
*/
struct attribute_group events_attr_group;
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 5455b20..7b2f606 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -239,21 +239,6 @@
return p;
}
-/* Count the number of pages available in a pool_type */
-static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt)
-{
- unsigned int count = 0;
- struct page *p;
-
- spin_lock(&pt->lock);
- /* Only used for debugfs, the overhead doesn't matter */
- list_for_each_entry(p, &pt->pages, lru)
- ++count;
- spin_unlock(&pt->lock);
-
- return count;
-}
-
/* Initialize and add a pool type to the global shrinker list */
static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool,
enum ttm_caching caching, unsigned int order)
@@ -543,6 +528,20 @@
EXPORT_SYMBOL(ttm_pool_fini);
#ifdef CONFIG_DEBUG_FS
+/* Count the number of pages available in a pool_type */
+static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt)
+{
+ unsigned int count = 0;
+ struct page *p;
+
+ spin_lock(&pt->lock);
+ /* Only used for debugfs, the overhead doesn't matter */
+ list_for_each_entry(p, &pt->pages, lru)
+ ++count;
+ spin_unlock(&pt->lock);
+
+ return count;
+}
/* Dump information about the different pool types */
static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 560865f..67f86c4 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -157,12 +157,6 @@
}
EXPORT_SYMBOL(rtrs_clt_put_permit);
-void *rtrs_permit_to_pdu(struct rtrs_permit *permit)
-{
- return permit + 1;
-}
-EXPORT_SYMBOL(rtrs_permit_to_pdu);
-
/**
* rtrs_permit_to_clt_con() - returns RDMA connection pointer by the permit
* @sess: client session pointer
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h
index 9af750f..8738e90 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs.h
@@ -63,13 +63,6 @@
void rtrs_clt_close(struct rtrs_clt *sess);
-/**
- * rtrs_permit_to_pdu() - converts rtrs_permit to opaque pdu pointer
- * @permit: RTRS permit pointer, it associates the memory allocation for future
- * RDMA operation.
- */
-void *rtrs_permit_to_pdu(struct rtrs_permit *permit);
-
enum {
RTRS_PERMIT_NOWAIT = 0,
RTRS_PERMIT_WAIT = 1,
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 0e06d72..a4752ac 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -2535,8 +2535,6 @@
else
err = "device busy";
mutex_unlock(&bch_register_lock);
- if (!IS_ERR(bdev))
- bdput(bdev);
if (attr == &ksysfs_register_quiet)
goto done;
}
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 554e3af..00a520c 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -404,7 +404,7 @@
if (!env)
return -ENOMEM;
add_uevent_var(env, "DRIVER=bcache");
- add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid),
+ add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid);
add_uevent_var(env, "CACHED_LABEL=%s", buf);
kobject_uevent_env(&disk_to_dev(dc->disk.disk)->kobj,
KOBJ_CHANGE,
diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c
index 6d1bf7c..a320393 100644
--- a/drivers/mfd/ab8500-debugfs.c
+++ b/drivers/mfd/ab8500-debugfs.c
@@ -1513,24 +1513,14 @@
{
int line;
- seq_puts(s, "name: number: number of: wake:\n");
+ seq_puts(s, "name: number: irq: number of: wake:\n");
for (line = 0; line < num_interrupt_lines; line++) {
- struct irq_desc *desc = irq_to_desc(line + irq_first);
-
- seq_printf(s, "%3i: %6i %4i",
+ seq_printf(s, "%3i: %4i %6i %4i\n",
line,
+ line + irq_first,
num_interrupts[line],
num_wake_interrupts[line]);
-
- if (desc && desc->name)
- seq_printf(s, "-%-8s", desc->name);
- if (desc && desc->action) {
- struct irqaction *action = desc->action;
-
- seq_printf(s, " %s", action->name);
- while ((action = action->next) != NULL)
- seq_printf(s, ", %s", action->name);
}
seq_putc(s, '\n');
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 74d4667..d5fc72b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -90,7 +90,7 @@
int cq_idx)
{
struct mlx4_en_dev *mdev = priv->mdev;
- int err = 0;
+ int irq, err = 0;
int timestamp_en = 0;
bool assigned_eq = false;
@@ -116,10 +116,8 @@
assigned_eq = true;
}
-
- cq->irq_desc =
- irq_to_desc(mlx4_eq_get_irq(mdev->dev,
- cq->vector));
+ irq = mlx4_eq_get_irq(mdev->dev, cq->vector);
+ cq->aff_mask = irq_get_effective_affinity_mask(irq);
} else {
/* For TX we use the same irq per
ring we assigned for the RX */
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 7954c1d..c1c9118 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -958,18 +958,14 @@
/* If we used up all the quota - we're probably not done yet... */
if (done == budget || !clean_complete) {
- const struct cpumask *aff;
- struct irq_data *idata;
int cpu_curr;
/* in case we got here because of !clean_complete */
done = budget;
cpu_curr = smp_processor_id();
- idata = irq_desc_get_irq_data(cq->irq_desc);
- aff = irq_data_get_affinity_mask(idata);
- if (likely(cpumask_test_cpu(cpu_curr, aff)))
+ if (likely(cpumask_test_cpu(cpu_curr, cq->aff_mask)))
return budget;
/* Current cpu is not according to smp_irq_affinity -
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 17f2b19..e8ed2319 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -47,6 +47,7 @@
#endif
#include <linux/cpu_rmap.h>
#include <linux/ptp_clock_kernel.h>
+#include <linux/irq.h>
#include <net/xdp.h>
#include <linux/mlx4/device.h>
@@ -365,7 +366,7 @@
struct mlx4_cqe *buf;
#define MLX4_EN_OPCODE_ERROR 0x1e
- struct irq_desc *irq_desc;
+ const struct cpumask *aff_mask;
};
struct mlx4_en_port_profile {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index a1a81cf..055baf3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -684,7 +684,7 @@
spinlock_t async_icosq_lock;
/* data path - accessed per napi poll */
- struct irq_desc *irq_desc;
+ const struct cpumask *aff_mask;
struct mlx5e_ch_stats *stats;
/* control */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 3511189..2a2bac3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -479,7 +479,6 @@
c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
c->num_tc = params->num_tc;
c->stats = &priv->port_ptp_stats.ch;
- c->irq_desc = irq_to_desc(irq);
c->lag_port = lag_port;
netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll, 64);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index 28aa5ae..90c98ea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -28,7 +28,6 @@
u8 lag_port;
/* data path - accessed per napi poll */
- struct irq_desc *irq_desc;
struct mlx5e_ch_stats *stats;
/* control */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0383165..7a79d33 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1987,7 +1987,7 @@
c->num_tc = params->num_tc;
c->xdp = !!params->xdp_prog;
c->stats = &priv->channel_stats[ix].ch;
- c->irq_desc = irq_to_desc(irq);
+ c->aff_mask = irq_get_effective_affinity_mask(irq);
c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix);
netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 1ec3d62..a3cfe06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -40,12 +40,8 @@
static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
{
int current_cpu = smp_processor_id();
- const struct cpumask *aff;
- struct irq_data *idata;
- idata = irq_desc_get_irq_data(c->irq_desc);
- aff = irq_data_get_affinity_mask(idata);
- return cpumask_test_cpu(current_cpu, aff);
+ return cpumask_test_cpu(current_cpu, c->aff_mask);
}
static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 052975e..4c41df6 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3072,6 +3072,7 @@
dev_err(&vdev->dev,
"device MTU appears to have changed it is now %d < %d",
mtu, dev->min_mtu);
+ err = -EINVAL;
goto free;
}
diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c
index 0a5e884..3f05cfb 100644
--- a/drivers/ntb/msi.c
+++ b/drivers/ntb/msi.c
@@ -282,15 +282,13 @@
struct ntb_msi_desc *msi_desc)
{
struct msi_desc *entry;
- struct irq_desc *desc;
int ret;
if (!ntb->msi)
return -EINVAL;
for_each_pci_msi_entry(entry, ntb->pdev) {
- desc = irq_to_desc(entry->irq);
- if (desc->action)
+ if (irq_has_action(entry->irq))
continue;
ret = devm_request_threaded_irq(&ntb->dev, entry->irq, handler,
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index 2e258be..aa53e0b 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -7,7 +7,6 @@
#ifndef _LINUX_BTT_H
#define _LINUX_BTT_H
-#include <linux/badblocks.h>
#include <linux/types.h>
#define BTT_SIG_LEN 16
@@ -197,6 +196,8 @@
int log_index[2];
};
+struct badblocks;
+
/**
* struct btt - handle for a BTT instance
* @btt_disk: Pointer to the gendisk for BTT device
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 5a7c800..030dbde 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -4,6 +4,7 @@
*/
#include <linux/device.h>
#include <linux/sizes.h>
+#include <linux/badblocks.h>
#include "nd-core.h"
#include "pmem.h"
#include "pfn.h"
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index c21ba06..7de592d 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -3,7 +3,6 @@
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
*/
#include <linux/libnvdimm.h>
-#include <linux/badblocks.h>
#include <linux/suspend.h>
#include <linux/export.h>
#include <linux/module.h>
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 47a4828..9251441 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -980,6 +980,15 @@
}
}
+ /* release slots associated with any invalidated UUIDs */
+ mutex_lock(&nd_mapping->lock);
+ list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list)
+ if (test_and_clear_bit(ND_LABEL_REAP, &label_ent->flags)) {
+ reap_victim(nd_mapping, label_ent);
+ list_move(&label_ent->list, &list);
+ }
+ mutex_unlock(&nd_mapping->lock);
+
/*
* Find the resource associated with the first label in the set
* per the v1.2 namespace specification.
@@ -999,8 +1008,10 @@
if (is_old_resource(res, old_res_list, old_num_resources))
continue; /* carry-over */
slot = nd_label_alloc_slot(ndd);
- if (slot == UINT_MAX)
+ if (slot == UINT_MAX) {
+ rc = -ENXIO;
goto abort;
+ }
dev_dbg(ndd->dev, "allocated: %d\n", slot);
nd_label = to_label(ndd, slot);
diff --git a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c
index a2632d0..c637de3 100644
--- a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c
+++ b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c
@@ -306,13 +306,11 @@
static void mobiveil_mask_intx_irq(struct irq_data *data)
{
- struct irq_desc *desc = irq_to_desc(data->irq);
- struct mobiveil_pcie *pcie;
+ struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(data);
struct mobiveil_root_port *rp;
unsigned long flags;
u32 mask, shifted_val;
- pcie = irq_desc_get_chip_data(desc);
rp = &pcie->rp;
mask = 1 << ((data->hwirq + PAB_INTX_START) - 1);
raw_spin_lock_irqsave(&rp->intx_mask_lock, flags);
@@ -324,13 +322,11 @@
static void mobiveil_unmask_intx_irq(struct irq_data *data)
{
- struct irq_desc *desc = irq_to_desc(data->irq);
- struct mobiveil_pcie *pcie;
+ struct mobiveil_pcie *pcie = irq_data_get_irq_chip_data(data);
struct mobiveil_root_port *rp;
unsigned long flags;
u32 shifted_val, mask;
- pcie = irq_desc_get_chip_data(desc);
rp = &pcie->rp;
mask = 1 << ((data->hwirq + PAB_INTX_START) - 1);
raw_spin_lock_irqsave(&rp->intx_mask_lock, flags);
diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c
index 7f29c2f..07e3666 100644
--- a/drivers/pci/controller/pcie-xilinx-nwl.c
+++ b/drivers/pci/controller/pcie-xilinx-nwl.c
@@ -374,13 +374,11 @@
static void nwl_mask_leg_irq(struct irq_data *data)
{
- struct irq_desc *desc = irq_to_desc(data->irq);
- struct nwl_pcie *pcie;
+ struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data);
unsigned long flags;
u32 mask;
u32 val;
- pcie = irq_desc_get_chip_data(desc);
mask = 1 << (data->hwirq - 1);
raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags);
val = nwl_bridge_readl(pcie, MSGF_LEG_MASK);
@@ -390,13 +388,11 @@
static void nwl_unmask_leg_irq(struct irq_data *data)
{
- struct irq_desc *desc = irq_to_desc(data->irq);
- struct nwl_pcie *pcie;
+ struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data);
unsigned long flags;
u32 mask;
u32 val;
- pcie = irq_desc_get_chip_data(desc);
mask = 1 << (data->hwirq - 1);
raw_spin_lock_irqsave(&pcie->leg_mask_lock, flags);
val = nwl_bridge_readl(pcie, MSGF_LEG_MASK);
diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
index 657e35a..d4ea108 100644
--- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c
+++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
@@ -948,8 +948,8 @@
(mode < 0) ? "unknown" : modes[mode]);
} else {
int irq = chip->to_irq(chip, offset);
- struct irq_desc *desc = irq_to_desc(irq);
const int pullidx = pull ? 1 : 0;
+ bool wake;
int val;
static const char * const pulls[] = {
"none ",
@@ -969,8 +969,9 @@
* This races with request_irq(), set_irq_type(),
* and set_irq_wake() ... but those are "rare".
*/
- if (irq > 0 && desc && desc->action) {
+ if (irq > 0 && irq_has_action(irq)) {
char *trigger;
+ bool wake;
if (nmk_chip->edge_rising & BIT(offset))
trigger = "edge-rising";
@@ -979,10 +980,10 @@
else
trigger = "edge-undefined";
+ wake = !!(nmk_chip->real_wake & BIT(offset));
+
seq_printf(s, " irq-%d %s%s",
- irq, trigger,
- irqd_is_wakeup_set(&desc->irq_data)
- ? " wakeup" : "");
+ irq, trigger, wake ? " wakeup" : "");
}
}
clk_disable(nmk_chip->clk);
diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
index 99f8661..dc78a52 100644
--- a/drivers/s390/block/dasd_alias.c
+++ b/drivers/s390/block/dasd_alias.c
@@ -256,7 +256,6 @@
return;
device->discipline->get_uid(device, &uid);
spin_lock_irqsave(&lcu->lock, flags);
- list_del_init(&device->alias_list);
/* make sure that the workers don't use this device */
if (device == lcu->suc_data.device) {
spin_unlock_irqrestore(&lcu->lock, flags);
@@ -283,6 +282,7 @@
spin_lock_irqsave(&aliastree.lock, flags);
spin_lock(&lcu->lock);
+ list_del_init(&device->alias_list);
if (list_empty(&lcu->grouplist) &&
list_empty(&lcu->active_devices) &&
list_empty(&lcu->inactive_devices)) {
@@ -462,11 +462,19 @@
spin_unlock_irqrestore(&lcu->lock, flags);
rc = dasd_sleep_on(cqr);
- if (rc && !suborder_not_supported(cqr)) {
+ if (!rc)
+ goto out;
+
+ if (suborder_not_supported(cqr)) {
+ /* suborder not supported or device unusable for IO */
+ rc = -EOPNOTSUPP;
+ } else {
+ /* IO failed but should be retried */
spin_lock_irqsave(&lcu->lock, flags);
lcu->flags |= NEED_UAC_UPDATE;
spin_unlock_irqrestore(&lcu->lock, flags);
}
+out:
dasd_sfree_request(cqr, cqr->memdev);
return rc;
}
@@ -503,6 +511,14 @@
return rc;
spin_lock_irqsave(&lcu->lock, flags);
+ /*
+ * there is another update needed skip the remaining handling
+ * the data might already be outdated
+ * but especially do not add the device to an LCU with pending
+ * update
+ */
+ if (lcu->flags & NEED_UAC_UPDATE)
+ goto out;
lcu->pav = NO_PAV;
for (i = 0; i < MAX_DEVICES_PER_LCU; ++i) {
switch (lcu->uac->unit[i].ua_type) {
@@ -521,6 +537,7 @@
alias_list) {
_add_device_to_lcu(lcu, device, refdev);
}
+out:
spin_unlock_irqrestore(&lcu->lock, flags);
return 0;
}
@@ -625,6 +642,7 @@
}
if (lcu->flags & UPDATE_PENDING) {
list_move(&device->alias_list, &lcu->active_devices);
+ private->pavgroup = NULL;
_schedule_lcu_update(lcu, device);
}
spin_unlock_irqrestore(&lcu->lock, flags);
diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig
index 6caf539..92a6396 100644
--- a/drivers/vdpa/Kconfig
+++ b/drivers/vdpa/Kconfig
@@ -9,21 +9,24 @@
if VDPA
config VDPA_SIM
- tristate "vDPA device simulator"
+ tristate "vDPA device simulator core"
depends on RUNTIME_TESTING_MENU && HAS_DMA
select DMA_OPS
select VHOST_RING
- select GENERIC_NET_UTILS
- default n
help
- vDPA networking device simulator which loop TX traffic back
- to RX. This device is used for testing, prototyping and
- development of vDPA.
+ Enable this module to support vDPA device simulators. These devices
+ are used for testing, prototyping and development of vDPA.
+
+config VDPA_SIM_NET
+ tristate "vDPA simulator for networking device"
+ depends on VDPA_SIM
+ select GENERIC_NET_UTILS
+ help
+ vDPA networking device simulator which loops TX traffic back to RX.
config IFCVF
tristate "Intel IFC VF vDPA driver"
depends on PCI_MSI
- default n
help
This kernel module can drive Intel IFC VF NIC to offload
virtio dataplane traffic to hardware.
@@ -42,7 +45,6 @@
tristate "vDPA driver for ConnectX devices"
select MLX5_VDPA
depends on MLX5_CORE
- default n
help
VDPA network driver for ConnectX6 and newer. Provides offloading
of virtio net datapath such that descriptors put on the ring will
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 8b40285..fa1af30 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -417,16 +417,9 @@
return ret;
}
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (ret) {
- IFCVF_ERR(pdev, "No usable DMA confiugration\n");
- return ret;
- }
-
- ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- if (ret) {
- IFCVF_ERR(pdev,
- "No usable coherent DMA confiugration\n");
+ IFCVF_ERR(pdev, "No usable DMA configuration\n");
return ret;
}
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index f1d5481..88dde34 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -479,6 +479,11 @@
static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
{
mlx5_cq_set_ci(&mvq->cq.mcq);
+
+ /* make sure CQ cosumer update is visible to the hardware before updating
+ * RX doorbell record.
+ */
+ dma_wmb();
rx_post(&mvq->vqqp, num);
if (mvq->event_cb.callback)
mvq->event_cb.callback(mvq->event_cb.private);
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index a69ffc9..c082565 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -89,7 +89,7 @@
if (!vdev)
goto err;
- err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL);
+ err = ida_alloc(&vdpa_index_ida, GFP_KERNEL);
if (err < 0)
goto err_ida;
diff --git a/drivers/vdpa/vdpa_sim/Makefile b/drivers/vdpa/vdpa_sim/Makefile
index b40278f..79d4536 100644
--- a/drivers/vdpa/vdpa_sim/Makefile
+++ b/drivers/vdpa/vdpa_sim/Makefile
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o
+obj-$(CONFIG_VDPA_SIM_NET) += vdpa_sim_net.o
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 6a90fdb..b3fcc67 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * VDPA networking device simulator.
+ * VDPA device simulator core.
*
* Copyright (c) 2020, Red Hat Inc. All rights reserved.
* Author: Jason Wang <jasowang@redhat.com>
@@ -11,97 +11,32 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/sched.h>
-#include <linux/wait.h>
-#include <linux/uuid.h>
-#include <linux/iommu.h>
#include <linux/dma-map-ops.h>
-#include <linux/sysfs.h>
-#include <linux/file.h>
-#include <linux/etherdevice.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
-#include <linux/virtio_byteorder.h>
#include <linux/vhost_iotlb.h>
-#include <uapi/linux/virtio_config.h>
-#include <uapi/linux/virtio_net.h>
+
+#include "vdpa_sim.h"
#define DRV_VERSION "0.1"
#define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>"
-#define DRV_DESC "vDPA Device Simulator"
+#define DRV_DESC "vDPA Device Simulator core"
#define DRV_LICENSE "GPL v2"
static int batch_mapping = 1;
module_param(batch_mapping, int, 0444);
MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable");
-static char *macaddr;
-module_param(macaddr, charp, 0);
-MODULE_PARM_DESC(macaddr, "Ethernet MAC address");
-
-struct vdpasim_virtqueue {
- struct vringh vring;
- struct vringh_kiov iov;
- unsigned short head;
- bool ready;
- u64 desc_addr;
- u64 device_addr;
- u64 driver_addr;
- u32 num;
- void *private;
- irqreturn_t (*cb)(void *data);
-};
+static int max_iotlb_entries = 2048;
+module_param(max_iotlb_entries, int, 0444);
+MODULE_PARM_DESC(max_iotlb_entries,
+ "Maximum number of iotlb entries. 0 means unlimited. (default: 2048)");
#define VDPASIM_QUEUE_ALIGN PAGE_SIZE
#define VDPASIM_QUEUE_MAX 256
-#define VDPASIM_DEVICE_ID 0x1
#define VDPASIM_VENDOR_ID 0
-#define VDPASIM_VQ_NUM 0x2
-#define VDPASIM_NAME "vdpasim-netdev"
-
-static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) |
- (1ULL << VIRTIO_F_VERSION_1) |
- (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
- (1ULL << VIRTIO_NET_F_MAC);
-
-/* State of each vdpasim device */
-struct vdpasim {
- struct vdpa_device vdpa;
- struct vdpasim_virtqueue vqs[VDPASIM_VQ_NUM];
- struct work_struct work;
- /* spinlock to synchronize virtqueue state */
- spinlock_t lock;
- struct virtio_net_config config;
- struct vhost_iotlb *iommu;
- void *buffer;
- u32 status;
- u32 generation;
- u64 features;
- /* spinlock to synchronize iommu table */
- spinlock_t iommu_lock;
-};
-
-/* TODO: cross-endian support */
-static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim)
-{
- return virtio_legacy_is_little_endian() ||
- (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1));
-}
-
-static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val)
-{
- return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val);
-}
-
-static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val)
-{
- return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val);
-}
-
-static struct vdpasim *vdpasim_dev;
static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
{
@@ -115,20 +50,34 @@
return vdpa_to_sim(vdpa);
}
+static void vdpasim_vq_notify(struct vringh *vring)
+{
+ struct vdpasim_virtqueue *vq =
+ container_of(vring, struct vdpasim_virtqueue, vring);
+
+ if (!vq->cb)
+ return;
+
+ vq->cb(vq->private);
+}
+
static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
{
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
- vringh_init_iotlb(&vq->vring, vdpasim_features,
+ vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
VDPASIM_QUEUE_MAX, false,
(struct vring_desc *)(uintptr_t)vq->desc_addr,
(struct vring_avail *)
(uintptr_t)vq->driver_addr,
(struct vring_used *)
(uintptr_t)vq->device_addr);
+
+ vq->vring.notify = vdpasim_vq_notify;
}
-static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq)
+static void vdpasim_vq_reset(struct vdpasim *vdpasim,
+ struct vdpasim_virtqueue *vq)
{
vq->ready = false;
vq->desc_addr = 0;
@@ -136,16 +85,18 @@
vq->device_addr = 0;
vq->cb = NULL;
vq->private = NULL;
- vringh_init_iotlb(&vq->vring, vdpasim_features, VDPASIM_QUEUE_MAX,
- false, NULL, NULL, NULL);
+ vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
+ VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
+
+ vq->vring.notify = NULL;
}
static void vdpasim_reset(struct vdpasim *vdpasim)
{
int i;
- for (i = 0; i < VDPASIM_VQ_NUM; i++)
- vdpasim_vq_reset(&vdpasim->vqs[i]);
+ for (i = 0; i < vdpasim->dev_attr.nvqs; i++)
+ vdpasim_vq_reset(vdpasim, &vdpasim->vqs[i]);
spin_lock(&vdpasim->iommu_lock);
vhost_iotlb_reset(vdpasim->iommu);
@@ -156,80 +107,6 @@
++vdpasim->generation;
}
-static void vdpasim_work(struct work_struct *work)
-{
- struct vdpasim *vdpasim = container_of(work, struct
- vdpasim, work);
- struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
- struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
- ssize_t read, write;
- size_t total_write;
- int pkts = 0;
- int err;
-
- spin_lock(&vdpasim->lock);
-
- if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
- goto out;
-
- if (!txq->ready || !rxq->ready)
- goto out;
-
- while (true) {
- total_write = 0;
- err = vringh_getdesc_iotlb(&txq->vring, &txq->iov, NULL,
- &txq->head, GFP_ATOMIC);
- if (err <= 0)
- break;
-
- err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->iov,
- &rxq->head, GFP_ATOMIC);
- if (err <= 0) {
- vringh_complete_iotlb(&txq->vring, txq->head, 0);
- break;
- }
-
- while (true) {
- read = vringh_iov_pull_iotlb(&txq->vring, &txq->iov,
- vdpasim->buffer,
- PAGE_SIZE);
- if (read <= 0)
- break;
-
- write = vringh_iov_push_iotlb(&rxq->vring, &rxq->iov,
- vdpasim->buffer, read);
- if (write <= 0)
- break;
-
- total_write += write;
- }
-
- /* Make sure data is wrote before advancing index */
- smp_wmb();
-
- vringh_complete_iotlb(&txq->vring, txq->head, 0);
- vringh_complete_iotlb(&rxq->vring, rxq->head, total_write);
-
- /* Make sure used is visible before rasing the interrupt. */
- smp_wmb();
-
- local_bh_disable();
- if (txq->cb)
- txq->cb(txq->private);
- if (rxq->cb)
- rxq->cb(rxq->private);
- local_bh_enable();
-
- if (++pkts > 4) {
- schedule_work(&vdpasim->work);
- goto out;
- }
- }
-
-out:
- spin_unlock(&vdpasim->lock);
-}
-
static int dir_to_perm(enum dma_data_direction dir)
{
int perm = -EFAULT;
@@ -342,26 +219,28 @@
.free = vdpasim_free_coherent,
};
-static const struct vdpa_config_ops vdpasim_net_config_ops;
-static const struct vdpa_config_ops vdpasim_net_batch_config_ops;
+static const struct vdpa_config_ops vdpasim_config_ops;
+static const struct vdpa_config_ops vdpasim_batch_config_ops;
-static struct vdpasim *vdpasim_create(void)
+struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
{
const struct vdpa_config_ops *ops;
struct vdpasim *vdpasim;
struct device *dev;
- int ret = -ENOMEM;
+ int i, ret = -ENOMEM;
if (batch_mapping)
- ops = &vdpasim_net_batch_config_ops;
+ ops = &vdpasim_batch_config_ops;
else
- ops = &vdpasim_net_config_ops;
+ ops = &vdpasim_config_ops;
- vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM);
+ vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
+ dev_attr->nvqs);
if (!vdpasim)
goto err_alloc;
- INIT_WORK(&vdpasim->work, vdpasim_work);
+ vdpasim->dev_attr = *dev_attr;
+ INIT_WORK(&vdpasim->work, dev_attr->work_fn);
spin_lock_init(&vdpasim->lock);
spin_lock_init(&vdpasim->iommu_lock);
@@ -371,31 +250,27 @@
goto err_iommu;
set_dma_ops(dev, &vdpasim_dma_ops);
- vdpasim->iommu = vhost_iotlb_alloc(2048, 0);
+ vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
+ if (!vdpasim->config)
+ goto err_iommu;
+
+ vdpasim->vqs = kcalloc(dev_attr->nvqs, sizeof(struct vdpasim_virtqueue),
+ GFP_KERNEL);
+ if (!vdpasim->vqs)
+ goto err_iommu;
+
+ vdpasim->iommu = vhost_iotlb_alloc(max_iotlb_entries, 0);
if (!vdpasim->iommu)
goto err_iommu;
- vdpasim->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL);
if (!vdpasim->buffer)
goto err_iommu;
- if (macaddr) {
- mac_pton(macaddr, vdpasim->config.mac);
- if (!is_valid_ether_addr(vdpasim->config.mac)) {
- ret = -EADDRNOTAVAIL;
- goto err_iommu;
- }
- } else {
- eth_random_addr(vdpasim->config.mac);
- }
-
- vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu);
- vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu);
+ for (i = 0; i < dev_attr->nvqs; i++)
+ vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu);
vdpasim->vdpa.dma_dev = dev;
- ret = vdpa_register_device(&vdpasim->vdpa);
- if (ret)
- goto err_iommu;
return vdpasim;
@@ -404,6 +279,7 @@
err_alloc:
return ERR_PTR(ret);
}
+EXPORT_SYMBOL_GPL(vdpasim_create);
static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx,
u64 desc_area, u64 driver_area,
@@ -498,28 +374,21 @@
static u64 vdpasim_get_features(struct vdpa_device *vdpa)
{
- return vdpasim_features;
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ return vdpasim->dev_attr.supported_features;
}
static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
- struct virtio_net_config *config = &vdpasim->config;
/* DMA mapping must be done by driver */
if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
return -EINVAL;
- vdpasim->features = features & vdpasim_features;
+ vdpasim->features = features & vdpasim->dev_attr.supported_features;
- /* We generally only know whether guest is using the legacy interface
- * here, so generally that's the earliest we can set config fields.
- * Note: We actually require VIRTIO_F_ACCESS_PLATFORM above which
- * implies VIRTIO_F_VERSION_1, but let's not try to be clever here.
- */
-
- config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
- config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
return 0;
}
@@ -536,7 +405,9 @@
static u32 vdpasim_get_device_id(struct vdpa_device *vdpa)
{
- return VDPASIM_DEVICE_ID;
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ return vdpasim->dev_attr.id;
}
static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa)
@@ -572,14 +443,27 @@
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
- if (offset + len < sizeof(struct virtio_net_config))
- memcpy(buf, (u8 *)&vdpasim->config + offset, len);
+ if (offset + len > vdpasim->dev_attr.config_size)
+ return;
+
+ if (vdpasim->dev_attr.get_config)
+ vdpasim->dev_attr.get_config(vdpasim, vdpasim->config);
+
+ memcpy(buf, vdpasim->config + offset, len);
}
static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset,
const void *buf, unsigned int len)
{
- /* No writable config supportted by vdpasim */
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ if (offset + len > vdpasim->dev_attr.config_size)
+ return;
+
+ memcpy(vdpasim->config + offset, buf, len);
+
+ if (vdpasim->dev_attr.set_config)
+ vdpasim->dev_attr.set_config(vdpasim, vdpasim->config);
}
static u32 vdpasim_get_generation(struct vdpa_device *vdpa)
@@ -656,12 +540,14 @@
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
cancel_work_sync(&vdpasim->work);
- kfree(vdpasim->buffer);
+ kvfree(vdpasim->buffer);
if (vdpasim->iommu)
vhost_iotlb_free(vdpasim->iommu);
+ kfree(vdpasim->vqs);
+ kfree(vdpasim->config);
}
-static const struct vdpa_config_ops vdpasim_net_config_ops = {
+static const struct vdpa_config_ops vdpasim_config_ops = {
.set_vq_address = vdpasim_set_vq_address,
.set_vq_num = vdpasim_set_vq_num,
.kick_vq = vdpasim_kick_vq,
@@ -688,7 +574,7 @@
.free = vdpasim_free,
};
-static const struct vdpa_config_ops vdpasim_net_batch_config_ops = {
+static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.set_vq_address = vdpasim_set_vq_address,
.set_vq_num = vdpasim_set_vq_num,
.kick_vq = vdpasim_kick_vq,
@@ -714,26 +600,6 @@
.free = vdpasim_free,
};
-static int __init vdpasim_dev_init(void)
-{
- vdpasim_dev = vdpasim_create();
-
- if (!IS_ERR(vdpasim_dev))
- return 0;
-
- return PTR_ERR(vdpasim_dev);
-}
-
-static void __exit vdpasim_dev_exit(void)
-{
- struct vdpa_device *vdpa = &vdpasim_dev->vdpa;
-
- vdpa_unregister_device(vdpa);
-}
-
-module_init(vdpasim_dev_init)
-module_exit(vdpasim_dev_exit)
-
MODULE_VERSION(DRV_VERSION);
MODULE_LICENSE(DRV_LICENSE);
MODULE_AUTHOR(DRV_AUTHOR);
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
new file mode 100644
index 0000000..b021422
--- /dev/null
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2020, Red Hat Inc. All rights reserved.
+ */
+
+#ifndef _VDPA_SIM_H
+#define _VDPA_SIM_H
+
+#include <linux/vringh.h>
+#include <linux/vdpa.h>
+#include <linux/virtio_byteorder.h>
+#include <linux/vhost_iotlb.h>
+#include <uapi/linux/virtio_config.h>
+
+#define VDPASIM_FEATURES ((1ULL << VIRTIO_F_ANY_LAYOUT) | \
+ (1ULL << VIRTIO_F_VERSION_1) | \
+ (1ULL << VIRTIO_F_ACCESS_PLATFORM))
+
+struct vdpasim;
+
+struct vdpasim_virtqueue {
+ struct vringh vring;
+ struct vringh_kiov in_iov;
+ struct vringh_kiov out_iov;
+ unsigned short head;
+ bool ready;
+ u64 desc_addr;
+ u64 device_addr;
+ u64 driver_addr;
+ u32 num;
+ void *private;
+ irqreturn_t (*cb)(void *data);
+};
+
+struct vdpasim_dev_attr {
+ u64 supported_features;
+ size_t config_size;
+ size_t buffer_size;
+ int nvqs;
+ u32 id;
+
+ work_func_t work_fn;
+ void (*get_config)(struct vdpasim *vdpasim, void *config);
+ void (*set_config)(struct vdpasim *vdpasim, const void *config);
+};
+
+/* State of each vdpasim device */
+struct vdpasim {
+ struct vdpa_device vdpa;
+ struct vdpasim_virtqueue *vqs;
+ struct work_struct work;
+ struct vdpasim_dev_attr dev_attr;
+ /* spinlock to synchronize virtqueue state */
+ spinlock_t lock;
+ /* virtio config according to device type */
+ void *config;
+ struct vhost_iotlb *iommu;
+ void *buffer;
+ u32 status;
+ u32 generation;
+ u64 features;
+ /* spinlock to synchronize iommu table */
+ spinlock_t iommu_lock;
+};
+
+struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr);
+
+/* TODO: cross-endian support */
+static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim)
+{
+ return virtio_legacy_is_little_endian() ||
+ (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1));
+}
+
+static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val)
+{
+ return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val);
+}
+
+static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val)
+{
+ return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val);
+}
+
+static inline u32 vdpasim32_to_cpu(struct vdpasim *vdpasim, __virtio32 val)
+{
+ return __virtio32_to_cpu(vdpasim_is_little_endian(vdpasim), val);
+}
+
+static inline __virtio32 cpu_to_vdpasim32(struct vdpasim *vdpasim, u32 val)
+{
+ return __cpu_to_virtio32(vdpasim_is_little_endian(vdpasim), val);
+}
+
+static inline u64 vdpasim64_to_cpu(struct vdpasim *vdpasim, __virtio64 val)
+{
+ return __virtio64_to_cpu(vdpasim_is_little_endian(vdpasim), val);
+}
+
+static inline __virtio64 cpu_to_vdpasim64(struct vdpasim *vdpasim, u64 val)
+{
+ return __cpu_to_virtio64(vdpasim_is_little_endian(vdpasim), val);
+}
+
+#endif
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
new file mode 100644
index 0000000..c10b698
--- /dev/null
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VDPA simulator for networking device.
+ *
+ * Copyright (c) 2020, Red Hat Inc. All rights reserved.
+ * Author: Jason Wang <jasowang@redhat.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/etherdevice.h>
+#include <linux/vringh.h>
+#include <linux/vdpa.h>
+#include <uapi/linux/virtio_net.h>
+
+#include "vdpa_sim.h"
+
+#define DRV_VERSION "0.1"
+#define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>"
+#define DRV_DESC "vDPA Device Simulator for networking device"
+#define DRV_LICENSE "GPL v2"
+
+#define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \
+ (1ULL << VIRTIO_NET_F_MAC))
+
+#define VDPASIM_NET_VQ_NUM 2
+
+static char *macaddr;
+module_param(macaddr, charp, 0);
+MODULE_PARM_DESC(macaddr, "Ethernet MAC address");
+
+u8 macaddr_buf[ETH_ALEN];
+
+static struct vdpasim *vdpasim_net_dev;
+
+static void vdpasim_net_work(struct work_struct *work)
+{
+ struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
+ struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
+ struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
+ ssize_t read, write;
+ size_t total_write;
+ int pkts = 0;
+ int err;
+
+ spin_lock(&vdpasim->lock);
+
+ if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
+ goto out;
+
+ if (!txq->ready || !rxq->ready)
+ goto out;
+
+ while (true) {
+ total_write = 0;
+ err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL,
+ &txq->head, GFP_ATOMIC);
+ if (err <= 0)
+ break;
+
+ err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov,
+ &rxq->head, GFP_ATOMIC);
+ if (err <= 0) {
+ vringh_complete_iotlb(&txq->vring, txq->head, 0);
+ break;
+ }
+
+ while (true) {
+ read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov,
+ vdpasim->buffer,
+ PAGE_SIZE);
+ if (read <= 0)
+ break;
+
+ write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov,
+ vdpasim->buffer, read);
+ if (write <= 0)
+ break;
+
+ total_write += write;
+ }
+
+ /* Make sure data is wrote before advancing index */
+ smp_wmb();
+
+ vringh_complete_iotlb(&txq->vring, txq->head, 0);
+ vringh_complete_iotlb(&rxq->vring, rxq->head, total_write);
+
+ /* Make sure used is visible before rasing the interrupt. */
+ smp_wmb();
+
+ local_bh_disable();
+ if (vringh_need_notify_iotlb(&txq->vring) > 0)
+ vringh_notify(&txq->vring);
+ if (vringh_need_notify_iotlb(&rxq->vring) > 0)
+ vringh_notify(&rxq->vring);
+ local_bh_enable();
+
+ if (++pkts > 4) {
+ schedule_work(&vdpasim->work);
+ goto out;
+ }
+ }
+
+out:
+ spin_unlock(&vdpasim->lock);
+}
+
+static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config)
+{
+ struct virtio_net_config *net_config =
+ (struct virtio_net_config *)config;
+
+ net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
+ net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
+ memcpy(net_config->mac, macaddr_buf, ETH_ALEN);
+}
+
+static int __init vdpasim_net_init(void)
+{
+ struct vdpasim_dev_attr dev_attr = {};
+ int ret;
+
+ if (macaddr) {
+ mac_pton(macaddr, macaddr_buf);
+ if (!is_valid_ether_addr(macaddr_buf)) {
+ ret = -EADDRNOTAVAIL;
+ goto out;
+ }
+ } else {
+ eth_random_addr(macaddr_buf);
+ }
+
+ dev_attr.id = VIRTIO_ID_NET;
+ dev_attr.supported_features = VDPASIM_NET_FEATURES;
+ dev_attr.nvqs = VDPASIM_NET_VQ_NUM;
+ dev_attr.config_size = sizeof(struct virtio_net_config);
+ dev_attr.get_config = vdpasim_net_get_config;
+ dev_attr.work_fn = vdpasim_net_work;
+ dev_attr.buffer_size = PAGE_SIZE;
+
+ vdpasim_net_dev = vdpasim_create(&dev_attr);
+ if (IS_ERR(vdpasim_net_dev)) {
+ ret = PTR_ERR(vdpasim_net_dev);
+ goto out;
+ }
+
+ ret = vdpa_register_device(&vdpasim_net_dev->vdpa);
+ if (ret)
+ goto put_dev;
+
+ return 0;
+
+put_dev:
+ put_device(&vdpasim_net_dev->vdpa.dev);
+out:
+ return ret;
+}
+
+static void __exit vdpasim_net_exit(void)
+{
+ struct vdpa_device *vdpa = &vdpasim_net_dev->vdpa;
+
+ vdpa_unregister_device(vdpa);
+}
+
+module_init(vdpasim_net_init);
+module_exit(vdpasim_net_exit);
+
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE(DRV_LICENSE);
+MODULE_AUTHOR(DRV_AUTHOR);
+MODULE_DESCRIPTION(DRV_DESC);
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 6ff8a5096..4ce9f00 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1643,7 +1643,8 @@
if (!vhost_vq_is_setup(vq))
continue;
- if (vhost_scsi_setup_vq_cmds(vq, vq->num))
+ ret = vhost_scsi_setup_vq_cmds(vq, vq->num);
+ if (ret)
goto destroy_vq_cmds;
}
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 29ed417..ef688c8 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -245,14 +245,10 @@
return -EFAULT;
if (vhost_vdpa_config_validate(v, &config))
return -EINVAL;
- buf = kvzalloc(config.len, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- if (copy_from_user(buf, c->buf, config.len)) {
- kvfree(buf);
- return -EFAULT;
- }
+ buf = vmemdup_user(c->buf, config.len);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
ops->set_config(vdpa, config.off, buf, config.len);
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 181e2f1..9fc9ec4 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -27,20 +27,74 @@
module_param(unplug_online, bool, 0644);
MODULE_PARM_DESC(unplug_online, "Try to unplug online memory");
-enum virtio_mem_mb_state {
+static bool force_bbm;
+module_param(force_bbm, bool, 0444);
+MODULE_PARM_DESC(force_bbm,
+ "Force Big Block Mode. Default is 0 (auto-selection)");
+
+static unsigned long bbm_block_size;
+module_param(bbm_block_size, ulong, 0444);
+MODULE_PARM_DESC(bbm_block_size,
+ "Big Block size in bytes. Default is 0 (auto-detection).");
+
+static bool bbm_safe_unplug = true;
+module_param(bbm_safe_unplug, bool, 0444);
+MODULE_PARM_DESC(bbm_safe_unplug,
+ "Use a safe unplug mechanism in BBM, avoiding long/endless loops");
+
+/*
+ * virtio-mem currently supports the following modes of operation:
+ *
+ * * Sub Block Mode (SBM): A Linux memory block spans 2..X subblocks (SB). The
+ * size of a Sub Block (SB) is determined based on the device block size, the
+ * pageblock size, and the maximum allocation granularity of the buddy.
+ * Subblocks within a Linux memory block might either be plugged or unplugged.
+ * Memory is added/removed to Linux MM in Linux memory block granularity.
+ *
+ * * Big Block Mode (BBM): A Big Block (BB) spans 1..X Linux memory blocks.
+ * Memory is added/removed to Linux MM in Big Block granularity.
+ *
+ * The mode is determined automatically based on the Linux memory block size
+ * and the device block size.
+ *
+ * User space / core MM (auto onlining) is responsible for onlining added
+ * Linux memory blocks - and for selecting a zone. Linux Memory Blocks are
+ * always onlined separately, and all memory within a Linux memory block is
+ * onlined to the same zone - virtio-mem relies on this behavior.
+ */
+
+/*
+ * State of a Linux memory block in SBM.
+ */
+enum virtio_mem_sbm_mb_state {
/* Unplugged, not added to Linux. Can be reused later. */
- VIRTIO_MEM_MB_STATE_UNUSED = 0,
+ VIRTIO_MEM_SBM_MB_UNUSED = 0,
/* (Partially) plugged, not added to Linux. Error on add_memory(). */
- VIRTIO_MEM_MB_STATE_PLUGGED,
+ VIRTIO_MEM_SBM_MB_PLUGGED,
/* Fully plugged, fully added to Linux, offline. */
- VIRTIO_MEM_MB_STATE_OFFLINE,
+ VIRTIO_MEM_SBM_MB_OFFLINE,
/* Partially plugged, fully added to Linux, offline. */
- VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL,
+ VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL,
/* Fully plugged, fully added to Linux, online. */
- VIRTIO_MEM_MB_STATE_ONLINE,
+ VIRTIO_MEM_SBM_MB_ONLINE,
/* Partially plugged, fully added to Linux, online. */
- VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL,
- VIRTIO_MEM_MB_STATE_COUNT
+ VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL,
+ VIRTIO_MEM_SBM_MB_COUNT
+};
+
+/*
+ * State of a Big Block (BB) in BBM, covering 1..X Linux memory blocks.
+ */
+enum virtio_mem_bbm_bb_state {
+ /* Unplugged, not added to Linux. Can be reused later. */
+ VIRTIO_MEM_BBM_BB_UNUSED = 0,
+ /* Plugged, not added to Linux. Error on add_memory(). */
+ VIRTIO_MEM_BBM_BB_PLUGGED,
+ /* Plugged and added to Linux. */
+ VIRTIO_MEM_BBM_BB_ADDED,
+ /* All online parts are fake-offline, ready to remove. */
+ VIRTIO_MEM_BBM_BB_FAKE_OFFLINE,
+ VIRTIO_MEM_BBM_BB_COUNT
};
struct virtio_mem {
@@ -51,6 +105,7 @@
/* Workqueue that processes the plug/unplug requests. */
struct work_struct wq;
+ atomic_t wq_active;
atomic_t config_changed;
/* Virtqueue for guest->host requests. */
@@ -70,27 +125,13 @@
/* The device block size (for communicating with the device). */
uint64_t device_block_size;
- /* The translated node id. NUMA_NO_NODE in case not specified. */
+ /* The determined node id for all memory of the device. */
int nid;
/* Physical start address of the memory region. */
uint64_t addr;
/* Maximum region size in bytes. */
uint64_t region_size;
- /* The subblock size. */
- uint64_t subblock_size;
- /* The number of subblocks per memory block. */
- uint32_t nb_sb_per_mb;
-
- /* Id of the first memory block of this device. */
- unsigned long first_mb_id;
- /* Id of the last memory block of this device. */
- unsigned long last_mb_id;
- /* Id of the last usable memory block of this device. */
- unsigned long last_usable_mb_id;
- /* Id of the next memory bock to prepare when needed. */
- unsigned long next_mb_id;
-
/* The parent resource for all memory added via this device. */
struct resource *parent_resource;
/*
@@ -99,31 +140,79 @@
*/
const char *resource_name;
- /* Summary of all memory block states. */
- unsigned long nb_mb_state[VIRTIO_MEM_MB_STATE_COUNT];
-#define VIRTIO_MEM_NB_OFFLINE_THRESHOLD 10
-
/*
- * One byte state per memory block.
- *
- * Allocated via vmalloc(). When preparing new blocks, resized
- * (alloc+copy+free) when needed (crossing pages with the next mb).
- * (when crossing pages).
- *
- * With 128MB memory blocks, we have states for 512GB of memory in one
- * page.
+ * We don't want to add too much memory if it's not getting onlined,
+ * to avoid running OOM. Besides this threshold, we allow to have at
+ * least two offline blocks at a time (whatever is bigger).
*/
- uint8_t *mb_state;
+#define VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD (1024 * 1024 * 1024)
+ atomic64_t offline_size;
+ uint64_t offline_threshold;
+
+ /* If set, the driver is in SBM, otherwise in BBM. */
+ bool in_sbm;
+
+ union {
+ struct {
+ /* Id of the first memory block of this device. */
+ unsigned long first_mb_id;
+ /* Id of the last usable memory block of this device. */
+ unsigned long last_usable_mb_id;
+ /* Id of the next memory bock to prepare when needed. */
+ unsigned long next_mb_id;
+
+ /* The subblock size. */
+ uint64_t sb_size;
+ /* The number of subblocks per Linux memory block. */
+ uint32_t sbs_per_mb;
+
+ /* Summary of all memory block states. */
+ unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT];
+
+ /*
+ * One byte state per memory block. Allocated via
+ * vmalloc(). Resized (alloc+copy+free) on demand.
+ *
+ * With 128 MiB memory blocks, we have states for 512
+ * GiB of memory in one 4 KiB page.
+ */
+ uint8_t *mb_states;
+
+ /*
+ * Bitmap: one bit per subblock. Allocated similar to
+ * sbm.mb_states.
+ *
+ * A set bit means the corresponding subblock is
+ * plugged, otherwise it's unblocked.
+ *
+ * With 4 MiB subblocks, we manage 128 GiB of memory
+ * in one 4 KiB page.
+ */
+ unsigned long *sb_states;
+ } sbm;
+
+ struct {
+ /* Id of the first big block of this device. */
+ unsigned long first_bb_id;
+ /* Id of the last usable big block of this device. */
+ unsigned long last_usable_bb_id;
+ /* Id of the next device bock to prepare when needed. */
+ unsigned long next_bb_id;
+
+ /* Summary of all big block states. */
+ unsigned long bb_count[VIRTIO_MEM_BBM_BB_COUNT];
+
+ /* One byte state per big block. See sbm.mb_states. */
+ uint8_t *bb_states;
+
+ /* The block size used for plugging/adding/removing. */
+ uint64_t bb_size;
+ } bbm;
+ };
/*
- * $nb_sb_per_mb bit per memory block. Handled similar to mb_state.
- *
- * With 4MB subblocks, we manage 128GB of memory in one page.
- */
- unsigned long *sb_bitmap;
-
- /*
- * Mutex that protects the nb_mb_state, mb_state, and sb_bitmap.
+ * Mutex that protects the sbm.mb_count, sbm.mb_states,
+ * sbm.sb_states, bbm.bb_count, and bbm.bb_states
*
* When this lock is held the pointers can't change, ONLINE and
* OFFLINE blocks can't change the state and no subblocks will get
@@ -160,6 +249,11 @@
static LIST_HEAD(virtio_mem_devices);
static void virtio_mem_online_page_cb(struct page *page, unsigned int order);
+static void virtio_mem_fake_offline_going_offline(unsigned long pfn,
+ unsigned long nr_pages);
+static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn,
+ unsigned long nr_pages);
+static void virtio_mem_retry(struct virtio_mem *vm);
/*
* Register a virtio-mem device so it will be considered for the online_page
@@ -213,6 +307,24 @@
}
/*
+ * Calculate the big block id of a given address.
+ */
+static unsigned long virtio_mem_phys_to_bb_id(struct virtio_mem *vm,
+ uint64_t addr)
+{
+ return addr / vm->bbm.bb_size;
+}
+
+/*
+ * Calculate the physical start address of a given big block id.
+ */
+static uint64_t virtio_mem_bb_id_to_phys(struct virtio_mem *vm,
+ unsigned long bb_id)
+{
+ return bb_id * vm->bbm.bb_size;
+}
+
+/*
* Calculate the subblock id of a given address.
*/
static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm,
@@ -221,89 +333,164 @@
const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr);
const unsigned long mb_addr = virtio_mem_mb_id_to_phys(mb_id);
- return (addr - mb_addr) / vm->subblock_size;
+ return (addr - mb_addr) / vm->sbm.sb_size;
}
/*
- * Set the state of a memory block, taking care of the state counter.
+ * Set the state of a big block, taking care of the state counter.
*/
-static void virtio_mem_mb_set_state(struct virtio_mem *vm, unsigned long mb_id,
- enum virtio_mem_mb_state state)
+static void virtio_mem_bbm_set_bb_state(struct virtio_mem *vm,
+ unsigned long bb_id,
+ enum virtio_mem_bbm_bb_state state)
{
- const unsigned long idx = mb_id - vm->first_mb_id;
- enum virtio_mem_mb_state old_state;
+ const unsigned long idx = bb_id - vm->bbm.first_bb_id;
+ enum virtio_mem_bbm_bb_state old_state;
- old_state = vm->mb_state[idx];
- vm->mb_state[idx] = state;
+ old_state = vm->bbm.bb_states[idx];
+ vm->bbm.bb_states[idx] = state;
- BUG_ON(vm->nb_mb_state[old_state] == 0);
- vm->nb_mb_state[old_state]--;
- vm->nb_mb_state[state]++;
+ BUG_ON(vm->bbm.bb_count[old_state] == 0);
+ vm->bbm.bb_count[old_state]--;
+ vm->bbm.bb_count[state]++;
}
/*
- * Get the state of a memory block.
+ * Get the state of a big block.
*/
-static enum virtio_mem_mb_state virtio_mem_mb_get_state(struct virtio_mem *vm,
- unsigned long mb_id)
+static enum virtio_mem_bbm_bb_state virtio_mem_bbm_get_bb_state(struct virtio_mem *vm,
+ unsigned long bb_id)
{
- const unsigned long idx = mb_id - vm->first_mb_id;
-
- return vm->mb_state[idx];
+ return vm->bbm.bb_states[bb_id - vm->bbm.first_bb_id];
}
/*
- * Prepare the state array for the next memory block.
+ * Prepare the big block state array for the next big block.
*/
-static int virtio_mem_mb_state_prepare_next_mb(struct virtio_mem *vm)
+static int virtio_mem_bbm_bb_states_prepare_next_bb(struct virtio_mem *vm)
{
- unsigned long old_bytes = vm->next_mb_id - vm->first_mb_id + 1;
- unsigned long new_bytes = vm->next_mb_id - vm->first_mb_id + 2;
+ unsigned long old_bytes = vm->bbm.next_bb_id - vm->bbm.first_bb_id;
+ unsigned long new_bytes = old_bytes + 1;
int old_pages = PFN_UP(old_bytes);
int new_pages = PFN_UP(new_bytes);
- uint8_t *new_mb_state;
+ uint8_t *new_array;
- if (vm->mb_state && old_pages == new_pages)
+ if (vm->bbm.bb_states && old_pages == new_pages)
return 0;
- new_mb_state = vzalloc(new_pages * PAGE_SIZE);
- if (!new_mb_state)
+ new_array = vzalloc(new_pages * PAGE_SIZE);
+ if (!new_array)
return -ENOMEM;
mutex_lock(&vm->hotplug_mutex);
- if (vm->mb_state)
- memcpy(new_mb_state, vm->mb_state, old_pages * PAGE_SIZE);
- vfree(vm->mb_state);
- vm->mb_state = new_mb_state;
+ if (vm->bbm.bb_states)
+ memcpy(new_array, vm->bbm.bb_states, old_pages * PAGE_SIZE);
+ vfree(vm->bbm.bb_states);
+ vm->bbm.bb_states = new_array;
mutex_unlock(&vm->hotplug_mutex);
return 0;
}
-#define virtio_mem_for_each_mb_state(_vm, _mb_id, _state) \
- for (_mb_id = _vm->first_mb_id; \
- _mb_id < _vm->next_mb_id && _vm->nb_mb_state[_state]; \
- _mb_id++) \
- if (virtio_mem_mb_get_state(_vm, _mb_id) == _state)
+#define virtio_mem_bbm_for_each_bb(_vm, _bb_id, _state) \
+ for (_bb_id = vm->bbm.first_bb_id; \
+ _bb_id < vm->bbm.next_bb_id && _vm->bbm.bb_count[_state]; \
+ _bb_id++) \
+ if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state)
-#define virtio_mem_for_each_mb_state_rev(_vm, _mb_id, _state) \
- for (_mb_id = _vm->next_mb_id - 1; \
- _mb_id >= _vm->first_mb_id && _vm->nb_mb_state[_state]; \
+#define virtio_mem_bbm_for_each_bb_rev(_vm, _bb_id, _state) \
+ for (_bb_id = vm->bbm.next_bb_id - 1; \
+ _bb_id >= vm->bbm.first_bb_id && _vm->bbm.bb_count[_state]; \
+ _bb_id--) \
+ if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state)
+
+/*
+ * Set the state of a memory block, taking care of the state counter.
+ */
+static void virtio_mem_sbm_set_mb_state(struct virtio_mem *vm,
+ unsigned long mb_id, uint8_t state)
+{
+ const unsigned long idx = mb_id - vm->sbm.first_mb_id;
+ uint8_t old_state;
+
+ old_state = vm->sbm.mb_states[idx];
+ vm->sbm.mb_states[idx] = state;
+
+ BUG_ON(vm->sbm.mb_count[old_state] == 0);
+ vm->sbm.mb_count[old_state]--;
+ vm->sbm.mb_count[state]++;
+}
+
+/*
+ * Get the state of a memory block.
+ */
+static uint8_t virtio_mem_sbm_get_mb_state(struct virtio_mem *vm,
+ unsigned long mb_id)
+{
+ const unsigned long idx = mb_id - vm->sbm.first_mb_id;
+
+ return vm->sbm.mb_states[idx];
+}
+
+/*
+ * Prepare the state array for the next memory block.
+ */
+static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm)
+{
+ int old_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id);
+ int new_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id + 1);
+ uint8_t *new_array;
+
+ if (vm->sbm.mb_states && old_pages == new_pages)
+ return 0;
+
+ new_array = vzalloc(new_pages * PAGE_SIZE);
+ if (!new_array)
+ return -ENOMEM;
+
+ mutex_lock(&vm->hotplug_mutex);
+ if (vm->sbm.mb_states)
+ memcpy(new_array, vm->sbm.mb_states, old_pages * PAGE_SIZE);
+ vfree(vm->sbm.mb_states);
+ vm->sbm.mb_states = new_array;
+ mutex_unlock(&vm->hotplug_mutex);
+
+ return 0;
+}
+
+#define virtio_mem_sbm_for_each_mb(_vm, _mb_id, _state) \
+ for (_mb_id = _vm->sbm.first_mb_id; \
+ _mb_id < _vm->sbm.next_mb_id && _vm->sbm.mb_count[_state]; \
+ _mb_id++) \
+ if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state)
+
+#define virtio_mem_sbm_for_each_mb_rev(_vm, _mb_id, _state) \
+ for (_mb_id = _vm->sbm.next_mb_id - 1; \
+ _mb_id >= _vm->sbm.first_mb_id && _vm->sbm.mb_count[_state]; \
_mb_id--) \
- if (virtio_mem_mb_get_state(_vm, _mb_id) == _state)
+ if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state)
+
+/*
+ * Calculate the bit number in the subblock bitmap for the given subblock
+ * inside the given memory block.
+ */
+static int virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem *vm,
+ unsigned long mb_id, int sb_id)
+{
+ return (mb_id - vm->sbm.first_mb_id) * vm->sbm.sbs_per_mb + sb_id;
+}
/*
* Mark all selected subblocks plugged.
*
* Will not modify the state of the memory block.
*/
-static void virtio_mem_mb_set_sb_plugged(struct virtio_mem *vm,
- unsigned long mb_id, int sb_id,
- int count)
+static void virtio_mem_sbm_set_sb_plugged(struct virtio_mem *vm,
+ unsigned long mb_id, int sb_id,
+ int count)
{
- const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id;
+ const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
- __bitmap_set(vm->sb_bitmap, bit, count);
+ __bitmap_set(vm->sbm.sb_states, bit, count);
}
/*
@@ -311,105 +498,114 @@
*
* Will not modify the state of the memory block.
*/
-static void virtio_mem_mb_set_sb_unplugged(struct virtio_mem *vm,
- unsigned long mb_id, int sb_id,
- int count)
+static void virtio_mem_sbm_set_sb_unplugged(struct virtio_mem *vm,
+ unsigned long mb_id, int sb_id,
+ int count)
{
- const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id;
+ const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
- __bitmap_clear(vm->sb_bitmap, bit, count);
+ __bitmap_clear(vm->sbm.sb_states, bit, count);
}
/*
* Test if all selected subblocks are plugged.
*/
-static bool virtio_mem_mb_test_sb_plugged(struct virtio_mem *vm,
- unsigned long mb_id, int sb_id,
- int count)
+static bool virtio_mem_sbm_test_sb_plugged(struct virtio_mem *vm,
+ unsigned long mb_id, int sb_id,
+ int count)
{
- const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id;
+ const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
if (count == 1)
- return test_bit(bit, vm->sb_bitmap);
+ return test_bit(bit, vm->sbm.sb_states);
/* TODO: Helper similar to bitmap_set() */
- return find_next_zero_bit(vm->sb_bitmap, bit + count, bit) >=
+ return find_next_zero_bit(vm->sbm.sb_states, bit + count, bit) >=
bit + count;
}
/*
* Test if all selected subblocks are unplugged.
*/
-static bool virtio_mem_mb_test_sb_unplugged(struct virtio_mem *vm,
- unsigned long mb_id, int sb_id,
- int count)
+static bool virtio_mem_sbm_test_sb_unplugged(struct virtio_mem *vm,
+ unsigned long mb_id, int sb_id,
+ int count)
{
- const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id;
+ const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
/* TODO: Helper similar to bitmap_set() */
- return find_next_bit(vm->sb_bitmap, bit + count, bit) >= bit + count;
+ return find_next_bit(vm->sbm.sb_states, bit + count, bit) >=
+ bit + count;
}
/*
- * Find the first unplugged subblock. Returns vm->nb_sb_per_mb in case there is
+ * Find the first unplugged subblock. Returns vm->sbm.sbs_per_mb in case there is
* none.
*/
-static int virtio_mem_mb_first_unplugged_sb(struct virtio_mem *vm,
+static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm,
unsigned long mb_id)
{
- const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb;
+ const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, 0);
- return find_next_zero_bit(vm->sb_bitmap, bit + vm->nb_sb_per_mb, bit) -
- bit;
+ return find_next_zero_bit(vm->sbm.sb_states,
+ bit + vm->sbm.sbs_per_mb, bit) - bit;
}
/*
* Prepare the subblock bitmap for the next memory block.
*/
-static int virtio_mem_sb_bitmap_prepare_next_mb(struct virtio_mem *vm)
+static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm)
{
- const unsigned long old_nb_mb = vm->next_mb_id - vm->first_mb_id;
- const unsigned long old_nb_bits = old_nb_mb * vm->nb_sb_per_mb;
- const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->nb_sb_per_mb;
+ const unsigned long old_nb_mb = vm->sbm.next_mb_id - vm->sbm.first_mb_id;
+ const unsigned long old_nb_bits = old_nb_mb * vm->sbm.sbs_per_mb;
+ const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->sbm.sbs_per_mb;
int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long));
int new_pages = PFN_UP(BITS_TO_LONGS(new_nb_bits) * sizeof(long));
- unsigned long *new_sb_bitmap, *old_sb_bitmap;
+ unsigned long *new_bitmap, *old_bitmap;
- if (vm->sb_bitmap && old_pages == new_pages)
+ if (vm->sbm.sb_states && old_pages == new_pages)
return 0;
- new_sb_bitmap = vzalloc(new_pages * PAGE_SIZE);
- if (!new_sb_bitmap)
+ new_bitmap = vzalloc(new_pages * PAGE_SIZE);
+ if (!new_bitmap)
return -ENOMEM;
mutex_lock(&vm->hotplug_mutex);
- if (new_sb_bitmap)
- memcpy(new_sb_bitmap, vm->sb_bitmap, old_pages * PAGE_SIZE);
+ if (new_bitmap)
+ memcpy(new_bitmap, vm->sbm.sb_states, old_pages * PAGE_SIZE);
- old_sb_bitmap = vm->sb_bitmap;
- vm->sb_bitmap = new_sb_bitmap;
+ old_bitmap = vm->sbm.sb_states;
+ vm->sbm.sb_states = new_bitmap;
mutex_unlock(&vm->hotplug_mutex);
- vfree(old_sb_bitmap);
+ vfree(old_bitmap);
return 0;
}
/*
- * Try to add a memory block to Linux. This will usually only fail
- * if out of memory.
+ * Test if we could add memory without creating too much offline memory -
+ * to avoid running OOM if memory is getting onlined deferred.
+ */
+static bool virtio_mem_could_add_memory(struct virtio_mem *vm, uint64_t size)
+{
+ if (WARN_ON_ONCE(size > vm->offline_threshold))
+ return false;
+
+ return atomic64_read(&vm->offline_size) + size <= vm->offline_threshold;
+}
+
+/*
+ * Try adding memory to Linux. Will usually only fail if out of memory.
*
* Must not be called with the vm->hotplug_mutex held (possible deadlock with
* onlining code).
*
- * Will not modify the state of the memory block.
+ * Will not modify the state of memory blocks in virtio-mem.
*/
-static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id)
+static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr,
+ uint64_t size)
{
- const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
- int nid = vm->nid;
-
- if (nid == NUMA_NO_NODE)
- nid = memory_add_physaddr_to_nid(addr);
+ int rc;
/*
* When force-unloading the driver and we still have memory added to
@@ -422,53 +618,155 @@
return -ENOMEM;
}
- dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id);
- return add_memory_driver_managed(nid, addr, memory_block_size_bytes(),
- vm->resource_name,
- MEMHP_MERGE_RESOURCE);
+ dev_dbg(&vm->vdev->dev, "adding memory: 0x%llx - 0x%llx\n", addr,
+ addr + size - 1);
+ /* Memory might get onlined immediately. */
+ atomic64_add(size, &vm->offline_size);
+ rc = add_memory_driver_managed(vm->nid, addr, size, vm->resource_name,
+ MEMHP_MERGE_RESOURCE);
+ if (rc) {
+ atomic64_sub(size, &vm->offline_size);
+ dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc);
+ /*
+ * TODO: Linux MM does not properly clean up yet in all cases
+ * where adding of memory failed - especially on -ENOMEM.
+ */
+ }
+ return rc;
}
/*
- * Try to remove a memory block from Linux. Will only fail if the memory block
- * is not offline.
+ * See virtio_mem_add_memory(): Try adding a single Linux memory block.
+ */
+static int virtio_mem_sbm_add_mb(struct virtio_mem *vm, unsigned long mb_id)
+{
+ const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
+ const uint64_t size = memory_block_size_bytes();
+
+ return virtio_mem_add_memory(vm, addr, size);
+}
+
+/*
+ * See virtio_mem_add_memory(): Try adding a big block.
+ */
+static int virtio_mem_bbm_add_bb(struct virtio_mem *vm, unsigned long bb_id)
+{
+ const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
+ const uint64_t size = vm->bbm.bb_size;
+
+ return virtio_mem_add_memory(vm, addr, size);
+}
+
+/*
+ * Try removing memory from Linux. Will only fail if memory blocks aren't
+ * offline.
*
* Must not be called with the vm->hotplug_mutex held (possible deadlock with
* onlining code).
*
- * Will not modify the state of the memory block.
+ * Will not modify the state of memory blocks in virtio-mem.
*/
-static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id)
+static int virtio_mem_remove_memory(struct virtio_mem *vm, uint64_t addr,
+ uint64_t size)
{
- const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
- int nid = vm->nid;
+ int rc;
- if (nid == NUMA_NO_NODE)
- nid = memory_add_physaddr_to_nid(addr);
-
- dev_dbg(&vm->vdev->dev, "removing memory block: %lu\n", mb_id);
- return remove_memory(nid, addr, memory_block_size_bytes());
+ dev_dbg(&vm->vdev->dev, "removing memory: 0x%llx - 0x%llx\n", addr,
+ addr + size - 1);
+ rc = remove_memory(vm->nid, addr, size);
+ if (!rc) {
+ atomic64_sub(size, &vm->offline_size);
+ /*
+ * We might have freed up memory we can now unplug, retry
+ * immediately instead of waiting.
+ */
+ virtio_mem_retry(vm);
+ } else {
+ dev_dbg(&vm->vdev->dev, "removing memory failed: %d\n", rc);
+ }
+ return rc;
}
/*
- * Try to offline and remove a memory block from Linux.
+ * See virtio_mem_remove_memory(): Try removing a single Linux memory block.
+ */
+static int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id)
+{
+ const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
+ const uint64_t size = memory_block_size_bytes();
+
+ return virtio_mem_remove_memory(vm, addr, size);
+}
+
+/*
+ * See virtio_mem_remove_memory(): Try to remove all Linux memory blocks covered
+ * by the big block.
+ */
+static int virtio_mem_bbm_remove_bb(struct virtio_mem *vm, unsigned long bb_id)
+{
+ const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
+ const uint64_t size = vm->bbm.bb_size;
+
+ return virtio_mem_remove_memory(vm, addr, size);
+}
+
+/*
+ * Try offlining and removing memory from Linux.
*
* Must not be called with the vm->hotplug_mutex held (possible deadlock with
* onlining code).
*
- * Will not modify the state of the memory block.
+ * Will not modify the state of memory blocks in virtio-mem.
*/
-static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm,
- unsigned long mb_id)
+static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm,
+ uint64_t addr,
+ uint64_t size)
+{
+ int rc;
+
+ dev_dbg(&vm->vdev->dev,
+ "offlining and removing memory: 0x%llx - 0x%llx\n", addr,
+ addr + size - 1);
+
+ rc = offline_and_remove_memory(vm->nid, addr, size);
+ if (!rc) {
+ atomic64_sub(size, &vm->offline_size);
+ /*
+ * We might have freed up memory we can now unplug, retry
+ * immediately instead of waiting.
+ */
+ virtio_mem_retry(vm);
+ } else {
+ dev_dbg(&vm->vdev->dev,
+ "offlining and removing memory failed: %d\n", rc);
+ }
+ return rc;
+}
+
+/*
+ * See virtio_mem_offline_and_remove_memory(): Try offlining and removing
+ * a single Linux memory block.
+ */
+static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm,
+ unsigned long mb_id)
{
const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
- int nid = vm->nid;
+ const uint64_t size = memory_block_size_bytes();
- if (nid == NUMA_NO_NODE)
- nid = memory_add_physaddr_to_nid(addr);
+ return virtio_mem_offline_and_remove_memory(vm, addr, size);
+}
- dev_dbg(&vm->vdev->dev, "offlining and removing memory block: %lu\n",
- mb_id);
- return offline_and_remove_memory(nid, addr, memory_block_size_bytes());
+/*
+ * See virtio_mem_offline_and_remove_memory(): Try to offline and remove a
+ * all Linux memory blocks covered by the big block.
+ */
+static int virtio_mem_bbm_offline_and_remove_bb(struct virtio_mem *vm,
+ unsigned long bb_id)
+{
+ const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
+ const uint64_t size = vm->bbm.bb_size;
+
+ return virtio_mem_offline_and_remove_memory(vm, addr, size);
}
/*
@@ -499,31 +797,28 @@
* Test if a virtio-mem device overlaps with the given range. Can be called
* from (notifier) callbacks lockless.
*/
-static bool virtio_mem_overlaps_range(struct virtio_mem *vm,
- unsigned long start, unsigned long size)
+static bool virtio_mem_overlaps_range(struct virtio_mem *vm, uint64_t start,
+ uint64_t size)
{
- unsigned long dev_start = virtio_mem_mb_id_to_phys(vm->first_mb_id);
- unsigned long dev_end = virtio_mem_mb_id_to_phys(vm->last_mb_id) +
- memory_block_size_bytes();
-
- return start < dev_end && dev_start < start + size;
+ return start < vm->addr + vm->region_size && vm->addr < start + size;
}
/*
- * Test if a virtio-mem device owns a memory block. Can be called from
+ * Test if a virtio-mem device contains a given range. Can be called from
* (notifier) callbacks lockless.
*/
-static bool virtio_mem_owned_mb(struct virtio_mem *vm, unsigned long mb_id)
+static bool virtio_mem_contains_range(struct virtio_mem *vm, uint64_t start,
+ uint64_t size)
{
- return mb_id >= vm->first_mb_id && mb_id <= vm->last_mb_id;
+ return start >= vm->addr && start + size <= vm->addr + vm->region_size;
}
-static int virtio_mem_notify_going_online(struct virtio_mem *vm,
- unsigned long mb_id)
+static int virtio_mem_sbm_notify_going_online(struct virtio_mem *vm,
+ unsigned long mb_id)
{
- switch (virtio_mem_mb_get_state(vm, mb_id)) {
- case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL:
- case VIRTIO_MEM_MB_STATE_OFFLINE:
+ switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
+ case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL:
+ case VIRTIO_MEM_SBM_MB_OFFLINE:
return NOTIFY_OK;
default:
break;
@@ -533,106 +828,98 @@
return NOTIFY_BAD;
}
-static void virtio_mem_notify_offline(struct virtio_mem *vm,
- unsigned long mb_id)
+static void virtio_mem_sbm_notify_offline(struct virtio_mem *vm,
+ unsigned long mb_id)
{
- switch (virtio_mem_mb_get_state(vm, mb_id)) {
- case VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL:
- virtio_mem_mb_set_state(vm, mb_id,
- VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL);
+ switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
+ case VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL:
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL);
break;
- case VIRTIO_MEM_MB_STATE_ONLINE:
- virtio_mem_mb_set_state(vm, mb_id,
- VIRTIO_MEM_MB_STATE_OFFLINE);
+ case VIRTIO_MEM_SBM_MB_ONLINE:
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_OFFLINE);
break;
default:
BUG();
break;
}
+}
+static void virtio_mem_sbm_notify_online(struct virtio_mem *vm,
+ unsigned long mb_id)
+{
+ switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
+ case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL:
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL);
+ break;
+ case VIRTIO_MEM_SBM_MB_OFFLINE:
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_ONLINE);
+ break;
+ default:
+ BUG();
+ break;
+ }
+}
+
+static void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm,
+ unsigned long mb_id)
+{
+ const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size);
+ unsigned long pfn;
+ int sb_id;
+
+ for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) {
+ if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1))
+ continue;
+ pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
+ sb_id * vm->sbm.sb_size);
+ virtio_mem_fake_offline_going_offline(pfn, nr_pages);
+ }
+}
+
+static void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm,
+ unsigned long mb_id)
+{
+ const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size);
+ unsigned long pfn;
+ int sb_id;
+
+ for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) {
+ if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1))
+ continue;
+ pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
+ sb_id * vm->sbm.sb_size);
+ virtio_mem_fake_offline_cancel_offline(pfn, nr_pages);
+ }
+}
+
+static void virtio_mem_bbm_notify_going_offline(struct virtio_mem *vm,
+ unsigned long bb_id,
+ unsigned long pfn,
+ unsigned long nr_pages)
+{
/*
- * Trigger the workqueue, maybe we can now unplug memory. Also,
- * when we offline and remove a memory block, this will re-trigger
- * us immediately - which is often nice because the removal of
- * the memory block (e.g., memmap) might have freed up memory
- * on other memory blocks we manage.
+ * When marked as "fake-offline", all online memory of this device block
+ * is allocated by us. Otherwise, we don't have any memory allocated.
*/
- virtio_mem_retry(vm);
+ if (virtio_mem_bbm_get_bb_state(vm, bb_id) !=
+ VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
+ return;
+ virtio_mem_fake_offline_going_offline(pfn, nr_pages);
}
-static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id)
+static void virtio_mem_bbm_notify_cancel_offline(struct virtio_mem *vm,
+ unsigned long bb_id,
+ unsigned long pfn,
+ unsigned long nr_pages)
{
- unsigned long nb_offline;
-
- switch (virtio_mem_mb_get_state(vm, mb_id)) {
- case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL:
- virtio_mem_mb_set_state(vm, mb_id,
- VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL);
- break;
- case VIRTIO_MEM_MB_STATE_OFFLINE:
- virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_ONLINE);
- break;
- default:
- BUG();
- break;
- }
- nb_offline = vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] +
- vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL];
-
- /* see if we can add new blocks now that we onlined one block */
- if (nb_offline == VIRTIO_MEM_NB_OFFLINE_THRESHOLD - 1)
- virtio_mem_retry(vm);
-}
-
-static void virtio_mem_notify_going_offline(struct virtio_mem *vm,
- unsigned long mb_id)
-{
- const unsigned long nr_pages = PFN_DOWN(vm->subblock_size);
- struct page *page;
- unsigned long pfn;
- int sb_id, i;
-
- for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) {
- if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
- continue;
- /*
- * Drop our reference to the pages so the memory can get
- * offlined and add the unplugged pages to the managed
- * page counters (so offlining code can correctly subtract
- * them again).
- */
- pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
- sb_id * vm->subblock_size);
- adjust_managed_page_count(pfn_to_page(pfn), nr_pages);
- for (i = 0; i < nr_pages; i++) {
- page = pfn_to_page(pfn + i);
- if (WARN_ON(!page_ref_dec_and_test(page)))
- dump_page(page, "unplugged page referenced");
- }
- }
-}
-
-static void virtio_mem_notify_cancel_offline(struct virtio_mem *vm,
- unsigned long mb_id)
-{
- const unsigned long nr_pages = PFN_DOWN(vm->subblock_size);
- unsigned long pfn;
- int sb_id, i;
-
- for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) {
- if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
- continue;
- /*
- * Get the reference we dropped when going offline and
- * subtract the unplugged pages from the managed page
- * counters.
- */
- pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
- sb_id * vm->subblock_size);
- adjust_managed_page_count(pfn_to_page(pfn), -nr_pages);
- for (i = 0; i < nr_pages; i++)
- page_ref_inc(pfn_to_page(pfn + i));
- }
+ if (virtio_mem_bbm_get_bb_state(vm, bb_id) !=
+ VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
+ return;
+ virtio_mem_fake_offline_cancel_offline(pfn, nr_pages);
}
/*
@@ -648,20 +935,33 @@
struct memory_notify *mhp = arg;
const unsigned long start = PFN_PHYS(mhp->start_pfn);
const unsigned long size = PFN_PHYS(mhp->nr_pages);
- const unsigned long mb_id = virtio_mem_phys_to_mb_id(start);
int rc = NOTIFY_OK;
+ unsigned long id;
if (!virtio_mem_overlaps_range(vm, start, size))
return NOTIFY_DONE;
- /*
- * Memory is onlined/offlined in memory block granularity. We cannot
- * cross virtio-mem device boundaries and memory block boundaries. Bail
- * out if this ever changes.
- */
- if (WARN_ON_ONCE(size != memory_block_size_bytes() ||
- !IS_ALIGNED(start, memory_block_size_bytes())))
- return NOTIFY_BAD;
+ if (vm->in_sbm) {
+ id = virtio_mem_phys_to_mb_id(start);
+ /*
+ * In SBM, we add memory in separate memory blocks - we expect
+ * it to be onlined/offlined in the same granularity. Bail out
+ * if this ever changes.
+ */
+ if (WARN_ON_ONCE(size != memory_block_size_bytes() ||
+ !IS_ALIGNED(start, memory_block_size_bytes())))
+ return NOTIFY_BAD;
+ } else {
+ id = virtio_mem_phys_to_bb_id(vm, start);
+ /*
+ * In BBM, we only care about onlining/offlining happening
+ * within a single big block, we don't care about the
+ * actual granularity as we don't track individual Linux
+ * memory blocks.
+ */
+ if (WARN_ON_ONCE(id != virtio_mem_phys_to_bb_id(vm, start + size - 1)))
+ return NOTIFY_BAD;
+ }
/*
* Avoid circular locking lockdep warnings. We lock the mutex
@@ -680,7 +980,12 @@
break;
}
vm->hotplug_active = true;
- virtio_mem_notify_going_offline(vm, mb_id);
+ if (vm->in_sbm)
+ virtio_mem_sbm_notify_going_offline(vm, id);
+ else
+ virtio_mem_bbm_notify_going_offline(vm, id,
+ mhp->start_pfn,
+ mhp->nr_pages);
break;
case MEM_GOING_ONLINE:
mutex_lock(&vm->hotplug_mutex);
@@ -690,22 +995,51 @@
break;
}
vm->hotplug_active = true;
- rc = virtio_mem_notify_going_online(vm, mb_id);
+ if (vm->in_sbm)
+ rc = virtio_mem_sbm_notify_going_online(vm, id);
break;
case MEM_OFFLINE:
- virtio_mem_notify_offline(vm, mb_id);
+ if (vm->in_sbm)
+ virtio_mem_sbm_notify_offline(vm, id);
+
+ atomic64_add(size, &vm->offline_size);
+ /*
+ * Trigger the workqueue. Now that we have some offline memory,
+ * maybe we can handle pending unplug requests.
+ */
+ if (!unplug_online)
+ virtio_mem_retry(vm);
+
vm->hotplug_active = false;
mutex_unlock(&vm->hotplug_mutex);
break;
case MEM_ONLINE:
- virtio_mem_notify_online(vm, mb_id);
+ if (vm->in_sbm)
+ virtio_mem_sbm_notify_online(vm, id);
+
+ atomic64_sub(size, &vm->offline_size);
+ /*
+ * Start adding more memory once we onlined half of our
+ * threshold. Don't trigger if it's possibly due to our actipn
+ * (e.g., us adding memory which gets onlined immediately from
+ * the core).
+ */
+ if (!atomic_read(&vm->wq_active) &&
+ virtio_mem_could_add_memory(vm, vm->offline_threshold / 2))
+ virtio_mem_retry(vm);
+
vm->hotplug_active = false;
mutex_unlock(&vm->hotplug_mutex);
break;
case MEM_CANCEL_OFFLINE:
if (!vm->hotplug_active)
break;
- virtio_mem_notify_cancel_offline(vm, mb_id);
+ if (vm->in_sbm)
+ virtio_mem_sbm_notify_cancel_offline(vm, id);
+ else
+ virtio_mem_bbm_notify_cancel_offline(vm, id,
+ mhp->start_pfn,
+ mhp->nr_pages);
vm->hotplug_active = false;
mutex_unlock(&vm->hotplug_mutex);
break;
@@ -729,7 +1063,7 @@
* (via generic_online_page()) using PageDirty().
*/
static void virtio_mem_set_fake_offline(unsigned long pfn,
- unsigned int nr_pages, bool onlined)
+ unsigned long nr_pages, bool onlined)
{
for (; nr_pages--; pfn++) {
struct page *page = pfn_to_page(pfn);
@@ -748,7 +1082,7 @@
* (via generic_online_page()), clear PageDirty().
*/
static void virtio_mem_clear_fake_offline(unsigned long pfn,
- unsigned int nr_pages, bool onlined)
+ unsigned long nr_pages, bool onlined)
{
for (; nr_pages--; pfn++) {
struct page *page = pfn_to_page(pfn);
@@ -763,16 +1097,17 @@
* Release a range of fake-offline pages to the buddy, effectively
* fake-onlining them.
*/
-static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages)
+static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages)
{
- const int order = MAX_ORDER - 1;
- int i;
+ const unsigned long max_nr_pages = MAX_ORDER_NR_PAGES;
+ unsigned long i;
/*
- * We are always called with subblock granularity, which is at least
- * aligned to MAX_ORDER - 1.
+ * We are always called at least with MAX_ORDER_NR_PAGES
+ * granularity/alignment (e.g., the way subblocks work). All pages
+ * inside such a block are alike.
*/
- for (i = 0; i < nr_pages; i += 1 << order) {
+ for (i = 0; i < nr_pages; i += max_nr_pages) {
struct page *page = pfn_to_page(pfn + i);
/*
@@ -782,42 +1117,128 @@
* alike.
*/
if (PageDirty(page)) {
- virtio_mem_clear_fake_offline(pfn + i, 1 << order,
+ virtio_mem_clear_fake_offline(pfn + i, max_nr_pages,
false);
- generic_online_page(page, order);
+ generic_online_page(page, MAX_ORDER - 1);
} else {
- virtio_mem_clear_fake_offline(pfn + i, 1 << order,
+ virtio_mem_clear_fake_offline(pfn + i, max_nr_pages,
true);
- free_contig_range(pfn + i, 1 << order);
- adjust_managed_page_count(page, 1 << order);
+ free_contig_range(pfn + i, max_nr_pages);
+ adjust_managed_page_count(page, max_nr_pages);
}
}
}
+/*
+ * Try to allocate a range, marking pages fake-offline, effectively
+ * fake-offlining them.
+ */
+static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages)
+{
+ const bool is_movable = zone_idx(page_zone(pfn_to_page(pfn))) ==
+ ZONE_MOVABLE;
+ int rc, retry_count;
+
+ /*
+ * TODO: We want an alloc_contig_range() mode that tries to allocate
+ * harder (e.g., dealing with temporarily pinned pages, PCP), especially
+ * with ZONE_MOVABLE. So for now, retry a couple of times with
+ * ZONE_MOVABLE before giving up - because that zone is supposed to give
+ * some guarantees.
+ */
+ for (retry_count = 0; retry_count < 5; retry_count++) {
+ rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE,
+ GFP_KERNEL);
+ if (rc == -ENOMEM)
+ /* whoops, out of memory */
+ return rc;
+ else if (rc && !is_movable)
+ break;
+ else if (rc)
+ continue;
+
+ virtio_mem_set_fake_offline(pfn, nr_pages, true);
+ adjust_managed_page_count(pfn_to_page(pfn), -nr_pages);
+ return 0;
+ }
+
+ return -EBUSY;
+}
+
+/*
+ * Handle fake-offline pages when memory is going offline - such that the
+ * pages can be skipped by mm-core when offlining.
+ */
+static void virtio_mem_fake_offline_going_offline(unsigned long pfn,
+ unsigned long nr_pages)
+{
+ struct page *page;
+ unsigned long i;
+
+ /*
+ * Drop our reference to the pages so the memory can get offlined
+ * and add the unplugged pages to the managed page counters (so
+ * offlining code can correctly subtract them again).
+ */
+ adjust_managed_page_count(pfn_to_page(pfn), nr_pages);
+ /* Drop our reference to the pages so the memory can get offlined. */
+ for (i = 0; i < nr_pages; i++) {
+ page = pfn_to_page(pfn + i);
+ if (WARN_ON(!page_ref_dec_and_test(page)))
+ dump_page(page, "fake-offline page referenced");
+ }
+}
+
+/*
+ * Handle fake-offline pages when memory offlining is canceled - to undo
+ * what we did in virtio_mem_fake_offline_going_offline().
+ */
+static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn,
+ unsigned long nr_pages)
+{
+ unsigned long i;
+
+ /*
+ * Get the reference we dropped when going offline and subtract the
+ * unplugged pages from the managed page counters.
+ */
+ adjust_managed_page_count(pfn_to_page(pfn), -nr_pages);
+ for (i = 0; i < nr_pages; i++)
+ page_ref_inc(pfn_to_page(pfn + i));
+}
+
static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
{
const unsigned long addr = page_to_phys(page);
- const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr);
+ unsigned long id, sb_id;
struct virtio_mem *vm;
- int sb_id;
+ bool do_online;
- /*
- * We exploit here that subblocks have at least MAX_ORDER - 1
- * size/alignment and that this callback is is called with such a
- * size/alignment. So we cannot cross subblocks and therefore
- * also not memory blocks.
- */
rcu_read_lock();
list_for_each_entry_rcu(vm, &virtio_mem_devices, next) {
- if (!virtio_mem_owned_mb(vm, mb_id))
+ if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order)))
continue;
- sb_id = virtio_mem_phys_to_sb_id(vm, addr);
- /*
- * If plugged, online the pages, otherwise, set them fake
- * offline (PageOffline).
- */
- if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
+ if (vm->in_sbm) {
+ /*
+ * We exploit here that subblocks have at least
+ * MAX_ORDER_NR_PAGES size/alignment - so we cannot
+ * cross subblocks within one call.
+ */
+ id = virtio_mem_phys_to_mb_id(addr);
+ sb_id = virtio_mem_phys_to_sb_id(vm, addr);
+ do_online = virtio_mem_sbm_test_sb_plugged(vm, id,
+ sb_id, 1);
+ } else {
+ /*
+ * If the whole block is marked fake offline, keep
+ * everything that way.
+ */
+ id = virtio_mem_phys_to_bb_id(vm, addr);
+ do_online = virtio_mem_bbm_get_bb_state(vm, id) !=
+ VIRTIO_MEM_BBM_BB_FAKE_OFFLINE;
+ }
+ if (do_online)
generic_online_page(page, order);
else
virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order,
@@ -870,23 +1291,33 @@
.u.plug.addr = cpu_to_virtio64(vm->vdev, addr),
.u.plug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks),
};
+ int rc = -ENOMEM;
if (atomic_read(&vm->config_changed))
return -EAGAIN;
+ dev_dbg(&vm->vdev->dev, "plugging memory: 0x%llx - 0x%llx\n", addr,
+ addr + size - 1);
+
switch (virtio_mem_send_request(vm, &req)) {
case VIRTIO_MEM_RESP_ACK:
vm->plugged_size += size;
return 0;
case VIRTIO_MEM_RESP_NACK:
- return -EAGAIN;
+ rc = -EAGAIN;
+ break;
case VIRTIO_MEM_RESP_BUSY:
- return -ETXTBSY;
+ rc = -ETXTBSY;
+ break;
case VIRTIO_MEM_RESP_ERROR:
- return -EINVAL;
+ rc = -EINVAL;
+ break;
default:
- return -ENOMEM;
+ break;
}
+
+ dev_dbg(&vm->vdev->dev, "plugging memory failed: %d\n", rc);
+ return rc;
}
static int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr,
@@ -898,21 +1329,30 @@
.u.unplug.addr = cpu_to_virtio64(vm->vdev, addr),
.u.unplug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks),
};
+ int rc = -ENOMEM;
if (atomic_read(&vm->config_changed))
return -EAGAIN;
+ dev_dbg(&vm->vdev->dev, "unplugging memory: 0x%llx - 0x%llx\n", addr,
+ addr + size - 1);
+
switch (virtio_mem_send_request(vm, &req)) {
case VIRTIO_MEM_RESP_ACK:
vm->plugged_size -= size;
return 0;
case VIRTIO_MEM_RESP_BUSY:
- return -ETXTBSY;
+ rc = -ETXTBSY;
+ break;
case VIRTIO_MEM_RESP_ERROR:
- return -EINVAL;
+ rc = -EINVAL;
+ break;
default:
- return -ENOMEM;
+ break;
}
+
+ dev_dbg(&vm->vdev->dev, "unplugging memory failed: %d\n", rc);
+ return rc;
}
static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm)
@@ -920,6 +1360,9 @@
const struct virtio_mem_req req = {
.type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG_ALL),
};
+ int rc = -ENOMEM;
+
+ dev_dbg(&vm->vdev->dev, "unplugging all memory");
switch (virtio_mem_send_request(vm, &req)) {
case VIRTIO_MEM_RESP_ACK:
@@ -929,30 +1372,31 @@
atomic_set(&vm->config_changed, 1);
return 0;
case VIRTIO_MEM_RESP_BUSY:
- return -ETXTBSY;
+ rc = -ETXTBSY;
+ break;
default:
- return -ENOMEM;
+ break;
}
+
+ dev_dbg(&vm->vdev->dev, "unplugging all memory failed: %d\n", rc);
+ return rc;
}
/*
* Plug selected subblocks. Updates the plugged state, but not the state
* of the memory block.
*/
-static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id,
- int sb_id, int count)
+static int virtio_mem_sbm_plug_sb(struct virtio_mem *vm, unsigned long mb_id,
+ int sb_id, int count)
{
const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) +
- sb_id * vm->subblock_size;
- const uint64_t size = count * vm->subblock_size;
+ sb_id * vm->sbm.sb_size;
+ const uint64_t size = count * vm->sbm.sb_size;
int rc;
- dev_dbg(&vm->vdev->dev, "plugging memory block: %lu : %i - %i\n", mb_id,
- sb_id, sb_id + count - 1);
-
rc = virtio_mem_send_plug_request(vm, addr, size);
if (!rc)
- virtio_mem_mb_set_sb_plugged(vm, mb_id, sb_id, count);
+ virtio_mem_sbm_set_sb_plugged(vm, mb_id, sb_id, count);
return rc;
}
@@ -960,24 +1404,47 @@
* Unplug selected subblocks. Updates the plugged state, but not the state
* of the memory block.
*/
-static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id,
- int sb_id, int count)
+static int virtio_mem_sbm_unplug_sb(struct virtio_mem *vm, unsigned long mb_id,
+ int sb_id, int count)
{
const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) +
- sb_id * vm->subblock_size;
- const uint64_t size = count * vm->subblock_size;
+ sb_id * vm->sbm.sb_size;
+ const uint64_t size = count * vm->sbm.sb_size;
int rc;
- dev_dbg(&vm->vdev->dev, "unplugging memory block: %lu : %i - %i\n",
- mb_id, sb_id, sb_id + count - 1);
-
rc = virtio_mem_send_unplug_request(vm, addr, size);
if (!rc)
- virtio_mem_mb_set_sb_unplugged(vm, mb_id, sb_id, count);
+ virtio_mem_sbm_set_sb_unplugged(vm, mb_id, sb_id, count);
return rc;
}
/*
+ * Request to unplug a big block.
+ *
+ * Will not modify the state of the big block.
+ */
+static int virtio_mem_bbm_unplug_bb(struct virtio_mem *vm, unsigned long bb_id)
+{
+ const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
+ const uint64_t size = vm->bbm.bb_size;
+
+ return virtio_mem_send_unplug_request(vm, addr, size);
+}
+
+/*
+ * Request to plug a big block.
+ *
+ * Will not modify the state of the big block.
+ */
+static int virtio_mem_bbm_plug_bb(struct virtio_mem *vm, unsigned long bb_id)
+{
+ const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
+ const uint64_t size = vm->bbm.bb_size;
+
+ return virtio_mem_send_plug_request(vm, addr, size);
+}
+
+/*
* Unplug the desired number of plugged subblocks of a offline or not-added
* memory block. Will fail if any subblock cannot get unplugged (instead of
* skipping it).
@@ -986,29 +1453,29 @@
*
* Note: can fail after some subblocks were unplugged.
*/
-static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm,
- unsigned long mb_id, uint64_t *nb_sb)
+static int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm,
+ unsigned long mb_id, uint64_t *nb_sb)
{
int sb_id, count;
int rc;
- sb_id = vm->nb_sb_per_mb - 1;
+ sb_id = vm->sbm.sbs_per_mb - 1;
while (*nb_sb) {
/* Find the next candidate subblock */
while (sb_id >= 0 &&
- virtio_mem_mb_test_sb_unplugged(vm, mb_id, sb_id, 1))
+ virtio_mem_sbm_test_sb_unplugged(vm, mb_id, sb_id, 1))
sb_id--;
if (sb_id < 0)
break;
/* Try to unplug multiple subblocks at a time */
count = 1;
while (count < *nb_sb && sb_id > 0 &&
- virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) {
+ virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) {
count++;
sb_id--;
}
- rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, count);
+ rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count);
if (rc)
return rc;
*nb_sb -= count;
@@ -1025,63 +1492,50 @@
*
* Note: can fail after some subblocks were unplugged.
*/
-static int virtio_mem_mb_unplug(struct virtio_mem *vm, unsigned long mb_id)
+static int virtio_mem_sbm_unplug_mb(struct virtio_mem *vm, unsigned long mb_id)
{
- uint64_t nb_sb = vm->nb_sb_per_mb;
+ uint64_t nb_sb = vm->sbm.sbs_per_mb;
- return virtio_mem_mb_unplug_any_sb(vm, mb_id, &nb_sb);
+ return virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb);
}
/*
* Prepare tracking data for the next memory block.
*/
-static int virtio_mem_prepare_next_mb(struct virtio_mem *vm,
- unsigned long *mb_id)
+static int virtio_mem_sbm_prepare_next_mb(struct virtio_mem *vm,
+ unsigned long *mb_id)
{
int rc;
- if (vm->next_mb_id > vm->last_usable_mb_id)
+ if (vm->sbm.next_mb_id > vm->sbm.last_usable_mb_id)
return -ENOSPC;
/* Resize the state array if required. */
- rc = virtio_mem_mb_state_prepare_next_mb(vm);
+ rc = virtio_mem_sbm_mb_states_prepare_next_mb(vm);
if (rc)
return rc;
/* Resize the subblock bitmap if required. */
- rc = virtio_mem_sb_bitmap_prepare_next_mb(vm);
+ rc = virtio_mem_sbm_sb_states_prepare_next_mb(vm);
if (rc)
return rc;
- vm->nb_mb_state[VIRTIO_MEM_MB_STATE_UNUSED]++;
- *mb_id = vm->next_mb_id++;
+ vm->sbm.mb_count[VIRTIO_MEM_SBM_MB_UNUSED]++;
+ *mb_id = vm->sbm.next_mb_id++;
return 0;
}
/*
- * Don't add too many blocks that are not onlined yet to avoid running OOM.
- */
-static bool virtio_mem_too_many_mb_offline(struct virtio_mem *vm)
-{
- unsigned long nb_offline;
-
- nb_offline = vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] +
- vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL];
- return nb_offline >= VIRTIO_MEM_NB_OFFLINE_THRESHOLD;
-}
-
-/*
* Try to plug the desired number of subblocks and add the memory block
* to Linux.
*
* Will modify the state of the memory block.
*/
-static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm,
- unsigned long mb_id,
- uint64_t *nb_sb)
+static int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm,
+ unsigned long mb_id, uint64_t *nb_sb)
{
- const int count = min_t(int, *nb_sb, vm->nb_sb_per_mb);
- int rc, rc2;
+ const int count = min_t(int, *nb_sb, vm->sbm.sbs_per_mb);
+ int rc;
if (WARN_ON_ONCE(!count))
return -EINVAL;
@@ -1090,7 +1544,7 @@
* Plug the requested number of subblocks before adding it to linux,
* so that onlining will directly online all plugged subblocks.
*/
- rc = virtio_mem_mb_plug_sb(vm, mb_id, 0, count);
+ rc = virtio_mem_sbm_plug_sb(vm, mb_id, 0, count);
if (rc)
return rc;
@@ -1098,29 +1552,21 @@
* Mark the block properly offline before adding it to Linux,
* so the memory notifiers will find the block in the right state.
*/
- if (count == vm->nb_sb_per_mb)
- virtio_mem_mb_set_state(vm, mb_id,
- VIRTIO_MEM_MB_STATE_OFFLINE);
+ if (count == vm->sbm.sbs_per_mb)
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_OFFLINE);
else
- virtio_mem_mb_set_state(vm, mb_id,
- VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL);
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL);
/* Add the memory block to linux - if that fails, try to unplug. */
- rc = virtio_mem_mb_add(vm, mb_id);
+ rc = virtio_mem_sbm_add_mb(vm, mb_id);
if (rc) {
- enum virtio_mem_mb_state new_state = VIRTIO_MEM_MB_STATE_UNUSED;
+ int new_state = VIRTIO_MEM_SBM_MB_UNUSED;
- dev_err(&vm->vdev->dev,
- "adding memory block %lu failed with %d\n", mb_id, rc);
- rc2 = virtio_mem_mb_unplug_sb(vm, mb_id, 0, count);
-
- /*
- * TODO: Linux MM does not properly clean up yet in all cases
- * where adding of memory failed - especially on -ENOMEM.
- */
- if (rc2)
- new_state = VIRTIO_MEM_MB_STATE_PLUGGED;
- virtio_mem_mb_set_state(vm, mb_id, new_state);
+ if (virtio_mem_sbm_unplug_sb(vm, mb_id, 0, count))
+ new_state = VIRTIO_MEM_SBM_MB_PLUGGED;
+ virtio_mem_sbm_set_mb_state(vm, mb_id, new_state);
return rc;
}
@@ -1136,8 +1582,9 @@
*
* Note: Can fail after some subblocks were successfully plugged.
*/
-static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id,
- uint64_t *nb_sb, bool online)
+static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm,
+ unsigned long mb_id, uint64_t *nb_sb,
+ bool online)
{
unsigned long pfn, nr_pages;
int sb_id, count;
@@ -1147,17 +1594,16 @@
return -EINVAL;
while (*nb_sb) {
- sb_id = virtio_mem_mb_first_unplugged_sb(vm, mb_id);
- if (sb_id >= vm->nb_sb_per_mb)
+ sb_id = virtio_mem_sbm_first_unplugged_sb(vm, mb_id);
+ if (sb_id >= vm->sbm.sbs_per_mb)
break;
count = 1;
while (count < *nb_sb &&
- sb_id + count < vm->nb_sb_per_mb &&
- !virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id + count,
- 1))
+ sb_id + count < vm->sbm.sbs_per_mb &&
+ !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1))
count++;
- rc = virtio_mem_mb_plug_sb(vm, mb_id, sb_id, count);
+ rc = virtio_mem_sbm_plug_sb(vm, mb_id, sb_id, count);
if (rc)
return rc;
*nb_sb -= count;
@@ -1166,29 +1612,26 @@
/* fake-online the pages if the memory block is online */
pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
- sb_id * vm->subblock_size);
- nr_pages = PFN_DOWN(count * vm->subblock_size);
+ sb_id * vm->sbm.sb_size);
+ nr_pages = PFN_DOWN(count * vm->sbm.sb_size);
virtio_mem_fake_online(pfn, nr_pages);
}
- if (virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) {
+ if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
if (online)
- virtio_mem_mb_set_state(vm, mb_id,
- VIRTIO_MEM_MB_STATE_ONLINE);
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_ONLINE);
else
- virtio_mem_mb_set_state(vm, mb_id,
- VIRTIO_MEM_MB_STATE_OFFLINE);
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_OFFLINE);
}
return 0;
}
-/*
- * Try to plug the requested amount of memory.
- */
-static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff)
+static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff)
{
- uint64_t nb_sb = diff / vm->subblock_size;
+ uint64_t nb_sb = diff / vm->sbm.sb_size;
unsigned long mb_id;
int rc;
@@ -1199,18 +1642,18 @@
mutex_lock(&vm->hotplug_mutex);
/* Try to plug subblocks of partially plugged online blocks. */
- virtio_mem_for_each_mb_state(vm, mb_id,
- VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) {
- rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, true);
+ virtio_mem_sbm_for_each_mb(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) {
+ rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, true);
if (rc || !nb_sb)
goto out_unlock;
cond_resched();
}
/* Try to plug subblocks of partially plugged offline blocks. */
- virtio_mem_for_each_mb_state(vm, mb_id,
- VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) {
- rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, false);
+ virtio_mem_sbm_for_each_mb(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) {
+ rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, false);
if (rc || !nb_sb)
goto out_unlock;
cond_resched();
@@ -1223,11 +1666,11 @@
mutex_unlock(&vm->hotplug_mutex);
/* Try to plug and add unused blocks */
- virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED) {
- if (virtio_mem_too_many_mb_offline(vm))
+ virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_UNUSED) {
+ if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes()))
return -ENOSPC;
- rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb);
+ rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb);
if (rc || !nb_sb)
return rc;
cond_resched();
@@ -1235,13 +1678,13 @@
/* Try to prepare, plug and add new blocks */
while (nb_sb) {
- if (virtio_mem_too_many_mb_offline(vm))
+ if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes()))
return -ENOSPC;
- rc = virtio_mem_prepare_next_mb(vm, &mb_id);
+ rc = virtio_mem_sbm_prepare_next_mb(vm, &mb_id);
if (rc)
return rc;
- rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb);
+ rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb);
if (rc)
return rc;
cond_resched();
@@ -1254,6 +1697,112 @@
}
/*
+ * Plug a big block and add it to Linux.
+ *
+ * Will modify the state of the big block.
+ */
+static int virtio_mem_bbm_plug_and_add_bb(struct virtio_mem *vm,
+ unsigned long bb_id)
+{
+ int rc;
+
+ if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
+ VIRTIO_MEM_BBM_BB_UNUSED))
+ return -EINVAL;
+
+ rc = virtio_mem_bbm_plug_bb(vm, bb_id);
+ if (rc)
+ return rc;
+ virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED);
+
+ rc = virtio_mem_bbm_add_bb(vm, bb_id);
+ if (rc) {
+ if (!virtio_mem_bbm_unplug_bb(vm, bb_id))
+ virtio_mem_bbm_set_bb_state(vm, bb_id,
+ VIRTIO_MEM_BBM_BB_UNUSED);
+ else
+ /* Retry from the main loop. */
+ virtio_mem_bbm_set_bb_state(vm, bb_id,
+ VIRTIO_MEM_BBM_BB_PLUGGED);
+ return rc;
+ }
+ return 0;
+}
+
+/*
+ * Prepare tracking data for the next big block.
+ */
+static int virtio_mem_bbm_prepare_next_bb(struct virtio_mem *vm,
+ unsigned long *bb_id)
+{
+ int rc;
+
+ if (vm->bbm.next_bb_id > vm->bbm.last_usable_bb_id)
+ return -ENOSPC;
+
+ /* Resize the big block state array if required. */
+ rc = virtio_mem_bbm_bb_states_prepare_next_bb(vm);
+ if (rc)
+ return rc;
+
+ vm->bbm.bb_count[VIRTIO_MEM_BBM_BB_UNUSED]++;
+ *bb_id = vm->bbm.next_bb_id;
+ vm->bbm.next_bb_id++;
+ return 0;
+}
+
+static int virtio_mem_bbm_plug_request(struct virtio_mem *vm, uint64_t diff)
+{
+ uint64_t nb_bb = diff / vm->bbm.bb_size;
+ unsigned long bb_id;
+ int rc;
+
+ if (!nb_bb)
+ return 0;
+
+ /* Try to plug and add unused big blocks */
+ virtio_mem_bbm_for_each_bb(vm, bb_id, VIRTIO_MEM_BBM_BB_UNUSED) {
+ if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size))
+ return -ENOSPC;
+
+ rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id);
+ if (!rc)
+ nb_bb--;
+ if (rc || !nb_bb)
+ return rc;
+ cond_resched();
+ }
+
+ /* Try to prepare, plug and add new big blocks */
+ while (nb_bb) {
+ if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size))
+ return -ENOSPC;
+
+ rc = virtio_mem_bbm_prepare_next_bb(vm, &bb_id);
+ if (rc)
+ return rc;
+ rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id);
+ if (!rc)
+ nb_bb--;
+ if (rc)
+ return rc;
+ cond_resched();
+ }
+
+ return 0;
+}
+
+/*
+ * Try to plug the requested amount of memory.
+ */
+static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff)
+{
+ if (vm->in_sbm)
+ return virtio_mem_sbm_plug_request(vm, diff);
+ return virtio_mem_bbm_plug_request(vm, diff);
+}
+
+/*
* Unplug the desired number of plugged subblocks of an offline memory block.
* Will fail if any subblock cannot get unplugged (instead of skipping it).
*
@@ -1262,33 +1811,33 @@
*
* Note: Can fail after some subblocks were successfully unplugged.
*/
-static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm,
- unsigned long mb_id,
- uint64_t *nb_sb)
+static int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm,
+ unsigned long mb_id,
+ uint64_t *nb_sb)
{
int rc;
- rc = virtio_mem_mb_unplug_any_sb(vm, mb_id, nb_sb);
+ rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, nb_sb);
/* some subblocks might have been unplugged even on failure */
- if (!virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb))
- virtio_mem_mb_set_state(vm, mb_id,
- VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL);
+ if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL);
if (rc)
return rc;
- if (virtio_mem_mb_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) {
+ if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
/*
* Remove the block from Linux - this should never fail.
* Hinder the block from getting onlined by marking it
* unplugged. Temporarily drop the mutex, so
* any pending GOING_ONLINE requests can be serviced/rejected.
*/
- virtio_mem_mb_set_state(vm, mb_id,
- VIRTIO_MEM_MB_STATE_UNUSED);
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_UNUSED);
mutex_unlock(&vm->hotplug_mutex);
- rc = virtio_mem_mb_remove(vm, mb_id);
+ rc = virtio_mem_sbm_remove_mb(vm, mb_id);
BUG_ON(rc);
mutex_lock(&vm->hotplug_mutex);
}
@@ -1300,38 +1849,31 @@
*
* Will modify the state of the memory block.
*/
-static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm,
- unsigned long mb_id, int sb_id,
- int count)
+static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm,
+ unsigned long mb_id, int sb_id,
+ int count)
{
- const unsigned long nr_pages = PFN_DOWN(vm->subblock_size) * count;
+ const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count;
unsigned long start_pfn;
int rc;
start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
- sb_id * vm->subblock_size);
- rc = alloc_contig_range(start_pfn, start_pfn + nr_pages,
- MIGRATE_MOVABLE, GFP_KERNEL);
- if (rc == -ENOMEM)
- /* whoops, out of memory */
- return rc;
- if (rc)
- return -EBUSY;
+ sb_id * vm->sbm.sb_size);
- /* Mark it as fake-offline before unplugging it */
- virtio_mem_set_fake_offline(start_pfn, nr_pages, true);
- adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
+ rc = virtio_mem_fake_offline(start_pfn, nr_pages);
+ if (rc)
+ return rc;
/* Try to unplug the allocated memory */
- rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, count);
+ rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count);
if (rc) {
/* Return the memory to the buddy. */
virtio_mem_fake_online(start_pfn, nr_pages);
return rc;
}
- virtio_mem_mb_set_state(vm, mb_id,
- VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL);
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL);
return 0;
}
@@ -1345,34 +1887,34 @@
* Note: Can fail after some subblocks were successfully unplugged. Can
* return 0 even if subblocks were busy and could not get unplugged.
*/
-static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm,
- unsigned long mb_id,
- uint64_t *nb_sb)
+static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm,
+ unsigned long mb_id,
+ uint64_t *nb_sb)
{
int rc, sb_id;
/* If possible, try to unplug the complete block in one shot. */
- if (*nb_sb >= vm->nb_sb_per_mb &&
- virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) {
- rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, 0,
- vm->nb_sb_per_mb);
+ if (*nb_sb >= vm->sbm.sbs_per_mb &&
+ virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
+ rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, 0,
+ vm->sbm.sbs_per_mb);
if (!rc) {
- *nb_sb -= vm->nb_sb_per_mb;
+ *nb_sb -= vm->sbm.sbs_per_mb;
goto unplugged;
} else if (rc != -EBUSY)
return rc;
}
/* Fallback to single subblocks. */
- for (sb_id = vm->nb_sb_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) {
+ for (sb_id = vm->sbm.sbs_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) {
/* Find the next candidate subblock */
while (sb_id >= 0 &&
- !virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
+ !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1))
sb_id--;
if (sb_id < 0)
break;
- rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, sb_id, 1);
+ rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, sb_id, 1);
if (rc == -EBUSY)
continue;
else if (rc)
@@ -1386,24 +1928,21 @@
* remove it. This will usually not fail, as no memory is in use
* anymore - however some other notifiers might NACK the request.
*/
- if (virtio_mem_mb_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) {
+ if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
mutex_unlock(&vm->hotplug_mutex);
- rc = virtio_mem_mb_offline_and_remove(vm, mb_id);
+ rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id);
mutex_lock(&vm->hotplug_mutex);
if (!rc)
- virtio_mem_mb_set_state(vm, mb_id,
- VIRTIO_MEM_MB_STATE_UNUSED);
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_UNUSED);
}
return 0;
}
-/*
- * Try to unplug the requested amount of memory.
- */
-static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
+static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
{
- uint64_t nb_sb = diff / vm->subblock_size;
+ uint64_t nb_sb = diff / vm->sbm.sb_size;
unsigned long mb_id;
int rc;
@@ -1418,20 +1957,17 @@
mutex_lock(&vm->hotplug_mutex);
/* Try to unplug subblocks of partially plugged offline blocks. */
- virtio_mem_for_each_mb_state_rev(vm, mb_id,
- VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) {
- rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id,
- &nb_sb);
+ virtio_mem_sbm_for_each_mb_rev(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) {
+ rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb);
if (rc || !nb_sb)
goto out_unlock;
cond_resched();
}
/* Try to unplug subblocks of plugged offline blocks. */
- virtio_mem_for_each_mb_state_rev(vm, mb_id,
- VIRTIO_MEM_MB_STATE_OFFLINE) {
- rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id,
- &nb_sb);
+ virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE) {
+ rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb);
if (rc || !nb_sb)
goto out_unlock;
cond_resched();
@@ -1443,10 +1979,9 @@
}
/* Try to unplug subblocks of partially plugged online blocks. */
- virtio_mem_for_each_mb_state_rev(vm, mb_id,
- VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) {
- rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id,
- &nb_sb);
+ virtio_mem_sbm_for_each_mb_rev(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) {
+ rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb);
if (rc || !nb_sb)
goto out_unlock;
mutex_unlock(&vm->hotplug_mutex);
@@ -1455,10 +1990,8 @@
}
/* Try to unplug subblocks of plugged online blocks. */
- virtio_mem_for_each_mb_state_rev(vm, mb_id,
- VIRTIO_MEM_MB_STATE_ONLINE) {
- rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id,
- &nb_sb);
+ virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE) {
+ rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb);
if (rc || !nb_sb)
goto out_unlock;
mutex_unlock(&vm->hotplug_mutex);
@@ -1474,19 +2007,211 @@
}
/*
+ * Try to offline and remove a big block from Linux and unplug it. Will fail
+ * with -EBUSY if some memory is busy and cannot get unplugged.
+ *
+ * Will modify the state of the memory block. Might temporarily drop the
+ * hotplug_mutex.
+ */
+static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
+ unsigned long bb_id)
+{
+ const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
+ const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
+ unsigned long end_pfn = start_pfn + nr_pages;
+ unsigned long pfn;
+ struct page *page;
+ int rc;
+
+ if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
+ VIRTIO_MEM_BBM_BB_ADDED))
+ return -EINVAL;
+
+ if (bbm_safe_unplug) {
+ /*
+ * Start by fake-offlining all memory. Once we marked the device
+ * block as fake-offline, all newly onlined memory will
+ * automatically be kept fake-offline. Protect from concurrent
+ * onlining/offlining until we have a consistent state.
+ */
+ mutex_lock(&vm->hotplug_mutex);
+ virtio_mem_bbm_set_bb_state(vm, bb_id,
+ VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+ page = pfn_to_online_page(pfn);
+ if (!page)
+ continue;
+
+ rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION);
+ if (rc) {
+ end_pfn = pfn;
+ goto rollback_safe_unplug;
+ }
+ }
+ mutex_unlock(&vm->hotplug_mutex);
+ }
+
+ rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id);
+ if (rc) {
+ if (bbm_safe_unplug) {
+ mutex_lock(&vm->hotplug_mutex);
+ goto rollback_safe_unplug;
+ }
+ return rc;
+ }
+
+ rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
+ if (rc)
+ virtio_mem_bbm_set_bb_state(vm, bb_id,
+ VIRTIO_MEM_BBM_BB_PLUGGED);
+ else
+ virtio_mem_bbm_set_bb_state(vm, bb_id,
+ VIRTIO_MEM_BBM_BB_UNUSED);
+ return rc;
+
+rollback_safe_unplug:
+ for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+ page = pfn_to_online_page(pfn);
+ if (!page)
+ continue;
+ virtio_mem_fake_online(pfn, PAGES_PER_SECTION);
+ }
+ virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED);
+ mutex_unlock(&vm->hotplug_mutex);
+ return rc;
+}
+
+/*
+ * Try to remove a big block from Linux and unplug it. Will fail with
+ * -EBUSY if some memory is online.
+ *
+ * Will modify the state of the memory block.
+ */
+static int virtio_mem_bbm_remove_and_unplug_bb(struct virtio_mem *vm,
+ unsigned long bb_id)
+{
+ int rc;
+
+ if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
+ VIRTIO_MEM_BBM_BB_ADDED))
+ return -EINVAL;
+
+ rc = virtio_mem_bbm_remove_bb(vm, bb_id);
+ if (rc)
+ return -EBUSY;
+
+ rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
+ if (rc)
+ virtio_mem_bbm_set_bb_state(vm, bb_id,
+ VIRTIO_MEM_BBM_BB_PLUGGED);
+ else
+ virtio_mem_bbm_set_bb_state(vm, bb_id,
+ VIRTIO_MEM_BBM_BB_UNUSED);
+ return rc;
+}
+
+/*
+ * Test if a big block is completely offline.
+ */
+static bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm,
+ unsigned long bb_id)
+{
+ const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
+ const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
+ unsigned long pfn;
+
+ for (pfn = start_pfn; pfn < start_pfn + nr_pages;
+ pfn += PAGES_PER_SECTION) {
+ if (pfn_to_online_page(pfn))
+ return false;
+ }
+
+ return true;
+}
+
+static int virtio_mem_bbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
+{
+ uint64_t nb_bb = diff / vm->bbm.bb_size;
+ uint64_t bb_id;
+ int rc;
+
+ if (!nb_bb)
+ return 0;
+
+ /* Try to unplug completely offline big blocks first. */
+ virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) {
+ cond_resched();
+ /*
+ * As we're holding no locks, this check is racy as memory
+ * can get onlined in the meantime - but we'll fail gracefully.
+ */
+ if (!virtio_mem_bbm_bb_is_offline(vm, bb_id))
+ continue;
+ rc = virtio_mem_bbm_remove_and_unplug_bb(vm, bb_id);
+ if (rc == -EBUSY)
+ continue;
+ if (!rc)
+ nb_bb--;
+ if (rc || !nb_bb)
+ return rc;
+ }
+
+ if (!unplug_online)
+ return 0;
+
+ /* Try to unplug any big blocks. */
+ virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) {
+ cond_resched();
+ rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id);
+ if (rc == -EBUSY)
+ continue;
+ if (!rc)
+ nb_bb--;
+ if (rc || !nb_bb)
+ return rc;
+ }
+
+ return nb_bb ? -EBUSY : 0;
+}
+
+/*
+ * Try to unplug the requested amount of memory.
+ */
+static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
+{
+ if (vm->in_sbm)
+ return virtio_mem_sbm_unplug_request(vm, diff);
+ return virtio_mem_bbm_unplug_request(vm, diff);
+}
+
+/*
* Try to unplug all blocks that couldn't be unplugged before, for example,
* because the hypervisor was busy.
*/
static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm)
{
- unsigned long mb_id;
+ unsigned long id;
int rc;
- virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_PLUGGED) {
- rc = virtio_mem_mb_unplug(vm, mb_id);
+ if (!vm->in_sbm) {
+ virtio_mem_bbm_for_each_bb(vm, id,
+ VIRTIO_MEM_BBM_BB_PLUGGED) {
+ rc = virtio_mem_bbm_unplug_bb(vm, id);
+ if (rc)
+ return rc;
+ virtio_mem_bbm_set_bb_state(vm, id,
+ VIRTIO_MEM_BBM_BB_UNUSED);
+ }
+ return 0;
+ }
+
+ virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_PLUGGED) {
+ rc = virtio_mem_sbm_unplug_mb(vm, id);
if (rc)
return rc;
- virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED);
+ virtio_mem_sbm_set_mb_state(vm, id,
+ VIRTIO_MEM_SBM_MB_UNUSED);
}
return 0;
@@ -1511,7 +2236,13 @@
usable_region_size, &usable_region_size);
end_addr = vm->addr + usable_region_size;
end_addr = min(end_addr, phys_limit);
- vm->last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr) - 1;
+
+ if (vm->in_sbm)
+ vm->sbm.last_usable_mb_id =
+ virtio_mem_phys_to_mb_id(end_addr) - 1;
+ else
+ vm->bbm.last_usable_bb_id =
+ virtio_mem_phys_to_bb_id(vm, end_addr) - 1;
/* see if there is a request to change the size */
virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size,
@@ -1535,6 +2266,7 @@
if (vm->broken)
return;
+ atomic_set(&vm->wq_active, 1);
retry:
rc = 0;
@@ -1595,6 +2327,8 @@
"unknown error, marking device broken: %d\n", rc);
vm->broken = true;
}
+
+ atomic_set(&vm->wq_active, 0);
}
static enum hrtimer_restart virtio_mem_timer_expired(struct hrtimer *timer)
@@ -1631,6 +2365,7 @@
static int virtio_mem_init(struct virtio_mem *vm)
{
const uint64_t phys_limit = 1UL << MAX_PHYSMEM_BITS;
+ uint64_t sb_size, addr;
uint16_t node_id;
if (!vm->vdev->config->get) {
@@ -1659,15 +2394,9 @@
virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size,
&vm->region_size);
- /*
- * We always hotplug memory in memory block granularity. This way,
- * we have to wait for exactly one memory block to online.
- */
- if (vm->device_block_size > memory_block_size_bytes()) {
- dev_err(&vm->vdev->dev,
- "The block size is not supported (too big).\n");
- return -EINVAL;
- }
+ /* Determine the nid for the device based on the lowest address. */
+ if (vm->nid == NUMA_NO_NODE)
+ vm->nid = memory_add_physaddr_to_nid(vm->addr);
/* bad device setup - warn only */
if (!IS_ALIGNED(vm->addr, memory_block_size_bytes()))
@@ -1681,23 +2410,57 @@
"Some memory is not addressable. This can make some memory unusable.\n");
/*
- * Calculate the subblock size:
- * - At least MAX_ORDER - 1 / pageblock_order.
- * - At least the device block size.
- * In the worst case, a single subblock per memory block.
+ * We want subblocks to span at least MAX_ORDER_NR_PAGES and
+ * pageblock_nr_pages pages. This:
+ * - Simplifies our page onlining code (virtio_mem_online_page_cb)
+ * and fake page onlining code (virtio_mem_fake_online).
+ * - Is required for now for alloc_contig_range() to work reliably -
+ * it doesn't properly handle smaller granularity on ZONE_NORMAL.
*/
- vm->subblock_size = PAGE_SIZE * 1ul << max_t(uint32_t, MAX_ORDER - 1,
- pageblock_order);
- vm->subblock_size = max_t(uint64_t, vm->device_block_size,
- vm->subblock_size);
- vm->nb_sb_per_mb = memory_block_size_bytes() / vm->subblock_size;
+ sb_size = max_t(uint64_t, MAX_ORDER_NR_PAGES,
+ pageblock_nr_pages) * PAGE_SIZE;
+ sb_size = max_t(uint64_t, vm->device_block_size, sb_size);
- /* Round up to the next full memory block */
- vm->first_mb_id = virtio_mem_phys_to_mb_id(vm->addr - 1 +
- memory_block_size_bytes());
- vm->next_mb_id = vm->first_mb_id;
- vm->last_mb_id = virtio_mem_phys_to_mb_id(vm->addr +
- vm->region_size) - 1;
+ if (sb_size < memory_block_size_bytes() && !force_bbm) {
+ /* SBM: At least two subblocks per Linux memory block. */
+ vm->in_sbm = true;
+ vm->sbm.sb_size = sb_size;
+ vm->sbm.sbs_per_mb = memory_block_size_bytes() /
+ vm->sbm.sb_size;
+
+ /* Round up to the next full memory block */
+ addr = vm->addr + memory_block_size_bytes() - 1;
+ vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(addr);
+ vm->sbm.next_mb_id = vm->sbm.first_mb_id;
+ } else {
+ /* BBM: At least one Linux memory block. */
+ vm->bbm.bb_size = max_t(uint64_t, vm->device_block_size,
+ memory_block_size_bytes());
+
+ if (bbm_block_size) {
+ if (!is_power_of_2(bbm_block_size)) {
+ dev_warn(&vm->vdev->dev,
+ "bbm_block_size is not a power of 2");
+ } else if (bbm_block_size < vm->bbm.bb_size) {
+ dev_warn(&vm->vdev->dev,
+ "bbm_block_size is too small");
+ } else {
+ vm->bbm.bb_size = bbm_block_size;
+ }
+ }
+
+ /* Round up to the next aligned big block */
+ addr = vm->addr + vm->bbm.bb_size - 1;
+ vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr);
+ vm->bbm.next_bb_id = vm->bbm.first_bb_id;
+ }
+
+ /* Prepare the offline threshold - make sure we can add two blocks. */
+ vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(),
+ VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD);
+ /* In BBM, we also want at least two big blocks. */
+ vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size,
+ vm->offline_threshold);
dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr);
dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size);
@@ -1705,9 +2468,13 @@
(unsigned long long)vm->device_block_size);
dev_info(&vm->vdev->dev, "memory block size: 0x%lx",
memory_block_size_bytes());
- dev_info(&vm->vdev->dev, "subblock size: 0x%llx",
- (unsigned long long)vm->subblock_size);
- if (vm->nid != NUMA_NO_NODE)
+ if (vm->in_sbm)
+ dev_info(&vm->vdev->dev, "subblock size: 0x%llx",
+ (unsigned long long)vm->sbm.sb_size);
+ else
+ dev_info(&vm->vdev->dev, "big block size: 0x%llx",
+ (unsigned long long)vm->bbm.bb_size);
+ if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA))
dev_info(&vm->vdev->dev, "nid: %d", vm->nid);
return 0;
@@ -1753,6 +2520,20 @@
vm->parent_resource = NULL;
}
+static int virtio_mem_range_has_system_ram(struct resource *res, void *arg)
+{
+ return 1;
+}
+
+static bool virtio_mem_has_memory_added(struct virtio_mem *vm)
+{
+ const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+ return walk_iomem_res_desc(IORES_DESC_NONE, flags, vm->addr,
+ vm->addr + vm->region_size, NULL,
+ virtio_mem_range_has_system_ram) == 1;
+}
+
static int virtio_mem_probe(struct virtio_device *vdev)
{
struct virtio_mem *vm;
@@ -1849,21 +2630,24 @@
cancel_work_sync(&vm->wq);
hrtimer_cancel(&vm->retry_timer);
- /*
- * After we unregistered our callbacks, user space can online partially
- * plugged offline blocks. Make sure to remove them.
- */
- virtio_mem_for_each_mb_state(vm, mb_id,
- VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) {
- rc = virtio_mem_mb_remove(vm, mb_id);
- BUG_ON(rc);
- virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED);
+ if (vm->in_sbm) {
+ /*
+ * After we unregistered our callbacks, user space can online
+ * partially plugged offline blocks. Make sure to remove them.
+ */
+ virtio_mem_sbm_for_each_mb(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) {
+ rc = virtio_mem_sbm_remove_mb(vm, mb_id);
+ BUG_ON(rc);
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_UNUSED);
+ }
+ /*
+ * After we unregistered our callbacks, user space can no longer
+ * offline partially plugged online memory blocks. No need to
+ * worry about them.
+ */
}
- /*
- * After we unregistered our callbacks, user space can no longer
- * offline partially plugged online memory blocks. No need to worry
- * about them.
- */
/* unregister callbacks */
unregister_virtio_mem_device(vm);
@@ -1874,10 +2658,7 @@
* the system. And there is no way to stop the driver/device from going
* away. Warn at least.
*/
- if (vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] ||
- vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL] ||
- vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE] ||
- vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL]) {
+ if (virtio_mem_has_memory_added(vm)) {
dev_warn(&vdev->dev, "device still has system memory added\n");
} else {
virtio_mem_delete_resource(vm);
@@ -1885,8 +2666,12 @@
}
/* remove all tracking data - no locking needed */
- vfree(vm->mb_state);
- vfree(vm->sb_bitmap);
+ if (vm->in_sbm) {
+ vfree(vm->sbm.mb_states);
+ vfree(vm->sbm.sb_states);
+ } else {
+ vfree(vm->bbm.bb_states);
+ }
/* reset the device and cleanup the queues */
vdev->config->reset(vdev);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index becc776..71e16b5 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1608,7 +1608,6 @@
vq->num_added = 0;
vq->packed_ring = true;
vq->use_dma_api = vring_use_dma_api(vdev);
- list_add_tail(&vq->vq.list, &vdev->vqs);
#ifdef DEBUG
vq->in_use = false;
vq->last_add_time_valid = false;
@@ -1669,6 +1668,7 @@
cpu_to_le16(vq->packed.event_flags_shadow);
}
+ list_add_tail(&vq->vq.list, &vdev->vqs);
return &vq->vq;
err_desc_extra:
@@ -1676,9 +1676,9 @@
err_desc_state:
kfree(vq);
err_vq:
- vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr);
+ vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
err_device:
- vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr);
+ vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
err_driver:
vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
err_ring:
@@ -2085,7 +2085,6 @@
vq->last_used_idx = 0;
vq->num_added = 0;
vq->use_dma_api = vring_use_dma_api(vdev);
- list_add_tail(&vq->vq.list, &vdev->vqs);
#ifdef DEBUG
vq->in_use = false;
vq->last_add_time_valid = false;
@@ -2127,6 +2126,7 @@
memset(vq->split.desc_state, 0, vring.num *
sizeof(struct vring_desc_state_split));
+ list_add_tail(&vq->vq.list, &vdev->vqs);
return &vq->vq;
}
EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 6038c4c..a803033 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -95,7 +95,8 @@
struct list_head list;
struct list_head eoi_list;
short refcnt;
- short spurious_cnt;
+ u8 spurious_cnt;
+ u8 is_accounted;
enum xen_irq_type type; /* type */
unsigned irq;
evtchn_port_t evtchn; /* event channel */
@@ -161,6 +162,9 @@
/* IRQ <-> IPI mapping */
static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
+/* Event channel distribution data */
+static atomic_t channels_on_cpu[NR_CPUS];
+
static int **evtchn_to_irq;
#ifdef CONFIG_X86
static unsigned long *pirq_eoi_map;
@@ -257,6 +261,32 @@
irq_set_chip_data(irq, info);
}
+/* Per CPU channel accounting */
+static void channels_on_cpu_dec(struct irq_info *info)
+{
+ if (!info->is_accounted)
+ return;
+
+ info->is_accounted = 0;
+
+ if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
+ return;
+
+ WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
+}
+
+static void channels_on_cpu_inc(struct irq_info *info)
+{
+ if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
+ return;
+
+ if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
+ INT_MAX)))
+ return;
+
+ info->is_accounted = 1;
+}
+
/* Constructors for packed IRQ information. */
static int xen_irq_info_common_setup(struct irq_info *info,
unsigned irq,
@@ -339,6 +369,7 @@
{
set_evtchn_to_irq(info->evtchn, -1);
info->evtchn = 0;
+ channels_on_cpu_dec(info);
}
/*
@@ -433,18 +464,25 @@
return info->u.pirq.flags & PIRQ_NEEDS_EOI;
}
-static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu)
+static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+ bool force_affinity)
{
int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info = info_for_irq(irq);
BUG_ON(irq == -1);
-#ifdef CONFIG_SMP
- cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
-#endif
+
+ if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
+ cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
+ cpumask_copy(irq_get_effective_affinity_mask(irq),
+ cpumask_of(cpu));
+ }
+
xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
+ channels_on_cpu_dec(info);
info->cpu = cpu;
+ channels_on_cpu_inc(info);
}
/**
@@ -523,8 +561,10 @@
return;
if (spurious) {
- if ((1 << info->spurious_cnt) < (HZ << 2))
- info->spurious_cnt++;
+ if ((1 << info->spurious_cnt) < (HZ << 2)) {
+ if (info->spurious_cnt != 0xFF)
+ info->spurious_cnt++;
+ }
if (info->spurious_cnt > 1) {
delay = 1 << (info->spurious_cnt - 2);
if (delay > HZ)
@@ -615,11 +655,6 @@
{
struct irq_info *info;
-#ifdef CONFIG_SMP
- /* By default all event channels notify CPU#0. */
- cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
-#endif
-
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (info == NULL)
panic("Unable to allocate metadata for IRQ%d\n", irq);
@@ -628,6 +663,11 @@
info->refcnt = -1;
set_info_for_irq(irq, info);
+ /*
+ * Interrupt affinity setting can be immediate. No point
+ * in delaying it until an interrupt is handled.
+ */
+ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
INIT_LIST_HEAD(&info->eoi_list);
list_add_tail(&info->list, &xen_irq_list_head);
@@ -739,18 +779,7 @@
if (!VALID_EVTCHN(evtchn))
return;
- if (unlikely(irqd_is_setaffinity_pending(data)) &&
- likely(!irqd_irq_disabled(data))) {
- int masked = test_and_set_mask(evtchn);
-
- clear_evtchn(evtchn);
-
- irq_move_masked_irq(data);
-
- if (!masked)
- unmask_evtchn(evtchn);
- } else
- clear_evtchn(evtchn);
+ clear_evtchn(evtchn);
if (pirq_needs_eoi(data->irq)) {
rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
@@ -794,7 +823,7 @@
goto err;
info->evtchn = evtchn;
- bind_evtchn_to_cpu(evtchn, 0);
+ bind_evtchn_to_cpu(evtchn, 0, false);
rc = xen_evtchn_port_setup(evtchn);
if (rc)
@@ -1113,8 +1142,14 @@
irq = ret;
goto out;
}
- /* New interdomain events are bound to VCPU 0. */
- bind_evtchn_to_cpu(evtchn, 0);
+ /*
+ * New interdomain events are initially bound to vCPU0 This
+ * is required to setup the event channel in the first
+ * place and also important for UP guests because the
+ * affinity setting is not invoked on them so nothing would
+ * bind the channel.
+ */
+ bind_evtchn_to_cpu(evtchn, 0, false);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
@@ -1132,12 +1167,6 @@
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
-int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
-{
- return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip);
-}
-EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
-
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
@@ -1168,7 +1197,11 @@
irq = ret;
goto out;
}
- bind_evtchn_to_cpu(evtchn, cpu);
+ /*
+ * Force the affinity mask to the target CPU so proc shows
+ * the correct target.
+ */
+ bind_evtchn_to_cpu(evtchn, cpu, true);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_IPI);
@@ -1281,7 +1314,11 @@
goto out;
}
- bind_evtchn_to_cpu(evtchn, cpu);
+ /*
+ * Force the affinity mask for percpu interrupts so proc
+ * shows the correct target.
+ */
+ bind_evtchn_to_cpu(evtchn, cpu, percpu);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_VIRQ);
@@ -1646,9 +1683,7 @@
mutex_unlock(&irq_mapping_update_lock);
- bind_evtchn_to_cpu(evtchn, info->cpu);
- /* This will be deferred until interrupt is processed */
- irq_set_affinity(irq, cpumask_of(info->cpu));
+ bind_evtchn_to_cpu(evtchn, info->cpu, false);
/* Unmask the event channel. */
enable_irq(irq);
@@ -1682,7 +1717,7 @@
* it, but don't do the xenlinux-level rebind in that case.
*/
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
- bind_evtchn_to_cpu(evtchn, tcpu);
+ bind_evtchn_to_cpu(evtchn, tcpu, false);
if (!masked)
unmask_evtchn(evtchn);
@@ -1690,27 +1725,47 @@
return 0;
}
+/*
+ * Find the CPU within @dest mask which has the least number of channels
+ * assigned. This is not precise as the per cpu counts can be modified
+ * concurrently.
+ */
+static unsigned int select_target_cpu(const struct cpumask *dest)
+{
+ unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
+
+ for_each_cpu_and(cpu, dest, cpu_online_mask) {
+ unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
+
+ if (curch < minch) {
+ minch = curch;
+ best_cpu = cpu;
+ }
+ }
+
+ /*
+ * Catch the unlikely case that dest contains no online CPUs. Can't
+ * recurse.
+ */
+ if (best_cpu == UINT_MAX)
+ return select_target_cpu(cpu_online_mask);
+
+ return best_cpu;
+}
+
static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
bool force)
{
- unsigned tcpu = cpumask_first_and(dest, cpu_online_mask);
- int ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu);
+ unsigned int tcpu = select_target_cpu(dest);
+ int ret;
+ ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu);
if (!ret)
irq_data_update_effective_affinity(data, cpumask_of(tcpu));
return ret;
}
-/* To be called with desc->lock held. */
-int xen_set_affinity_evtchn(struct irq_desc *desc, unsigned int tcpu)
-{
- struct irq_data *d = irq_desc_get_irq_data(desc);
-
- return set_affinity_irq(d, cpumask_of(tcpu), false);
-}
-EXPORT_SYMBOL_GPL(xen_set_affinity_evtchn);
-
static void enable_dynirq(struct irq_data *data)
{
evtchn_port_t evtchn = evtchn_from_irq(data->irq);
@@ -1734,18 +1789,7 @@
if (!VALID_EVTCHN(evtchn))
return;
- if (unlikely(irqd_is_setaffinity_pending(data)) &&
- likely(!irqd_irq_disabled(data))) {
- int masked = test_and_set_mask(evtchn);
-
- clear_evtchn(evtchn);
-
- irq_move_masked_irq(data);
-
- if (!masked)
- unmask_evtchn(evtchn);
- } else
- clear_evtchn(evtchn);
+ clear_evtchn(evtchn);
}
static void mask_ack_dynirq(struct irq_data *data)
@@ -1830,7 +1874,8 @@
/* Record the new mapping. */
(void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
- bind_evtchn_to_cpu(evtchn, cpu);
+ /* The affinity mask is still valid */
+ bind_evtchn_to_cpu(evtchn, cpu, false);
}
}
@@ -1855,7 +1900,8 @@
/* Record the new mapping. */
(void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
- bind_evtchn_to_cpu(evtchn, cpu);
+ /* The affinity mask is still valid */
+ bind_evtchn_to_cpu(evtchn, cpu, false);
}
}
@@ -1938,8 +1984,12 @@
xen_evtchn_resume();
/* No IRQ <-> event-channel mappings. */
- list_for_each_entry(info, &xen_irq_list_head, list)
- info->evtchn = 0; /* zap event-channel binding */
+ list_for_each_entry(info, &xen_irq_list_head, list) {
+ /* Zap event-channel binding */
+ info->evtchn = 0;
+ /* Adjust accounting */
+ channels_on_cpu_dec(info);
+ }
clear_evtchn_to_irq_all();
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 5dc016d..a7a8571 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -421,36 +421,6 @@
del_evtchn(u, evtchn);
}
-static DEFINE_PER_CPU(int, bind_last_selected_cpu);
-
-static void evtchn_bind_interdom_next_vcpu(evtchn_port_t evtchn)
-{
- unsigned int selected_cpu, irq;
- struct irq_desc *desc;
- unsigned long flags;
-
- irq = irq_from_evtchn(evtchn);
- desc = irq_to_desc(irq);
-
- if (!desc)
- return;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
- selected_cpu = this_cpu_read(bind_last_selected_cpu);
- selected_cpu = cpumask_next_and(selected_cpu,
- desc->irq_common_data.affinity, cpu_online_mask);
-
- if (unlikely(selected_cpu >= nr_cpu_ids))
- selected_cpu = cpumask_first_and(desc->irq_common_data.affinity,
- cpu_online_mask);
-
- this_cpu_write(bind_last_selected_cpu, selected_cpu);
-
- /* unmask expects irqs to be disabled */
- xen_set_affinity_evtchn(desc, selected_cpu);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-}
-
static long evtchn_ioctl(struct file *file,
unsigned int cmd, unsigned long arg)
{
@@ -508,10 +478,8 @@
break;
rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
- if (rc == 0) {
+ if (rc == 0)
rc = bind_interdomain.local_port;
- evtchn_bind_interdom_next_vcpu(rc);
- }
break;
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9e56ee1..9293045 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1,9 +1,8 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * linux/fs/block_dev.c
- *
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
+ * Copyright (C) 2016 - 2020 Christoph Hellwig
*/
#include <linux/init.h>
diff --git a/fs/dcache.c b/fs/dcache.c
index ea04858..97e81a8 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -793,10 +793,17 @@
* a reference to the dentry and change that, but
* our work is done - we can leave the dentry
* around with a zero refcount.
+ *
+ * Nevertheless, there are two cases that we should kill
+ * the dentry anyway.
+ * 1. free disconnected dentries as soon as their refcount
+ * reached zero.
+ * 2. free dentries if they should not be cached.
*/
smp_rmb();
d_flags = READ_ONCE(dentry->d_flags);
- d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED;
+ d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST |
+ DCACHE_DISCONNECTED | DCACHE_DONTCACHE;
/* Nothing to do? Dropping the reference was all we needed? */
if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1d640b1..f45f9fe 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -185,7 +185,7 @@
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t start, tmp;
- J_ASSERT_BH(bh, buffer_locked(bh));
+ ASSERT(buffer_locked(bh));
/* If checksum is bad mark all blocks used to prevent allocation
* essentially implementing a per-group read-only flag. */
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 8e6ca23..4666b55 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -176,12 +176,10 @@
err = add_system_zone(system_blks, map.m_pblk, n, ino);
if (err < 0) {
if (err == -EFSCORRUPTED) {
- __ext4_error(sb, __func__, __LINE__,
- -err, map.m_pblk,
- "blocks %llu-%llu from inode %u overlap system zone",
- map.m_pblk,
- map.m_pblk + map.m_len - 1,
- ino);
+ EXT4_ERROR_INODE_ERR(inode, -err,
+ "blocks %llu-%llu from inode overlap system zone",
+ map.m_pblk,
+ map.m_pblk + map.m_len - 1);
}
break;
}
@@ -206,7 +204,7 @@
*
* The update of system_blks pointer in this function is protected by
* sb->s_umount semaphore. However we have to be careful as we can be
- * racing with ext4_data_block_valid() calls reading system_blks rbtree
+ * racing with ext4_inode_block_valid() calls reading system_blks rbtree
* protected only by RCU. That's why we first build the rbtree and then
* swap it in place.
*/
@@ -258,7 +256,7 @@
/*
* System blks rbtree complete, announce it once to prevent racing
- * with ext4_data_block_valid() accessing the rbtree at the same
+ * with ext4_inode_block_valid() accessing the rbtree at the same
* time.
*/
rcu_assign_pointer(sbi->s_system_blks, system_blks);
@@ -278,7 +276,7 @@
*
* The update of system_blks pointer in this function is protected by
* sb->s_umount semaphore. However we have to be careful as we can be
- * racing with ext4_data_block_valid() calls reading system_blks rbtree
+ * racing with ext4_inode_block_valid() calls reading system_blks rbtree
* protected only by RCU. So we first clear the system_blks pointer and
* then free the rbtree only after RCU grace period expires.
*/
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index c64ea8f5..2866d24 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -98,6 +98,16 @@
#define ext_debug(ino, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
+#define ASSERT(assert) \
+do { \
+ if (unlikely(!(assert))) { \
+ printk(KERN_EMERG \
+ "Assertion failure in %s() at %s:%d: '%s'\n", \
+ __func__, __FILE__, __LINE__, #assert); \
+ BUG(); \
+ } \
+} while (0)
+
/* data type for block offset of block group */
typedef int ext4_grpblk_t;
@@ -1619,6 +1629,27 @@
errseq_t s_bdev_wb_err;
spinlock_t s_bdev_wb_lock;
+ /* Information about errors that happened during this mount */
+ spinlock_t s_error_lock;
+ int s_add_error_count;
+ int s_first_error_code;
+ __u32 s_first_error_line;
+ __u32 s_first_error_ino;
+ __u64 s_first_error_block;
+ const char *s_first_error_func;
+ time64_t s_first_error_time;
+ int s_last_error_code;
+ __u32 s_last_error_line;
+ __u32 s_last_error_ino;
+ __u64 s_last_error_block;
+ const char *s_last_error_func;
+ time64_t s_last_error_time;
+ /*
+ * If we are in a context where we cannot update error information in
+ * the on-disk superblock, we queue this work to do it.
+ */
+ struct work_struct s_error_work;
+
/* Ext4 fast commit stuff */
atomic_t s_fc_subtid;
atomic_t s_fc_ineligible_updates;
@@ -1858,7 +1889,6 @@
#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */
#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
-#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV
#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV
#define EXT4_GOOD_OLD_INODE_SIZE 128
@@ -2952,9 +2982,9 @@
ext4_group_t block_group,
unsigned int flags);
-extern __printf(6, 7)
-void __ext4_error(struct super_block *, const char *, unsigned int, int, __u64,
- const char *, ...);
+extern __printf(7, 8)
+void __ext4_error(struct super_block *, const char *, unsigned int, bool,
+ int, __u64, const char *, ...);
extern __printf(6, 7)
void __ext4_error_inode(struct inode *, const char *, unsigned int,
ext4_fsblk_t, int, const char *, ...);
@@ -2963,9 +2993,6 @@
const char *, ...);
extern void __ext4_std_error(struct super_block *, const char *,
unsigned int, int);
-extern __printf(5, 6)
-void __ext4_abort(struct super_block *, const char *, unsigned int, int,
- const char *, ...);
extern __printf(4, 5)
void __ext4_warning(struct super_block *, const char *, unsigned int,
const char *, ...);
@@ -2995,6 +3022,9 @@
#define EXT4_ERROR_FILE(file, block, fmt, a...) \
ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
+#define ext4_abort(sb, err, fmt, a...) \
+ __ext4_error((sb), __func__, __LINE__, true, (err), 0, (fmt), ## a)
+
#ifdef CONFIG_PRINTK
#define ext4_error_inode(inode, func, line, block, fmt, ...) \
@@ -3005,11 +3035,11 @@
#define ext4_error_file(file, func, line, block, fmt, ...) \
__ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__)
#define ext4_error(sb, fmt, ...) \
- __ext4_error((sb), __func__, __LINE__, 0, 0, (fmt), ##__VA_ARGS__)
+ __ext4_error((sb), __func__, __LINE__, false, 0, 0, (fmt), \
+ ##__VA_ARGS__)
#define ext4_error_err(sb, err, fmt, ...) \
- __ext4_error((sb), __func__, __LINE__, (err), 0, (fmt), ##__VA_ARGS__)
-#define ext4_abort(sb, err, fmt, ...) \
- __ext4_abort((sb), __func__, __LINE__, (err), (fmt), ##__VA_ARGS__)
+ __ext4_error((sb), __func__, __LINE__, false, (err), 0, (fmt), \
+ ##__VA_ARGS__)
#define ext4_warning(sb, fmt, ...) \
__ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
#define ext4_warning_inode(inode, fmt, ...) \
@@ -3042,17 +3072,12 @@
#define ext4_error(sb, fmt, ...) \
do { \
no_printk(fmt, ##__VA_ARGS__); \
- __ext4_error(sb, "", 0, 0, 0, " "); \
+ __ext4_error(sb, "", 0, false, 0, 0, " "); \
} while (0)
#define ext4_error_err(sb, err, fmt, ...) \
do { \
no_printk(fmt, ##__VA_ARGS__); \
- __ext4_error(sb, "", 0, err, 0, " "); \
-} while (0)
-#define ext4_abort(sb, err, fmt, ...) \
-do { \
- no_printk(fmt, ##__VA_ARGS__); \
- __ext4_abort(sb, "", 0, err, " "); \
+ __ext4_error(sb, "", 0, false, err, 0, " "); \
} while (0)
#define ext4_warning(sb, fmt, ...) \
do { \
@@ -3361,6 +3386,21 @@
spin_unlock(ext4_group_lock_ptr(sb, group));
}
+#ifdef CONFIG_QUOTA
+static inline bool ext4_quota_capable(struct super_block *sb)
+{
+ return (test_opt(sb, QUOTA) || ext4_has_feature_quota(sb));
+}
+
+static inline bool ext4_is_quota_journalled(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ return (ext4_has_feature_quota(sb) ||
+ sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]);
+}
+#endif
+
/*
* Block validity checking
*/
@@ -3609,7 +3649,6 @@
extern int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
int len,
- struct writeback_control *wbc,
bool keep_towrite);
extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end);
extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 0fd0c42..1a0a827 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -296,8 +296,8 @@
if (err) {
ext4_journal_abort_handle(where, line, __func__,
bh, handle, err);
- __ext4_abort(inode->i_sb, where, line, -err,
- "error %d when attempting revoke", err);
+ __ext4_error(inode->i_sb, where, line, true, -err, 0,
+ "error %d when attempting revoke", err);
}
BUFFER_TRACE(bh, "exit");
return err;
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 00dc668..a124c68 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -86,17 +86,14 @@
#ifdef CONFIG_QUOTA
/* Amount of blocks needed for quota update - we know that the structure was
* allocated so we need to update only data block */
-#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
- ext4_has_feature_quota(sb)) ? 1 : 0)
+#define EXT4_QUOTA_TRANS_BLOCKS(sb) ((ext4_quota_capable(sb)) ? 1 : 0)
/* Amount of blocks needed for quota insert/delete - we do some block writes
* but inode, sb and group updates are done only once */
-#define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
- ext4_has_feature_quota(sb)) ?\
+#define EXT4_QUOTA_INIT_BLOCKS(sb) ((ext4_quota_capable(sb)) ?\
(DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\
+3+DQUOT_INIT_REWRITE) : 0)
-#define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\
- ext4_has_feature_quota(sb)) ?\
+#define EXT4_QUOTA_DEL_BLOCKS(sb) ((ext4_quota_capable(sb)) ?\
(DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\
+3+DQUOT_DEL_REWRITE) : 0)
#else
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 17d7096..3960b7e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5815,8 +5815,8 @@
int ret;
path = ext4_find_extent(inode, start, NULL, 0);
- if (!path)
- return -EINVAL;
+ if (IS_ERR(path))
+ return PTR_ERR(path);
ex = path[path->p_depth].p_ext;
if (!ex) {
ret = -EFSCORRUPTED;
@@ -5988,7 +5988,6 @@
kfree(path);
break;
}
- ex = path2[path2->p_depth].p_ext;
for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
cmp1 = cmp2 = 0;
if (i <= path->p_depth)
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index f2033e1..4fcc21c 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -103,8 +103,69 @@
*
* Replay code should thus check for all the valid tails in the FC area.
*
+ * Fast Commit Replay Idempotence
+ * ------------------------------
+ *
+ * Fast commits tags are idempotent in nature provided the recovery code follows
+ * certain rules. The guiding principle that the commit path follows while
+ * committing is that it stores the result of a particular operation instead of
+ * storing the procedure.
+ *
+ * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
+ * was associated with inode 10. During fast commit, instead of storing this
+ * operation as a procedure "rename a to b", we store the resulting file system
+ * state as a "series" of outcomes:
+ *
+ * - Link dirent b to inode 10
+ * - Unlink dirent a
+ * - Inode <10> with valid refcount
+ *
+ * Now when recovery code runs, it needs "enforce" this state on the file
+ * system. This is what guarantees idempotence of fast commit replay.
+ *
+ * Let's take an example of a procedure that is not idempotent and see how fast
+ * commits make it idempotent. Consider following sequence of operations:
+ *
+ * rm A; mv B A; read A
+ * (x) (y) (z)
+ *
+ * (x), (y) and (z) are the points at which we can crash. If we store this
+ * sequence of operations as is then the replay is not idempotent. Let's say
+ * while in replay, we crash at (z). During the second replay, file A (which was
+ * actually created as a result of "mv B A" operation) would get deleted. Thus,
+ * file named A would be absent when we try to read A. So, this sequence of
+ * operations is not idempotent. However, as mentioned above, instead of storing
+ * the procedure fast commits store the outcome of each procedure. Thus the fast
+ * commit log for above procedure would be as follows:
+ *
+ * (Let's assume dirent A was linked to inode 10 and dirent B was linked to
+ * inode 11 before the replay)
+ *
+ * [Unlink A] [Link A to inode 11] [Unlink B] [Inode 11]
+ * (w) (x) (y) (z)
+ *
+ * If we crash at (z), we will have file A linked to inode 11. During the second
+ * replay, we will remove file A (inode 11). But we will create it back and make
+ * it point to inode 11. We won't find B, so we'll just skip that step. At this
+ * point, the refcount for inode 11 is not reliable, but that gets fixed by the
+ * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled
+ * similarly. Thus, by converting a non-idempotent procedure into a series of
+ * idempotent outcomes, fast commits ensured idempotence during the replay.
+ *
* TODOs
* -----
+ *
+ * 0) Fast commit replay path hardening: Fast commit replay code should use
+ * journal handles to make sure all the updates it does during the replay
+ * path are atomic. With that if we crash during fast commit replay, after
+ * trying to do recovery again, we will find a file system where fast commit
+ * area is invalid (because new full commit would be found). In order to deal
+ * with that, fast commit replay code should ensure that the "FC_REPLAY"
+ * superblock state is persisted before starting the replay, so that after
+ * the crash, fast commit recovery code can look at that flag and perform
+ * fast commit recovery even if that area is invalidated by later full
+ * commits.
+ *
* 1) Make fast commit atomic updates more fine grained. Today, a fast commit
* eligible update must be protected within ext4_fc_start_update() and
* ext4_fc_stop_update(). These routines are called at much higher
@@ -1220,18 +1281,6 @@
/* Ext4 Replay Path Routines */
-/* Get length of a particular tlv */
-static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl)
-{
- return le16_to_cpu(tl->fc_len);
-}
-
-/* Get a pointer to "value" of a tlv */
-static inline u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl)
-{
- return (u8 *)tl + sizeof(*tl);
-}
-
/* Helper struct for dentry replay routines */
struct dentry_info_args {
int parent_ino, dname_len, ino, inode_len;
@@ -1770,32 +1819,6 @@
return 0;
}
-static inline const char *tag2str(u16 tag)
-{
- switch (tag) {
- case EXT4_FC_TAG_LINK:
- return "TAG_ADD_ENTRY";
- case EXT4_FC_TAG_UNLINK:
- return "TAG_DEL_ENTRY";
- case EXT4_FC_TAG_ADD_RANGE:
- return "TAG_ADD_RANGE";
- case EXT4_FC_TAG_CREAT:
- return "TAG_CREAT_DENTRY";
- case EXT4_FC_TAG_DEL_RANGE:
- return "TAG_DEL_RANGE";
- case EXT4_FC_TAG_INODE:
- return "TAG_INODE";
- case EXT4_FC_TAG_PAD:
- return "TAG_PAD";
- case EXT4_FC_TAG_TAIL:
- return "TAG_TAIL";
- case EXT4_FC_TAG_HEAD:
- return "TAG_HEAD";
- default:
- return "TAG_ERROR";
- }
-}
-
static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
{
struct ext4_fc_replay_state *state;
diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h
index 3a6e5a1..b77f70f 100644
--- a/fs/ext4/fast_commit.h
+++ b/fs/ext4/fast_commit.h
@@ -3,6 +3,11 @@
#ifndef __FAST_COMMIT_H__
#define __FAST_COMMIT_H__
+/*
+ * Note this file is present in e2fsprogs/lib/ext2fs/fast_commit.h and
+ * linux/fs/ext4/fast_commit.h. These file should always be byte identical.
+ */
+
/* Fast commit tags */
#define EXT4_FC_TAG_ADD_RANGE 0x0001
#define EXT4_FC_TAG_DEL_RANGE 0x0002
@@ -50,7 +55,7 @@
struct ext4_fc_dentry_info {
__le32 fc_parent_ino;
__le32 fc_ino;
- u8 fc_dname[0];
+ __u8 fc_dname[0];
};
/* Value structure for EXT4_FC_TAG_INODE and EXT4_FC_TAG_INODE_PARTIAL. */
@@ -66,19 +71,6 @@
};
/*
- * In memory list of dentry updates that are performed on the file
- * system used by fast commit code.
- */
-struct ext4_fc_dentry_update {
- int fcd_op; /* Type of update create / unlink / link */
- int fcd_parent; /* Parent inode number */
- int fcd_ino; /* Inode number */
- struct qstr fcd_name; /* Dirent name */
- unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */
- struct list_head fcd_list;
-};
-
-/*
* Fast commit reason codes
*/
enum {
@@ -107,6 +99,20 @@
EXT4_FC_REASON_MAX
};
+#ifdef __KERNEL__
+/*
+ * In memory list of dentry updates that are performed on the file
+ * system used by fast commit code.
+ */
+struct ext4_fc_dentry_update {
+ int fcd_op; /* Type of update create / unlink / link */
+ int fcd_parent; /* Parent inode number */
+ int fcd_ino; /* Inode number */
+ struct qstr fcd_name; /* Dirent name */
+ unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */
+ struct list_head fcd_list;
+};
+
struct ext4_fc_stats {
unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX];
unsigned long fc_num_commits;
@@ -145,13 +151,51 @@
};
#define region_last(__region) (((__region)->lblk) + ((__region)->len) - 1)
+#endif
#define fc_for_each_tl(__start, __end, __tl) \
- for (tl = (struct ext4_fc_tl *)start; \
- (u8 *)tl < (u8 *)end; \
- tl = (struct ext4_fc_tl *)((u8 *)tl + \
+ for (tl = (struct ext4_fc_tl *)(__start); \
+ (__u8 *)tl < (__u8 *)(__end); \
+ tl = (struct ext4_fc_tl *)((__u8 *)tl + \
sizeof(struct ext4_fc_tl) + \
+ le16_to_cpu(tl->fc_len)))
+static inline const char *tag2str(__u16 tag)
+{
+ switch (tag) {
+ case EXT4_FC_TAG_LINK:
+ return "ADD_ENTRY";
+ case EXT4_FC_TAG_UNLINK:
+ return "DEL_ENTRY";
+ case EXT4_FC_TAG_ADD_RANGE:
+ return "ADD_RANGE";
+ case EXT4_FC_TAG_CREAT:
+ return "CREAT_DENTRY";
+ case EXT4_FC_TAG_DEL_RANGE:
+ return "DEL_RANGE";
+ case EXT4_FC_TAG_INODE:
+ return "INODE";
+ case EXT4_FC_TAG_PAD:
+ return "PAD";
+ case EXT4_FC_TAG_TAIL:
+ return "TAIL";
+ case EXT4_FC_TAG_HEAD:
+ return "HEAD";
+ default:
+ return "ERROR";
+ }
+}
+
+/* Get length of a particular tlv */
+static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl)
+{
+ return le16_to_cpu(tl->fc_len);
+}
+
+/* Get a pointer to "value" of a tlv */
+static inline __u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl)
+{
+ return (__u8 *)tl + sizeof(*tl);
+}
#endif /* __FAST_COMMIT_H__ */
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index a42ca95..113bfb0 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -136,7 +136,7 @@
if (unlikely(ext4_forced_shutdown(sbi)))
return -EIO;
- J_ASSERT(ext4_journal_current_handle() == NULL);
+ ASSERT(ext4_journal_current_handle() == NULL);
trace_ext4_sync_file_enter(file, datasync);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 05efa682..1223a18 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -534,8 +534,8 @@
ext4_fsblk_t first_block = 0;
trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
- J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
- J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
+ ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
+ ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
depth = ext4_block_to_path(inode, map->m_lblk, offsets,
&blocks_to_boundary);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0d8385a..2794688 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -175,6 +175,7 @@
*/
int extra_credits = 6;
struct ext4_xattr_inode_array *ea_inode_array = NULL;
+ bool freeze_protected = false;
trace_ext4_evict_inode(inode);
@@ -232,9 +233,14 @@
/*
* Protect us against freezing - iput() caller didn't have to have any
- * protection against it
+ * protection against it. When we are in a running transaction though,
+ * we are already protected against freezing and we cannot grab further
+ * protection due to lock ordering constraints.
*/
- sb_start_intwrite(inode->i_sb);
+ if (!ext4_journal_current_handle()) {
+ sb_start_intwrite(inode->i_sb);
+ freeze_protected = true;
+ }
if (!IS_NOQUOTA(inode))
extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
@@ -253,7 +259,8 @@
* cleaned up.
*/
ext4_orphan_del(NULL, inode);
- sb_end_intwrite(inode->i_sb);
+ if (freeze_protected)
+ sb_end_intwrite(inode->i_sb);
goto no_delete;
}
@@ -294,7 +301,8 @@
stop_handle:
ext4_journal_stop(handle);
ext4_orphan_del(NULL, inode);
- sb_end_intwrite(inode->i_sb);
+ if (freeze_protected)
+ sb_end_intwrite(inode->i_sb);
ext4_xattr_inode_array_free(ea_inode_array);
goto no_delete;
}
@@ -323,7 +331,8 @@
else
ext4_free_inode(handle, inode);
ext4_journal_stop(handle);
- sb_end_intwrite(inode->i_sb);
+ if (freeze_protected)
+ sb_end_intwrite(inode->i_sb);
ext4_xattr_inode_array_free(ea_inode_array);
return;
no_delete:
@@ -830,8 +839,8 @@
int create = map_flags & EXT4_GET_BLOCKS_CREATE;
int err;
- J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
- || handle != NULL || create == 0);
+ ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ || handle != NULL || create == 0);
map.m_lblk = block;
map.m_len = 1;
@@ -846,9 +855,9 @@
if (unlikely(!bh))
return ERR_PTR(-ENOMEM);
if (map.m_flags & EXT4_MAP_NEW) {
- J_ASSERT(create != 0);
- J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
- || (handle != NULL));
+ ASSERT(create != 0);
+ ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+ || (handle != NULL));
/*
* Now that we do not always journal data, we should
@@ -2055,7 +2064,7 @@
unlock_page(page);
return -ENOMEM;
}
- ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
+ ret = ext4_bio_write_page(&io_submit, page, len, keep_towrite);
ext4_io_submit(&io_submit);
/* Drop io_end reference we got from init */
ext4_put_io_end_defer(io_submit.io_end);
@@ -2089,7 +2098,7 @@
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
- err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
+ err = ext4_bio_write_page(&mpd->io_submit, page, len, false);
if (!err)
mpd->wbc->nr_to_write--;
mpd->first_page++;
@@ -4610,7 +4619,7 @@
(ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) {
if (flags & EXT4_IGET_HANDLE)
return ERR_PTR(-ESTALE);
- __ext4_error(sb, function, line, EFSCORRUPTED, 0,
+ __ext4_error(sb, function, line, false, EFSCORRUPTED, 0,
"inode #%lu: comm %s: iget: illegal inode #",
ino, current->comm);
return ERR_PTR(-EFSCORRUPTED);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 24af9ed..99bf091 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -822,24 +822,6 @@
spin_unlock(&sbi->s_bal_lock);
}
-static void mb_regenerate_buddy(struct ext4_buddy *e4b)
-{
- int count;
- int order = 1;
- void *buddy;
-
- while ((buddy = mb_find_buddy(e4b, order++, &count))) {
- ext4_set_bits(buddy, 0, count);
- }
- e4b->bd_info->bb_fragments = 0;
- memset(e4b->bd_info->bb_counters, 0,
- sizeof(*e4b->bd_info->bb_counters) *
- (e4b->bd_sb->s_blocksize_bits + 2));
-
- ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
- e4b->bd_bitmap, e4b->bd_group);
-}
-
/* The buddy information is attached the buddy cache inode
* for convenience. The information regarding each group
* is loaded via ext4_mb_load_buddy. The information involve
@@ -1307,22 +1289,18 @@
static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
{
- int order = 1;
- int bb_incr = 1 << (e4b->bd_blkbits - 1);
+ int order = 1, max;
void *bb;
BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
- bb = e4b->bd_buddy;
while (order <= e4b->bd_blkbits + 1) {
- block = block >> 1;
- if (!mb_test_bit(block, bb)) {
+ bb = mb_find_buddy(e4b, order, &max);
+ if (!mb_test_bit(block >> order, bb)) {
/* this block is part of buddy of order 'order' */
return order;
}
- bb += bb_incr;
- bb_incr >>= 1;
order++;
}
return 0;
@@ -1512,7 +1490,6 @@
sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
}
- mb_regenerate_buddy(e4b);
goto done;
}
@@ -2395,9 +2372,9 @@
nr = sbi->s_mb_prefetch;
if (ext4_has_feature_flex_bg(sb)) {
- nr = (group / sbi->s_mb_prefetch) *
- sbi->s_mb_prefetch;
- nr = nr + sbi->s_mb_prefetch - group;
+ nr = 1 << sbi->s_log_groups_per_flex;
+ nr -= group & (nr - 1);
+ nr = min(nr, sbi->s_mb_prefetch);
}
prefetch_grp = ext4_mb_prefetch(sb, group,
nr, &prefetch_ios);
@@ -2733,7 +2710,8 @@
if (ext4_has_feature_flex_bg(sb)) {
/* a single flex group is supposed to be read by a single IO */
- sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex;
+ sbi->s_mb_prefetch = min(1 << sbi->s_es->s_log_groups_per_flex,
+ BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9));
sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */
} else {
sbi->s_mb_prefetch = 32;
@@ -5126,6 +5104,7 @@
ext4_group_first_block_no(sb, group) +
EXT4_C2B(sbi, cluster),
"Block already on to-be-freed list");
+ kmem_cache_free(ext4_free_data_cachep, new_entry);
return 0;
}
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 326fe40..b17a082 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -182,10 +182,6 @@
return bh;
}
-#ifndef assert
-#define assert(test) J_ASSERT(test)
-#endif
-
#ifdef DX_DEBUG
#define dxtrace(command) command
#else
@@ -843,7 +839,7 @@
break;
}
}
- assert (at == p - 1);
+ ASSERT(at == p - 1);
}
at = p - 1;
@@ -1259,8 +1255,8 @@
struct dx_entry *old = frame->at, *new = old + 1;
int count = dx_get_count(entries);
- assert(count < dx_get_limit(entries));
- assert(old < entries + count);
+ ASSERT(count < dx_get_limit(entries));
+ ASSERT(old < entries + count);
memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
dx_set_hash(new, hash);
dx_set_block(new, block);
@@ -2959,7 +2955,7 @@
* hold i_mutex, or the inode can not be referenced from outside,
* so i_nlink should not be bumped due to race
*/
- J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
BUFFER_TRACE(sbi->s_sbh, "get_write_access");
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index defd2e1..03a44a0 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -111,9 +111,6 @@
unsigned under_io = 0;
unsigned long flags;
- if (!page)
- continue;
-
if (fscrypt_is_bounce_page(page)) {
bounce_page = page;
page = fscrypt_pagecache_page(bounce_page);
@@ -438,7 +435,6 @@
int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
int len,
- struct writeback_control *wbc,
bool keep_towrite)
{
struct page *bounce_page = NULL;
@@ -448,6 +444,7 @@
int ret = 0;
int nr_submitted = 0;
int nr_to_submit = 0;
+ struct writeback_control *wbc = io->io_wbc;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 830c196..2112178 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -404,10 +404,8 @@
bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
}
-static void __ext4_update_tstamp(__le32 *lo, __u8 *hi)
+static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
{
- time64_t now = ktime_get_real_seconds();
-
now = clamp_val(now, 0, (1ull << 40) - 1);
*lo = cpu_to_le32(lower_32_bits(now));
@@ -419,108 +417,11 @@
return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
}
#define ext4_update_tstamp(es, tstamp) \
- __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
+ __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
+ ktime_get_real_seconds())
#define ext4_get_tstamp(es, tstamp) \
__ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
-static void __save_error_info(struct super_block *sb, int error,
- __u32 ino, __u64 block,
- const char *func, unsigned int line)
-{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- int err;
-
- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- if (bdev_read_only(sb->s_bdev))
- return;
- es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- ext4_update_tstamp(es, s_last_error_time);
- strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
- es->s_last_error_line = cpu_to_le32(line);
- es->s_last_error_ino = cpu_to_le32(ino);
- es->s_last_error_block = cpu_to_le64(block);
- switch (error) {
- case EIO:
- err = EXT4_ERR_EIO;
- break;
- case ENOMEM:
- err = EXT4_ERR_ENOMEM;
- break;
- case EFSBADCRC:
- err = EXT4_ERR_EFSBADCRC;
- break;
- case 0:
- case EFSCORRUPTED:
- err = EXT4_ERR_EFSCORRUPTED;
- break;
- case ENOSPC:
- err = EXT4_ERR_ENOSPC;
- break;
- case ENOKEY:
- err = EXT4_ERR_ENOKEY;
- break;
- case EROFS:
- err = EXT4_ERR_EROFS;
- break;
- case EFBIG:
- err = EXT4_ERR_EFBIG;
- break;
- case EEXIST:
- err = EXT4_ERR_EEXIST;
- break;
- case ERANGE:
- err = EXT4_ERR_ERANGE;
- break;
- case EOVERFLOW:
- err = EXT4_ERR_EOVERFLOW;
- break;
- case EBUSY:
- err = EXT4_ERR_EBUSY;
- break;
- case ENOTDIR:
- err = EXT4_ERR_ENOTDIR;
- break;
- case ENOTEMPTY:
- err = EXT4_ERR_ENOTEMPTY;
- break;
- case ESHUTDOWN:
- err = EXT4_ERR_ESHUTDOWN;
- break;
- case EFAULT:
- err = EXT4_ERR_EFAULT;
- break;
- default:
- err = EXT4_ERR_UNKNOWN;
- }
- es->s_last_error_errcode = err;
- if (!es->s_first_error_time) {
- es->s_first_error_time = es->s_last_error_time;
- es->s_first_error_time_hi = es->s_last_error_time_hi;
- strncpy(es->s_first_error_func, func,
- sizeof(es->s_first_error_func));
- es->s_first_error_line = cpu_to_le32(line);
- es->s_first_error_ino = es->s_last_error_ino;
- es->s_first_error_block = es->s_last_error_block;
- es->s_first_error_errcode = es->s_last_error_errcode;
- }
- /*
- * Start the daily error reporting function if it hasn't been
- * started already
- */
- if (!es->s_error_count)
- mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
- le32_add_cpu(&es->s_error_count, 1);
-}
-
-static void save_error_info(struct super_block *sb, int error,
- __u32 ino, __u64 block,
- const char *func, unsigned int line)
-{
- __save_error_info(sb, error, ino, block, func, line);
- if (!bdev_read_only(sb->s_bdev))
- ext4_commit_super(sb, 1);
-}
-
/*
* The del_gendisk() function uninitializes the disk-specific data
* structures, including the bdi structure, without telling anyone
@@ -649,6 +550,83 @@
|| system_state == SYSTEM_RESTART;
}
+struct ext4_err_translation {
+ int code;
+ int errno;
+};
+
+#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
+
+static struct ext4_err_translation err_translation[] = {
+ EXT4_ERR_TRANSLATE(EIO),
+ EXT4_ERR_TRANSLATE(ENOMEM),
+ EXT4_ERR_TRANSLATE(EFSBADCRC),
+ EXT4_ERR_TRANSLATE(EFSCORRUPTED),
+ EXT4_ERR_TRANSLATE(ENOSPC),
+ EXT4_ERR_TRANSLATE(ENOKEY),
+ EXT4_ERR_TRANSLATE(EROFS),
+ EXT4_ERR_TRANSLATE(EFBIG),
+ EXT4_ERR_TRANSLATE(EEXIST),
+ EXT4_ERR_TRANSLATE(ERANGE),
+ EXT4_ERR_TRANSLATE(EOVERFLOW),
+ EXT4_ERR_TRANSLATE(EBUSY),
+ EXT4_ERR_TRANSLATE(ENOTDIR),
+ EXT4_ERR_TRANSLATE(ENOTEMPTY),
+ EXT4_ERR_TRANSLATE(ESHUTDOWN),
+ EXT4_ERR_TRANSLATE(EFAULT),
+};
+
+static int ext4_errno_to_code(int errno)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(err_translation); i++)
+ if (err_translation[i].errno == errno)
+ return err_translation[i].code;
+ return EXT4_ERR_UNKNOWN;
+}
+
+static void __save_error_info(struct super_block *sb, int error,
+ __u32 ino, __u64 block,
+ const char *func, unsigned int line)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+ if (bdev_read_only(sb->s_bdev))
+ return;
+ /* We default to EFSCORRUPTED error... */
+ if (error == 0)
+ error = EFSCORRUPTED;
+
+ spin_lock(&sbi->s_error_lock);
+ sbi->s_add_error_count++;
+ sbi->s_last_error_code = error;
+ sbi->s_last_error_line = line;
+ sbi->s_last_error_ino = ino;
+ sbi->s_last_error_block = block;
+ sbi->s_last_error_func = func;
+ sbi->s_last_error_time = ktime_get_real_seconds();
+ if (!sbi->s_first_error_time) {
+ sbi->s_first_error_code = error;
+ sbi->s_first_error_line = line;
+ sbi->s_first_error_ino = ino;
+ sbi->s_first_error_block = block;
+ sbi->s_first_error_func = func;
+ sbi->s_first_error_time = sbi->s_last_error_time;
+ }
+ spin_unlock(&sbi->s_error_lock);
+}
+
+static void save_error_info(struct super_block *sb, int error,
+ __u32 ino, __u64 block,
+ const char *func, unsigned int line)
+{
+ __save_error_info(sb, error, ino, block, func, line);
+ if (!bdev_read_only(sb->s_bdev))
+ ext4_commit_super(sb, 1);
+}
+
/* Deal with the reporting of failure conditions on a filesystem such as
* inconsistencies detected or read IO failures.
*
@@ -662,40 +640,50 @@
* We'll just use the jbd2_journal_abort() error code to record an error in
* the journal instead. On recovery, the journal will complain about
* that error until we've noted it down and cleared it.
+ *
+ * If force_ro is set, we unconditionally force the filesystem into an
+ * ABORT|READONLY state, unless the error response on the fs has been set to
+ * panic in which case we take the easy way out and panic immediately. This is
+ * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
+ * at a critical moment in log management.
*/
-
-static void ext4_handle_error(struct super_block *sb)
+static void ext4_handle_error(struct super_block *sb, bool force_ro)
{
+ journal_t *journal = EXT4_SB(sb)->s_journal;
+
if (test_opt(sb, WARN_ON_ERROR))
WARN_ON_ONCE(1);
- if (sb_rdonly(sb))
+ if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT)))
return;
- if (!test_opt(sb, ERRORS_CONT)) {
- journal_t *journal = EXT4_SB(sb)->s_journal;
-
- ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
- if (journal)
- jbd2_journal_abort(journal, -EIO);
- }
+ ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
+ if (journal)
+ jbd2_journal_abort(journal, -EIO);
/*
* We force ERRORS_RO behavior when system is rebooting. Otherwise we
* could panic during 'reboot -f' as the underlying device got already
* disabled.
*/
- if (test_opt(sb, ERRORS_RO) || system_going_down()) {
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
- /*
- * Make sure updated value of ->s_mount_flags will be visible
- * before ->s_flags update
- */
- smp_wmb();
- sb->s_flags |= SB_RDONLY;
- } else if (test_opt(sb, ERRORS_PANIC)) {
+ if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id);
}
+ ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
+ /*
+ * Make sure updated value of ->s_mount_flags will be visible before
+ * ->s_flags update
+ */
+ smp_wmb();
+ sb->s_flags |= SB_RDONLY;
+}
+
+static void flush_stashed_error_work(struct work_struct *work)
+{
+ struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
+ s_error_work);
+
+ ext4_commit_super(sbi->s_sb, 1);
}
#define ext4_error_ratelimit(sb) \
@@ -703,7 +691,7 @@
"EXT4-fs error")
void __ext4_error(struct super_block *sb, const char *function,
- unsigned int line, int error, __u64 block,
+ unsigned int line, bool force_ro, int error, __u64 block,
const char *fmt, ...)
{
struct va_format vaf;
@@ -723,7 +711,7 @@
va_end(args);
}
save_error_info(sb, error, 0, block, function, line);
- ext4_handle_error(sb);
+ ext4_handle_error(sb, force_ro);
}
void __ext4_error_inode(struct inode *inode, const char *function,
@@ -755,7 +743,7 @@
}
save_error_info(inode->i_sb, error, inode->i_ino, block,
function, line);
- ext4_handle_error(inode->i_sb);
+ ext4_handle_error(inode->i_sb, false);
}
void __ext4_error_file(struct file *file, const char *function,
@@ -794,7 +782,7 @@
}
save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block,
function, line);
- ext4_handle_error(inode->i_sb);
+ ext4_handle_error(inode->i_sb, false);
}
const char *ext4_decode_error(struct super_block *sb, int errno,
@@ -862,51 +850,7 @@
}
save_error_info(sb, -errno, 0, 0, function, line);
- ext4_handle_error(sb);
-}
-
-/*
- * ext4_abort is a much stronger failure handler than ext4_error. The
- * abort function may be used to deal with unrecoverable failures such
- * as journal IO errors or ENOMEM at a critical moment in log management.
- *
- * We unconditionally force the filesystem into an ABORT|READONLY state,
- * unless the error response on the fs has been set to panic in which
- * case we take the easy way out and panic immediately.
- */
-
-void __ext4_abort(struct super_block *sb, const char *function,
- unsigned int line, int error, const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
- return;
-
- save_error_info(sb, error, 0, 0, function, line);
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
- sb->s_id, function, line, &vaf);
- va_end(args);
-
- if (sb_rdonly(sb) == 0) {
- ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
- if (EXT4_SB(sb)->s_journal)
- jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
-
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
- /*
- * Make sure updated value of ->s_mount_flags will be visible
- * before ->s_flags update
- */
- smp_wmb();
- sb->s_flags |= SB_RDONLY;
- }
- if (test_opt(sb, ERRORS_PANIC) && !system_going_down())
- panic("EXT4-fs panic from previous error\n");
+ ext4_handle_error(sb, false);
}
void __ext4_msg(struct super_block *sb,
@@ -982,8 +926,6 @@
return;
trace_ext4_error(sb, function, line);
- __save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
-
if (ext4_error_ratelimit(sb)) {
va_start(args, fmt);
vaf.fmt = fmt;
@@ -999,17 +941,16 @@
va_end(args);
}
- if (test_opt(sb, WARN_ON_ERROR))
- WARN_ON_ONCE(1);
-
if (test_opt(sb, ERRORS_CONT)) {
- ext4_commit_super(sb, 0);
+ if (test_opt(sb, WARN_ON_ERROR))
+ WARN_ON_ONCE(1);
+ __save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
+ schedule_work(&EXT4_SB(sb)->s_error_work);
return;
}
-
ext4_unlock_group(sb, grp);
- ext4_commit_super(sb, 1);
- ext4_handle_error(sb);
+ save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
+ ext4_handle_error(sb, false);
/*
* We only get here in the ERRORS_RO case; relocking the group
* may be dangerous, but nothing bad will happen since the
@@ -1181,6 +1122,7 @@
ext4_unregister_li_request(sb);
ext4_quota_off_umount(sb);
+ flush_work(&sbi->s_error_work);
destroy_workqueue(sbi->rsv_conversion_wq);
/*
@@ -1240,7 +1182,7 @@
* in-memory list had better be clean by this point. */
if (!list_empty(&sbi->s_orphan))
dump_orphan_list(sb, sbi);
- J_ASSERT(list_empty(&sbi->s_orphan));
+ ASSERT(list_empty(&sbi->s_orphan));
sync_blockdev(sb->s_bdev);
invalidate_bdev(sb->s_bdev);
@@ -4005,6 +3947,21 @@
atomic64_set(&sbi->s_resv_clusters, resv_clusters);
}
+static const char *ext4_quota_mode(struct super_block *sb)
+{
+#ifdef CONFIG_QUOTA
+ if (!ext4_quota_capable(sb))
+ return "none";
+
+ if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
+ return "journalled";
+ else
+ return "writeback";
+#else
+ return "disabled";
+#endif
+}
+
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
@@ -4073,7 +4030,6 @@
if (IS_ERR(bh)) {
ext4_msg(sb, KERN_ERR, "unable to read superblock");
ret = PTR_ERR(bh);
- bh = NULL;
goto out_fail;
}
/*
@@ -4187,19 +4143,26 @@
*/
sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
- blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+ if (le32_to_cpu(es->s_log_block_size) >
+ (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid log block size: %u",
+ le32_to_cpu(es->s_log_block_size));
+ goto failed_mount;
+ }
+ if (le32_to_cpu(es->s_log_cluster_size) >
+ (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid log cluster size: %u",
+ le32_to_cpu(es->s_log_cluster_size));
+ goto failed_mount;
+ }
+
+ blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
if (blocksize == PAGE_SIZE)
set_opt(sb, DIOREAD_NOLOCK);
- if (blocksize < EXT4_MIN_BLOCK_SIZE ||
- blocksize > EXT4_MAX_BLOCK_SIZE) {
- ext4_msg(sb, KERN_ERR,
- "Unsupported filesystem blocksize %d (%d log_block_size)",
- blocksize, le32_to_cpu(es->s_log_block_size));
- goto failed_mount;
- }
-
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
@@ -4417,21 +4380,6 @@
if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
goto failed_mount;
- if (le32_to_cpu(es->s_log_block_size) >
- (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
- ext4_msg(sb, KERN_ERR,
- "Invalid log block size: %u",
- le32_to_cpu(es->s_log_block_size));
- goto failed_mount;
- }
- if (le32_to_cpu(es->s_log_cluster_size) >
- (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
- ext4_msg(sb, KERN_ERR,
- "Invalid log cluster size: %u",
- le32_to_cpu(es->s_log_cluster_size));
- goto failed_mount;
- }
-
if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
ext4_msg(sb, KERN_ERR,
"Number of reserved GDT blocks insanely large: %d",
@@ -4702,7 +4650,6 @@
"can't read group descriptor %d", i);
db_count = i;
ret = PTR_ERR(bh);
- bh = NULL;
goto failed_mount2;
}
rcu_read_lock();
@@ -4717,6 +4664,8 @@
}
timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
+ spin_lock_init(&sbi->s_error_lock);
+ INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
/* Register extent status tree shrinker */
if (ext4_es_register_shrinker(sbi))
@@ -4872,6 +4821,7 @@
"requested data journaling mode");
goto failed_mount_wq;
}
+ break;
default:
break;
}
@@ -5000,13 +4950,11 @@
block = ext4_count_free_clusters(sb);
ext4_free_blocks_count_set(sbi->s_es,
EXT4_C2B(sbi, block));
- ext4_superblock_csum_set(sb);
err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
GFP_KERNEL);
if (!err) {
unsigned long freei = ext4_count_free_inodes(sb);
sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
- ext4_superblock_csum_set(sb);
err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
GFP_KERNEL);
}
@@ -5086,10 +5034,11 @@
if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
- "Opts: %.*s%s%s", descr,
+ "Opts: %.*s%s%s. Quota mode: %s.", descr,
(int) sizeof(sbi->s_es->s_mount_opts),
sbi->s_es->s_mount_opts,
- *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
+ *sbi->s_es->s_mount_opts ? "; " : "", orig_data,
+ ext4_quota_mode(sb));
if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
@@ -5154,6 +5103,7 @@
ext4_es_unregister_shrinker(sbi);
failed_mount3:
del_timer_sync(&sbi->s_err_report);
+ flush_work(&sbi->s_error_work);
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
failed_mount2:
@@ -5480,6 +5430,7 @@
static int ext4_commit_super(struct super_block *sb, int sync)
{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0;
@@ -5511,6 +5462,46 @@
es->s_free_inodes_count =
cpu_to_le32(percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeinodes_counter));
+ /* Copy error information to the on-disk superblock */
+ spin_lock(&sbi->s_error_lock);
+ if (sbi->s_add_error_count > 0) {
+ es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+ if (!es->s_first_error_time && !es->s_first_error_time_hi) {
+ __ext4_update_tstamp(&es->s_first_error_time,
+ &es->s_first_error_time_hi,
+ sbi->s_first_error_time);
+ strncpy(es->s_first_error_func, sbi->s_first_error_func,
+ sizeof(es->s_first_error_func));
+ es->s_first_error_line =
+ cpu_to_le32(sbi->s_first_error_line);
+ es->s_first_error_ino =
+ cpu_to_le32(sbi->s_first_error_ino);
+ es->s_first_error_block =
+ cpu_to_le64(sbi->s_first_error_block);
+ es->s_first_error_errcode =
+ ext4_errno_to_code(sbi->s_first_error_code);
+ }
+ __ext4_update_tstamp(&es->s_last_error_time,
+ &es->s_last_error_time_hi,
+ sbi->s_last_error_time);
+ strncpy(es->s_last_error_func, sbi->s_last_error_func,
+ sizeof(es->s_last_error_func));
+ es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
+ es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
+ es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
+ es->s_last_error_errcode =
+ ext4_errno_to_code(sbi->s_last_error_code);
+ /*
+ * Start the daily error reporting function if it hasn't been
+ * started already
+ */
+ if (!es->s_error_count)
+ mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
+ le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
+ sbi->s_add_error_count = 0;
+ }
+ spin_unlock(&sbi->s_error_lock);
+
BUFFER_TRACE(sbh, "marking dirty");
ext4_superblock_csum_set(sb);
if (sync)
@@ -5864,6 +5855,9 @@
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
}
+ /* Flush outstanding errors before changing fs state */
+ flush_work(&sbi->s_error_work);
+
if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
err = -EROFS;
@@ -6022,7 +6016,8 @@
*/
*flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags);
- ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
+ ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.",
+ orig_data, ext4_quota_mode(sb));
kfree(orig_data);
return 0;
@@ -6201,11 +6196,8 @@
static int ext4_mark_dquot_dirty(struct dquot *dquot)
{
struct super_block *sb = dquot->dq_sb;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- /* Are we journaling quotas? */
- if (ext4_has_feature_quota(sb) ||
- sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
+ if (ext4_is_quota_journalled(sb)) {
dquot_mark_dquot_dirty(dquot);
return ext4_write_dquot(dquot);
} else {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 6127e94..4e3b1f8 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1927,7 +1927,6 @@
} else {
/* Allocate a buffer where we construct the new block. */
s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
- /* assert(header == s->base) */
error = -ENOMEM;
if (s->base == NULL)
goto cleanup;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index c070c0d..aea3545 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -315,7 +315,7 @@
if (mode & FMODE_WRITE)
r = w = 1;
- name = dentry_name(file->f_path.dentry);
+ name = dentry_name(d_real(file->f_path.dentry, file->f_inode));
if (name == NULL)
return -ENOMEM;
diff --git a/fs/inode.c b/fs/inode.c
index cb008ac..6442d97 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1624,7 +1624,9 @@
else
drop = generic_drop_inode(inode);
- if (!drop && (sb->s_flags & SB_ACTIVE)) {
+ if (!drop &&
+ !(inode->i_state & I_DONTCACHE) &&
+ (sb->s_flags & SB_ACTIVE)) {
inode_add_lru(inode);
spin_unlock(&inode->i_lock);
return;
diff --git a/fs/io-wq.c b/fs/io-wq.c
index f72d538..a564f36 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -36,8 +36,7 @@
enum {
IO_WQ_BIT_EXIT = 0, /* wq exiting */
- IO_WQ_BIT_CANCEL = 1, /* cancel work on list */
- IO_WQ_BIT_ERROR = 2, /* error on setup */
+ IO_WQ_BIT_ERROR = 1, /* error on setup */
};
enum {
@@ -561,12 +560,6 @@
next_hashed = wq_next_work(work);
io_impersonate_work(worker, work);
- /*
- * OK to set IO_WQ_WORK_CANCEL even for uncancellable
- * work, the worker function will do the right thing.
- */
- if (test_bit(IO_WQ_BIT_CANCEL, &wq->state))
- work->flags |= IO_WQ_WORK_CANCEL;
old_work = work;
linked = wq->do_work(work);
@@ -732,12 +725,6 @@
return acct->nr_workers < acct->max_workers;
}
-static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
-{
- send_sig(SIGINT, worker->task, 1);
- return false;
-}
-
/*
* Iterate the passed in list and call the specific function for each
* worker that isn't exiting
@@ -938,21 +925,6 @@
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
}
-void io_wq_cancel_all(struct io_wq *wq)
-{
- int node;
-
- set_bit(IO_WQ_BIT_CANCEL, &wq->state);
-
- rcu_read_lock();
- for_each_node(node) {
- struct io_wqe *wqe = wq->wqes[node];
-
- io_wq_for_each_worker(wqe, io_wqe_worker_send_sig, NULL);
- }
- rcu_read_unlock();
-}
-
struct io_cb_cancel_data {
work_cancel_fn *fn;
void *data;
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 069496c..b158f8a 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -59,6 +59,7 @@
list->last->next = node;
list->last = node;
}
+ node->next = NULL;
}
static inline void wq_list_cut(struct io_wq_work_list *list,
@@ -128,8 +129,6 @@
return work->flags & IO_WQ_WORK_HASHED;
}
-void io_wq_cancel_all(struct io_wq *wq);
-
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6f9392c..7e35283 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1693,6 +1693,11 @@
return io_wq_current_is_worker();
}
+static inline unsigned __io_cqring_events(struct io_ring_ctx *ctx)
+{
+ return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
+}
+
static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
{
if (waitqueue_active(&ctx->wait))
@@ -1703,15 +1708,6 @@
eventfd_signal(ctx->cq_ev_fd, 1);
}
-static void io_cqring_mark_overflow(struct io_ring_ctx *ctx)
-{
- if (list_empty(&ctx->cq_overflow_list)) {
- clear_bit(0, &ctx->sq_check_overflow);
- clear_bit(0, &ctx->cq_check_overflow);
- ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
- }
-}
-
/* Returns true if there are no backlogged entries after the flush */
static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
struct task_struct *tsk,
@@ -1721,23 +1717,13 @@
struct io_kiocb *req, *tmp;
struct io_uring_cqe *cqe;
unsigned long flags;
+ bool all_flushed;
LIST_HEAD(list);
- if (!force) {
- if (list_empty_careful(&ctx->cq_overflow_list))
- return true;
- if ((ctx->cached_cq_tail - READ_ONCE(rings->cq.head) ==
- rings->cq_ring_entries))
- return false;
- }
+ if (!force && __io_cqring_events(ctx) == rings->cq_ring_entries)
+ return false;
spin_lock_irqsave(&ctx->completion_lock, flags);
-
- /* if force is set, the ring is going away. always drop after that */
- if (force)
- ctx->cq_overflow_flushed = 1;
-
- cqe = NULL;
list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) {
if (!io_match_task(req, tsk, files))
continue;
@@ -1758,9 +1744,14 @@
}
}
- io_commit_cqring(ctx);
- io_cqring_mark_overflow(ctx);
+ all_flushed = list_empty(&ctx->cq_overflow_list);
+ if (all_flushed) {
+ clear_bit(0, &ctx->sq_check_overflow);
+ clear_bit(0, &ctx->cq_check_overflow);
+ ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
+ }
+ io_commit_cqring(ctx);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
@@ -1770,7 +1761,7 @@
io_put_req(req);
}
- return cqe != NULL;
+ return all_flushed;
}
static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
@@ -2320,8 +2311,6 @@
static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush)
{
- struct io_rings *rings = ctx->rings;
-
if (test_bit(0, &ctx->cq_check_overflow)) {
/*
* noflush == true is from the waitqueue handler, just ensure
@@ -2336,7 +2325,7 @@
/* See comment at the top of this file */
smp_rmb();
- return ctx->cached_cq_tail - READ_ONCE(rings->cq.head);
+ return __io_cqring_events(ctx);
}
static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
@@ -3136,9 +3125,7 @@
iov[0].iov_len = kbuf->len;
return 0;
}
- if (!req->rw.len)
- return 0;
- else if (req->rw.len > 1)
+ if (req->rw.len != 1)
return -EINVAL;
#ifdef CONFIG_COMPAT
@@ -3784,6 +3771,8 @@
return -ENOTSOCK;
ret = __sys_shutdown_sock(sock, req->shutdown.how);
+ if (ret < 0)
+ req_set_fail_links(req);
io_req_complete(req, ret);
return 0;
#else
@@ -6107,15 +6096,15 @@
struct io_uring_task *tctx = req->task->io_uring;
unsigned long flags;
- spin_lock_irqsave(&ctx->inflight_lock, flags);
- list_del(&req->inflight_entry);
- if (atomic_read(&tctx->in_idle))
- wake_up(&tctx->wait);
- spin_unlock_irqrestore(&ctx->inflight_lock, flags);
- req->flags &= ~REQ_F_INFLIGHT;
put_files_struct(req->work.identity->files);
put_nsproxy(req->work.identity->nsproxy);
+ spin_lock_irqsave(&ctx->inflight_lock, flags);
+ list_del(&req->inflight_entry);
+ spin_unlock_irqrestore(&ctx->inflight_lock, flags);
+ req->flags &= ~REQ_F_INFLIGHT;
req->work.flags &= ~IO_WQ_WORK_FILES;
+ if (atomic_read(&tctx->in_idle))
+ wake_up(&tctx->wait);
}
static void __io_clean_op(struct io_kiocb *req)
@@ -6343,19 +6332,28 @@
}
if (ret) {
- /*
- * io_iopoll_complete() does not hold completion_lock to complete
- * polled io, so here for polled io, just mark it done and still let
- * io_iopoll_complete() complete it.
- */
- if (req->ctx->flags & IORING_SETUP_IOPOLL) {
- struct kiocb *kiocb = &req->rw.kiocb;
+ struct io_ring_ctx *lock_ctx = NULL;
- kiocb_done(kiocb, ret, NULL);
- } else {
- req_set_fail_links(req);
- io_req_complete(req, ret);
- }
+ if (req->ctx->flags & IORING_SETUP_IOPOLL)
+ lock_ctx = req->ctx;
+
+ /*
+ * io_iopoll_complete() does not hold completion_lock to
+ * complete polled io, so here for polled io, we can not call
+ * io_req_complete() directly, otherwise there maybe concurrent
+ * access to cqring, defer_list, etc, which is not safe. Given
+ * that io_iopoll_complete() is always called under uring_lock,
+ * so here for polled io, we also get uring_lock to complete
+ * it.
+ */
+ if (lock_ctx)
+ mutex_lock(&lock_ctx->uring_lock);
+
+ req_set_fail_links(req);
+ io_req_complete(req, ret);
+
+ if (lock_ctx)
+ mutex_unlock(&lock_ctx->uring_lock);
}
return io_steal_work(req);
@@ -6824,8 +6822,7 @@
/* if we have a backlog and couldn't flush it all, return BUSY */
if (test_bit(0, &ctx->sq_check_overflow)) {
- if (!list_empty(&ctx->cq_overflow_list) &&
- !io_cqring_overflow_flush(ctx, false, NULL, NULL))
+ if (!io_cqring_overflow_flush(ctx, false, NULL, NULL))
return -EBUSY;
}
@@ -8155,10 +8152,13 @@
__io_unaccount_mem(ctx->user, nr_pages);
if (ctx->mm_account) {
- if (acct == ACCT_LOCKED)
+ if (acct == ACCT_LOCKED) {
+ mmap_write_lock(ctx->mm_account);
ctx->mm_account->locked_vm -= nr_pages;
- else if (acct == ACCT_PINNED)
+ mmap_write_unlock(ctx->mm_account);
+ }else if (acct == ACCT_PINNED) {
atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm);
+ }
}
}
@@ -8174,10 +8174,13 @@
}
if (ctx->mm_account) {
- if (acct == ACCT_LOCKED)
+ if (acct == ACCT_LOCKED) {
+ mmap_write_lock(ctx->mm_account);
ctx->mm_account->locked_vm += nr_pages;
- else if (acct == ACCT_PINNED)
+ mmap_write_unlock(ctx->mm_account);
+ } else if (acct == ACCT_PINNED) {
atomic64_add(nr_pages, &ctx->mm_account->pinned_vm);
+ }
}
return 0;
@@ -8643,10 +8646,19 @@
io_ring_ctx_free(ctx);
}
+static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
+{
+ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+
+ return req->ctx == data;
+}
+
static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
{
mutex_lock(&ctx->uring_lock);
percpu_ref_kill(&ctx->refs);
+ /* if force is set, the ring is going away. always drop after that */
+ ctx->cq_overflow_flushed = 1;
if (ctx->rings)
io_cqring_overflow_flush(ctx, true, NULL, NULL);
mutex_unlock(&ctx->uring_lock);
@@ -8655,7 +8667,7 @@
io_poll_remove_all(ctx, NULL, NULL);
if (ctx->io_wq)
- io_wq_cancel_all(ctx->io_wq);
+ io_wq_cancel_cb(ctx->io_wq, io_cancel_ctx_cb, ctx, true);
/* if we failed setting up the ctx, we might not have any rings */
io_iopoll_try_reap_events(ctx);
@@ -8798,9 +8810,9 @@
ret |= io_poll_remove_all(ctx, task, NULL);
ret |= io_kill_timeouts(ctx, task, NULL);
+ ret |= io_run_task_work();
if (!ret)
break;
- io_run_task_work();
cond_resched();
}
}
@@ -8849,10 +8861,9 @@
static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file)
{
struct io_uring_task *tctx = current->io_uring;
+ int ret;
if (unlikely(!tctx)) {
- int ret;
-
ret = io_uring_alloc_task_context(current);
if (unlikely(ret))
return ret;
@@ -8863,7 +8874,12 @@
if (!old) {
get_file(file);
- xa_store(&tctx->xa, (unsigned long)file, file, GFP_KERNEL);
+ ret = xa_err(xa_store(&tctx->xa, (unsigned long)file,
+ file, GFP_KERNEL));
+ if (ret) {
+ fput(file);
+ return ret;
+ }
}
tctx->last = file;
}
@@ -8986,9 +9002,9 @@
if (inflight != tctx_inflight(tctx))
continue;
schedule();
+ finish_wait(&tctx->wait, &wait);
} while (1);
- finish_wait(&tctx->wait, &wait);
atomic_dec(&tctx->in_idle);
}
@@ -9156,10 +9172,13 @@
*/
ret = 0;
if (ctx->flags & IORING_SETUP_SQPOLL) {
- io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL));
- if (!list_empty_careful(&ctx->cq_overflow_list))
+ if (!list_empty_careful(&ctx->cq_overflow_list)) {
+ bool needs_lock = ctx->flags & IORING_SETUP_IOPOLL;
+
+ io_ring_submit_lock(ctx, needs_lock);
io_cqring_overflow_flush(ctx, false, NULL, NULL);
- io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL));
+ io_ring_submit_unlock(ctx, needs_lock);
+ }
if (flags & IORING_ENTER_SQ_WAKEUP)
wake_up(&ctx->sq_data->wait);
if (flags & IORING_ENTER_SQ_WAIT)
@@ -9369,55 +9388,52 @@
return 0;
}
+static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
+{
+ int ret, fd;
+
+ fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ ret = io_uring_add_task_file(ctx, file);
+ if (ret) {
+ put_unused_fd(fd);
+ return ret;
+ }
+ fd_install(fd, file);
+ return fd;
+}
+
/*
* Allocate an anonymous fd, this is what constitutes the application
* visible backing of an io_uring instance. The application mmaps this
* fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
* we have to tie this fd to a socket for file garbage collection purposes.
*/
-static int io_uring_get_fd(struct io_ring_ctx *ctx)
+static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
{
struct file *file;
- int ret;
- int fd;
-
#if defined(CONFIG_UNIX)
+ int ret;
+
ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
&ctx->ring_sock);
if (ret)
- return ret;
+ return ERR_PTR(ret);
#endif
- ret = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
- if (ret < 0)
- goto err;
- fd = ret;
-
file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
O_RDWR | O_CLOEXEC);
+#if defined(CONFIG_UNIX)
if (IS_ERR(file)) {
- put_unused_fd(fd);
- ret = PTR_ERR(file);
- goto err;
+ sock_release(ctx->ring_sock);
+ ctx->ring_sock = NULL;
+ } else {
+ ctx->ring_sock->file = file;
}
-
-#if defined(CONFIG_UNIX)
- ctx->ring_sock->file = file;
#endif
- ret = io_uring_add_task_file(ctx, file);
- if (ret) {
- fput(file);
- put_unused_fd(fd);
- goto err;
- }
- fd_install(fd, file);
- return fd;
-err:
-#if defined(CONFIG_UNIX)
- sock_release(ctx->ring_sock);
- ctx->ring_sock = NULL;
-#endif
- return ret;
+ return file;
}
static int io_uring_create(unsigned entries, struct io_uring_params *p,
@@ -9425,6 +9441,7 @@
{
struct user_struct *user = NULL;
struct io_ring_ctx *ctx;
+ struct file *file;
bool limit_mem;
int ret;
@@ -9572,13 +9589,22 @@
goto err;
}
+ file = io_uring_get_file(ctx);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto err;
+ }
+
/*
* Install ring fd as the very last thing, so we don't risk someone
* having closed it before we finish setup
*/
- ret = io_uring_get_fd(ctx);
- if (ret < 0)
- goto err;
+ ret = io_uring_install_fd(ctx, file);
+ if (ret < 0) {
+ /* fput will clean it up */
+ fput(file);
+ return ret;
+ }
trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
return ret;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 188f79d..2dc9444 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1869,9 +1869,7 @@
if (jbd2_has_feature_fast_commit(journal)) {
journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
- num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
- if (!num_fc_blocks)
- num_fc_blocks = JBD2_MIN_FC_BLOCKS;
+ num_fc_blocks = jbd2_journal_get_num_fc_blks(sb);
if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
journal->j_last = journal->j_fc_last - num_fc_blocks;
journal->j_fc_first = journal->j_last + 1;
@@ -2102,9 +2100,7 @@
journal_superblock_t *sb = journal->j_superblock;
unsigned long long num_fc_blks;
- num_fc_blks = be32_to_cpu(sb->s_num_fc_blks);
- if (num_fc_blks == 0)
- num_fc_blks = JBD2_MIN_FC_BLOCKS;
+ num_fc_blks = jbd2_journal_get_num_fc_blks(sb);
if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS)
return -ENOSPC;
diff --git a/fs/namei.c b/fs/namei.c
index 03d0e11..78443a8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2114,8 +2114,10 @@
return PTR_ERR(name);
while (*name=='/')
name++;
- if (!*name)
+ if (!*name) {
+ nd->dir_mode = 0; // short-circuit the 'hardening' idiocy
return 0;
+ }
/* At this point we know we have a real path component. */
for(;;) {
diff --git a/fs/namespace.c b/fs/namespace.c
index 2b681f65..d2db7df 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -156,10 +156,10 @@
/*
* vfsmount lock must be held for write
*/
-unsigned int mnt_get_count(struct mount *mnt)
+int mnt_get_count(struct mount *mnt)
{
#ifdef CONFIG_SMP
- unsigned int count = 0;
+ int count = 0;
int cpu;
for_each_possible_cpu(cpu) {
@@ -1139,6 +1139,7 @@
static void mntput_no_expire(struct mount *mnt)
{
LIST_HEAD(list);
+ int count;
rcu_read_lock();
if (likely(READ_ONCE(mnt->mnt_ns))) {
@@ -1162,7 +1163,9 @@
*/
smp_mb();
mnt_add_count(mnt, -1);
- if (mnt_get_count(mnt)) {
+ count = mnt_get_count(mnt);
+ if (count != 0) {
+ WARN_ON(count < 0);
rcu_read_unlock();
unlock_mount_hash();
return;
diff --git a/fs/pnode.h b/fs/pnode.h
index 49a058c..26f74e0 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -44,7 +44,7 @@
void propagate_mount_unlock(struct mount *);
void mnt_release_group_id(struct mount *);
int get_dominating_id(struct mount *mnt, const struct path *root);
-unsigned int mnt_get_count(struct mount *mnt);
+int mnt_get_count(struct mount *mnt);
void mnt_set_mountpoint(struct mount *, struct mountpoint *,
struct mount *);
void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp,
diff --git a/include/linux/efi.h b/include/linux/efi.h
index d7c0e73..763b816 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -817,12 +817,6 @@
static inline void
efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {}
-static inline bool
-efi_capsule_pending(int *reset_type)
-{
- return false;
-}
-
static inline bool efi_soft_reserve_enabled(void)
{
return false;
@@ -1038,6 +1032,7 @@
bool efivar_variable_is_removable(efi_guid_t vendor, const char *name,
size_t len);
+#if IS_ENABLED(CONFIG_EFI_CAPSULE_LOADER)
extern bool efi_capsule_pending(int *reset_type);
extern int efi_capsule_supported(efi_guid_t guid, u32 flags,
@@ -1045,6 +1040,9 @@
extern int efi_capsule_update(efi_capsule_header_t *capsule,
phys_addr_t *pages);
+#else
+static inline bool efi_capsule_pending(int *reset_type) { return false; }
+#endif
#ifdef CONFIG_EFI_RUNTIME_MAP
int efi_runtime_map_init(struct kobject *);
@@ -1089,7 +1087,28 @@
efi_secureboot_mode_disabled,
efi_secureboot_mode_enabled,
};
-enum efi_secureboot_mode efi_get_secureboot(void);
+
+static inline
+enum efi_secureboot_mode efi_get_secureboot_mode(efi_get_variable_t *get_var)
+{
+ u8 secboot, setupmode = 0;
+ efi_status_t status;
+ unsigned long size;
+
+ size = sizeof(secboot);
+ status = get_var(L"SecureBoot", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size,
+ &secboot);
+ if (status == EFI_NOT_FOUND)
+ return efi_secureboot_mode_disabled;
+ if (status != EFI_SUCCESS)
+ return efi_secureboot_mode_unknown;
+
+ size = sizeof(setupmode);
+ get_var(L"SetupMode", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, &setupmode);
+ if (secboot == 0 || setupmode == 1)
+ return efi_secureboot_mode_disabled;
+ return efi_secureboot_mode_enabled;
+}
#ifdef CONFIG_RESET_ATTACK_MITIGATION
void efi_enable_reset_attack_mitigation(void);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ad4cf1b..fd47dee 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2876,8 +2876,7 @@
extern int generic_delete_inode(struct inode *inode);
static inline int generic_drop_inode(struct inode *inode)
{
- return !inode->i_nlink || inode_unhashed(inode) ||
- (inode->i_state & I_DONTCACHE);
+ return !inode->i_nlink || inode_unhashed(inode);
}
extern void d_mark_dontcache(struct inode *inode);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 870b325..bb8ff90 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -232,6 +232,7 @@
# define local_irq_enable_in_hardirq() local_irq_enable()
#endif
+bool irq_has_action(unsigned int irq);
extern void disable_irq_nosync(unsigned int irq);
extern bool disable_hardirq(unsigned int irq);
extern void disable_irq(unsigned int irq);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index c332871..4aeb1c4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -906,6 +906,13 @@
}
#endif
+static inline struct cpumask *irq_get_effective_affinity_mask(unsigned int irq)
+{
+ struct irq_data *d = irq_get_irq_data(irq);
+
+ return d ? irq_data_get_effective_affinity_mask(d) : NULL;
+}
+
unsigned int arch_dynirq_lower_bound(unsigned int from);
int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 5745491..891b323 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -113,6 +113,12 @@
extern struct irq_desc irq_desc[NR_IRQS];
#endif
+static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc,
+ unsigned int cpu)
+{
+ return desc->kstat_irqs ? *per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
+}
+
static inline struct irq_desc *irq_data_to_desc(struct irq_data *data)
{
return container_of(data->common, struct irq_desc, irq_common_data);
@@ -179,12 +185,7 @@
/* Test to see if a driver has successfully requested an irq */
static inline int irq_desc_has_action(struct irq_desc *desc)
{
- return desc->action != NULL;
-}
-
-static inline int irq_has_action(unsigned int irq)
-{
- return irq_desc_has_action(irq_to_desc(irq));
+ return desc && desc->action != NULL;
}
/**
@@ -228,40 +229,31 @@
data->chip = chip;
}
+bool irq_check_status_bit(unsigned int irq, unsigned int bitmask);
+
static inline bool irq_balancing_disabled(unsigned int irq)
{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
- return desc->status_use_accessors & IRQ_NO_BALANCING_MASK;
+ return irq_check_status_bit(irq, IRQ_NO_BALANCING_MASK);
}
static inline bool irq_is_percpu(unsigned int irq)
{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
- return desc->status_use_accessors & IRQ_PER_CPU;
+ return irq_check_status_bit(irq, IRQ_PER_CPU);
}
static inline bool irq_is_percpu_devid(unsigned int irq)
{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
- return desc->status_use_accessors & IRQ_PER_CPU_DEVID;
+ return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID);
}
+void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
+ struct lock_class_key *request_class);
static inline void
irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
struct lock_class_key *request_class)
{
- struct irq_desc *desc = irq_to_desc(irq);
-
- if (desc) {
- lockdep_set_class(&desc->lock, lock_class);
- lockdep_set_class(&desc->request_mutex, request_class);
- }
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ __irq_set_lockdep_class(irq, lock_class, request_class);
}
#endif
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 578ff19..99d3cd0 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -68,7 +68,7 @@
extern void jbd2_free(void *ptr, size_t size);
#define JBD2_MIN_JOURNAL_BLOCKS 1024
-#define JBD2_MIN_FC_BLOCKS 256
+#define JBD2_DEFAULT_FAST_COMMIT_BLOCKS 256
#ifdef __KERNEL__
@@ -538,6 +538,7 @@
* The transaction keeps track of all of the buffers modified by a
* running transaction, and all of the buffers committed but not yet
* flushed to home for finished transactions.
+ * (Locking Documentation improved by LockDoc)
*/
/*
@@ -658,12 +659,12 @@
unsigned long t_start;
/*
- * When commit was requested
+ * When commit was requested [j_state_lock]
*/
unsigned long t_requested;
/*
- * Checkpointing stats [j_checkpoint_sem]
+ * Checkpointing stats [j_list_lock]
*/
struct transaction_chp_stats_s t_chp_stats;
@@ -1691,6 +1692,13 @@
return journal->j_chksum_driver != NULL;
}
+static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb)
+{
+ int num_fc_blocks = be32_to_cpu(jsb->s_num_fc_blks);
+
+ return num_fc_blocks ? num_fc_blocks : JBD2_DEFAULT_FAST_COMMIT_BLOCKS;
+}
+
/*
* Return number of free blocks in the log. Must be called under j_state_lock.
*/
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 89f0745..44ae1a7 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -67,7 +67,6 @@
/*
* Number of interrupts per specific IRQ source, since bootup
*/
-extern unsigned int kstat_irqs(unsigned int irq);
extern unsigned int kstat_irqs_usr(unsigned int irq);
/*
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 30bc7a7..0fefeb9 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -42,6 +42,7 @@
* @config: the configuration ops for this device.
* @index: device index
* @features_valid: were features initialized? for legacy guests
+ * @nvqs: maximum number of supported virtqueues
*/
struct vdpa_device {
struct device dev;
diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h
index 0b68699..e282ce0 100644
--- a/include/trace/events/iocost.h
+++ b/include/trace/events/iocost.h
@@ -11,7 +11,7 @@
#include <linux/tracepoint.h>
-TRACE_EVENT(iocost_iocg_activate,
+DECLARE_EVENT_CLASS(iocost_iocg_state,
TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now,
u64 last_period, u64 cur_period, u64 vtime),
@@ -59,6 +59,20 @@
)
);
+DEFINE_EVENT(iocost_iocg_state, iocost_iocg_activate,
+ TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now,
+ u64 last_period, u64 cur_period, u64 vtime),
+
+ TP_ARGS(iocg, path, now, last_period, cur_period, vtime)
+);
+
+DEFINE_EVENT(iocost_iocg_state, iocost_iocg_idle,
+ TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now,
+ u64 last_period, u64 cur_period, u64 vtime),
+
+ TP_ARGS(iocg, path, now, last_period, cur_period, vtime)
+);
+
DECLARE_EVENT_CLASS(iocg_inuse_update,
TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now,
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index b052355..bc1c062 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -29,24 +29,30 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE. */
-#define VIRTIO_ID_NET 1 /* virtio net */
-#define VIRTIO_ID_BLOCK 2 /* virtio block */
-#define VIRTIO_ID_CONSOLE 3 /* virtio console */
-#define VIRTIO_ID_RNG 4 /* virtio rng */
-#define VIRTIO_ID_BALLOON 5 /* virtio balloon */
-#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */
-#define VIRTIO_ID_SCSI 8 /* virtio scsi */
-#define VIRTIO_ID_9P 9 /* 9p virtio console */
-#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
-#define VIRTIO_ID_CAIF 12 /* Virtio caif */
-#define VIRTIO_ID_GPU 16 /* virtio GPU */
-#define VIRTIO_ID_INPUT 18 /* virtio input */
-#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
-#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
-#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */
-#define VIRTIO_ID_MEM 24 /* virtio mem */
-#define VIRTIO_ID_FS 26 /* virtio filesystem */
-#define VIRTIO_ID_PMEM 27 /* virtio pmem */
-#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_NET 1 /* virtio net */
+#define VIRTIO_ID_BLOCK 2 /* virtio block */
+#define VIRTIO_ID_CONSOLE 3 /* virtio console */
+#define VIRTIO_ID_RNG 4 /* virtio rng */
+#define VIRTIO_ID_BALLOON 5 /* virtio balloon */
+#define VIRTIO_ID_IOMEM 6 /* virtio ioMemory */
+#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */
+#define VIRTIO_ID_SCSI 8 /* virtio scsi */
+#define VIRTIO_ID_9P 9 /* 9p virtio console */
+#define VIRTIO_ID_MAC80211_WLAN 10 /* virtio WLAN MAC */
+#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
+#define VIRTIO_ID_CAIF 12 /* Virtio caif */
+#define VIRTIO_ID_MEMORY_BALLOON 13 /* virtio memory balloon */
+#define VIRTIO_ID_GPU 16 /* virtio GPU */
+#define VIRTIO_ID_CLOCK 17 /* virtio clock/timer */
+#define VIRTIO_ID_INPUT 18 /* virtio input */
+#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
+#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
+#define VIRTIO_ID_SIGNAL_DIST 21 /* virtio signal distribution device */
+#define VIRTIO_ID_PSTORE 22 /* virtio pstore device */
+#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */
+#define VIRTIO_ID_MEM 24 /* virtio mem */
+#define VIRTIO_ID_FS 26 /* virtio filesystem */
+#define VIRTIO_ID_PMEM 27 /* virtio pmem */
+#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index e810eb9..cc1a094 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -147,12 +147,12 @@
struct kobj_attribute *attr, char *buf)
{
struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
- int cpu, irq = desc->irq_data.irq;
ssize_t ret = 0;
char *p = "";
+ int cpu;
for_each_possible_cpu(cpu) {
- unsigned int c = kstat_irqs_cpu(irq, cpu);
+ unsigned int c = irq_desc_kstat_cpu(desc, cpu);
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c);
p = ",";
@@ -352,7 +352,9 @@
{
return radix_tree_lookup(&irq_desc_tree, irq);
}
-EXPORT_SYMBOL(irq_to_desc);
+#ifdef CONFIG_KVM_BOOK3S_64_HV_MODULE
+EXPORT_SYMBOL_GPL(irq_to_desc);
+#endif
static void delete_irq_desc(unsigned int irq)
{
@@ -924,15 +926,7 @@
return desc->istate & IRQS_NMI;
}
-/**
- * kstat_irqs - Get the statistics for an interrupt
- * @irq: The interrupt number
- *
- * Returns the sum of interrupt counts on all cpus since boot for
- * @irq. The caller must ensure that the interrupt is not removed
- * concurrently.
- */
-unsigned int kstat_irqs(unsigned int irq)
+static unsigned int kstat_irqs(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned int sum = 0;
@@ -943,21 +937,22 @@
if (!irq_settings_is_per_cpu_devid(desc) &&
!irq_settings_is_per_cpu(desc) &&
!irq_is_nmi(desc))
- return desc->tot_count;
+ return data_race(desc->tot_count);
for_each_possible_cpu(cpu)
- sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
+ sum += data_race(*per_cpu_ptr(desc->kstat_irqs, cpu));
return sum;
}
/**
- * kstat_irqs_usr - Get the statistics for an interrupt
+ * kstat_irqs_usr - Get the statistics for an interrupt from thread context
* @irq: The interrupt number
*
* Returns the sum of interrupt counts on all cpus since boot for @irq.
- * Contrary to kstat_irqs() this can be called from any context.
- * It uses rcu since a concurrent removal of an interrupt descriptor is
- * observing an rcu grace period before delayed_free_desc()/irq_kobj_release().
+ *
+ * It uses rcu to protect the access since a concurrent removal of an
+ * interrupt descriptor is observing an rcu grace period before
+ * delayed_free_desc()/irq_kobj_release().
*/
unsigned int kstat_irqs_usr(unsigned int irq)
{
@@ -968,3 +963,17 @@
rcu_read_unlock();
return sum;
}
+
+#ifdef CONFIG_LOCKDEP
+void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
+ struct lock_class_key *request_class)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ if (desc) {
+ lockdep_set_class(&desc->lock, lock_class);
+ lockdep_set_class(&desc->request_mutex, request_class);
+ }
+}
+EXPORT_SYMBOL_GPL(__irq_set_lockdep_class);
+#endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c826ba4..ab8567f 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -2822,3 +2822,40 @@
return err;
}
EXPORT_SYMBOL_GPL(irq_set_irqchip_state);
+
+/**
+ * irq_has_action - Check whether an interrupt is requested
+ * @irq: The linux irq number
+ *
+ * Returns: A snapshot of the current state
+ */
+bool irq_has_action(unsigned int irq)
+{
+ bool res;
+
+ rcu_read_lock();
+ res = irq_desc_has_action(irq_to_desc(irq));
+ rcu_read_unlock();
+ return res;
+}
+EXPORT_SYMBOL_GPL(irq_has_action);
+
+/**
+ * irq_check_status_bit - Check whether bits in the irq descriptor status are set
+ * @irq: The linux irq number
+ * @bitmask: The bitmask to evaluate
+ *
+ * Returns: True if one of the bits in @bitmask is set
+ */
+bool irq_check_status_bit(unsigned int irq, unsigned int bitmask)
+{
+ struct irq_desc *desc;
+ bool res = false;
+
+ rcu_read_lock();
+ desc = irq_to_desc(irq);
+ if (desc)
+ res = !!(desc->status_use_accessors & bitmask);
+ rcu_read_unlock();
+ return res;
+}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 72513ed..9813878 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -488,9 +488,10 @@
if (!desc || irq_settings_is_hidden(desc))
goto outsparse;
- if (desc->kstat_irqs)
+ if (desc->kstat_irqs) {
for_each_online_cpu(j)
- any_count |= *per_cpu_ptr(desc->kstat_irqs, j);
+ any_count |= data_race(*per_cpu_ptr(desc->kstat_irqs, j));
+ }
if ((!desc->action || irq_desc_is_chained(desc)) && !any_count)
goto outsparse;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c016042..af41fb9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1784,39 +1784,112 @@
}
EXPORT_SYMBOL_GPL(remove_memory);
+static int try_offline_memory_block(struct memory_block *mem, void *arg)
+{
+ uint8_t online_type = MMOP_ONLINE_KERNEL;
+ uint8_t **online_types = arg;
+ struct page *page;
+ int rc;
+
+ /*
+ * Sense the online_type via the zone of the memory block. Offlining
+ * with multiple zones within one memory block will be rejected
+ * by offlining code ... so we don't care about that.
+ */
+ page = pfn_to_online_page(section_nr_to_pfn(mem->start_section_nr));
+ if (page && zone_idx(page_zone(page)) == ZONE_MOVABLE)
+ online_type = MMOP_ONLINE_MOVABLE;
+
+ rc = device_offline(&mem->dev);
+ /*
+ * Default is MMOP_OFFLINE - change it only if offlining succeeded,
+ * so try_reonline_memory_block() can do the right thing.
+ */
+ if (!rc)
+ **online_types = online_type;
+
+ (*online_types)++;
+ /* Ignore if already offline. */
+ return rc < 0 ? rc : 0;
+}
+
+static int try_reonline_memory_block(struct memory_block *mem, void *arg)
+{
+ uint8_t **online_types = arg;
+ int rc;
+
+ if (**online_types != MMOP_OFFLINE) {
+ mem->online_type = **online_types;
+ rc = device_online(&mem->dev);
+ if (rc < 0)
+ pr_warn("%s: Failed to re-online memory: %d",
+ __func__, rc);
+ }
+
+ /* Continue processing all remaining memory blocks. */
+ (*online_types)++;
+ return 0;
+}
+
/*
- * Try to offline and remove a memory block. Might take a long time to
- * finish in case memory is still in use. Primarily useful for memory devices
- * that logically unplugged all memory (so it's no longer in use) and want to
- * offline + remove the memory block.
+ * Try to offline and remove memory. Might take a long time to finish in case
+ * memory is still in use. Primarily useful for memory devices that logically
+ * unplugged all memory (so it's no longer in use) and want to offline + remove
+ * that memory.
*/
int offline_and_remove_memory(int nid, u64 start, u64 size)
{
- struct memory_block *mem;
- int rc = -EINVAL;
+ const unsigned long mb_count = size / memory_block_size_bytes();
+ uint8_t *online_types, *tmp;
+ int rc;
if (!IS_ALIGNED(start, memory_block_size_bytes()) ||
- size != memory_block_size_bytes())
- return rc;
-
- lock_device_hotplug();
- mem = find_memory_block(__pfn_to_section(PFN_DOWN(start)));
- if (mem)
- rc = device_offline(&mem->dev);
- /* Ignore if the device is already offline. */
- if (rc > 0)
- rc = 0;
+ !IS_ALIGNED(size, memory_block_size_bytes()) || !size)
+ return -EINVAL;
/*
- * In case we succeeded to offline the memory block, remove it.
+ * We'll remember the old online type of each memory block, so we can
+ * try to revert whatever we did when offlining one memory block fails
+ * after offlining some others succeeded.
+ */
+ online_types = kmalloc_array(mb_count, sizeof(*online_types),
+ GFP_KERNEL);
+ if (!online_types)
+ return -ENOMEM;
+ /*
+ * Initialize all states to MMOP_OFFLINE, so when we abort processing in
+ * try_offline_memory_block(), we'll skip all unprocessed blocks in
+ * try_reonline_memory_block().
+ */
+ memset(online_types, MMOP_OFFLINE, mb_count);
+
+ lock_device_hotplug();
+
+ tmp = online_types;
+ rc = walk_memory_blocks(start, size, &tmp, try_offline_memory_block);
+
+ /*
+ * In case we succeeded to offline all memory, remove it.
* This cannot fail as it cannot get onlined in the meantime.
*/
if (!rc) {
rc = try_remove_memory(nid, start, size);
- WARN_ON_ONCE(rc);
+ if (rc)
+ pr_err("%s: Failed to remove memory: %d", __func__, rc);
+ }
+
+ /*
+ * Rollback what we did. While memory onlining might theoretically fail
+ * (nacked by a notifier), it barely ever happens.
+ */
+ if (rc) {
+ tmp = online_types;
+ walk_memory_blocks(start, size, &tmp,
+ try_reonline_memory_block);
}
unlock_device_hotplug();
+ kfree(online_types);
return rc;
}
EXPORT_SYMBOL_GPL(offline_and_remove_memory);
diff --git a/scripts/coccicheck b/scripts/coccicheck
index 209bb04..65fee63 100755
--- a/scripts/coccicheck
+++ b/scripts/coccicheck
@@ -16,7 +16,6 @@
fi
SPATCH_VERSION=$($SPATCH --version | head -1 | awk '{print $3}')
-SPATCH_VERSION_NUM=$(echo $SPATCH_VERSION | ${DIR}/scripts/ld-version.sh)
USE_JOBS="no"
$SPATCH --help | grep "\-\-jobs" > /dev/null && USE_JOBS="yes"
@@ -61,6 +60,18 @@
if [ "$C" = "1" -o "$C" = "2" ]; then
ONLINE=1
+ if [[ $# -le 0 ]]; then
+ echo ''
+ echo 'Specifying both the variable "C" and rule "coccicheck" in the make
+command results in a shift count error.'
+ echo ''
+ echo 'Try specifying "scripts/coccicheck" as a value for the CHECK variable instead.'
+ echo ''
+ echo 'Example: make C=2 CHECK=scripts/coccicheck drivers/net/ethernet/ethoc.o'
+ echo ''
+ exit 1
+ fi
+
# Take only the last argument, which is the C file to test
shift $(( $# - 1 ))
OPTIONS="$COCCIINCLUDE $1"
@@ -186,14 +197,11 @@
OPT=`grep "Options:" $COCCI | cut -d':' -f2`
REQ=`grep "Requires:" $COCCI | cut -d':' -f2 | sed "s| ||"`
- REQ_NUM=$(echo $REQ | ${DIR}/scripts/ld-version.sh)
- if [ "$REQ_NUM" != "0" ] ; then
- if [ "$SPATCH_VERSION_NUM" -lt "$REQ_NUM" ] ; then
- echo "Skipping coccinelle SmPL patch: $COCCI"
- echo "You have coccinelle: $SPATCH_VERSION"
- echo "This SmPL patch requires: $REQ"
- return
- fi
+ if [ -n "$REQ" ] && ! { echo "$REQ"; echo "$SPATCH_VERSION"; } | sort -CV ; then
+ echo "Skipping coccinelle SmPL patch: $COCCI"
+ echo "You have coccinelle: $SPATCH_VERSION"
+ echo "This SmPL patch requires: $REQ"
+ return
fi
# The option '--parse-cocci' can be used to syntactically check the SmPL files.
diff --git a/scripts/coccinelle/api/ptr_ret.cocci b/scripts/coccinelle/api/ptr_ret.cocci
deleted file mode 100644
index e76cd5d..0000000
--- a/scripts/coccinelle/api/ptr_ret.cocci
+++ /dev/null
@@ -1,97 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-///
-/// Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR
-///
-// Confidence: High
-// Copyright: (C) 2012 Julia Lawall, INRIA/LIP6.
-// Copyright: (C) 2012 Gilles Muller, INRIA/LiP6.
-// URL: http://coccinelle.lip6.fr/
-// Options: --no-includes --include-headers
-//
-// Keywords: ERR_PTR, PTR_ERR, PTR_ERR_OR_ZERO
-// Version min: 2.6.39
-//
-
-virtual context
-virtual patch
-virtual org
-virtual report
-
-@depends on patch@
-expression ptr;
-@@
-
-- if (IS_ERR(ptr)) return PTR_ERR(ptr); else return 0;
-+ return PTR_ERR_OR_ZERO(ptr);
-
-@depends on patch@
-expression ptr;
-@@
-
-- if (IS_ERR(ptr)) return PTR_ERR(ptr); return 0;
-+ return PTR_ERR_OR_ZERO(ptr);
-
-@depends on patch@
-expression ptr;
-@@
-
-- (IS_ERR(ptr) ? PTR_ERR(ptr) : 0)
-+ PTR_ERR_OR_ZERO(ptr)
-
-@r1 depends on !patch@
-expression ptr;
-position p1;
-@@
-
-* if@p1 (IS_ERR(ptr)) return PTR_ERR(ptr); else return 0;
-
-@r2 depends on !patch@
-expression ptr;
-position p2;
-@@
-
-* if@p2 (IS_ERR(ptr)) return PTR_ERR(ptr); return 0;
-
-@r3 depends on !patch@
-expression ptr;
-position p3;
-@@
-
-* IS_ERR@p3(ptr) ? PTR_ERR(ptr) : 0
-
-@script:python depends on org@
-p << r1.p1;
-@@
-
-coccilib.org.print_todo(p[0], "WARNING: PTR_ERR_OR_ZERO can be used")
-
-
-@script:python depends on org@
-p << r2.p2;
-@@
-
-coccilib.org.print_todo(p[0], "WARNING: PTR_ERR_OR_ZERO can be used")
-
-@script:python depends on org@
-p << r3.p3;
-@@
-
-coccilib.org.print_todo(p[0], "WARNING: PTR_ERR_OR_ZERO can be used")
-
-@script:python depends on report@
-p << r1.p1;
-@@
-
-coccilib.report.print_report(p[0], "WARNING: PTR_ERR_OR_ZERO can be used")
-
-@script:python depends on report@
-p << r2.p2;
-@@
-
-coccilib.report.print_report(p[0], "WARNING: PTR_ERR_OR_ZERO can be used")
-
-@script:python depends on report@
-p << r3.p3;
-@@
-
-coccilib.report.print_report(p[0], "WARNING: PTR_ERR_OR_ZERO can be used")
diff --git a/scripts/coccinelle/misc/boolinit.cocci b/scripts/coccinelle/misc/boolinit.cocci
deleted file mode 100644
index fed6126..0000000
--- a/scripts/coccinelle/misc/boolinit.cocci
+++ /dev/null
@@ -1,195 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/// Bool initializations should use true and false. Bool tests don't need
-/// comparisons. Based on contributions from Joe Perches, Rusty Russell
-/// and Bruce W Allan.
-///
-// Confidence: High
-// Copyright: (C) 2012 Julia Lawall, INRIA/LIP6.
-// Copyright: (C) 2012 Gilles Muller, INRIA/LiP6.
-// URL: http://coccinelle.lip6.fr/
-// Options: --include-headers
-
-virtual patch
-virtual context
-virtual org
-virtual report
-
-@boolok@
-symbol true,false;
-@@
-(
-true
-|
-false
-)
-
-@depends on patch@
-bool t;
-@@
-
-(
-- t == true
-+ t
-|
-- true == t
-+ t
-|
-- t != true
-+ !t
-|
-- true != t
-+ !t
-|
-- t == false
-+ !t
-|
-- false == t
-+ !t
-|
-- t != false
-+ t
-|
-- false != t
-+ t
-)
-
-@depends on patch disable is_zero, isnt_zero@
-bool t;
-@@
-
-(
-- t == 1
-+ t
-|
-- t != 1
-+ !t
-|
-- t == 0
-+ !t
-|
-- t != 0
-+ t
-)
-
-@depends on patch && boolok@
-bool b;
-@@
-(
- b =
-- 0
-+ false
-|
- b =
-- 1
-+ true
-)
-
-// ---------------------------------------------------------------------
-
-@r1 depends on !patch@
-bool t;
-position p;
-@@
-
-(
-* t@p == true
-|
-* true == t@p
-|
-* t@p != true
-|
-* true != t@p
-|
-* t@p == false
-|
-* false == t@p
-|
-* t@p != false
-|
-* false != t@p
-)
-
-@r2 depends on !patch disable is_zero, isnt_zero@
-bool t;
-position p;
-@@
-
-(
-* t@p == 1
-|
-* t@p != 1
-|
-* t@p == 0
-|
-* t@p != 0
-)
-
-@r3 depends on !patch && boolok@
-bool b;
-position p1;
-@@
-(
-*b@p1 = 0
-|
-*b@p1 = 1
-)
-
-@r4 depends on !patch@
-bool b;
-position p2;
-identifier i;
-constant c != {0,1};
-@@
-(
- b = i
-|
-*b@p2 = c
-)
-
-@script:python depends on org@
-p << r1.p;
-@@
-
-cocci.print_main("WARNING: Comparison to bool",p)
-
-@script:python depends on org@
-p << r2.p;
-@@
-
-cocci.print_main("WARNING: Comparison of 0/1 to bool variable",p)
-
-@script:python depends on org@
-p1 << r3.p1;
-@@
-
-cocci.print_main("WARNING: Assignment of 0/1 to bool variable",p1)
-
-@script:python depends on org@
-p2 << r4.p2;
-@@
-
-cocci.print_main("ERROR: Assignment of non-0/1 constant to bool variable",p2)
-
-@script:python depends on report@
-p << r1.p;
-@@
-
-coccilib.report.print_report(p[0],"WARNING: Comparison to bool")
-
-@script:python depends on report@
-p << r2.p;
-@@
-
-coccilib.report.print_report(p[0],"WARNING: Comparison of 0/1 to bool variable")
-
-@script:python depends on report@
-p1 << r3.p1;
-@@
-
-coccilib.report.print_report(p1[0],"WARNING: Assignment of 0/1 to bool variable")
-
-@script:python depends on report@
-p2 << r4.p2;
-@@
-
-coccilib.report.print_report(p2[0],"ERROR: Assignment of non-0/1 constant to bool variable")
diff --git a/scripts/nsdeps b/scripts/nsdeps
index dab4c1a..e8ce2a4 100644
--- a/scripts/nsdeps
+++ b/scripts/nsdeps
@@ -12,11 +12,9 @@
exit 1
fi
-SPATCH_REQ_VERSION_NUM=$(echo $SPATCH_REQ_VERSION | ${DIR}/scripts/ld-version.sh)
SPATCH_VERSION=$($SPATCH --version | head -1 | awk '{print $3}')
-SPATCH_VERSION_NUM=$(echo $SPATCH_VERSION | ${DIR}/scripts/ld-version.sh)
-if [ "$SPATCH_VERSION_NUM" -lt "$SPATCH_REQ_VERSION_NUM" ] ; then
+if ! { echo "$SPATCH_REQ_VERSION"; echo "$SPATCH_VERSION"; } | sort -CV ; then
echo "spatch needs to be version $SPATCH_REQ_VERSION or higher"
exit 1
fi
diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile
index 67dabca..2499f24 100644
--- a/security/integrity/ima/Makefile
+++ b/security/integrity/ima/Makefile
@@ -14,3 +14,7 @@
ima-$(CONFIG_IMA_BLACKLIST_KEYRING) += ima_mok.o
ima-$(CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS) += ima_asymmetric_keys.o
ima-$(CONFIG_IMA_QUEUE_EARLY_BOOT_KEYS) += ima_queue_keys.o
+
+ifeq ($(CONFIG_EFI),y)
+ima-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_efi.o
+endif
diff --git a/security/integrity/ima/ima_efi.c b/security/integrity/ima/ima_efi.c
new file mode 100644
index 0000000..71786d0
--- /dev/null
+++ b/security/integrity/ima/ima_efi.c
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2018 IBM Corporation
+ */
+#include <linux/efi.h>
+#include <linux/module.h>
+#include <linux/ima.h>
+#include <asm/efi.h>
+
+#ifndef arch_ima_efi_boot_mode
+#define arch_ima_efi_boot_mode efi_secureboot_mode_unset
+#endif
+
+static enum efi_secureboot_mode get_sb_mode(void)
+{
+ enum efi_secureboot_mode mode;
+
+ if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) {
+ pr_info("ima: secureboot mode unknown, no efi\n");
+ return efi_secureboot_mode_unknown;
+ }
+
+ mode = efi_get_secureboot_mode(efi.get_variable);
+ if (mode == efi_secureboot_mode_disabled)
+ pr_info("ima: secureboot mode disabled\n");
+ else if (mode == efi_secureboot_mode_unknown)
+ pr_info("ima: secureboot mode unknown\n");
+ else
+ pr_info("ima: secureboot mode enabled\n");
+ return mode;
+}
+
+bool arch_ima_get_secureboot(void)
+{
+ static enum efi_secureboot_mode sb_mode;
+ static bool initialized;
+
+ if (!initialized && efi_enabled(EFI_BOOT)) {
+ sb_mode = arch_ima_efi_boot_mode;
+
+ if (sb_mode == efi_secureboot_mode_unset)
+ sb_mode = get_sb_mode();
+ initialized = true;
+ }
+
+ if (sb_mode == efi_secureboot_mode_enabled)
+ return true;
+ else
+ return false;
+}
+
+/* secureboot arch rules */
+static const char * const sb_arch_rules[] = {
+#if !IS_ENABLED(CONFIG_KEXEC_SIG)
+ "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig",
+#endif /* CONFIG_KEXEC_SIG */
+ "measure func=KEXEC_KERNEL_CHECK",
+#if !IS_ENABLED(CONFIG_MODULE_SIG)
+ "appraise func=MODULE_CHECK appraise_type=imasig",
+#endif
+ "measure func=MODULE_CHECK",
+ NULL
+};
+
+const char * const *arch_get_ima_policy(void)
+{
+ if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) {
+ if (IS_ENABLED(CONFIG_MODULE_SIG))
+ set_module_sig_enforced();
+ return sb_arch_rules;
+ }
+ return NULL;
+}
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index efe2406..7eabb44 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -688,9 +688,10 @@
bool smack_privileged(int cap)
{
/*
- * All kernel tasks are privileged
+ * Kernel threads may not have credentials we can use.
+ * The io_uring kernel threads do have reliable credentials.
*/
- if (unlikely(current->flags & PF_KTHREAD))
+ if ((current->flags & (PF_KTHREAD | PF_IO_WORKER)) == PF_KTHREAD)
return true;
return smack_privileged_cred(cap, current_cred());
diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
index 04d563f..468435e 100644
--- a/tools/virtio/asm/barrier.h
+++ b/tools/virtio/asm/barrier.h
@@ -16,6 +16,16 @@
# define mb() abort()
# define dma_rmb() abort()
# define dma_wmb() abort()
+#elif defined(__aarch64__)
+#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() dmb(ishld)
+#define virt_wmb() dmb(ishst)
+#define virt_store_mb(var, value) do { WRITE_ONCE(var, value); dmb(ish); } while (0)
+/* Weak barriers should be used. If not - it's a bug */
+# define mb() abort()
+# define dma_rmb() abort()
+# define dma_wmb() abort()
#else
#error Please fill in barrier macros
#endif
diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h
index b14c2c3..813baf1 100644
--- a/tools/virtio/linux/bug.h
+++ b/tools/virtio/linux/bug.h
@@ -2,6 +2,8 @@
#ifndef BUG_H
#define BUG_H
+#include <asm/bug.h>
+
#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
#define BUILD_BUG_ON(x)
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 315e85c..0b49354 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -11,6 +11,7 @@
#include <linux/compiler.h>
#include <linux/types.h>
+#include <linux/overflow.h>
#include <linux/list.h>
#include <linux/printk.h>
#include <linux/bug.h>
@@ -117,6 +118,16 @@
# define unlikely(x) (__builtin_expect(!!(x), 0))
# endif
+static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t gfp)
+{
+ size_t bytes;
+
+ if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
+ return NULL;
+
+ return krealloc(p, bytes, gfp);
+}
+
#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#ifdef DEBUG
#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
@@ -126,8 +137,6 @@
#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
-#define WARN_ON_ONCE(cond) (unlikely(cond) ? fprintf (stderr, "WARNING\n") : 0)
-
#define min(x, y) ({ \
typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \