diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 2416b03ff2837..137f16feee084 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -430,6 +430,7 @@ Description: Show status of f2fs superblock in real time. 0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP 0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted 0x2000 SBI_IS_RESIZEFS resizefs is in process + 0x4000 SBI_IS_FREEZING freefs is in process ====== ===================== ================================= What: /sys/fs/f2fs//ckpt_thread_ioprio diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7123524a86b8b..bdc733c2561d1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3485,8 +3485,7 @@ difficult since unequal pointers can no longer be compared. However, if this command-line option is specified, then all normal pointers will have their true - value printed. Pointers printed via %pK may still be - hashed. This option should only be specified when + value printed. This option should only be specified when debugging the kernel. Please do not use on production kernels. @@ -4356,6 +4355,12 @@ fully seed the kernel's CRNG. Default is controlled by CONFIG_RANDOM_TRUST_CPU. + random.trust_bootloader={on,off} + [KNL] Enable or disable trusting the use of the + a seed passed by the bootloader (if available) to + fully seed the kernel's CRNG. Default is controlled + by CONFIG_RANDOM_TRUST_BOOTLOADER. + randomize_kstack_offset= [KNL] Enable or disable kernel stack offset randomization, which provides roughly 5 bits of diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst index c21b5823f1261..2cf5bae620367 100644 --- a/Documentation/admin-guide/mm/index.rst +++ b/Documentation/admin-guide/mm/index.rst @@ -32,6 +32,7 @@ the Linux memory management. idle_page_tracking ksm memory-hotplug + multigen_lru nommu-mmap numa_memory_policy numaperf diff --git a/Documentation/admin-guide/mm/multigen_lru.rst b/Documentation/admin-guide/mm/multigen_lru.rst new file mode 100644 index 0000000000000..4ea6a801dc56d --- /dev/null +++ b/Documentation/admin-guide/mm/multigen_lru.rst @@ -0,0 +1,146 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============= +Multi-Gen LRU +============= +Quick start +=========== +Build the kernel with the following configurations. + +* ``CONFIG_LRU_GEN=y`` +* ``CONFIG_LRU_GEN_ENABLED=y`` + +All set! + +Runtime options +=============== +``/sys/kernel/mm/lru_gen/`` contains stable ABIs described in the +following subsections. + +Kill switch +----------- +``enable`` accepts different values to enable or disabled the +following components. The default value of this file depends on +``CONFIG_LRU_GEN_ENABLED``. All the components should be enabled +unless some of them have unforeseen side effects. Writing to +``enable`` has no effect when a component is not supported by the +hardware, and valid values will be accepted even when the main switch +is off. + +====== =============================================================== +Values Components +====== =============================================================== +0x0001 The main switch for the multi-gen LRU. +0x0002 Clearing the accessed bit in leaf page table entries in large + batches, when MMU sets it (e.g., on x86). This behavior can + theoretically worsen lock contention (mmap_lock). If it is + disabled, the multi-gen LRU will suffer a minor performance + degradation. +0x0004 Clearing the accessed bit in non-leaf page table entries as + well, when MMU sets it (e.g., on x86). This behavior was not + verified on x86 varieties other than Intel and AMD. If it is + disabled, the multi-gen LRU will suffer a negligible + performance degradation. +[yYnN] Apply to all the components above. +====== =============================================================== + +E.g., +:: + + echo y >/sys/kernel/mm/lru_gen/enabled + cat /sys/kernel/mm/lru_gen/enabled + 0x0007 + echo 5 >/sys/kernel/mm/lru_gen/enabled + cat /sys/kernel/mm/lru_gen/enabled + 0x0005 + +Thrashing prevention +-------------------- +Personal computers are more sensitive to thrashing because it can +cause janks (lags when rendering UI) and negatively impact user +experience. The multi-gen LRU offers thrashing prevention to the +majority of laptop and desktop users who do not have ``oomd``. + +Users can write ``N`` to ``min_ttl_ms`` to prevent the working set of +``N`` milliseconds from getting evicted. The OOM killer is triggered +if this working set cannot be kept in memory. In other words, this +option works as an adjustable pressure relief valve, and when open, it +terminates applications that are hopefully not being used. + +Based on the average human detectable lag (~100ms), ``N=1000`` usually +eliminates intolerable janks due to thrashing. Larger values like +``N=3000`` make janks less noticeable at the risk of premature OOM +kills. + +Experimental features +===================== +``/sys/kernel/debug/lru_gen`` accepts commands described in the +following subsections. Multiple command lines are supported, so does +concatenation with delimiters ``,`` and ``;``. + +``/sys/kernel/debug/lru_gen_full`` provides additional stats for +debugging. ``CONFIG_LRU_GEN_STATS=y`` keeps historical stats from +evicted generations in this file. + +Working set estimation +---------------------- +Working set estimation measures how much memory an application +requires in a given time interval, and it is usually done with little +impact on the performance of the application. E.g., data centers want +to optimize job scheduling (bin packing) to improve memory +utilizations. When a new job comes in, the job scheduler needs to find +out whether each server it manages can allocate a certain amount of +memory for this new job before it can pick a candidate. To do so, this +job scheduler needs to estimate the working sets of the existing jobs. + +When it is read, ``lru_gen`` returns a histogram of numbers of pages +accessed over different time intervals for each memcg and node. +``MAX_NR_GENS`` decides the number of bins for each histogram. +:: + + memcg memcg_id memcg_path + node node_id + min_gen_nr age_in_ms nr_anon_pages nr_file_pages + ... + max_gen_nr age_in_ms nr_anon_pages nr_file_pages + +Each generation contains an estimated number of pages that have been +accessed within ``age_in_ms`` non-cumulatively. E.g., ``min_gen_nr`` +contains the coldest pages and ``max_gen_nr`` contains the hottest +pages, since ``age_in_ms`` of the former is the largest and that of +the latter is the smallest. + +Users can write ``+ memcg_id node_id max_gen_nr +[can_swap[full_scan]]`` to ``lru_gen`` to create a new generation +``max_gen_nr+1``. ``can_swap`` defaults to the swap setting and, if it +is set to ``1``, it forces the scan of anon pages when swap is off. +``full_scan`` defaults to ``1`` and, if it is set to ``0``, it reduces +the overhead as well as the coverage when scanning page tables. + +A typical use case is that a job scheduler writes to ``lru_gen`` at a +certain time interval to create new generations, and it ranks the +servers it manages based on the sizes of their cold memory defined by +this time interval. + +Proactive reclaim +----------------- +Proactive reclaim induces memory reclaim when there is no memory +pressure and usually targets cold memory only. E.g., when a new job +comes in, the job scheduler wants to proactively reclaim memory on the +server it has selected to improve the chance of successfully landing +this new job. + +Users can write ``- memcg_id node_id min_gen_nr [swappiness +[nr_to_reclaim]]`` to ``lru_gen`` to evict generations less than or +equal to ``min_gen_nr``. Note that ``min_gen_nr`` should be less than +``max_gen_nr-1`` as ``max_gen_nr`` and ``max_gen_nr-1`` are not fully +aged and therefore cannot be evicted. ``swappiness`` overrides the +default value in ``/proc/sys/vm/swappiness``. ``nr_to_reclaim`` limits +the number of pages to evict. + +A typical use case is that a job scheduler writes to ``lru_gen`` +before it tries to land a new job on a server, and if it fails to +materialize the cold memory without impacting the existing jobs on +this server, it retries on the next server according to the ranking +result obtained from the working set estimation step described +earlier. diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index d359bcfadd39a..0f86e9f931293 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -795,6 +795,7 @@ bit 1 print system memory info bit 2 print timer info bit 3 print locks info if ``CONFIG_LOCKDEP`` is on bit 4 print ftrace buffer +bit 5 print all printk messages in buffer ===== ============================================ So for example to print tasks and memory info on panic, user can:: diff --git a/Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml b/Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml index 87992db389b28..3698b4b0900f5 100644 --- a/Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml +++ b/Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml @@ -92,6 +92,10 @@ properties: description: AMS Controller register space maxItems: 1 + clocks: + items: + - description: AMS reference clock + ranges: description: Maps the child address space for PS and/or PL. @@ -181,12 +185,15 @@ properties: required: - compatible - reg + - clocks - ranges additionalProperties: false examples: - | + #include + bus { #address-cells = <2>; #size-cells = <2>; @@ -196,6 +203,7 @@ examples: interrupt-parent = <&gic>; interrupts = <0 56 4>; reg = <0x0 0xffa50000 0x0 0x800>; + clocks = <&zynqmp_clk AMS_REF>; #address-cells = <1>; #size-cells = <1>; #io-channel-cells = <1>; diff --git a/Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml b/Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml index 85a8877c2f387..1e2df8cf2937b 100644 --- a/Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml +++ b/Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml @@ -49,7 +49,8 @@ properties: description: Definition of the regulator used for the VDDD power supply. port: - $ref: /schemas/graph.yaml#/properties/port + $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false properties: endpoint: @@ -68,8 +69,11 @@ properties: - const: 1 - const: 2 + link-frequencies: true + required: - data-lanes + - link-frequencies required: - compatible diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml index 3a82b0b27fa0a..4fca71f343109 100644 --- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml @@ -88,10 +88,9 @@ allOf: - mediatek,mt2701-smi-common then: properties: - clock: - items: - minItems: 3 - maxItems: 3 + clocks: + minItems: 3 + maxItems: 3 clock-names: items: - const: apb @@ -108,10 +107,9 @@ allOf: required: - mediatek,smi properties: - clock: - items: - minItems: 3 - maxItems: 3 + clocks: + minItems: 3 + maxItems: 3 clock-names: items: - const: apb @@ -133,10 +131,9 @@ allOf: then: properties: - clock: - items: - minItems: 4 - maxItems: 4 + clocks: + minItems: 4 + maxItems: 4 clock-names: items: - const: apb @@ -146,10 +143,9 @@ allOf: else: # for gen2 HW that don't have gals properties: - clock: - items: - minItems: 2 - maxItems: 2 + clocks: + minItems: 2 + maxItems: 2 clock-names: items: - const: apb diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml index eaeff1ada7f89..c5c32c9100457 100644 --- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml @@ -79,11 +79,11 @@ allOf: then: properties: - clock: - items: - minItems: 3 - maxItems: 3 + clocks: + minItems: 2 + maxItems: 3 clock-names: + minItems: 2 items: - const: apb - const: smi @@ -91,10 +91,9 @@ allOf: else: properties: - clock: - items: - minItems: 2 - maxItems: 2 + clocks: + minItems: 2 + maxItems: 2 clock-names: items: - const: apb @@ -108,7 +107,6 @@ allOf: - mediatek,mt2701-smi-larb - mediatek,mt2712-smi-larb - mediatek,mt6779-smi-larb - - mediatek,mt8167-smi-larb - mediatek,mt8192-smi-larb - mediatek,mt8195-smi-larb diff --git a/Documentation/devicetree/bindings/memory-controllers/synopsys,ddrc-ecc.yaml b/Documentation/devicetree/bindings/memory-controllers/synopsys,ddrc-ecc.yaml index fb7ae38a9c866..e3bc6ebce0904 100644 --- a/Documentation/devicetree/bindings/memory-controllers/synopsys,ddrc-ecc.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/synopsys,ddrc-ecc.yaml @@ -24,9 +24,9 @@ description: | properties: compatible: enum: + - snps,ddrc-3.80a - xlnx,zynq-ddrc-a05 - xlnx,zynqmp-ddrc-2.40a - - snps,ddrc-3.80a interrupts: maxItems: 1 @@ -43,7 +43,9 @@ allOf: properties: compatible: contains: - const: xlnx,zynqmp-ddrc-2.40a + enum: + - snps,ddrc-3.80a + - xlnx,zynqmp-ddrc-2.40a then: required: - interrupts diff --git a/Documentation/devicetree/bindings/mtd/nand-controller.yaml b/Documentation/devicetree/bindings/mtd/nand-controller.yaml index bd217e6f5018a..5cd144a9ec992 100644 --- a/Documentation/devicetree/bindings/mtd/nand-controller.yaml +++ b/Documentation/devicetree/bindings/mtd/nand-controller.yaml @@ -55,7 +55,7 @@ patternProperties: properties: reg: description: - Contains the native Ready/Busy IDs. + Contains the chip-select IDs. nand-ecc-engine: allOf: @@ -184,7 +184,7 @@ examples: nand-use-soft-ecc-engine; nand-ecc-algo = "bch"; - /* controller specific properties */ + /* NAND chip specific properties */ }; nand@1 { diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 7eb43707e601d..c421e4e306a1b 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -53,20 +53,18 @@ properties: - allwinner,sun8i-r40-gmac - allwinner,sun8i-v3s-emac - allwinner,sun50i-a64-emac - - loongson,ls2k-dwmac - - loongson,ls7a-dwmac - amlogic,meson6-dwmac - amlogic,meson8b-dwmac - amlogic,meson8m2-dwmac - amlogic,meson-gxbb-dwmac - amlogic,meson-axg-dwmac - - loongson,ls2k-dwmac - - loongson,ls7a-dwmac - ingenic,jz4775-mac - ingenic,x1000-mac - ingenic,x1600-mac - ingenic,x1830-mac - ingenic,x2000-mac + - loongson,ls2k-dwmac + - loongson,ls7a-dwmac - rockchip,px30-gmac - rockchip,rk3128-gmac - rockchip,rk3228-gmac diff --git a/Documentation/devicetree/bindings/pinctrl/microchip,sparx5-sgpio.yaml b/Documentation/devicetree/bindings/pinctrl/microchip,sparx5-sgpio.yaml index cb554084bdf11..0df4e114fdd69 100644 --- a/Documentation/devicetree/bindings/pinctrl/microchip,sparx5-sgpio.yaml +++ b/Documentation/devicetree/bindings/pinctrl/microchip,sparx5-sgpio.yaml @@ -145,7 +145,7 @@ examples: clocks = <&sys_clk>; pinctrl-0 = <&sgpio2_pins>; pinctrl-names = "default"; - reg = <0x1101059c 0x100>; + reg = <0x1101059c 0x118>; microchip,sgpio-port-ranges = <0 0>, <16 18>, <28 31>; bus-frequency = <25000000>; sgpio_in2: gpio@0 { diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml index 328ea59c5466f..8299662c2c096 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml @@ -99,6 +99,14 @@ patternProperties: enum: [2, 4, 6, 8, 10, 12, 14, 16] bias-pull-down: + oneOf: + - type: boolean + - enum: [100, 101, 102, 103] + description: mt8195 pull down PUPD/R0/R1 type define value. + - enum: [200, 201, 202, 203, 204, 205, 206, 207] + description: mt8195 pull down RSEL type define value. + - enum: [75000, 5000] + description: mt8195 pull down RSEL type si unit value(ohm). description: | For pull down type is normal, it don't need add RSEL & R1R0 define and resistance value. @@ -115,13 +123,6 @@ patternProperties: & "MTK_PULL_SET_RSEL_110" & "MTK_PULL_SET_RSEL_111" define in mt8195. It can also support resistance value(ohm) "75000" & "5000" in mt8195. - oneOf: - - enum: [100, 101, 102, 103] - - description: mt8195 pull down PUPD/R0/R1 type define value. - - enum: [200, 201, 202, 203, 204, 205, 206, 207] - - description: mt8195 pull down RSEL type define value. - - enum: [75000, 5000] - - description: mt8195 pull down RSEL type si unit value(ohm). An example of using RSEL define: pincontroller { @@ -146,6 +147,14 @@ patternProperties: }; bias-pull-up: + oneOf: + - type: boolean + - enum: [100, 101, 102, 103] + description: mt8195 pull up PUPD/R0/R1 type define value. + - enum: [200, 201, 202, 203, 204, 205, 206, 207] + description: mt8195 pull up RSEL type define value. + - enum: [1000, 1500, 2000, 3000, 4000, 5000, 10000, 75000] + description: mt8195 pull up RSEL type si unit value(ohm). description: | For pull up type is normal, it don't need add RSEL & R1R0 define and resistance value. @@ -163,13 +172,6 @@ patternProperties: define in mt8195. It can also support resistance value(ohm) "1000" & "1500" & "2000" & "3000" & "4000" & "5000" & "10000" & "75000" in mt8195. - oneOf: - - enum: [100, 101, 102, 103] - - description: mt8195 pull up PUPD/R0/R1 type define value. - - enum: [200, 201, 202, 203, 204, 205, 206, 207] - - description: mt8195 pull up RSEL type define value. - - enum: [1000, 1500, 2000, 3000, 4000, 5000, 10000, 75000] - - description: mt8195 pull up RSEL type si unit value(ohm). An example of using RSEL define: pincontroller { i2c0-pins { diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml index 35a8045b2c70d..53627c6e2ae32 100644 --- a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml +++ b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml @@ -106,7 +106,7 @@ examples: dma-names = "rx", "tx"; flash@0 { - compatible = "spi-nor"; + compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <104000000>; spi-tx-bus-width = <2>; diff --git a/Documentation/devicetree/bindings/spi/spi-mxic.txt b/Documentation/devicetree/bindings/spi/spi-mxic.txt index 529f2dab2648a..7bcbb229b78bb 100644 --- a/Documentation/devicetree/bindings/spi/spi-mxic.txt +++ b/Documentation/devicetree/bindings/spi/spi-mxic.txt @@ -8,11 +8,13 @@ Required properties: - reg: should contain 2 entries, one for the registers and one for the direct mapping area - reg-names: should contain "regs" and "dirmap" -- interrupts: interrupt line connected to the SPI controller - clock-names: should contain "ps_clk", "send_clk" and "send_dly_clk" - clocks: should contain 3 entries for the "ps_clk", "send_clk" and "send_dly_clk" clocks +Optional properties: +- interrupts: interrupt line connected to the SPI controller + Example: spi@43c30000 { diff --git a/Documentation/devicetree/bindings/usb/usb-hcd.yaml b/Documentation/devicetree/bindings/usb/usb-hcd.yaml index 56853c17af667..1dc3d5d7b44fe 100644 --- a/Documentation/devicetree/bindings/usb/usb-hcd.yaml +++ b/Documentation/devicetree/bindings/usb/usb-hcd.yaml @@ -33,7 +33,7 @@ patternProperties: "^.*@[0-9a-f]{1,2}$": description: The hard wired USB devices type: object - $ref: /usb/usb-device.yaml + $ref: /schemas/usb/usb-device.yaml additionalProperties: true diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst index 3b8f41395f6b5..c8f7a16cd0e3c 100644 --- a/Documentation/driver-api/cxl/memory-devices.rst +++ b/Documentation/driver-api/cxl/memory-devices.rst @@ -36,10 +36,10 @@ CXL Core .. kernel-doc:: drivers/cxl/cxl.h :internal: -.. kernel-doc:: drivers/cxl/core/bus.c +.. kernel-doc:: drivers/cxl/core/port.c :doc: cxl core -.. kernel-doc:: drivers/cxl/core/bus.c +.. kernel-doc:: drivers/cxl/core/port.c :identifiers: .. kernel-doc:: drivers/cxl/core/pmem.c diff --git a/Documentation/hwmon/asus_ec_sensors.rst b/Documentation/hwmon/asus_ec_sensors.rst new file mode 100644 index 0000000000000..e7e8f1640f457 --- /dev/null +++ b/Documentation/hwmon/asus_ec_sensors.rst @@ -0,0 +1,54 @@ +.. SPDX-License-Identifier: GPL-2.0-or-later + +Kernel driver asus_ec_sensors +================================= + +Supported boards: + * PRIME X570-PRO, + * Pro WS X570-ACE, + * ROG CROSSHAIR VIII DARK HERO, + * ROG CROSSHAIR VIII HERO (WI-FI) + * ROG CROSSHAIR VIII FORMULA, + * ROG CROSSHAIR VIII HERO, + * ROG CROSSHAIR VIII IMPACT, + * ROG STRIX B550-E GAMING, + * ROG STRIX B550-I GAMING, + * ROG STRIX X570-E GAMING, + * ROG STRIX X570-F GAMING, + * ROG STRIX X570-I GAMING + +Authors: + - Eugene Shalygin + +Description: +------------ +ASUS mainboards publish hardware monitoring information via Super I/O +chip and the ACPI embedded controller (EC) registers. Some of the sensors +are only available via the EC. + +The driver is aware of and reads the following sensors: + +1. Chipset (PCH) temperature +2. CPU package temperature +3. Motherboard temperature +4. Readings from the T_Sensor header +5. VRM temperature +6. CPU_Opt fan RPM +7. VRM heatsink fan RPM +8. Chipset fan RPM +9. Readings from the "Water flow meter" header (RPM) +10. Readings from the "Water In" and "Water Out" temperature headers +11. CPU current +12. CPU core voltage + +Sensor values are read from EC registers, and to avoid race with the board +firmware the driver acquires ACPI mutex, the one used by the WMI when its +methods access the EC. + +Module Parameters +----------------- + * mutex_path: string + The driver holds path to the ACPI mutex for each board (actually, + the path is mostly identical for them). If ASUS changes this path + in a future BIOS update, this parameter can be used to override + the stored in the driver value until it gets updated. diff --git a/Documentation/hwmon/asus_wmi_ec_sensors.rst b/Documentation/hwmon/asus_wmi_ec_sensors.rst deleted file mode 100644 index 1b287f229e86c..0000000000000 --- a/Documentation/hwmon/asus_wmi_ec_sensors.rst +++ /dev/null @@ -1,38 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0-or-later - -Kernel driver asus_wmi_ec_sensors -================================= - -Supported boards: - * PRIME X570-PRO, - * Pro WS X570-ACE, - * ROG CROSSHAIR VIII DARK HERO, - * ROG CROSSHAIR VIII FORMULA, - * ROG CROSSHAIR VIII HERO, - * ROG STRIX B550-E GAMING, - * ROG STRIX B550-I GAMING, - * ROG STRIX X570-E GAMING. - -Authors: - - Eugene Shalygin - -Description: ------------- -ASUS mainboards publish hardware monitoring information via Super I/O -chip and the ACPI embedded controller (EC) registers. Some of the sensors -are only available via the EC. - -ASUS WMI interface provides a method (BREC) to read data from EC registers, -which is utilized by this driver to publish those sensor readings to the -HWMON system. The driver is aware of and reads the following sensors: - -1. Chipset (PCH) temperature -2. CPU package temperature -3. Motherboard temperature -4. Readings from the T_Sensor header -5. VRM temperature -6. CPU_Opt fan RPM -7. Chipset fan RPM -8. Readings from the "Water flow meter" header (RPM) -9. Readings from the "Water In" and "Water Out" temperature headers -10. CPU current diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index 003c865e9c212..fbcb48bc2a903 100644 --- a/Documentation/process/stable-kernel-rules.rst +++ b/Documentation/process/stable-kernel-rules.rst @@ -168,7 +168,16 @@ Trees - The finalized and tagged releases of all stable kernels can be found in separate branches per version at: - https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git + + - The release candidate of all stable kernel versions can be found at: + + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git/ + + .. warning:: + The -stable-rc tree is a snapshot in time of the stable-queue tree and + will change frequently, hence will be rebased often. It should only be + used for testing purposes (e.g. to be consumed by CI systems). Review committee diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst index d5fd6ccc3dcbd..b73eb764a0017 100644 --- a/Documentation/security/SCTP.rst +++ b/Documentation/security/SCTP.rst @@ -15,10 +15,7 @@ For security module support, three SCTP specific hooks have been implemented:: security_sctp_assoc_request() security_sctp_bind_connect() security_sctp_sk_clone() - -Also the following security hook has been utilised:: - - security_inet_conn_established() + security_sctp_assoc_established() The usage of these hooks are described below with the SELinux implementation described in the `SCTP SELinux Support`_ chapter. @@ -122,11 +119,12 @@ calls **sctp_peeloff**\(3). @newsk - pointer to new sock structure. -security_inet_conn_established() -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Called when a COOKIE ACK is received:: +security_sctp_assoc_established() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Called when a COOKIE ACK is received, and the peer secid will be +saved into ``@asoc->peer_secid`` for client:: - @sk - pointer to sock structure. + @asoc - pointer to sctp association structure. @skb - pointer to skbuff of the COOKIE ACK packet. @@ -134,7 +132,7 @@ Security Hooks used for Association Establishment ------------------------------------------------- The following diagram shows the use of ``security_sctp_bind_connect()``, -``security_sctp_assoc_request()``, ``security_inet_conn_established()`` when +``security_sctp_assoc_request()``, ``security_sctp_assoc_established()`` when establishing an association. :: @@ -172,7 +170,7 @@ establishing an association. <------------------------------------------- COOKIE ACK | | sctp_sf_do_5_1E_ca | - Call security_inet_conn_established() | + Call security_sctp_assoc_established() | to set the peer label. | | | | If SCTP_SOCKET_TCP or peeled off @@ -198,7 +196,7 @@ hooks with the SELinux specifics expanded below:: security_sctp_assoc_request() security_sctp_bind_connect() security_sctp_sk_clone() - security_inet_conn_established() + security_sctp_assoc_established() security_sctp_assoc_request() @@ -271,12 +269,12 @@ sockets sid and peer sid to that contained in the ``@asoc sid`` and @newsk - pointer to new sock structure. -security_inet_conn_established() -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +security_sctp_assoc_established() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Called when a COOKIE ACK is received where it sets the connection's peer sid to that in ``@skb``:: - @sk - pointer to sock structure. + @asoc - pointer to sctp association structure. @skb - pointer to skbuff of the COOKIE ACK packet. diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst index d25335993e553..9b52f50a68542 100644 --- a/Documentation/sound/hd-audio/models.rst +++ b/Documentation/sound/hd-audio/models.rst @@ -261,6 +261,10 @@ alc-sense-combo huawei-mbx-stereo Enable initialization verbs for Huawei MBX stereo speakers; might be risky, try this at your own risk +alc298-samsung-headphone + Samsung laptops with ALC298 +alc256-samsung-headphone + Samsung laptops with ALC256 ALC66x/67x/892 ============== diff --git a/Documentation/sphinx/requirements.txt b/Documentation/sphinx/requirements.txt index 9a35f50798a65..2c573541ab712 100644 --- a/Documentation/sphinx/requirements.txt +++ b/Documentation/sphinx/requirements.txt @@ -1,2 +1,4 @@ +# jinja2>=3.1 is not compatible with Sphinx<4.0 +jinja2<3.1 sphinx_rtd_theme Sphinx==2.4.4 diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst index 60a29972d3f1b..d063aaee5bb73 100644 --- a/Documentation/virt/kvm/devices/vcpu.rst +++ b/Documentation/virt/kvm/devices/vcpu.rst @@ -70,7 +70,7 @@ irqchip. -ENODEV PMUv3 not supported or GIC not initialized -ENXIO PMUv3 not properly configured or in-kernel irqchip not configured as required prior to calling this attribute - -EBUSY PMUv3 already initialized + -EBUSY PMUv3 already initialized or a VCPU has already run -EINVAL Invalid filter range ======= ====================================================== diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst index 44365c4574a37..b484343002269 100644 --- a/Documentation/vm/index.rst +++ b/Documentation/vm/index.rst @@ -25,6 +25,7 @@ algorithms. If you are looking for advice on simply allocating memory, see the ksm memory-model mmu_notifier + multigen_lru numa overcommit-accounting page_migration diff --git a/Documentation/vm/multigen_lru.rst b/Documentation/vm/multigen_lru.rst new file mode 100644 index 0000000000000..cde60de16621b --- /dev/null +++ b/Documentation/vm/multigen_lru.rst @@ -0,0 +1,156 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============= +Multi-Gen LRU +============= + +Design overview +=============== +Objectives +---------- +The design objectives are: + +* Good representation of access recency +* Try to profit from spatial locality +* Fast paths to make obvious choices +* Simple self-correcting heuristics + +The representation of access recency is at the core of all LRU +implementations. In the multi-gen LRU, each generation represents a +group of pages with similar access recency. Generations establish a +common frame of reference and therefore help make better choices, +e.g., between different memcgs on a computer or different computers in +a data center (for job scheduling). + +Exploiting spatial locality improves efficiency when gathering the +accessed bit. A rmap walk targets a single page and does not try to +profit from discovering a young PTE. A page table walk can sweep all +the young PTEs in an address space, but the address space can be too +large to make a profit. The key is to optimize both methods and use +them in combination. + +Fast paths reduce code complexity and runtime overhead. Unmapped pages +do not require TLB flushes; clean pages do not require writeback. +These facts are only helpful when other conditions, e.g., access +recency, are similar. With generations as a common frame of reference, +additional factors stand out. But obvious choices might not be good +choices; thus self-correction is required. + +The benefits of simple self-correcting heuristics are self-evident. +Again, with generations as a common frame of reference, this becomes +attainable. Specifically, pages in the same generation can be +categorized based on additional factors, and a feedback loop can +statistically compare the refault percentages across those categories +and infer which of them are better choices. + +Assumptions +----------- +The protection of hot pages and the selection of cold pages are based +on page access channels and patterns. There are two access channels: + +* Accesses through page tables +* Accesses through file descriptors + +The protection of the former channel is by design stronger because: + +1. The uncertainty in determining the access patterns of the former + channel is higher due to the approximation of the accessed bit. +2. The cost of evicting the former channel is higher due to the TLB + flushes required and the likelihood of encountering the dirty bit. +3. The penalty of underprotecting the former channel is higher because + applications usually do not prepare themselves for major page + faults like they do for blocked I/O. E.g., GUI applications + commonly use dedicated I/O threads to avoid blocking the rendering + threads. + +There are also two access patterns: + +* Accesses exhibiting temporal locality +* Accesses not exhibiting temporal locality + +For the reasons listed above, the former channel is assumed to follow +the former pattern unless ``VM_SEQ_READ`` or ``VM_RAND_READ`` is +present, and the latter channel is assumed to follow the latter +pattern unless outlying refaults have been observed. + +Workflow overview +================= +Evictable pages are divided into multiple generations for each +``lruvec``. The youngest generation number is stored in +``lrugen->max_seq`` for both anon and file types as they are aged on +an equal footing. The oldest generation numbers are stored in +``lrugen->min_seq[]`` separately for anon and file types as clean file +pages can be evicted regardless of swap constraints. These three +variables are monotonically increasing. + +Generation numbers are truncated into ``order_base_2(MAX_NR_GENS+1)`` +bits in order to fit into the gen counter in ``folio->flags``. Each +truncated generation number is an index to ``lrugen->lists[]``. The +sliding window technique is used to track at least ``MIN_NR_GENS`` and +at most ``MAX_NR_GENS`` generations. The gen counter stores a value +within ``[1, MAX_NR_GENS]`` while a page is on one of +``lrugen->lists[]``; otherwise it stores zero. + +Each generation is divided into multiple tiers. Tiers represent +different ranges of numbers of accesses through file descriptors. A +page accessed ``N`` times through file descriptors is in tier +``order_base_2(N)``. In contrast to moving across generations, which +requires the LRU lock, moving across tiers only requires operations on +``folio->flags`` and therefore has a negligible cost. A feedback loop +modeled after the PID controller monitors refaults over all the tiers +from anon and file types and decides which tiers from which types to +evict or protect. + +There are two conceptually independent procedures: the aging and the +eviction. They form a closed-loop system, i.e., the page reclaim. + +Aging +----- +The aging produces young generations. Given an ``lruvec``, it +increments ``max_seq`` when ``max_seq-min_seq+1`` approaches +``MIN_NR_GENS``. The aging promotes hot pages to the youngest +generation when it finds them accessed through page tables; the +demotion of cold pages happens consequently when it increments +``max_seq``. The aging uses page table walks and rmap walks to find +young PTEs. For the former, it iterates ``lruvec_memcg()->mm_list`` +and calls ``walk_page_range()`` with each ``mm_struct`` on this list +to scan PTEs. On finding a young PTE, it clears the accessed bit and +updates the gen counter of the page mapped by this PTE to +``(max_seq%MAX_NR_GENS)+1``. After each iteration of this list, it +increments ``max_seq``. For the latter, when the eviction walks the +rmap and finds a young PTE, the aging scans the adjacent PTEs and +follows the same steps just described. + +Eviction +-------- +The eviction consumes old generations. Given an ``lruvec``, it +increments ``min_seq`` when ``lrugen->lists[]`` indexed by +``min_seq%MAX_NR_GENS`` becomes empty. To select a type and a tier to +evict from, it first compares ``min_seq[]`` to select the older type. +If both types are equally old, it selects the one whose first tier has +a lower refault percentage. The first tier contains single-use +unmapped clean pages, which are the best bet. The eviction sorts a +page according to the gen counter if the aging has found this page +accessed through page tables and updated the gen counter. It also +moves a page to the next generation, i.e., ``min_seq+1``, if this page +was accessed multiple times through file descriptors and the feedback +loop has detected outlying refaults from the tier this page is in. To +do this, the feedback loop uses the first tier as the baseline, for +the reason stated earlier. + +Summary +------- +The multi-gen LRU can be disassembled into the following parts: + +* Generations +* Page table walks +* Rmap walks +* Bloom filters +* The PID controller + +The aging and the eviction is a producer-consumer model; specifically, +the latter drives the former by the sliding window over generations. +Within the aging, rmap walks drive page table walks by inserting hot +densely populated page tables to the Bloom filters. Within the +eviction, the PID controller uses refaults as the feedback to select +types to evict and tiers to protect. diff --git a/MAINTAINERS b/MAINTAINERS index cd0f68d4a34a6..c79ea7056d472 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3056,6 +3056,12 @@ L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/asus_wmi_ec_sensors.c +ASUS EC HARDWARE MONITOR DRIVER +M: Eugene Shalygin +L: linux-hwmon@vger.kernel.org +S: Maintained +F: drivers/hwmon/asus-ec-sensors.c + ASUS WIRELESS RADIO CONTROL DRIVER M: João Paulo Rechi Vita L: platform-driver-x86@vger.kernel.org @@ -11113,6 +11119,13 @@ F: Documentation/litmus-tests/ F: Documentation/memory-barriers.txt F: tools/memory-model/ +LINUX RANDOM NUMBER GENERATOR (LRNG) DRIVER +M: Stephan Mueller +S: Maintained +W: https://www.chronox.de/lrng.html +F: drivers/char/lrng/ +F: include/linux/lrng.h + LIS3LV02D ACCELEROMETER DRIVER M: Eric Piel S: Maintained @@ -16373,8 +16386,7 @@ M: Linus Walleij M: Alvin Šipraga S: Maintained F: Documentation/devicetree/bindings/net/dsa/realtek-smi.txt -F: drivers/net/dsa/realtek-smi* -F: drivers/net/dsa/rtl83* +F: drivers/net/dsa/realtek/* REALTEK WIRELESS DRIVER (rtlwifi family) M: Ping-Ke Shih diff --git a/Makefile b/Makefile index 7214f075e1f06..607a090241194 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -pf3 NAME = Superb Owl # *DOCUMENTATION* diff --git a/arch/Kconfig b/arch/Kconfig index 678a80713b213..b682d6bf2eb87 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1162,6 +1162,7 @@ config HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET config RANDOMIZE_KSTACK_OFFSET_DEFAULT bool "Randomize kernel stack offset on syscall entry" depends on HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET + depends on INIT_STACK_NONE || !CC_IS_CLANG || CLANG_VERSION >= 140000 help The kernel stack offset can be randomized (after pt_regs) by roughly 5 bits of entropy, frustrating memory corruption @@ -1322,6 +1323,15 @@ config DYNAMIC_SIGFRAME config HAVE_ARCH_NODE_DEV_GROUP bool +config ARCH_HAS_NONLEAF_PMD_YOUNG + bool + depends on PGTABLE_LEVELS > 2 + help + Architectures that select this option are capable of setting the + accessed bit in non-leaf PMD entries when using them as part of linear + address translations. Page table walkers that clear the accessed bit + may use this capability to reduce their search space. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 3515bc4f16a4f..00ff721da300e 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -490,3 +490,4 @@ 558 common process_mrelease sys_process_mrelease 559 common futex_waitv sys_futex_waitv 560 common set_mempolicy_home_node sys_ni_syscall +561 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 8e90052f6f056..5f7f5aab361f1 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -43,7 +43,7 @@ SYSCALL_DEFINE0(arc_gettls) return task_thread_info(current)->thr_ptr; } -SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new) +SYSCALL_DEFINE3(arc_usr_cmpxchg, int __user *, uaddr, int, expected, int, new) { struct pt_regs *regs = current_pt_regs(); u32 uval; diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi index 21294f775a20f..89af57482bc8f 100644 --- a/arch/arm/boot/dts/bcm2711.dtsi +++ b/arch/arm/boot/dts/bcm2711.dtsi @@ -459,12 +459,26 @@ #size-cells = <0>; enable-method = "brcm,bcm2836-smp"; // for ARM 32-bit + /* Source for d/i-cache-line-size and d/i-cache-sets + * https://developer.arm.com/documentation/100095/0003 + * /Level-1-Memory-System/About-the-L1-memory-system?lang=en + * Source for d/i-cache-size + * https://www.raspberrypi.com/documentation/computers + * /processors.html#bcm2711 + */ cpu0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a72"; reg = <0>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000d8>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + i-cache-size = <0xc000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 48KiB(size)/64(line-size)=768ways/3-way set + next-level-cache = <&l2>; }; cpu1: cpu@1 { @@ -473,6 +487,13 @@ reg = <1>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000e0>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + i-cache-size = <0xc000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 48KiB(size)/64(line-size)=768ways/3-way set + next-level-cache = <&l2>; }; cpu2: cpu@2 { @@ -481,6 +502,13 @@ reg = <2>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000e8>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + i-cache-size = <0xc000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 48KiB(size)/64(line-size)=768ways/3-way set + next-level-cache = <&l2>; }; cpu3: cpu@3 { @@ -489,6 +517,28 @@ reg = <3>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000f0>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + i-cache-size = <0xc000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 48KiB(size)/64(line-size)=768ways/3-way set + next-level-cache = <&l2>; + }; + + /* Source for d/i-cache-line-size and d/i-cache-sets + * https://developer.arm.com/documentation/100095/0003 + * /Level-2-Memory-System/About-the-L2-memory-system?lang=en + * Source for d/i-cache-size + * https://www.raspberrypi.com/documentation/computers + * /processors.html#bcm2711 + */ + l2: l2-cache0 { + compatible = "cache"; + cache-size = <0x100000>; + cache-line-size = <64>; + cache-sets = <1024>; // 1MiB(size)/64(line-size)=16384ways/16-way set + cache-level = <2>; }; }; diff --git a/arch/arm/boot/dts/bcm2837.dtsi b/arch/arm/boot/dts/bcm2837.dtsi index 0199ec98cd616..5dbdebc462594 100644 --- a/arch/arm/boot/dts/bcm2837.dtsi +++ b/arch/arm/boot/dts/bcm2837.dtsi @@ -40,12 +40,26 @@ #size-cells = <0>; enable-method = "brcm,bcm2836-smp"; // for ARM 32-bit + /* Source for d/i-cache-line-size and d/i-cache-sets + * https://developer.arm.com/documentation/ddi0500/e/level-1-memory-system + * /about-the-l1-memory-system?lang=en + * + * Source for d/i-cache-size + * https://magpi.raspberrypi.com/articles/raspberry-pi-3-specs-benchmarks + */ cpu0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a53"; reg = <0>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000d8>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; }; cpu1: cpu@1 { @@ -54,6 +68,13 @@ reg = <1>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000e0>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; }; cpu2: cpu@2 { @@ -62,6 +83,13 @@ reg = <2>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000e8>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; }; cpu3: cpu@3 { @@ -70,6 +98,27 @@ reg = <3>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000f0>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; + }; + + /* Source for cache-line-size + cache-sets + * https://developer.arm.com/documentation/ddi0500 + * /e/level-2-memory-system/about-the-l2-memory-system?lang=en + * Source for cache-size + * https://datasheets.raspberrypi.com/cm/cm1-and-cm3-datasheet.pdf + */ + l2: l2-cache0 { + compatible = "cache"; + cache-size = <0x80000>; + cache-line-size = <64>; + cache-sets = <512>; // 512KiB(size)/64(line-size)=8192ways/16-way set + cache-level = <2>; }; }; }; diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 956a26d52a4c3..0a11bacffc1f1 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -3482,8 +3482,7 @@ ti,timer-pwm; }; }; - - target-module@2c000 { /* 0x4882c000, ap 17 02.0 */ + timer15_target: target-module@2c000 { /* 0x4882c000, ap 17 02.0 */ compatible = "ti,sysc-omap4-timer", "ti,sysc"; reg = <0x2c000 0x4>, <0x2c010 0x4>; @@ -3511,7 +3510,7 @@ }; }; - target-module@2e000 { /* 0x4882e000, ap 19 14.0 */ + timer16_target: target-module@2e000 { /* 0x4882e000, ap 19 14.0 */ compatible = "ti,sysc-omap4-timer", "ti,sysc"; reg = <0x2e000 0x4>, <0x2e010 0x4>; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 42bff117656cf..97ce0c4f1df7e 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1339,20 +1339,20 @@ }; /* Local timers, see ARM architected timer wrap erratum i940 */ -&timer3_target { +&timer15_target { ti,no-reset-on-init; ti,no-idle; timer@0 { - assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER3_CLKCTRL 24>; + assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>; assigned-clock-parents = <&timer_sys_clk_div>; }; }; -&timer4_target { +&timer16_target { ti,no-reset-on-init; ti,no-idle; timer@0 { - assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 24>; + assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>; assigned-clock-parents = <&timer_sys_clk_div>; }; }; diff --git a/arch/arm/boot/dts/exynos5250-pinctrl.dtsi b/arch/arm/boot/dts/exynos5250-pinctrl.dtsi index d31a68672bfac..d7d756614edd1 100644 --- a/arch/arm/boot/dts/exynos5250-pinctrl.dtsi +++ b/arch/arm/boot/dts/exynos5250-pinctrl.dtsi @@ -260,7 +260,7 @@ }; uart3_data: uart3-data { - samsung,pins = "gpa1-4", "gpa1-4"; + samsung,pins = "gpa1-4", "gpa1-5"; samsung,pin-function = ; samsung,pin-pud = ; samsung,pin-drv = ; diff --git a/arch/arm/boot/dts/exynos5250-smdk5250.dts b/arch/arm/boot/dts/exynos5250-smdk5250.dts index 39bbe18145cf2..f042954bdfa5d 100644 --- a/arch/arm/boot/dts/exynos5250-smdk5250.dts +++ b/arch/arm/boot/dts/exynos5250-smdk5250.dts @@ -118,6 +118,9 @@ status = "okay"; ddc = <&i2c_2>; hpd-gpios = <&gpx3 7 GPIO_ACTIVE_HIGH>; + vdd-supply = <&ldo8_reg>; + vdd_osc-supply = <&ldo10_reg>; + vdd_pll-supply = <&ldo8_reg>; }; &i2c_0 { diff --git a/arch/arm/boot/dts/exynos5420-smdk5420.dts b/arch/arm/boot/dts/exynos5420-smdk5420.dts index a4f0e3ffedbd3..07f65213aae65 100644 --- a/arch/arm/boot/dts/exynos5420-smdk5420.dts +++ b/arch/arm/boot/dts/exynos5420-smdk5420.dts @@ -124,6 +124,9 @@ hpd-gpios = <&gpx3 7 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&hdmi_hpd_irq>; + vdd-supply = <&ldo6_reg>; + vdd_osc-supply = <&ldo7_reg>; + vdd_pll-supply = <&ldo6_reg>; }; &hsi2c_4 { diff --git a/arch/arm/boot/dts/imx53-m53menlo.dts b/arch/arm/boot/dts/imx53-m53menlo.dts index 4f88e96d81ddb..d5c68d1ea707c 100644 --- a/arch/arm/boot/dts/imx53-m53menlo.dts +++ b/arch/arm/boot/dts/imx53-m53menlo.dts @@ -53,6 +53,31 @@ }; }; + lvds-decoder { + compatible = "ti,ds90cf364a", "lvds-decoder"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + lvds_decoder_in: endpoint { + remote-endpoint = <&lvds0_out>; + }; + }; + + port@1 { + reg = <1>; + + lvds_decoder_out: endpoint { + remote-endpoint = <&panel_in>; + }; + }; + }; + }; + panel { compatible = "edt,etm0700g0dh6"; pinctrl-0 = <&pinctrl_display_gpio>; @@ -61,7 +86,7 @@ port { panel_in: endpoint { - remote-endpoint = <&lvds0_out>; + remote-endpoint = <&lvds_decoder_out>; }; }; }; @@ -450,7 +475,7 @@ reg = <2>; lvds0_out: endpoint { - remote-endpoint = <&panel_in>; + remote-endpoint = <&lvds_decoder_in>; }; }; }; diff --git a/arch/arm/boot/dts/imx7-colibri.dtsi b/arch/arm/boot/dts/imx7-colibri.dtsi index 62b771c1d5a9a..f1c60b0cb143e 100644 --- a/arch/arm/boot/dts/imx7-colibri.dtsi +++ b/arch/arm/boot/dts/imx7-colibri.dtsi @@ -40,7 +40,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&codec>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -293,7 +293,7 @@ compatible = "fsl,sgtl5000"; #sound-dai-cells = <0>; reg = <0x0a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sai1_mclk>; VDDA-supply = <®_module_3v3_avdd>; diff --git a/arch/arm/boot/dts/imx7-mba7.dtsi b/arch/arm/boot/dts/imx7-mba7.dtsi index 49086c6b6a0a2..3df6dff7734ae 100644 --- a/arch/arm/boot/dts/imx7-mba7.dtsi +++ b/arch/arm/boot/dts/imx7-mba7.dtsi @@ -302,7 +302,7 @@ tlv320aic32x4: audio-codec@18 { compatible = "ti,tlv320aic32x4"; reg = <0x18>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; ldoin-supply = <®_audio_3v3>; iov-supply = <®_audio_3v3>; diff --git a/arch/arm/boot/dts/imx7d-nitrogen7.dts b/arch/arm/boot/dts/imx7d-nitrogen7.dts index e0751e6ba3c0f..a31de900139d6 100644 --- a/arch/arm/boot/dts/imx7d-nitrogen7.dts +++ b/arch/arm/boot/dts/imx7d-nitrogen7.dts @@ -288,7 +288,7 @@ codec: wm8960@1a { compatible = "wlf,wm8960"; reg = <0x1a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; wlf,shared-lrclk; }; diff --git a/arch/arm/boot/dts/imx7d-pico-hobbit.dts b/arch/arm/boot/dts/imx7d-pico-hobbit.dts index 7b2198a9372c6..d917dc4f2f227 100644 --- a/arch/arm/boot/dts/imx7d-pico-hobbit.dts +++ b/arch/arm/boot/dts/imx7d-pico-hobbit.dts @@ -31,7 +31,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&sgtl5000>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -41,7 +41,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; VDDA-supply = <®_2p5v>; VDDIO-supply = <®_vref_1v8>; }; diff --git a/arch/arm/boot/dts/imx7d-pico-pi.dts b/arch/arm/boot/dts/imx7d-pico-pi.dts index 70bea95c06d83..f263e391e24cb 100644 --- a/arch/arm/boot/dts/imx7d-pico-pi.dts +++ b/arch/arm/boot/dts/imx7d-pico-pi.dts @@ -31,7 +31,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&sgtl5000>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -41,7 +41,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; VDDA-supply = <®_2p5v>; VDDIO-supply = <®_vref_1v8>; }; diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index 7813ef960f6ee..f053f51227417 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -385,14 +385,14 @@ codec: wm8960@1a { compatible = "wlf,wm8960"; reg = <0x1a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; wlf,shared-lrclk; wlf,hp-cfg = <2 2 3>; wlf,gpio-cfg = <1 3>; assigned-clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_SRC>, <&clks IMX7D_PLL_AUDIO_POST_DIV>, - <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; assigned-clock-parents = <&clks IMX7D_PLL_AUDIO_POST_DIV>; assigned-clock-rates = <0>, <884736000>, <12288000>; }; diff --git a/arch/arm/boot/dts/imx7s-warp.dts b/arch/arm/boot/dts/imx7s-warp.dts index 4f1edef06c922..e8734d218b9de 100644 --- a/arch/arm/boot/dts/imx7s-warp.dts +++ b/arch/arm/boot/dts/imx7s-warp.dts @@ -75,7 +75,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&codec>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -232,7 +232,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sai1_mclk>; VDDA-supply = <&vgen4_reg>; diff --git a/arch/arm/boot/dts/openbmc-flash-layout-64.dtsi b/arch/arm/boot/dts/openbmc-flash-layout-64.dtsi index 31f59de5190b8..7af41361c4800 100644 --- a/arch/arm/boot/dts/openbmc-flash-layout-64.dtsi +++ b/arch/arm/boot/dts/openbmc-flash-layout-64.dtsi @@ -28,7 +28,7 @@ partitions { label = "rofs"; }; - rwfs@6000000 { + rwfs@2a00000 { reg = <0x2a00000 0x1600000>; // 22MB label = "rwfs"; }; diff --git a/arch/arm/boot/dts/openbmc-flash-layout.dtsi b/arch/arm/boot/dts/openbmc-flash-layout.dtsi index 6c26524e93e11..b47e14063c380 100644 --- a/arch/arm/boot/dts/openbmc-flash-layout.dtsi +++ b/arch/arm/boot/dts/openbmc-flash-layout.dtsi @@ -20,7 +20,7 @@ partitions { label = "kernel"; }; - rofs@c0000 { + rofs@4c0000 { reg = <0x4c0000 0x1740000>; label = "rofs"; }; diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi index 7dec0553636e5..51c365fdf3bfd 100644 --- a/arch/arm/boot/dts/qcom-ipq4019.dtsi +++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi @@ -142,7 +142,8 @@ clocks { sleep_clk: sleep_clk { compatible = "fixed-clock"; - clock-frequency = <32768>; + clock-frequency = <32000>; + clock-output-names = "gcc_sleep_clk_src"; #clock-cells = <0>; }; diff --git a/arch/arm/boot/dts/qcom-msm8960.dtsi b/arch/arm/boot/dts/qcom-msm8960.dtsi index 2a0ec97a264f2..a0f9ab7f08f34 100644 --- a/arch/arm/boot/dts/qcom-msm8960.dtsi +++ b/arch/arm/boot/dts/qcom-msm8960.dtsi @@ -146,7 +146,9 @@ reg = <0x108000 0x1000>; qcom,ipc = <&l2cc 0x8 2>; - interrupts = <0 19 0>, <0 21 0>, <0 22 0>; + interrupts = , + , + ; interrupt-names = "ack", "err", "wakeup"; regulators { @@ -192,7 +194,7 @@ compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm"; reg = <0x16440000 0x1000>, <0x16400000 0x1000>; - interrupts = <0 154 0x0>; + interrupts = ; clocks = <&gcc GSBI5_UART_CLK>, <&gcc GSBI5_H_CLK>; clock-names = "core", "iface"; status = "disabled"; @@ -318,7 +320,7 @@ #address-cells = <1>; #size-cells = <0>; reg = <0x16080000 0x1000>; - interrupts = <0 147 0>; + interrupts = ; spi-max-frequency = <24000000>; cs-gpios = <&msmgpio 8 0>; diff --git a/arch/arm/boot/dts/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom-sdx55.dtsi index 8ac0492c76595..40f11159f061e 100644 --- a/arch/arm/boot/dts/qcom-sdx55.dtsi +++ b/arch/arm/boot/dts/qcom-sdx55.dtsi @@ -413,7 +413,7 @@ <0x40000000 0xf1d>, <0x40000f20 0xc8>, <0x40001000 0x1000>, - <0x40002000 0x10000>, + <0x40200000 0x100000>, <0x01c03000 0x3000>; reg-names = "parf", "dbi", "elbi", "atu", "addr_space", "mmio"; diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 09c741e8ecb87..c700c3b19e4c4 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -415,7 +415,7 @@ pmecc: ecc-engine@f8014070 { compatible = "atmel,sama5d2-pmecc"; reg = <0xf8014070 0x490>, - <0xf8014500 0x100>; + <0xf8014500 0x200>; }; }; diff --git a/arch/arm/boot/dts/sama7g5.dtsi b/arch/arm/boot/dts/sama7g5.dtsi index eddcfbf4d2233..22520cdd37fc5 100644 --- a/arch/arm/boot/dts/sama7g5.dtsi +++ b/arch/arm/boot/dts/sama7g5.dtsi @@ -382,8 +382,6 @@ dmas = <&dma0 AT91_XDMAC_DT_PERID(7)>, <&dma0 AT91_XDMAC_DT_PERID(8)>; dma-names = "rx", "tx"; - atmel,use-dma-rx; - atmel,use-dma-tx; status = "disabled"; }; }; @@ -558,8 +556,6 @@ dmas = <&dma0 AT91_XDMAC_DT_PERID(21)>, <&dma0 AT91_XDMAC_DT_PERID(22)>; dma-names = "rx", "tx"; - atmel,use-dma-rx; - atmel,use-dma-tx; status = "disabled"; }; }; @@ -584,8 +580,6 @@ dmas = <&dma0 AT91_XDMAC_DT_PERID(23)>, <&dma0 AT91_XDMAC_DT_PERID(24)>; dma-names = "rx", "tx"; - atmel,use-dma-rx; - atmel,use-dma-tx; status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi index 827e887afbda4..13e1bdb3ddbf1 100644 --- a/arch/arm/boot/dts/spear1340.dtsi +++ b/arch/arm/boot/dts/spear1340.dtsi @@ -134,9 +134,9 @@ reg = <0xb4100000 0x1000>; interrupts = <0 105 0x4>; status = "disabled"; - dmas = <&dwdma0 12 0 1>, - <&dwdma0 13 1 0>; - dma-names = "tx", "rx"; + dmas = <&dwdma0 13 0 1>, + <&dwdma0 12 1 0>; + dma-names = "rx", "tx"; }; thermal@e07008c4 { diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index c87b881b2c8bb..9135533676879 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -284,9 +284,9 @@ #size-cells = <0>; interrupts = <0 31 0x4>; status = "disabled"; - dmas = <&dwdma0 4 0 0>, - <&dwdma0 5 0 0>; - dma-names = "tx", "rx"; + dmas = <&dwdma0 5 0 0>, + <&dwdma0 4 0 0>; + dma-names = "rx", "tx"; }; rtc@e0580000 { diff --git a/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi index 3b65130affec8..6161f5906ec11 100644 --- a/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi +++ b/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi @@ -1190,7 +1190,7 @@ }; }; - sai2a_sleep_pins_c: sai2a-2 { + sai2a_sleep_pins_c: sai2a-sleep-2 { pins { pinmux = , /* SAI2_SCK_A */ , /* SAI2_SD_A */ diff --git a/arch/arm/boot/dts/sun8i-v3s.dtsi b/arch/arm/boot/dts/sun8i-v3s.dtsi index b30bc1a25ebb9..084323d5c61cb 100644 --- a/arch/arm/boot/dts/sun8i-v3s.dtsi +++ b/arch/arm/boot/dts/sun8i-v3s.dtsi @@ -593,6 +593,17 @@ #size-cells = <0>; }; + gic: interrupt-controller@1c81000 { + compatible = "arm,gic-400"; + reg = <0x01c81000 0x1000>, + <0x01c82000 0x2000>, + <0x01c84000 0x2000>, + <0x01c86000 0x2000>; + interrupt-controller; + #interrupt-cells = <3>; + interrupts = ; + }; + csi1: camera@1cb4000 { compatible = "allwinner,sun8i-v3s-csi"; reg = <0x01cb4000 0x3000>; @@ -604,16 +615,5 @@ resets = <&ccu RST_BUS_CSI>; status = "disabled"; }; - - gic: interrupt-controller@1c81000 { - compatible = "arm,gic-400"; - reg = <0x01c81000 0x1000>, - <0x01c82000 0x2000>, - <0x01c84000 0x2000>, - <0x01c86000 0x2000>; - interrupt-controller; - #interrupt-cells = <3>; - interrupts = ; - }; }; }; diff --git a/arch/arm/boot/dts/tegra20-asus-tf101.dts b/arch/arm/boot/dts/tegra20-asus-tf101.dts index 020172ee7340e..e3267cda15cc9 100644 --- a/arch/arm/boot/dts/tegra20-asus-tf101.dts +++ b/arch/arm/boot/dts/tegra20-asus-tf101.dts @@ -442,11 +442,13 @@ serial@70006040 { compatible = "nvidia,tegra20-hsuart"; + /delete-property/ reg-shift; /* GPS BCM4751 */ }; serial@70006200 { compatible = "nvidia,tegra20-hsuart"; + /delete-property/ reg-shift; status = "okay"; /* Azurewave AW-NH615 BCM4329B1 */ diff --git a/arch/arm/boot/dts/tegra20-tamonten.dtsi b/arch/arm/boot/dts/tegra20-tamonten.dtsi index de39c5465c0a9..0e19bd0a847c8 100644 --- a/arch/arm/boot/dts/tegra20-tamonten.dtsi +++ b/arch/arm/boot/dts/tegra20-tamonten.dtsi @@ -183,8 +183,8 @@ }; conf_ata { nvidia,pins = "ata", "atb", "atc", "atd", "ate", - "cdev1", "cdev2", "dap1", "dtb", "gma", - "gmb", "gmc", "gmd", "gme", "gpu7", + "cdev1", "cdev2", "dap1", "dtb", "dtf", + "gma", "gmb", "gmc", "gmd", "gme", "gpu7", "gpv", "i2cp", "irrx", "irtx", "pta", "rm", "slxa", "slxk", "spia", "spib", "uac"; @@ -203,7 +203,7 @@ }; conf_crtp { nvidia,pins = "crtp", "dap2", "dap3", "dap4", - "dtc", "dte", "dtf", "gpu", "sdio1", + "dtc", "dte", "gpu", "sdio1", "slxc", "slxd", "spdi", "spdo", "spig", "uda"; nvidia,pull = ; diff --git a/arch/arm/boot/dts/tegra30-asus-transformer-common.dtsi b/arch/arm/boot/dts/tegra30-asus-transformer-common.dtsi index 85b43a86a26d9..c662ab261ed5f 100644 --- a/arch/arm/boot/dts/tegra30-asus-transformer-common.dtsi +++ b/arch/arm/boot/dts/tegra30-asus-transformer-common.dtsi @@ -1080,6 +1080,7 @@ serial@70006040 { compatible = "nvidia,tegra30-hsuart"; + /delete-property/ reg-shift; status = "okay"; /* Broadcom GPS BCM47511 */ @@ -1087,6 +1088,7 @@ serial@70006200 { compatible = "nvidia,tegra30-hsuart"; + /delete-property/ reg-shift; status = "okay"; nvidia,adjust-baud-rates = <0 9600 100>, diff --git a/arch/arm/boot/dts/tegra30-pegatron-chagall.dts b/arch/arm/boot/dts/tegra30-pegatron-chagall.dts index f4b2d4218849c..8ce61035290b5 100644 --- a/arch/arm/boot/dts/tegra30-pegatron-chagall.dts +++ b/arch/arm/boot/dts/tegra30-pegatron-chagall.dts @@ -1103,6 +1103,7 @@ uartb: serial@70006040 { compatible = "nvidia,tegra30-hsuart"; + /delete-property/ reg-shift; status = "okay"; /* Broadcom GPS BCM47511 */ @@ -1110,6 +1111,7 @@ uartc: serial@70006200 { compatible = "nvidia,tegra30-hsuart"; + /delete-property/ reg-shift; status = "okay"; nvidia,adjust-baud-rates = <0 9600 100>, diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig index fe8d760256a4c..3e3beb0cc33de 100644 --- a/arch/arm/configs/multi_v5_defconfig +++ b/arch/arm/configs/multi_v5_defconfig @@ -188,6 +188,7 @@ CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_MEDIA_SUPPORT=y CONFIG_MEDIA_CAMERA_SUPPORT=y +CONFIG_MEDIA_PLATFORM_SUPPORT=y CONFIG_V4L_PLATFORM_DRIVERS=y CONFIG_VIDEO_ASPEED=m CONFIG_VIDEO_ATMEL_ISI=m @@ -196,6 +197,7 @@ CONFIG_DRM_ATMEL_HLCDC=m CONFIG_DRM_PANEL_SIMPLE=y CONFIG_DRM_PANEL_EDP=y CONFIG_DRM_ASPEED_GFX=m +CONFIG_FB=y CONFIG_FB_IMX=y CONFIG_FB_ATMEL=y CONFIG_BACKLIGHT_ATMEL_LCDC=y diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 2b575792363e5..e4dba5461cb3e 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -102,6 +102,8 @@ config CRYPTO_AES_ARM_BS depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER select CRYPTO_LIB_AES + select CRYPTO_AES + select CRYPTO_CBC select CRYPTO_SIMD help Use a faster and more secure NEON based implementation of AES in CBC, diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S index a74289ebc8036..5f1b1ce10473a 100644 --- a/arch/arm/kernel/entry-ftrace.S +++ b/arch/arm/kernel/entry-ftrace.S @@ -22,10 +22,7 @@ * mcount can be thought of as a function called in the middle of a subroutine * call. As such, it needs to be transparent for both the caller and the * callee: the original lr needs to be restored when leaving mcount, and no - * registers should be clobbered. (In the __gnu_mcount_nc implementation, we - * clobber the ip register. This is OK because the ARM calling convention - * allows it to be clobbered in subroutines and doesn't use it to hold - * parameters.) + * registers should be clobbered. * * When using dynamic ftrace, we patch out the mcount call by a "pop {lr}" * instead of the __gnu_mcount_nc call (see arch/arm/kernel/ftrace.c). @@ -70,26 +67,25 @@ .macro __ftrace_regs_caller - sub sp, sp, #8 @ space for PC and CPSR OLD_R0, + str lr, [sp, #-8]! @ store LR as PC and make space for CPSR/OLD_R0, @ OLD_R0 will overwrite previous LR - add ip, sp, #12 @ move in IP the value of SP as it was - @ before the push {lr} of the mcount mechanism + ldr lr, [sp, #8] @ get previous LR - str lr, [sp, #0] @ store LR instead of PC + str r0, [sp, #8] @ write r0 as OLD_R0 over previous LR - ldr lr, [sp, #8] @ get previous LR + str lr, [sp, #-4]! @ store previous LR as LR - str r0, [sp, #8] @ write r0 as OLD_R0 over previous LR + add lr, sp, #16 @ move in LR the value of SP as it was + @ before the push {lr} of the mcount mechanism - stmdb sp!, {ip, lr} - stmdb sp!, {r0-r11, lr} + push {r0-r11, ip, lr} @ stack content at this point: @ 0 4 48 52 56 60 64 68 72 - @ R0 | R1 | ... | LR | SP + 4 | previous LR | LR | PSR | OLD_R0 | + @ R0 | R1 | ... | IP | SP + 4 | previous LR | LR | PSR | OLD_R0 | - mov r3, sp @ struct pt_regs* + mov r3, sp @ struct pt_regs* ldr r2, =function_trace_op ldr r2, [r2] @ pointer to the current @@ -112,11 +108,9 @@ ftrace_graph_regs_call: #endif @ pop saved regs - ldmia sp!, {r0-r12} @ restore r0 through r12 - ldr ip, [sp, #8] @ restore PC - ldr lr, [sp, #4] @ restore LR - ldr sp, [sp, #0] @ restore SP - mov pc, ip @ return + pop {r0-r11, ip, lr} @ restore r0 through r12 + ldr lr, [sp], #4 @ restore LR + ldr pc, [sp], #12 .endm #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -132,11 +126,9 @@ ftrace_graph_regs_call: bl prepare_ftrace_return @ pop registers saved in ftrace_regs_caller - ldmia sp!, {r0-r12} @ restore r0 through r12 - ldr ip, [sp, #8] @ restore PC - ldr lr, [sp, #4] @ restore LR - ldr sp, [sp, #0] @ restore SP - mov pc, ip @ return + pop {r0-r11, ip, lr} @ restore r0 through r12 + ldr lr, [sp], #4 @ restore LR + ldr pc, [sp], #12 .endm #endif @@ -202,16 +194,17 @@ ftrace_graph_call\suffix: .endm .macro mcount_exit - ldmia sp!, {r0-r3, ip, lr} - ret ip + ldmia sp!, {r0-r3} + ldr lr, [sp, #4] + ldr pc, [sp], #8 .endm ENTRY(__gnu_mcount_nc) UNWIND(.fnstart) #ifdef CONFIG_DYNAMIC_FTRACE - mov ip, lr - ldmia sp!, {lr} - ret ip + push {lr} + ldr lr, [sp, #4] + ldr pc, [sp], #8 #else __mcount #endif diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 6166ba38bf994..b74bfcf94fb1a 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -195,7 +195,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr) destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data); /* Check access in reasonable access range for both SWP and SWPB */ - if (!access_ok((address & ~3), 4)) { + if (!access_ok((void __user *)(address & ~3), 4)) { pr_debug("SWP{B} emulation: access to %p not allowed!\n", (void *)address); res = -EFAULT; diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index cae4a748811f8..5d58aee24087f 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -577,7 +577,7 @@ do_cache_op(unsigned long start, unsigned long end, int flags) if (end < start || flags) return -EINVAL; - if (!access_ok(start, end - start)) + if (!access_ok((void __user *)start, end - start)) return -EFAULT; return __do_cache_op(start, end); diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index 428012687a802..7f7f6bae21c2d 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -1101,11 +1101,13 @@ static int __init da850_evm_config_emac(void) int ret; u32 val; struct davinci_soc_info *soc_info = &davinci_soc_info; - u8 rmii_en = soc_info->emac_pdata->rmii_en; + u8 rmii_en; if (!machine_is_davinci_da850_evm()) return 0; + rmii_en = soc_info->emac_pdata->rmii_en; + cfg_chip3_base = DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP3_REG); val = __raw_readl(cfg_chip3_base); diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c index cc75087134d38..28e0ae6e890e5 100644 --- a/arch/arm/mach-ep93xx/clock.c +++ b/arch/arm/mach-ep93xx/clock.c @@ -148,8 +148,10 @@ static struct clk_hw *ep93xx_clk_register_gate(const char *name, psc->lock = &clk_lock; clk = clk_register(NULL, &psc->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { kfree(psc); + return ERR_CAST(clk); + } return &psc->hw; } diff --git a/arch/arm/mach-iop32x/include/mach/entry-macro.S b/arch/arm/mach-iop32x/include/mach/entry-macro.S index 8e6766d4621eb..341e5d9a6616d 100644 --- a/arch/arm/mach-iop32x/include/mach/entry-macro.S +++ b/arch/arm/mach-iop32x/include/mach/entry-macro.S @@ -20,7 +20,7 @@ mrc p6, 0, \irqstat, c8, c0, 0 @ Read IINTSRC cmp \irqstat, #0 clzne \irqnr, \irqstat - rsbne \irqnr, \irqnr, #31 + rsbne \irqnr, \irqnr, #32 .endm .macro arch_ret_to_user, tmp1, tmp2 diff --git a/arch/arm/mach-iop32x/include/mach/irqs.h b/arch/arm/mach-iop32x/include/mach/irqs.h index c4e78df428e86..e09ae5f48aec5 100644 --- a/arch/arm/mach-iop32x/include/mach/irqs.h +++ b/arch/arm/mach-iop32x/include/mach/irqs.h @@ -9,6 +9,6 @@ #ifndef __IRQS_H #define __IRQS_H -#define NR_IRQS 32 +#define NR_IRQS 33 #endif diff --git a/arch/arm/mach-iop32x/irq.c b/arch/arm/mach-iop32x/irq.c index 2d48bf1398c10..d1e8824cbd824 100644 --- a/arch/arm/mach-iop32x/irq.c +++ b/arch/arm/mach-iop32x/irq.c @@ -32,14 +32,14 @@ static void intstr_write(u32 val) static void iop32x_irq_mask(struct irq_data *d) { - iop32x_mask &= ~(1 << d->irq); + iop32x_mask &= ~(1 << (d->irq - 1)); intctl_write(iop32x_mask); } static void iop32x_irq_unmask(struct irq_data *d) { - iop32x_mask |= 1 << d->irq; + iop32x_mask |= 1 << (d->irq - 1); intctl_write(iop32x_mask); } @@ -65,7 +65,7 @@ void __init iop32x_init_irq(void) machine_is_em7210()) *IOP3XX_PCIIRSR = 0x0f; - for (i = 0; i < NR_IRQS; i++) { + for (i = 1; i < NR_IRQS; i++) { irq_set_chip_and_handler(i, &ext_chip, handle_level_irq); irq_clear_status_flags(i, IRQ_NOREQUEST | IRQ_NOPROBE); } diff --git a/arch/arm/mach-iop32x/irqs.h b/arch/arm/mach-iop32x/irqs.h index 69858e4e905d1..e1dfc8b4e7d7e 100644 --- a/arch/arm/mach-iop32x/irqs.h +++ b/arch/arm/mach-iop32x/irqs.h @@ -7,36 +7,40 @@ #ifndef __IOP32X_IRQS_H #define __IOP32X_IRQS_H +/* Interrupts in Linux start at 1, hardware starts at 0 */ + +#define IOP_IRQ(x) ((x) + 1) + /* * IOP80321 chipset interrupts */ -#define IRQ_IOP32X_DMA0_EOT 0 -#define IRQ_IOP32X_DMA0_EOC 1 -#define IRQ_IOP32X_DMA1_EOT 2 -#define IRQ_IOP32X_DMA1_EOC 3 -#define IRQ_IOP32X_AA_EOT 6 -#define IRQ_IOP32X_AA_EOC 7 -#define IRQ_IOP32X_CORE_PMON 8 -#define IRQ_IOP32X_TIMER0 9 -#define IRQ_IOP32X_TIMER1 10 -#define IRQ_IOP32X_I2C_0 11 -#define IRQ_IOP32X_I2C_1 12 -#define IRQ_IOP32X_MESSAGING 13 -#define IRQ_IOP32X_ATU_BIST 14 -#define IRQ_IOP32X_PERFMON 15 -#define IRQ_IOP32X_CORE_PMU 16 -#define IRQ_IOP32X_BIU_ERR 17 -#define IRQ_IOP32X_ATU_ERR 18 -#define IRQ_IOP32X_MCU_ERR 19 -#define IRQ_IOP32X_DMA0_ERR 20 -#define IRQ_IOP32X_DMA1_ERR 21 -#define IRQ_IOP32X_AA_ERR 23 -#define IRQ_IOP32X_MSG_ERR 24 -#define IRQ_IOP32X_SSP 25 -#define IRQ_IOP32X_XINT0 27 -#define IRQ_IOP32X_XINT1 28 -#define IRQ_IOP32X_XINT2 29 -#define IRQ_IOP32X_XINT3 30 -#define IRQ_IOP32X_HPI 31 +#define IRQ_IOP32X_DMA0_EOT IOP_IRQ(0) +#define IRQ_IOP32X_DMA0_EOC IOP_IRQ(1) +#define IRQ_IOP32X_DMA1_EOT IOP_IRQ(2) +#define IRQ_IOP32X_DMA1_EOC IOP_IRQ(3) +#define IRQ_IOP32X_AA_EOT IOP_IRQ(6) +#define IRQ_IOP32X_AA_EOC IOP_IRQ(7) +#define IRQ_IOP32X_CORE_PMON IOP_IRQ(8) +#define IRQ_IOP32X_TIMER0 IOP_IRQ(9) +#define IRQ_IOP32X_TIMER1 IOP_IRQ(10) +#define IRQ_IOP32X_I2C_0 IOP_IRQ(11) +#define IRQ_IOP32X_I2C_1 IOP_IRQ(12) +#define IRQ_IOP32X_MESSAGING IOP_IRQ(13) +#define IRQ_IOP32X_ATU_BIST IOP_IRQ(14) +#define IRQ_IOP32X_PERFMON IOP_IRQ(15) +#define IRQ_IOP32X_CORE_PMU IOP_IRQ(16) +#define IRQ_IOP32X_BIU_ERR IOP_IRQ(17) +#define IRQ_IOP32X_ATU_ERR IOP_IRQ(18) +#define IRQ_IOP32X_MCU_ERR IOP_IRQ(19) +#define IRQ_IOP32X_DMA0_ERR IOP_IRQ(20) +#define IRQ_IOP32X_DMA1_ERR IOP_IRQ(21) +#define IRQ_IOP32X_AA_ERR IOP_IRQ(23) +#define IRQ_IOP32X_MSG_ERR IOP_IRQ(24) +#define IRQ_IOP32X_SSP IOP_IRQ(25) +#define IRQ_IOP32X_XINT0 IOP_IRQ(27) +#define IRQ_IOP32X_XINT1 IOP_IRQ(28) +#define IRQ_IOP32X_XINT2 IOP_IRQ(29) +#define IRQ_IOP32X_XINT3 IOP_IRQ(30) +#define IRQ_IOP32X_HPI IOP_IRQ(31) #endif diff --git a/arch/arm/mach-mmp/sram.c b/arch/arm/mach-mmp/sram.c index 6794e2db1ad5f..ecc46c31004f6 100644 --- a/arch/arm/mach-mmp/sram.c +++ b/arch/arm/mach-mmp/sram.c @@ -72,6 +72,8 @@ static int sram_probe(struct platform_device *pdev) if (!info) return -ENOMEM; + platform_set_drvdata(pdev, info); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res == NULL) { dev_err(&pdev->dev, "no memory resource defined\n"); @@ -107,8 +109,6 @@ static int sram_probe(struct platform_device *pdev) list_add(&info->node, &sram_bank_list); mutex_unlock(&sram_lock); - platform_set_drvdata(pdev, info); - dev_info(&pdev->dev, "initialized\n"); return 0; @@ -127,17 +127,19 @@ static int sram_remove(struct platform_device *pdev) struct sram_bank_info *info; info = platform_get_drvdata(pdev); - if (info == NULL) - return -ENODEV; - mutex_lock(&sram_lock); - list_del(&info->node); - mutex_unlock(&sram_lock); + if (info->sram_size) { + mutex_lock(&sram_lock); + list_del(&info->node); + mutex_unlock(&sram_lock); + + gen_pool_destroy(info->gpool); + iounmap(info->sram_virt); + kfree(info->pool_name); + } - gen_pool_destroy(info->gpool); - iounmap(info->sram_virt); - kfree(info->pool_name); kfree(info); + return 0; } diff --git a/arch/arm/mach-s3c/mach-jive.c b/arch/arm/mach-s3c/mach-jive.c index 285e1f0f4145a..0d7d408c37291 100644 --- a/arch/arm/mach-s3c/mach-jive.c +++ b/arch/arm/mach-s3c/mach-jive.c @@ -236,11 +236,11 @@ static int __init jive_mtdset(char *options) unsigned long set; if (options == NULL || options[0] == '\0') - return 0; + return 1; if (kstrtoul(options, 10, &set)) { printk(KERN_ERR "failed to parse mtdset=%s\n", options); - return 0; + return 1; } switch (set) { @@ -256,7 +256,7 @@ static int __init jive_mtdset(char *options) "using default.", set); } - return 0; + return 1; } /* parse the mtdset= option given to the kernel command line */ diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index ac964612d8b07..90933eabe1156 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -464,3 +464,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c842878f81331..baa0e9bbe7547 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -683,6 +683,7 @@ config ARM64_ERRATUM_2051678 config ARM64_ERRATUM_2077057 bool "Cortex-A510: 2077057: workaround software-step corrupting SPSR_EL2" + default y help This option adds the workaround for ARM Cortex-A510 erratum 2077057. Affected Cortex-A510 may corrupt SPSR_EL2 when the a step exception is diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi index 984c737fa627a..6e738f2a37013 100644 --- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi @@ -273,9 +273,9 @@ #size-cells = <1>; ranges = <0x00 0x00 0xff800000 0x3000>; - timer: timer@400 { - compatible = "brcm,bcm6328-timer", "syscon"; - reg = <0x400 0x3c>; + twd: timer-mfd@400 { + compatible = "brcm,bcm4908-twd", "simple-mfd", "syscon"; + reg = <0x400 0x4c>; }; gpio0: gpio-controller@500 { @@ -330,7 +330,7 @@ reboot { compatible = "syscon-reboot"; - regmap = <&timer>; + regmap = <&twd>; offset = <0x34>; mask = <1>; }; diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts b/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts index ec19fbf928a14..12a4b1c03390c 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts @@ -111,8 +111,8 @@ compatible = "silabs,si3226x"; reg = <0>; spi-max-frequency = <5000000>; - spi-cpha = <1>; - spi-cpol = <1>; + spi-cpha; + spi-cpol; pl022,hierarchy = <0>; pl022,interface = <0>; pl022,slave-tx-disable = <0>; @@ -135,8 +135,8 @@ at25,byte-len = <0x8000>; at25,addr-mode = <2>; at25,page-size = <64>; - spi-cpha = <1>; - spi-cpol = <1>; + spi-cpha; + spi-cpol; pl022,hierarchy = <0>; pl022,interface = <0>; pl022,slave-tx-disable = <0>; diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index 2cfeaf3b0a876..8c218689fef70 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -687,7 +687,7 @@ }; }; - sata: ahci@663f2000 { + sata: sata@663f2000 { compatible = "brcm,iproc-ahci", "generic-ahci"; reg = <0x663f2000 0x1000>; dma-coherent; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index 01b01e3204118..35d1939e690b0 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -536,9 +536,9 @@ clock-names = "i2c"; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(1)>; - dmas = <&edma0 1 39>, - <&edma0 1 38>; - dma-names = "tx", "rx"; + dmas = <&edma0 1 38>, + <&edma0 1 39>; + dma-names = "rx", "tx"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi index 687fea6d8afa4..4e7bd04d97984 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi @@ -499,9 +499,9 @@ interrupts = ; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(2)>; - dmas = <&edma0 1 39>, - <&edma0 1 38>; - dma-names = "tx", "rx"; + dmas = <&edma0 1 38>, + <&edma0 1 39>; + dma-names = "rx", "tx"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index 66ec5615651d4..5dea37651adfc 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -748,7 +748,7 @@ snps,hird-threshold = /bits/ 8 <0x0>; snps,dis_u2_susphy_quirk; snps,dis_u3_susphy_quirk; - snps,ref-clock-period-ns = <0x32>; + snps,ref-clock-period-ns = <0x29>; dr_mode = "host"; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8916-samsung-j5.dts b/arch/arm64/boot/dts/qcom/msm8916-samsung-j5.dts index 687bea438a571..6c408d61de75a 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-samsung-j5.dts +++ b/arch/arm64/boot/dts/qcom/msm8916-samsung-j5.dts @@ -41,7 +41,7 @@ }; home-key { - lable = "Home Key"; + label = "Home Key"; gpios = <&msmgpio 109 GPIO_ACTIVE_LOW>; linux,code = ; }; diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index 5a9a5ed0565f6..215f56daa26c2 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -713,6 +713,9 @@ #reset-cells = <1>; #power-domain-cells = <1>; reg = <0xfc400000 0x2000>; + + clock-names = "xo", "sleep_clk"; + clocks = <&xo_board>, <&sleep_clk>; }; rpm_msg_ram: sram@fc428000 { diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 937c2e0e93eb9..eab7a85050531 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -1790,7 +1790,7 @@ }; }; - gmu: gmu@3d69000 { + gmu: gmu@3d6a000 { compatible="qcom,adreno-gmu-635.0", "qcom,adreno-gmu"; reg = <0 0x03d6a000 0 0x34000>, <0 0x3de0000 0 0x10000>, diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index cfdeaa81f1bbc..1bb4d98db96fa 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -3613,10 +3613,10 @@ #clock-cells = <0>; clock-frequency = <9600000>; clock-output-names = "mclk"; - qcom,micbias1-millivolt = <1800>; - qcom,micbias2-millivolt = <1800>; - qcom,micbias3-millivolt = <1800>; - qcom,micbias4-millivolt = <1800>; + qcom,micbias1-microvolt = <1800000>; + qcom,micbias2-microvolt = <1800000>; + qcom,micbias3-microvolt = <1800000>; + qcom,micbias4-microvolt = <1800000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 6012322a59846..78265646feff7 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -3556,9 +3556,9 @@ qcom,tcs-offset = <0xd00>; qcom,drv-id = <2>; qcom,tcs-config = , - , - , - ; + , + , + ; rpmhcc: clock-controller { compatible = "qcom,sm8150-rpmh-clk"; diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index 5617a46e5ccdd..a92230bec1ddb 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -1740,8 +1740,8 @@ phys = <&pcie0_lane>; phy-names = "pciephy"; - perst-gpio = <&tlmm 79 GPIO_ACTIVE_LOW>; - enable-gpio = <&tlmm 81 GPIO_ACTIVE_HIGH>; + perst-gpios = <&tlmm 79 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 81 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pcie0_default_state>; @@ -1801,7 +1801,7 @@ ranges = <0x01000000 0x0 0x40200000 0x0 0x40200000 0x0 0x100000>, <0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>; - interrupts = ; + interrupts = ; interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; @@ -1844,8 +1844,8 @@ phys = <&pcie1_lane>; phy-names = "pciephy"; - perst-gpio = <&tlmm 82 GPIO_ACTIVE_LOW>; - enable-gpio = <&tlmm 84 GPIO_ACTIVE_HIGH>; + perst-gpios = <&tlmm 82 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 84 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pcie1_default_state>; @@ -1907,7 +1907,7 @@ ranges = <0x01000000 0x0 0x64200000 0x0 0x64200000 0x0 0x100000>, <0x02000000 0x0 0x64300000 0x0 0x64300000 0x0 0x3d00000>; - interrupts = ; + interrupts = ; interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; @@ -1950,8 +1950,8 @@ phys = <&pcie2_lane>; phy-names = "pciephy"; - perst-gpio = <&tlmm 85 GPIO_ACTIVE_LOW>; - enable-gpio = <&tlmm 87 GPIO_ACTIVE_HIGH>; + perst-gpios = <&tlmm 85 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 87 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pcie2_default_state>; diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index 4b19744bcfb34..765d018e6306c 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -1820,7 +1820,7 @@ qcom,tcs-offset = <0xd00>; qcom,drv-id = <2>; qcom,tcs-config = , , - , ; + , ; rpmhcc: clock-controller { compatible = "qcom,sm8350-rpmh-clk"; diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 02b97e838c474..9ee055143f8a8 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -203,9 +203,9 @@ compatible = "arm,idle-state"; idle-state-name = "silver-rail-power-collapse"; arm,psci-suspend-param = <0x40000004>; - entry-latency-us = <274>; - exit-latency-us = <480>; - min-residency-us = <3934>; + entry-latency-us = <800>; + exit-latency-us = <750>; + min-residency-us = <4090>; local-timer-stop; }; @@ -213,9 +213,9 @@ compatible = "arm,idle-state"; idle-state-name = "gold-rail-power-collapse"; arm,psci-suspend-param = <0x40000004>; - entry-latency-us = <327>; - exit-latency-us = <1502>; - min-residency-us = <4488>; + entry-latency-us = <600>; + exit-latency-us = <1550>; + min-residency-us = <4791>; local-timer-stop; }; }; @@ -224,10 +224,10 @@ CLUSTER_SLEEP_0: cluster-sleep-0 { compatible = "domain-idle-state"; idle-state-name = "cluster-l3-off"; - arm,psci-suspend-param = <0x4100c344>; - entry-latency-us = <584>; - exit-latency-us = <2332>; - min-residency-us = <6118>; + arm,psci-suspend-param = <0x41000044>; + entry-latency-us = <1050>; + exit-latency-us = <2500>; + min-residency-us = <5309>; local-timer-stop; }; @@ -235,9 +235,9 @@ compatible = "domain-idle-state"; idle-state-name = "cluster-power-collapse"; arm,psci-suspend-param = <0x4100c344>; - entry-latency-us = <2893>; - exit-latency-us = <4023>; - min-residency-us = <9987>; + entry-latency-us = <2700>; + exit-latency-us = <3500>; + min-residency-us = <13959>; local-timer-stop; }; }; @@ -315,7 +315,7 @@ CLUSTER_PD: cpu-cluster0 { #power-domain-cells = <0>; - domain-idle-states = <&CLUSTER_SLEEP_0>; + domain-idle-states = <&CLUSTER_SLEEP_0>, <&CLUSTER_SLEEP_1>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts b/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts index c4dd2a6b48368..f81ce3240342c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts @@ -770,8 +770,8 @@ sd-uhs-sdr104; /* Power supply */ - vqmmc-supply = &vcc1v8_s3; /* IO line */ - vmmc-supply = &vcc_sdio; /* card's power */ + vqmmc-supply = <&vcc1v8_s3>; /* IO line */ + vmmc-supply = <&vcc_sdio>; /* card's power */ #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi index 012011dc619a5..ce4daff758e7e 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi @@ -59,7 +59,10 @@ #interrupt-cells = <3>; interrupt-controller; reg = <0x00 0x01800000 0x00 0x10000>, /* GICD */ - <0x00 0x01840000 0x00 0xC0000>; /* GICR */ + <0x00 0x01840000 0x00 0xC0000>, /* GICR */ + <0x01 0x00000000 0x00 0x2000>, /* GICC */ + <0x01 0x00010000 0x00 0x1000>, /* GICH */ + <0x01 0x00020000 0x00 0x2000>; /* GICV */ /* * vcpumntirq: * virtual CPU interface maintenance interrupt diff --git a/arch/arm64/boot/dts/ti/k3-am64.dtsi b/arch/arm64/boot/dts/ti/k3-am64.dtsi index 120974726be81..19684865d0d68 100644 --- a/arch/arm64/boot/dts/ti/k3-am64.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64.dtsi @@ -87,6 +87,7 @@ <0x00 0x68000000 0x00 0x68000000 0x00 0x08000000>, /* PCIe DAT0 */ <0x00 0x70000000 0x00 0x70000000 0x00 0x00200000>, /* OC SRAM */ <0x00 0x78000000 0x00 0x78000000 0x00 0x00800000>, /* Main R5FSS */ + <0x01 0x00000000 0x01 0x00000000 0x00 0x00310000>, /* A53 PERIPHBASE */ <0x06 0x00000000 0x06 0x00000000 0x01 0x00000000>, /* PCIe DAT1 */ <0x05 0x00000000 0x05 0x00000000 0x01 0x00000000>, /* FSS0 DAT3 */ diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index ce8bb4a61011e..e749343accedd 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -35,7 +35,10 @@ #interrupt-cells = <3>; interrupt-controller; reg = <0x00 0x01800000 0x00 0x10000>, /* GICD */ - <0x00 0x01880000 0x00 0x90000>; /* GICR */ + <0x00 0x01880000 0x00 0x90000>, /* GICR */ + <0x00 0x6f000000 0x00 0x2000>, /* GICC */ + <0x00 0x6f010000 0x00 0x1000>, /* GICH */ + <0x00 0x6f020000 0x00 0x2000>; /* GICV */ /* * vcpumntirq: * virtual CPU interface maintenance interrupt diff --git a/arch/arm64/boot/dts/ti/k3-am65.dtsi b/arch/arm64/boot/dts/ti/k3-am65.dtsi index a58a39fa42dbc..c538a0bf3cdda 100644 --- a/arch/arm64/boot/dts/ti/k3-am65.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65.dtsi @@ -86,6 +86,7 @@ <0x00 0x46000000 0x00 0x46000000 0x00 0x00200000>, <0x00 0x47000000 0x00 0x47000000 0x00 0x00068400>, <0x00 0x50000000 0x00 0x50000000 0x00 0x8000000>, + <0x00 0x6f000000 0x00 0x6f000000 0x00 0x00310000>, /* A53 PERIPHBASE */ <0x00 0x70000000 0x00 0x70000000 0x00 0x200000>, <0x05 0x00000000 0x05 0x00000000 0x01 0x0000000>, <0x07 0x00000000 0x07 0x00000000 0x01 0x0000000>; diff --git a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi index 05a627ad6cdc4..16684a2f054d9 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi @@ -54,7 +54,10 @@ #interrupt-cells = <3>; interrupt-controller; reg = <0x00 0x01800000 0x00 0x10000>, /* GICD */ - <0x00 0x01900000 0x00 0x100000>; /* GICR */ + <0x00 0x01900000 0x00 0x100000>, /* GICR */ + <0x00 0x6f000000 0x00 0x2000>, /* GICC */ + <0x00 0x6f010000 0x00 0x1000>, /* GICH */ + <0x00 0x6f020000 0x00 0x2000>; /* GICV */ /* vcpumntirq: virtual CPU interface maintenance interrupt */ interrupts = ; diff --git a/arch/arm64/boot/dts/ti/k3-j7200.dtsi b/arch/arm64/boot/dts/ti/k3-j7200.dtsi index 64fef4e67d76a..b6da0454cc5bd 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200.dtsi @@ -129,6 +129,7 @@ <0x00 0x00a40000 0x00 0x00a40000 0x00 0x00000800>, /* timesync router */ <0x00 0x01000000 0x00 0x01000000 0x00 0x0d000000>, /* Most peripherals */ <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>, /* MAIN NAVSS */ + <0x00 0x6f000000 0x00 0x6f000000 0x00 0x00310000>, /* A72 PERIPHBASE */ <0x00 0x70000000 0x00 0x70000000 0x00 0x00800000>, /* MSMC RAM */ <0x00 0x18000000 0x00 0x18000000 0x00 0x08000000>, /* PCIe1 DAT0 */ <0x41 0x00000000 0x41 0x00000000 0x01 0x00000000>, /* PCIe1 DAT1 */ diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi index 599861259a30f..db0669985e42a 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi @@ -76,7 +76,10 @@ #interrupt-cells = <3>; interrupt-controller; reg = <0x00 0x01800000 0x00 0x10000>, /* GICD */ - <0x00 0x01900000 0x00 0x100000>; /* GICR */ + <0x00 0x01900000 0x00 0x100000>, /* GICR */ + <0x00 0x6f000000 0x00 0x2000>, /* GICC */ + <0x00 0x6f010000 0x00 0x1000>, /* GICH */ + <0x00 0x6f020000 0x00 0x2000>; /* GICV */ /* vcpumntirq: virtual CPU interface maintenance interrupt */ interrupts = ; diff --git a/arch/arm64/boot/dts/ti/k3-j721e.dtsi b/arch/arm64/boot/dts/ti/k3-j721e.dtsi index 4a3872fce5339..0e23886c9fd1d 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e.dtsi @@ -139,6 +139,7 @@ <0x00 0x0e000000 0x00 0x0e000000 0x00 0x01800000>, /* PCIe Core*/ <0x00 0x10000000 0x00 0x10000000 0x00 0x10000000>, /* PCIe DAT */ <0x00 0x64800000 0x00 0x64800000 0x00 0x00800000>, /* C71 */ + <0x00 0x6f000000 0x00 0x6f000000 0x00 0x00310000>, /* A72 PERIPHBASE */ <0x44 0x00000000 0x44 0x00000000 0x00 0x08000000>, /* PCIe2 DAT */ <0x44 0x10000000 0x44 0x10000000 0x00 0x08000000>, /* PCIe3 DAT */ <0x4d 0x80800000 0x4d 0x80800000 0x00 0x00800000>, /* C66_0 */ diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi index b04db1d3ab617..be7f39299894e 100644 --- a/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi @@ -34,7 +34,10 @@ #interrupt-cells = <3>; interrupt-controller; reg = <0x00 0x01800000 0x00 0x200000>, /* GICD */ - <0x00 0x01900000 0x00 0x100000>; /* GICR */ + <0x00 0x01900000 0x00 0x100000>, /* GICR */ + <0x00 0x6f000000 0x00 0x2000>, /* GICC */ + <0x00 0x6f010000 0x00 0x1000>, /* GICH */ + <0x00 0x6f020000 0x00 0x2000>; /* GICV */ /* vcpumntirq: virtual CPU interface maintenance interrupt */ interrupts = ; diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2-mcu-wakeup.dtsi index 7521963719ff9..6c5c02edb375d 100644 --- a/arch/arm64/boot/dts/ti/k3-j721s2-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721s2-mcu-wakeup.dtsi @@ -108,7 +108,7 @@ reg = <0x00 0x42110000 0x00 0x100>; gpio-controller; #gpio-cells = <2>; - interrupt-parent = <&main_gpio_intr>; + interrupt-parent = <&wkup_gpio_intr>; interrupts = <103>, <104>, <105>, <106>, <107>, <108>; interrupt-controller; #interrupt-cells = <2>; @@ -124,7 +124,7 @@ reg = <0x00 0x42100000 0x00 0x100>; gpio-controller; #gpio-cells = <2>; - interrupt-parent = <&main_gpio_intr>; + interrupt-parent = <&wkup_gpio_intr>; interrupts = <112>, <113>, <114>, <115>, <116>, <117>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arm64/boot/dts/ti/k3-j721s2.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2.dtsi index fe5234c40f6ce..7b930a85a29d6 100644 --- a/arch/arm64/boot/dts/ti/k3-j721s2.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721s2.dtsi @@ -119,6 +119,7 @@ <0x00 0x18000000 0x00 0x18000000 0x00 0x08000000>, /* PCIe1 DAT0 */ <0x00 0x64800000 0x00 0x64800000 0x00 0x0070c000>, /* C71_1 */ <0x00 0x65800000 0x00 0x65800000 0x00 0x0070c000>, /* C71_2 */ + <0x00 0x6f000000 0x00 0x6f000000 0x00 0x00310000>, /* A72 PERIPHBASE */ <0x00 0x70000000 0x00 0x70000000 0x00 0x00400000>, /* MSMC RAM */ <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>, /* MAIN NAVSS */ <0x41 0x00000000 0x41 0x00000000 0x01 0x00000000>, /* PCIe1 DAT1 */ diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 30516dc0b70ec..7411e4f9b5545 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -939,7 +939,7 @@ CONFIG_DMADEVICES=y CONFIG_DMA_BCM2835=y CONFIG_DMA_SUN6I=m CONFIG_FSL_EDMA=y -CONFIG_IMX_SDMA=y +CONFIG_IMX_SDMA=m CONFIG_K3_DMA=y CONFIG_MV_XOR=y CONFIG_MV_XOR_V2=y diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index bfbf0c4c7c5e5..39f5c1672f480 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_PART_CORTEX_A77 0xD0D #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 @@ -123,6 +124,7 @@ #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index d62405ce3e6de..7496deab025ad 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -43,10 +43,22 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_vcpu_wfi(struct kvm_vcpu *vcpu); +#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { return !(vcpu->arch.hcr_el2 & HCR_RW); } +#else +static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, + &kvm->arch.flags)); + + return test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags); +} +#endif static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { @@ -72,15 +84,14 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_TVM; } - if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) + if (vcpu_el1_is_32bit(vcpu)) vcpu->arch.hcr_el2 &= ~HCR_RW; - - /* - * TID3: trap feature register accesses that we virtualise. - * For now this is conditional, since no AArch32 feature regs - * are currently virtualised. - */ - if (!vcpu_el1_is_32bit(vcpu)) + else + /* + * TID3: trap feature register accesses that we virtualise. + * For now this is conditional, since no AArch32 feature regs + * are currently virtualised. + */ vcpu->arch.hcr_el2 |= HCR_TID3; if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 031e3a2537fc8..b5ae92f77c616 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -122,7 +122,22 @@ struct kvm_arch { * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is * supported. */ - bool return_nisv_io_abort_to_user; +#define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER 0 + /* Memory Tagging Extension enabled for the guest */ +#define KVM_ARCH_FLAG_MTE_ENABLED 1 + /* At least one vCPU has ran in the VM */ +#define KVM_ARCH_FLAG_HAS_RAN_ONCE 2 + /* + * The following two bits are used to indicate the guest's EL1 + * register width configuration. A value of KVM_ARCH_FLAG_EL1_32BIT + * bit is valid only when KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED is set. + * Otherwise, the guest's EL1 register width has not yet been + * determined yet. + */ +#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3 +#define KVM_ARCH_FLAG_EL1_32BIT 4 + + unsigned long flags; /* * VM-wide PMU filter, implemented as a bitmap and big enough for @@ -133,9 +148,6 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; - - /* Memory Tagging Extension enabled for the guest */ - bool mte_enabled; }; struct kvm_vcpu_fault_info { @@ -791,7 +803,9 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_sve_finalized(vcpu) \ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) -#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled) +#define kvm_has_mte(kvm) \ + (system_supports_mte() && \ + test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags)) #define kvm_vcpu_has_pmu(vcpu) \ (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index a11ccadd47d29..094701ec5500b 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,8 +1,8 @@ SECTIONS { #ifdef CONFIG_ARM64_MODULE_PLTS - .plt 0 (NOLOAD) : { BYTE(0) } - .init.plt 0 (NOLOAD) : { BYTE(0) } - .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) } + .plt 0 : { BYTE(0) } + .init.plt 0 : { BYTE(0) } + .text.ftrace_trampoline 0 : { BYTE(0) } #endif #ifdef CONFIG_KASAN_SW_TAGS diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 94e147e5456ca..85d509a08ce36 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -999,23 +999,13 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * page after fork() + CoW for pfn mappings. We don't always have a * hardware-managed access flag on arm64. */ -static inline bool arch_faults_on_old_pte(void) -{ - WARN_ON(preemptible()); - - return !cpu_has_hw_af(); -} -#define arch_faults_on_old_pte arch_faults_on_old_pte +#define arch_has_hw_pte_young cpu_has_hw_af /* * Experimentally, it's cheap to set the access flag in hardware and we * benefit from prefaulting mappings as 'old' to start with. */ -static inline bool arch_wants_old_prefaulted_pte(void) -{ - return !arch_faults_on_old_pte(); -} -#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte +#define arch_wants_old_prefaulted_pte cpu_has_hw_af static inline bool pud_sect_supported(void) { diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 86e0cc9b9c685..aa3d3607d5c8d 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -67,7 +67,8 @@ struct bp_hardening_data { DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); -static inline void arm64_apply_bp_hardening(void) +/* Called during entry so must be __always_inline */ +static __always_inline void arm64_apply_bp_hardening(void) { struct bp_hardening_data *d; diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 4e65da3445c7a..14c95844f6c84 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 451 +#define __NR_compat_syscalls 452 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 604a2053d0067..91f2bb7199af9 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -907,6 +907,8 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) __SYSCALL(__NR_futex_waitv, sys_futex_waitv) #define __NR_set_mempolicy_home_node 450 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +#define __NR_pmadv_ksm 451 +__SYSCALL(__NR_pmadv_ksm, sys_pmadv_ksm) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 3fb79b76e9d96..7bbf5104b7b7b 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -42,7 +42,7 @@ bool alternative_is_applied(u16 cpufeature) /* * Check if the target PC is within an alternative block. */ -static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) +static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) { unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt); return !(pc >= replptr && pc <= (replptr + alt->alt_len)); @@ -50,7 +50,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) #define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) -static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) +static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) { u32 insn; @@ -95,7 +95,7 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp return insn; } -static void patch_alternative(struct alt_instr *alt, +static noinstr void patch_alternative(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) { __le32 *replptr; diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 03991eeff6430..3006f43248084 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -54,6 +54,9 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu) struct acpi_lpi_state *lpi; struct acpi_processor *pr = per_cpu(processors, cpu); + if (unlikely(!pr || !pr->flags.has_lpi)) + return -EINVAL; + /* * If the PSCI cpu_suspend function hook has not been initialized * idle states must not be enabled, so bail out @@ -61,9 +64,6 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu) if (!psci_ops.cpu_suspend) return -EOPNOTSUPP; - if (unlikely(!pr || !pr->flags.has_lpi)) - return -EINVAL; - count = pr->power.count - 1; if (count <= 0) return -ENODEV; diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c index 771f543464e06..33e0fabc0b79b 100644 --- a/arch/arm64/kernel/patching.c +++ b/arch/arm64/kernel/patching.c @@ -117,8 +117,8 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) int i, ret = 0; struct aarch64_insn_patch *pp = arg; - /* The first CPU becomes master */ - if (atomic_inc_return(&pp->cpu_count) == 1) { + /* The last CPU becomes master */ + if (atomic_inc_return(&pp->cpu_count) == num_online_cpus()) { for (i = 0; ret == 0 && i < pp->insn_cnt; i++) ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], pp->new_insns[i]); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 6d45c63c64548..40be3a7c2c531 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -233,17 +233,20 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn) __this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT); } -static void call_smc_arch_workaround_1(void) +/* Called during entry so must be noinstr */ +static noinstr void call_smc_arch_workaround_1(void) { arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void call_hvc_arch_workaround_1(void) +/* Called during entry so must be noinstr */ +static noinstr void call_hvc_arch_workaround_1(void) { arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void qcom_link_stack_sanitisation(void) +/* Called during entry so must be noinstr */ +static noinstr void qcom_link_stack_sanitisation(void) { u64 tmp; @@ -850,6 +853,7 @@ u8 spectre_bhb_loop_affected(int scope) if (scope == SCOPE_LOCAL_CPU) { static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index d8aaf4b6f4320..3d66fba69016f 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -577,10 +577,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, { int err; - err = sigframe_alloc(user, &user->fpsimd_offset, - sizeof(struct fpsimd_context)); - if (err) - return err; + if (system_supports_fpsimd()) { + err = sigframe_alloc(user, &user->fpsimd_offset, + sizeof(struct fpsimd_context)); + if (err) + return err; + } /* fault information, if valid */ if (add_all || current->thread.fault_code) { diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 27df5c1e6baad..3b46041f2b978 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -234,6 +234,7 @@ asmlinkage notrace void secondary_start_kernel(void) * Log the CPU info before it is marked online and might get read. */ cpuinfo_store_cpu(); + store_cpu_topology(cpu); /* * Enable GIC and timers. @@ -242,7 +243,6 @@ asmlinkage notrace void secondary_start_kernel(void) ipi_setup(cpu); - store_cpu_topology(cpu); numa_add_cpu(cpu); /* diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4dca6ffd03d42..25d8aff273a10 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -89,7 +89,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, switch (cap->cap) { case KVM_CAP_ARM_NISV_TO_USER: r = 0; - kvm->arch.return_nisv_io_abort_to_user = true; + set_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, + &kvm->arch.flags); break; case KVM_CAP_ARM_MTE: mutex_lock(&kvm->lock); @@ -97,7 +98,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = -EINVAL; } else { r = 0; - kvm->arch.mte_enabled = true; + set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); } mutex_unlock(&kvm->lock); break; @@ -634,6 +635,10 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (kvm_vm_is_protected(kvm)) kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu); + mutex_lock(&kvm->lock); + set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags); + mutex_unlock(&kvm->lock); + return ret; } diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 3e2d8ba11a027..3dd38a151d2a6 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -135,7 +135,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) * volunteered to do so, and bail out otherwise. */ if (!kvm_vcpu_dabt_isvalid(vcpu)) { - if (vcpu->kvm->arch.return_nisv_io_abort_to_user) { + if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, + &vcpu->kvm->arch.flags)) { run->exit_reason = KVM_EXIT_ARM_NISV; run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu); run->arm_nisv.fault_ipa = fault_ipa; diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index fbcfd4ec6f926..fc6ee6f02fec4 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -924,6 +924,8 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { + struct kvm *kvm = vcpu->kvm; + if (!kvm_vcpu_has_pmu(vcpu)) return -ENODEV; @@ -941,7 +943,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) int __user *uaddr = (int __user *)(long)attr->addr; int irq; - if (!irqchip_in_kernel(vcpu->kvm)) + if (!irqchip_in_kernel(kvm)) return -EINVAL; if (get_user(irq, uaddr)) @@ -951,7 +953,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!(irq_is_ppi(irq) || irq_is_spi(irq))) return -EINVAL; - if (!pmu_irq_is_valid(vcpu->kvm, irq)) + if (!pmu_irq_is_valid(kvm, irq)) return -EINVAL; if (kvm_arm_pmu_irq_initialized(vcpu)) @@ -966,7 +968,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) struct kvm_pmu_event_filter filter; int nr_events; - nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; + nr_events = kvm_pmu_event_mask(kvm) + 1; uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; @@ -978,12 +980,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) filter.action != KVM_PMU_EVENT_DENY)) return -EINVAL; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&kvm->lock); + + if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) { + mutex_unlock(&kvm->lock); + return -EBUSY; + } - if (!vcpu->kvm->arch.pmu_filter) { - vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); - if (!vcpu->kvm->arch.pmu_filter) { - mutex_unlock(&vcpu->kvm->lock); + if (!kvm->arch.pmu_filter) { + kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); + if (!kvm->arch.pmu_filter) { + mutex_unlock(&kvm->lock); return -ENOMEM; } @@ -994,17 +1001,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) * events, the default is to allow. */ if (filter.action == KVM_PMU_EVENT_ALLOW) - bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events); + bitmap_zero(kvm->arch.pmu_filter, nr_events); else - bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events); + bitmap_fill(kvm->arch.pmu_filter, nr_events); } if (filter.action == KVM_PMU_EVENT_ALLOW) - bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); + bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents); else - bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); + bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&kvm->lock); return 0; } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index ecc40c8cd6f64..6c70c6f61c703 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -181,27 +181,51 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) return 0; } -static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu) +/** + * kvm_set_vm_width() - set the register width for the guest + * @vcpu: Pointer to the vcpu being configured + * + * Set both KVM_ARCH_FLAG_EL1_32BIT and KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED + * in the VM flags based on the vcpu's requested register width, the HW + * capabilities and other options (such as MTE). + * When REG_WIDTH_CONFIGURED is already set, the vcpu settings must be + * consistent with the value of the FLAG_EL1_32BIT bit in the flags. + * + * Return: 0 on success, negative error code on failure. + */ +static int kvm_set_vm_width(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *tmp; + struct kvm *kvm = vcpu->kvm; bool is32bit; - unsigned long i; is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT); + + lockdep_assert_held(&kvm->lock); + + if (test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags)) { + /* + * The guest's register width is already configured. + * Make sure that the vcpu is consistent with it. + */ + if (is32bit == test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags)) + return 0; + + return -EINVAL; + } + if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit) - return false; + return -EINVAL; /* MTE is incompatible with AArch32 */ - if (kvm_has_mte(vcpu->kvm) && is32bit) - return false; + if (kvm_has_mte(kvm) && is32bit) + return -EINVAL; - /* Check that the vcpus are either all 32bit or all 64bit */ - kvm_for_each_vcpu(i, tmp, vcpu->kvm) { - if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit) - return false; - } + if (is32bit) + set_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags); - return true; + set_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags); + + return 0; } /** @@ -230,10 +254,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) u32 pstate; mutex_lock(&vcpu->kvm->lock); - reset_state = vcpu->arch.reset_state; - WRITE_ONCE(vcpu->arch.reset_state.reset, false); + ret = kvm_set_vm_width(vcpu); + if (!ret) { + reset_state = vcpu->arch.reset_state; + WRITE_ONCE(vcpu->arch.reset_state.reset, false); + } mutex_unlock(&vcpu->kvm->lock); + if (ret) + return ret; + /* Reset PMU outside of the non-preemptible section */ kvm_pmu_vcpu_reset(vcpu); @@ -260,14 +290,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) } } - if (!vcpu_allowed_register_width(vcpu)) { - ret = -EINVAL; - goto out; - } - switch (vcpu->arch.target) { default: - if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { + if (vcpu_el1_is_32bit(vcpu)) { pstate = VCPU_RESET_PSTATE_SVC; } else { pstate = VCPU_RESET_PSTATE_EL1; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index db63cc885771a..9e26ec80d3175 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr); * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4). * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory, * otherwise it is empty. + * + * Memory reservation for crash kernel either done early or deferred + * depending on DMA memory zones configs (ZONE_DMA) -- + * + * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized + * here instead of max_zone_phys(). This lets early reservation of + * crash kernel memory which has a dependency on arm64_dma_phys_limit. + * Reserving memory early for crash kernel allows linear creation of block + * mappings (greater than page-granularity) for all the memory bank rangs. + * In this scheme a comparatively quicker boot is observed. + * + * If ZONE_DMA configs are defined, crash kernel memory reservation + * is delayed until DMA zone memory range size initilazation performed in + * zone_sizes_init(). The defer is necessary to steer clear of DMA zone + * memory range to avoid overlap allocation. So crash kernel memory boundaries + * are not known when mapping all bank memory ranges, which otherwise means + * not possible to exclude crash kernel range from creating block mappings + * so page-granularity mappings are created for the entire memory range. + * Hence a slightly slower boot is observed. + * + * Note: Page-granularity mapppings are necessary for crash kernel memory + * range for shrinking its size via /sys/kernel/kexec_crash_size interface. */ -phys_addr_t arm64_dma_phys_limit __ro_after_init; +#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32) +phys_addr_t __ro_after_init arm64_dma_phys_limit; +#else +phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1; +#endif #ifdef CONFIG_KEXEC_CORE /* @@ -153,8 +179,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (!arm64_dma_phys_limit) arm64_dma_phys_limit = dma32_phys_limit; #endif - if (!arm64_dma_phys_limit) - arm64_dma_phys_limit = PHYS_MASK + 1; max_zone_pfns[ZONE_NORMAL] = max; free_area_init(max_zone_pfns); @@ -315,6 +339,9 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); + if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) + reserve_crashkernel(); + high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } @@ -361,7 +388,8 @@ void __init bootmem_init(void) * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. */ - reserve_crashkernel(); + if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)) + reserve_crashkernel(); memblock_dump_all(); } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 49abbf43bf355..37b8230cda6a8 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -63,6 +63,7 @@ static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; static DEFINE_SPINLOCK(swapper_pgdir_lock); +static DEFINE_MUTEX(fixmap_lock); void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) { @@ -329,6 +330,12 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, } BUG_ON(p4d_bad(p4d)); + /* + * No need for locking during early boot. And it doesn't work as + * expected with KASLR enabled. + */ + if (system_state != SYSTEM_BOOTING) + mutex_lock(&fixmap_lock); pudp = pud_set_fixmap_offset(p4dp, addr); do { pud_t old_pud = READ_ONCE(*pudp); @@ -359,6 +366,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, } while (pudp++, addr = next, addr != end); pud_clear_fixmap(); + if (system_state != SYSTEM_BOOTING) + mutex_unlock(&fixmap_lock); } static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, @@ -517,7 +526,7 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); - if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE)) + if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE)) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* @@ -528,6 +537,17 @@ static void __init map_mem(pgd_t *pgdp) */ memblock_mark_nomap(kernel_start, kernel_end - kernel_start); +#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map) { + if (IS_ENABLED(CONFIG_ZONE_DMA) || + IS_ENABLED(CONFIG_ZONE_DMA32)) + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + else if (crashk_res.end) + memblock_mark_nomap(crashk_res.start, + resource_size(&crashk_res)); + } +#endif + /* map all the memory banks */ for_each_mem_range(i, &start, &end) { if (start >= end) @@ -554,6 +574,25 @@ static void __init map_mem(pgd_t *pgdp) __map_memblock(pgdp, kernel_start, kernel_end, PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); + + /* + * Use page-level mappings here so that we can shrink the region + * in page granularity and put back unused memory to buddy system + * through /sys/kernel/kexec_crash_size interface. + */ +#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map && + !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) { + if (crashk_res.end) { + __map_memblock(pgdp, crashk_res.start, + crashk_res.end + 1, + PAGE_KERNEL, + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(crashk_res.start, + resource_size(&crashk_res)); + } + } +#endif } void mark_rodata_ro(void) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index e96d4d87291f3..cbc41e261f1e7 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1049,15 +1049,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_off; } - /* 1. Initial fake pass to compute ctx->idx. */ - - /* Fake pass to fill in ctx->offset. */ - if (build_body(&ctx, extra_pass)) { + /* + * 1. Initial fake pass to compute ctx->idx and ctx->offset. + * + * BPF line info needs ctx->offset[i] to be the offset of + * instruction[i] in jited image, so build prologue first. + */ + if (build_prologue(&ctx, was_classic)) { prog = orig_prog; goto out_off; } - if (build_prologue(&ctx, was_classic)) { + if (build_body(&ctx, extra_pass)) { prog = orig_prog; goto out_off; } @@ -1130,6 +1133,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog->jited_len = prog_size; if (!prog->is_func || extra_pass) { + int i; + + /* offset[prog->len] is the size of program */ + for (i = 0; i <= prog->len; i++) + ctx.offset[i] *= AARCH64_INSN_SIZE; bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); out_off: kfree(ctx.offset); diff --git a/arch/csky/include/asm/uaccess.h b/arch/csky/include/asm/uaccess.h index c40f06ee8d3ef..ac5a54f57d407 100644 --- a/arch/csky/include/asm/uaccess.h +++ b/arch/csky/include/asm/uaccess.h @@ -3,14 +3,13 @@ #ifndef __ASM_CSKY_UACCESS_H #define __ASM_CSKY_UACCESS_H -#define user_addr_max() \ - (uaccess_kernel() ? KERNEL_DS.seg : get_fs().seg) +#define user_addr_max() (current_thread_info()->addr_limit.seg) static inline int __access_ok(unsigned long addr, unsigned long size) { - unsigned long limit = current_thread_info()->addr_limit.seg; + unsigned long limit = user_addr_max(); - return ((addr < limit) && ((addr + size) < limit)); + return (size <= limit) && (addr <= (limit - size)); } #define __access_ok __access_ok diff --git a/arch/csky/kernel/perf_callchain.c b/arch/csky/kernel/perf_callchain.c index 92057de08f4f0..1612f43540877 100644 --- a/arch/csky/kernel/perf_callchain.c +++ b/arch/csky/kernel/perf_callchain.c @@ -49,7 +49,7 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, { struct stackframe buftail; unsigned long lr = 0; - unsigned long *user_frame_tail = (unsigned long *)fp; + unsigned long __user *user_frame_tail = (unsigned long __user *)fp; /* Check accessibility of one struct frame_tail beyond */ if (!access_ok(user_frame_tail, sizeof(buftail))) diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index c7b763d2f526e..8867ddf3e6c77 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -136,7 +136,7 @@ static inline void __user *get_sigframe(struct ksignal *ksig, static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - struct rt_sigframe *frame; + struct rt_sigframe __user *frame; int err = 0; frame = get_sigframe(ksig, regs, sizeof(*frame)); diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h index ef5bfef8d490c..719ba3f3c45cd 100644 --- a/arch/hexagon/include/asm/uaccess.h +++ b/arch/hexagon/include/asm/uaccess.h @@ -25,17 +25,17 @@ * Returns true (nonzero) if the memory block *may* be valid, false (zero) * if it is definitely invalid. * - * User address space in Hexagon, like x86, goes to 0xbfffffff, so the - * simple MSB-based tests used by MIPS won't work. Some further - * optimization is probably possible here, but for now, keep it - * reasonably simple and not *too* slow. After all, we've got the - * MMU for backup. */ +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) +#define user_addr_max() (uaccess_kernel() ? ~0UL : TASK_SIZE) -#define __access_ok(addr, size) \ - ((get_fs().seg == KERNEL_DS.seg) || \ - (((unsigned long)addr < get_fs().seg) && \ - (unsigned long)size < (get_fs().seg - (unsigned long)addr))) +static inline int __access_ok(unsigned long addr, unsigned long size) +{ + unsigned long limit = TASK_SIZE; + + return (size <= limit) && (addr <= (limit - size)); +} +#define __access_ok __access_ok /* * When a kernel-mode page fault is taken, the faulting instruction diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 78b1d03e86e1d..79ad5a5682b30 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -371,3 +371,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c index 0386252e9d043..4218750414bbf 100644 --- a/arch/m68k/coldfire/device.c +++ b/arch/m68k/coldfire/device.c @@ -480,7 +480,7 @@ static struct platform_device mcf_i2c5 = { #endif /* MCFI2C_BASE5 */ #endif /* IS_ENABLED(CONFIG_I2C_IMX) */ -#if IS_ENABLED(CONFIG_MCF_EDMA) +#ifdef MCFEDMA_BASE static const struct dma_slave_map mcf_edma_map[] = { { "dreq0", "rx-tx", MCF_EDMA_FILTER_PARAM(0) }, @@ -552,7 +552,7 @@ static struct platform_device mcf_edma = { .platform_data = &mcf_edma_data, } }; -#endif /* IS_ENABLED(CONFIG_MCF_EDMA) */ +#endif /* MCFEDMA_BASE */ #ifdef MCFSDHC_BASE static struct mcf_esdhc_platform_data mcf_esdhc_data = { @@ -651,7 +651,7 @@ static struct platform_device *mcf_devices[] __initdata = { &mcf_i2c5, #endif #endif -#if IS_ENABLED(CONFIG_MCF_EDMA) +#ifdef MCFEDMA_BASE &mcf_edma, #endif #ifdef MCFSDHC_BASE diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h index ba670523885c8..60b786eb2254e 100644 --- a/arch/m68k/include/asm/uaccess.h +++ b/arch/m68k/include/asm/uaccess.h @@ -12,14 +12,17 @@ #include /* We let the MMU do all checking */ -static inline int access_ok(const void __user *addr, +static inline int access_ok(const void __user *ptr, unsigned long size) { - /* - * XXX: for !CONFIG_CPU_HAS_ADDRESS_SPACES this really needs to check - * for TASK_SIZE! - */ - return 1; + unsigned long limit = TASK_SIZE; + unsigned long addr = (unsigned long)ptr; + + if (IS_ENABLED(CONFIG_CPU_HAS_ADDRESS_SPACES) || + !IS_ENABLED(CONFIG_MMU)) + return 1; + + return (size <= limit) && (addr <= (limit - size)); } /* diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index b1f3940bc2981..5ccf925567da0 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -450,3 +450,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index d2a8ef9f89787..3fe96979d2c62 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -39,24 +39,13 @@ # define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) -static inline int access_ok(const void __user *addr, unsigned long size) +static inline int __access_ok(unsigned long addr, unsigned long size) { - if (!size) - goto ok; + unsigned long limit = user_addr_max(); - if ((get_fs().seg < ((unsigned long)addr)) || - (get_fs().seg < ((unsigned long)addr + size - 1))) { - pr_devel("ACCESS fail at 0x%08x (size 0x%x), seg 0x%08x\n", - (__force u32)addr, (u32)size, - (u32)get_fs().seg); - return 0; - } -ok: - pr_devel("ACCESS OK at 0x%08x (size 0x%x), seg 0x%08x\n", - (__force u32)addr, (u32)size, - (u32)get_fs().seg); - return 1; + return (size <= limit) && (addr <= (limit - size)); } +#define access_ok(addr, size) __access_ok((unsigned long)addr, size) # define __FIXUP_SECTION ".section .fixup,\"ax\"\n" # define __EX_TABLE_SECTION ".section __ex_table,\"a\"\n" @@ -141,27 +130,27 @@ extern long __user_bad(void); #define __get_user(x, ptr) \ ({ \ - unsigned long __gu_val = 0; \ long __gu_err; \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_asm("lbu", (ptr), __gu_val, __gu_err); \ + __get_user_asm("lbu", (ptr), x, __gu_err); \ break; \ case 2: \ - __get_user_asm("lhu", (ptr), __gu_val, __gu_err); \ + __get_user_asm("lhu", (ptr), x, __gu_err); \ break; \ case 4: \ - __get_user_asm("lw", (ptr), __gu_val, __gu_err); \ + __get_user_asm("lw", (ptr), x, __gu_err); \ break; \ - case 8: \ - __gu_err = __copy_from_user(&__gu_val, ptr, 8); \ - if (__gu_err) \ - __gu_err = -EFAULT; \ + case 8: { \ + __u64 __x = 0; \ + __gu_err = raw_copy_from_user(&__x, ptr, 8) ? \ + -EFAULT : 0; \ + (x) = (typeof(x))(typeof((x) - (x)))__x; \ break; \ + } \ default: \ /* __gu_val = 0; __gu_err = -EINVAL;*/ __gu_err = __user_bad();\ } \ - x = (__force __typeof__(*(ptr))) __gu_val; \ __gu_err; \ }) diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 820145e473501..6b76208597f3c 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -456,3 +456,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 058446f01487c..651d4fe355da6 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -101,6 +101,7 @@ config MIPS select TRACE_IRQFLAGS_SUPPORT select VIRT_TO_BUS select ARCH_HAS_ELFCORE_COMPAT + select HAVE_ARCH_KCSAN if 64BIT config MIPS_FIXUP_BIGPHYS_ADDR bool diff --git a/arch/mips/Makefile b/arch/mips/Makefile index e036fc025cccb..4478c5661d61d 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -340,14 +340,12 @@ drivers-$(CONFIG_PM) += arch/mips/power/ boot-y := vmlinux.bin boot-y += vmlinux.ecoff boot-y += vmlinux.srec -ifeq ($(shell expr $(load-y) \< 0xffffffff80000000 2> /dev/null), 0) boot-y += uImage boot-y += uImage.bin boot-y += uImage.bz2 boot-y += uImage.gz boot-y += uImage.lzma boot-y += uImage.lzo -endif boot-y += vmlinux.itb boot-y += vmlinux.gz.itb boot-y += vmlinux.bz2.itb @@ -359,9 +357,7 @@ bootz-y := vmlinuz bootz-y += vmlinuz.bin bootz-y += vmlinuz.ecoff bootz-y += vmlinuz.srec -ifeq ($(shell expr $(zload-y) \< 0xffffffff80000000 2> /dev/null), 0) bootz-y += uzImage.bin -endif bootz-y += vmlinuz.itb # diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 5a15d51e88841..6cc28173bee89 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -38,6 +38,7 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n +KCSAN_SANITIZE := n # decompressor objects (linked with vmlinuz) vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o $(obj)/bswapsi.o diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi index 3f9ea47a10cd2..b998301f179ce 100644 --- a/arch/mips/boot/dts/ingenic/jz4780.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi @@ -510,7 +510,7 @@ #address-cells = <1>; #size-cells = <1>; - eth0_addr: eth-mac-addr@0x22 { + eth0_addr: eth-mac-addr@22 { reg = <0x22 0x6>; }; }; diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c index 0a03529cf3178..3e4f5ba104f89 100644 --- a/arch/mips/crypto/crc32-mips.c +++ b/arch/mips/crypto/crc32-mips.c @@ -28,7 +28,7 @@ enum crc_type { }; #ifndef TOOLCHAIN_SUPPORTS_CRC -#define _ASM_MACRO_CRC32(OP, SZ, TYPE) \ +#define _ASM_SET_CRC(OP, SZ, TYPE) \ _ASM_MACRO_3R(OP, rt, rs, rt2, \ ".ifnc \\rt, \\rt2\n\t" \ ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \ @@ -37,30 +37,36 @@ _ASM_MACRO_3R(OP, rt, rs, rt2, \ ((SZ) << 6) | ((TYPE) << 8)) \ _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \ ((SZ) << 14) | ((TYPE) << 3))) -_ASM_MACRO_CRC32(crc32b, 0, 0); -_ASM_MACRO_CRC32(crc32h, 1, 0); -_ASM_MACRO_CRC32(crc32w, 2, 0); -_ASM_MACRO_CRC32(crc32d, 3, 0); -_ASM_MACRO_CRC32(crc32cb, 0, 1); -_ASM_MACRO_CRC32(crc32ch, 1, 1); -_ASM_MACRO_CRC32(crc32cw, 2, 1); -_ASM_MACRO_CRC32(crc32cd, 3, 1); -#define _ASM_SET_CRC "" +#define _ASM_UNSET_CRC(op, SZ, TYPE) ".purgem " #op "\n\t" #else /* !TOOLCHAIN_SUPPORTS_CRC */ -#define _ASM_SET_CRC ".set\tcrc\n\t" +#define _ASM_SET_CRC(op, SZ, TYPE) ".set\tcrc\n\t" +#define _ASM_UNSET_CRC(op, SZ, TYPE) #endif -#define _CRC32(crc, value, size, type) \ -do { \ - __asm__ __volatile__( \ - ".set push\n\t" \ - _ASM_SET_CRC \ - #type #size " %0, %1, %0\n\t" \ - ".set pop" \ - : "+r" (crc) \ - : "r" (value)); \ +#define __CRC32(crc, value, op, SZ, TYPE) \ +do { \ + __asm__ __volatile__( \ + ".set push\n\t" \ + _ASM_SET_CRC(op, SZ, TYPE) \ + #op " %0, %1, %0\n\t" \ + _ASM_UNSET_CRC(op, SZ, TYPE) \ + ".set pop" \ + : "+r" (crc) \ + : "r" (value)); \ } while (0) +#define _CRC32_crc32b(crc, value) __CRC32(crc, value, crc32b, 0, 0) +#define _CRC32_crc32h(crc, value) __CRC32(crc, value, crc32h, 1, 0) +#define _CRC32_crc32w(crc, value) __CRC32(crc, value, crc32w, 2, 0) +#define _CRC32_crc32d(crc, value) __CRC32(crc, value, crc32d, 3, 0) +#define _CRC32_crc32cb(crc, value) __CRC32(crc, value, crc32cb, 0, 1) +#define _CRC32_crc32ch(crc, value) __CRC32(crc, value, crc32ch, 1, 1) +#define _CRC32_crc32cw(crc, value) __CRC32(crc, value, crc32cw, 2, 1) +#define _CRC32_crc32cd(crc, value) __CRC32(crc, value, crc32cd, 3, 1) + +#define _CRC32(crc, value, size, op) \ + _CRC32_##op##size(crc, value) + #define CRC32(crc, value, size) \ _CRC32(crc, value, size, crc32) diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index ea5b5a83f1e11..011d1d678840a 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -131,7 +131,7 @@ */ mfc0 t0,CP0_CAUSE # get pending interrupts mfc0 t1,CP0_STATUS -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) lw t2,cpu_fpu_mask #endif andi t0,ST0_IM # CAUSE.CE may be non-zero! @@ -139,7 +139,7 @@ beqz t0,spurious -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) and t2,t0 bnez t2,fpu # handle FPU immediately #endif @@ -280,7 +280,7 @@ handle_it: j dec_irq_dispatch nop -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) fpu: lw t0,fpu_kstat_irq nop diff --git a/arch/mips/dec/prom/Makefile b/arch/mips/dec/prom/Makefile index d95016016b42b..2bad87551203b 100644 --- a/arch/mips/dec/prom/Makefile +++ b/arch/mips/dec/prom/Makefile @@ -6,4 +6,4 @@ lib-y += init.o memory.o cmdline.o identify.o console.o -lib-$(CONFIG_32BIT) += locore.o +lib-$(CONFIG_CPU_R3000) += locore.o diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c index a8a30bb1dee8c..82b00e45ce50a 100644 --- a/arch/mips/dec/setup.c +++ b/arch/mips/dec/setup.c @@ -746,7 +746,8 @@ void __init arch_init_irq(void) dec_interrupt[DEC_IRQ_HALT] = -1; /* Register board interrupts: FPU and cascade. */ - if (dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) { + if (IS_ENABLED(CONFIG_MIPS_FP_SUPPORT) && + dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) { struct irq_desc *desc_fpu; int irq_fpu; diff --git a/arch/mips/include/asm/dec/prom.h b/arch/mips/include/asm/dec/prom.h index 62c7dfb90e06c..1e1247add1cf8 100644 --- a/arch/mips/include/asm/dec/prom.h +++ b/arch/mips/include/asm/dec/prom.h @@ -43,16 +43,11 @@ */ #define REX_PROM_MAGIC 0x30464354 -#ifdef CONFIG_64BIT - -#define prom_is_rex(magic) 1 /* KN04 and KN05 are REX PROMs. */ - -#else /* !CONFIG_64BIT */ - -#define prom_is_rex(magic) ((magic) == REX_PROM_MAGIC) - -#endif /* !CONFIG_64BIT */ - +/* KN04 and KN05 are REX PROMs, so only do the check for R3k systems. */ +static inline bool prom_is_rex(u32 magic) +{ + return !IS_ENABLED(CONFIG_CPU_R3000) || magic == REX_PROM_MAGIC; +} /* * 3MIN/MAXINE PROM entry points for DS5000/1xx's, DS5000/xx's and diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index c7925d0e98746..867e9c3db76e9 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -15,6 +15,7 @@ #define __HAVE_ARCH_PMD_ALLOC_ONE #define __HAVE_ARCH_PUD_ALLOC_ONE +#define __HAVE_ARCH_PGD_FREE #include static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, @@ -48,6 +49,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) extern void pgd_init(unsigned long page); extern pgd_t *pgd_alloc(struct mm_struct *mm); +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + free_pages((unsigned long)pgd, PGD_ORDER); +} + #define __pte_free_tlb(tlb,pte,address) \ do { \ pgtable_pte_page_dtor(pte); \ diff --git a/arch/mips/include/asm/setup.h b/arch/mips/include/asm/setup.h index bb36a400203df..8c56b862fd9c2 100644 --- a/arch/mips/include/asm/setup.h +++ b/arch/mips/include/asm/setup.h @@ -16,7 +16,7 @@ static inline void setup_8250_early_printk_port(unsigned long base, unsigned int reg_shift, unsigned int timeout) {} #endif -extern void set_handler(unsigned long offset, void *addr, unsigned long len); +void set_handler(unsigned long offset, const void *addr, unsigned long len); extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len); typedef void (*vi_handler_t)(void); diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 253ff994ed2ec..e4aeedb17c383 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -389,3 +389,4 @@ 448 n32 process_mrelease sys_process_mrelease 449 n32 futex_waitv sys_futex_waitv 450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node +451 n32 pmadv_ksm sys_pmadv_ksm diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 3f1886ad9d806..fe88db51efa00 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -365,3 +365,4 @@ 448 n64 process_mrelease sys_process_mrelease 449 n64 futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 n64 pmadv_ksm sys_pmadv_ksm diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 8f243e35a7b20..674cb940bd153 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -438,3 +438,4 @@ 448 o32 process_mrelease sys_process_mrelease 449 o32 futex_waitv sys_futex_waitv 450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node +451 o32 pmadv_ksm sys_pmadv_ksm diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index a486486b2355c..246c6a6b02614 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2091,19 +2091,19 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs) * If no shadow set is selected then use the default handler * that does normal register saving and standard interrupt exit */ - extern char except_vec_vi, except_vec_vi_lui; - extern char except_vec_vi_ori, except_vec_vi_end; - extern char rollback_except_vec_vi; - char *vec_start = using_rollback_handler() ? - &rollback_except_vec_vi : &except_vec_vi; + extern const u8 except_vec_vi[], except_vec_vi_lui[]; + extern const u8 except_vec_vi_ori[], except_vec_vi_end[]; + extern const u8 rollback_except_vec_vi[]; + const u8 *vec_start = using_rollback_handler() ? + rollback_except_vec_vi : except_vec_vi; #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN) - const int lui_offset = &except_vec_vi_lui - vec_start + 2; - const int ori_offset = &except_vec_vi_ori - vec_start + 2; + const int lui_offset = except_vec_vi_lui - vec_start + 2; + const int ori_offset = except_vec_vi_ori - vec_start + 2; #else - const int lui_offset = &except_vec_vi_lui - vec_start; - const int ori_offset = &except_vec_vi_ori - vec_start; + const int lui_offset = except_vec_vi_lui - vec_start; + const int ori_offset = except_vec_vi_ori - vec_start; #endif - const int handler_len = &except_vec_vi_end - vec_start; + const int handler_len = except_vec_vi_end - vec_start; if (handler_len > VECTORSPACING) { /* @@ -2311,7 +2311,7 @@ void per_cpu_trap_init(bool is_boot_cpu) } /* Install CPU exception handler */ -void set_handler(unsigned long offset, void *addr, unsigned long size) +void set_handler(unsigned long offset, const void *addr, unsigned long size) { #ifdef CONFIG_CPU_MICROMIPS memcpy((void *)(ebase + offset), ((unsigned char *)addr - 1), size); diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index b131e6a773832..5cda07688f67a 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -2160,16 +2160,14 @@ static void build_r4000_tlb_load_handler(void) uasm_i_tlbr(&p); switch (current_cpu_type()) { - default: - if (cpu_has_mips_r2_exec_hazard) { - uasm_i_ehb(&p); - fallthrough; - case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: case CPU_CAVIUM_OCTEON2: - break; - } + break; + default: + if (cpu_has_mips_r2_exec_hazard) + uasm_i_ehb(&p); + break; } /* Examine entrylo 0 or 1 based on ptr. */ @@ -2236,15 +2234,14 @@ static void build_r4000_tlb_load_handler(void) uasm_i_tlbr(&p); switch (current_cpu_type()) { - default: - if (cpu_has_mips_r2_exec_hazard) { - uasm_i_ehb(&p); - case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: case CPU_CAVIUM_OCTEON2: - break; - } + break; + default: + if (cpu_has_mips_r2_exec_hazard) + uasm_i_ehb(&p); + break; } /* Examine entrylo 0 or 1 based on ptr. */ diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c index 115a69fc20caa..f395ae218470f 100644 --- a/arch/mips/ralink/ill_acc.c +++ b/arch/mips/ralink/ill_acc.c @@ -61,6 +61,7 @@ static int __init ill_acc_of_setup(void) pdev = of_find_device_by_node(np); if (!pdev) { pr_err("%pOFn: failed to lookup pdev\n", np); + of_node_put(np); return -EINVAL; } diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index 04684990e28ef..b7f6f782d9a13 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -301,11 +301,9 @@ static int __init plat_setup_devices(void) static int __init setup_kmac(char *s) { printk(KERN_INFO "korina mac = %s\n", s); - if (!mac_pton(s, korina_dev0_data.mac)) { + if (!mac_pton(s, korina_dev0_data.mac)) printk(KERN_ERR "Invalid mac\n"); - return -EINVAL; - } - return 0; + return 1; } __setup("kmac=", setup_kmac); diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index d65f55f67e19b..f72658b3a53f7 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # Objects to go into the VDSO. +# Sanitizer runtimes are unavailable and cannot be linked here. + KCSAN_SANITIZE := n + # Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before # the inclusion of generic Makefile. ARCH_REL_TYPE_ABS := R_MIPS_JUMP_SLOT|R_MIPS_GLOB_DAT diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h index d4cbf069dc224..37a40981deb3b 100644 --- a/arch/nds32/include/asm/uaccess.h +++ b/arch/nds32/include/asm/uaccess.h @@ -70,9 +70,7 @@ static inline void set_fs(mm_segment_t fs) * versions are void (ie, don't return a value as such). */ -#define get_user __get_user \ - -#define __get_user(x, ptr) \ +#define get_user(x, ptr) \ ({ \ long __gu_err = 0; \ __get_user_check((x), (ptr), __gu_err); \ @@ -85,6 +83,14 @@ static inline void set_fs(mm_segment_t fs) (void)0; \ }) +#define __get_user(x, ptr) \ +({ \ + long __gu_err = 0; \ + const __typeof__(*(ptr)) __user *__p = (ptr); \ + __get_user_err((x), __p, (__gu_err)); \ + __gu_err; \ +}) + #define __get_user_check(x, ptr, err) \ ({ \ const __typeof__(*(ptr)) __user *__p = (ptr); \ @@ -165,12 +171,18 @@ do { \ : "r"(addr), "i"(-EFAULT) \ : "cc") -#define put_user __put_user \ +#define put_user(x, ptr) \ +({ \ + long __pu_err = 0; \ + __put_user_check((x), (ptr), __pu_err); \ + __pu_err; \ +}) #define __put_user(x, ptr) \ ({ \ long __pu_err = 0; \ - __put_user_err((x), (ptr), __pu_err); \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + __put_user_err((x), __p, __pu_err); \ __pu_err; \ }) diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index ba9340e96fd4c..ca9285a915efa 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -88,6 +88,7 @@ extern __must_check long strnlen_user(const char __user *s, long n); /* Optimized macros */ #define __get_user_asm(val, insn, addr, err) \ { \ + unsigned long __gu_val; \ __asm__ __volatile__( \ " movi %0, %3\n" \ "1: " insn " %1, 0(%2)\n" \ @@ -96,14 +97,20 @@ extern __must_check long strnlen_user(const char __user *s, long n); " .section __ex_table,\"a\"\n" \ " .word 1b, 2b\n" \ " .previous" \ - : "=&r" (err), "=r" (val) \ + : "=&r" (err), "=r" (__gu_val) \ : "r" (addr), "i" (-EFAULT)); \ + val = (__force __typeof__(*(addr)))__gu_val; \ } -#define __get_user_unknown(val, size, ptr, err) do { \ +extern void __get_user_unknown(void); + +#define __get_user_8(val, ptr, err) do { \ + u64 __val = 0; \ err = 0; \ - if (__copy_from_user(&(val), ptr, size)) { \ + if (raw_copy_from_user(&(__val), ptr, sizeof(val))) { \ err = -EFAULT; \ + } else { \ + val = (typeof(val))(typeof((val) - (val)))__val; \ } \ } while (0) @@ -119,8 +126,11 @@ do { \ case 4: \ __get_user_asm(val, "ldw", ptr, err); \ break; \ + case 8: \ + __get_user_8(val, ptr, err); \ + break; \ default: \ - __get_user_unknown(val, size, ptr, err); \ + __get_user_unknown(); \ break; \ } \ } while (0) @@ -129,9 +139,7 @@ do { \ ({ \ long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ - unsigned long __gu_val = 0; \ - __get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\ - (x) = (__force __typeof__(x))__gu_val; \ + __get_user_common(x, sizeof(*(ptr)), __gu_ptr, __gu_err); \ __gu_err; \ }) @@ -139,11 +147,9 @@ do { \ ({ \ long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ - unsigned long __gu_val = 0; \ if (access_ok( __gu_ptr, sizeof(*__gu_ptr))) \ - __get_user_common(__gu_val, sizeof(*__gu_ptr), \ + __get_user_common(x, sizeof(*__gu_ptr), \ __gu_ptr, __gu_err); \ - (x) = (__force __typeof__(x))__gu_val; \ __gu_err; \ }) diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c index 2009ae2d3c3bb..386e46443b605 100644 --- a/arch/nios2/kernel/signal.c +++ b/arch/nios2/kernel/signal.c @@ -36,10 +36,10 @@ struct rt_sigframe { static inline int rt_restore_ucontext(struct pt_regs *regs, struct switch_stack *sw, - struct ucontext *uc, int *pr2) + struct ucontext __user *uc, int *pr2) { int temp; - unsigned long *gregs = uc->uc_mcontext.gregs; + unsigned long __user *gregs = uc->uc_mcontext.gregs; int err; /* Always make any pending restarted system calls return -EINTR */ @@ -102,10 +102,11 @@ asmlinkage int do_rt_sigreturn(struct switch_stack *sw) { struct pt_regs *regs = (struct pt_regs *)(sw + 1); /* Verify, can we follow the stack back */ - struct rt_sigframe *frame = (struct rt_sigframe *) regs->sp; + struct rt_sigframe __user *frame; sigset_t set; int rval; + frame = (struct rt_sigframe __user *) regs->sp; if (!access_ok(frame, sizeof(*frame))) goto badframe; @@ -124,10 +125,10 @@ asmlinkage int do_rt_sigreturn(struct switch_stack *sw) return 0; } -static inline int rt_setup_ucontext(struct ucontext *uc, struct pt_regs *regs) +static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *regs) { struct switch_stack *sw = (struct switch_stack *)regs - 1; - unsigned long *gregs = uc->uc_mcontext.gregs; + unsigned long __user *gregs = uc->uc_mcontext.gregs; int err = 0; err |= __put_user(MCONTEXT_VERSION, &uc->uc_mcontext.version); @@ -162,8 +163,9 @@ static inline int rt_setup_ucontext(struct ucontext *uc, struct pt_regs *regs) return err; } -static inline void *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, - size_t frame_size) +static inline void __user *get_sigframe(struct ksignal *ksig, + struct pt_regs *regs, + size_t frame_size) { unsigned long usp; @@ -174,13 +176,13 @@ static inline void *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, usp = sigsp(usp, ksig); /* Verify, is it 32 or 64 bit aligned */ - return (void *)((usp - frame_size) & -8UL); + return (void __user *)((usp - frame_size) & -8UL); } static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - struct rt_sigframe *frame; + struct rt_sigframe __user *frame; int err = 0; frame = get_sigframe(ksig, regs, sizeof(*frame)); diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h index 34619f010c631..0ccdb738a9a36 100644 --- a/arch/parisc/include/asm/traps.h +++ b/arch/parisc/include/asm/traps.h @@ -18,6 +18,7 @@ unsigned long parisc_acctyp(unsigned long code, unsigned int inst); const char *trap_name(unsigned long code); void do_page_fault(struct pt_regs *regs, unsigned long code, unsigned long address); +int handle_nadtlb_fault(struct pt_regs *regs); #endif #endif diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 94150b91c96fb..bce71cefe5724 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -558,15 +558,6 @@ static void flush_cache_pages(struct vm_area_struct *vma, struct mm_struct *mm, } } -static void flush_user_cache_tlb(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - flush_user_dcache_range_asm(start, end); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm(start, end); - flush_tlb_range(vma, start, end); -} - void flush_cache_mm(struct mm_struct *mm) { struct vm_area_struct *vma; @@ -581,17 +572,8 @@ void flush_cache_mm(struct mm_struct *mm) return; } - preempt_disable(); - if (mm->context == mfsp(3)) { - for (vma = mm->mmap; vma; vma = vma->vm_next) - flush_user_cache_tlb(vma, vma->vm_start, vma->vm_end); - preempt_enable(); - return; - } - for (vma = mm->mmap; vma; vma = vma->vm_next) flush_cache_pages(vma, mm, vma->vm_start, vma->vm_end); - preempt_enable(); } void flush_cache_range(struct vm_area_struct *vma, @@ -605,15 +587,7 @@ void flush_cache_range(struct vm_area_struct *vma, return; } - preempt_disable(); - if (vma->vm_mm->context == mfsp(3)) { - flush_user_cache_tlb(vma, start, end); - preempt_enable(); - return; - } - - flush_cache_pages(vma, vma->vm_mm, vma->vm_start, vma->vm_end); - preempt_enable(); + flush_cache_pages(vma, vma->vm_mm, start, end); } void diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c index 80a0ab372802d..e59574f65e641 100644 --- a/arch/parisc/kernel/patch.c +++ b/arch/parisc/kernel/patch.c @@ -40,10 +40,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags, *need_unmap = 1; set_fixmap(fixmap, page_to_phys(page)); - if (flags) - raw_spin_lock_irqsave(&patch_lock, *flags); - else - __acquire(&patch_lock); + raw_spin_lock_irqsave(&patch_lock, *flags); return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); } @@ -52,10 +49,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { clear_fixmap(fixmap); - if (flags) - raw_spin_unlock_irqrestore(&patch_lock, *flags); - else - __release(&patch_lock); + raw_spin_unlock_irqrestore(&patch_lock, *flags); } void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) @@ -67,8 +61,9 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) int mapped; /* Make sure we don't have any aliases in cache */ - flush_kernel_vmap_range(addr, len); - flush_icache_range(start, end); + flush_kernel_dcache_range_asm(start, end); + flush_kernel_icache_range_asm(start, end); + flush_tlb_kernel_range(start, end); p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped); @@ -81,8 +76,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) * We're crossing a page boundary, so * need to remap */ - flush_kernel_vmap_range((void *)fixmap, - (p-fixmap) * sizeof(*p)); + flush_kernel_dcache_range_asm((unsigned long)fixmap, + (unsigned long)p); + flush_tlb_kernel_range((unsigned long)fixmap, + (unsigned long)p); if (mapped) patch_unmap(FIX_TEXT_POKE0, &flags); p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, @@ -90,10 +87,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) } } - flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p)); + flush_kernel_dcache_range_asm((unsigned long)fixmap, (unsigned long)p); + flush_tlb_kernel_range((unsigned long)fixmap, (unsigned long)p); if (mapped) patch_unmap(FIX_TEXT_POKE0, &flags); - flush_icache_range(start, end); } void __kprobes __patch_text(void *addr, u32 insn) diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 68b46fe2f17c5..e19e8deb4c322 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -448,3 +448,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index b6fdebddc8e99..39576a9245c7f 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -662,6 +662,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs) by hand. Technically we need to emulate: fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw */ + if (code == 17 && handle_nadtlb_fault(regs)) + return; fault_address = regs->ior; fault_space = regs->isr; break; diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index e9eabf8f14d7e..f114e102aaf21 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -425,3 +425,92 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, } pagefault_out_of_memory(); } + +/* Handle non-access data TLB miss faults. + * + * For probe instructions, accesses to userspace are considered allowed + * if they lie in a valid VMA and the access type matches. We are not + * allowed to handle MM faults here so there may be situations where an + * actual access would fail even though a probe was successful. + */ +int +handle_nadtlb_fault(struct pt_regs *regs) +{ + unsigned long insn = regs->iir; + int breg, treg, xreg, val = 0; + struct vm_area_struct *vma, *prev_vma; + struct task_struct *tsk; + struct mm_struct *mm; + unsigned long address; + unsigned long acc_type; + + switch (insn & 0x380) { + case 0x280: + /* FDC instruction */ + fallthrough; + case 0x380: + /* PDC and FIC instructions */ + if (printk_ratelimit()) { + pr_warn("BUG: nullifying cache flush/purge instruction\n"); + show_regs(regs); + } + if (insn & 0x20) { + /* Base modification */ + breg = (insn >> 21) & 0x1f; + xreg = (insn >> 16) & 0x1f; + if (breg && xreg) + regs->gr[breg] += regs->gr[xreg]; + } + regs->gr[0] |= PSW_N; + return 1; + + case 0x180: + /* PROBE instruction */ + treg = insn & 0x1f; + if (regs->isr) { + tsk = current; + mm = tsk->mm; + if (mm) { + /* Search for VMA */ + address = regs->ior; + mmap_read_lock(mm); + vma = find_vma_prev(mm, address, &prev_vma); + mmap_read_unlock(mm); + + /* + * Check if access to the VMA is okay. + * We don't allow for stack expansion. + */ + acc_type = (insn & 0x40) ? VM_WRITE : VM_READ; + if (vma + && address >= vma->vm_start + && (vma->vm_flags & acc_type) == acc_type) + val = 1; + } + } + if (treg) + regs->gr[treg] = val; + regs->gr[0] |= PSW_N; + return 1; + + case 0x300: + /* LPA instruction */ + if (insn & 0x20) { + /* Base modification */ + breg = (insn >> 21) & 0x1f; + xreg = (insn >> 16) & 0x1f; + if (breg && xreg) + regs->gr[breg] += regs->gr[xreg]; + } + treg = insn & 0x1f; + if (treg) + regs->gr[treg] = 0; + regs->gr[0] |= PSW_N; + return 1; + + default: + break; + } + + return 0; +} diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5f16ac1583c5d..887efa31f60ab 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -171,7 +171,7 @@ else CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5)) CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4) endif -else +else ifdef CONFIG_PPC_BOOK3E_64 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts new file mode 100644 index 0000000000000..73f8c998c64df --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * T1040RDB-REV-A Device Tree Source + * + * Copyright 2014 - 2015 Freescale Semiconductor Inc. + * + */ + +#include "t1040rdb.dts" + +/ { + model = "fsl,T1040RDB-REV-A"; + compatible = "fsl,T1040RDB-REV-A"; +}; + +&seville_port0 { + label = "ETH5"; +}; + +&seville_port2 { + label = "ETH7"; +}; + +&seville_port4 { + label = "ETH9"; +}; + +&seville_port6 { + label = "ETH11"; +}; diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts index af0c8a6f56138..b6733e7e65805 100644 --- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts @@ -119,7 +119,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_0>; phy-mode = "qsgmii"; - label = "ETH5"; + label = "ETH3"; status = "okay"; }; @@ -135,7 +135,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_2>; phy-mode = "qsgmii"; - label = "ETH7"; + label = "ETH5"; status = "okay"; }; @@ -151,7 +151,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_4>; phy-mode = "qsgmii"; - label = "ETH9"; + label = "ETH7"; status = "okay"; }; @@ -167,7 +167,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_6>; phy-mode = "qsgmii"; - label = "ETH11"; + label = "ETH9"; status = "okay"; }; diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index 099a598c74c00..bfe1ed5be3374 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -139,12 +139,12 @@ fman@400000 { ethernet@e6000 { phy-handle = <&phy_rgmii_0>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { phy-handle = <&phy_rgmii_1>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; mdio0: mdio@fc000 { diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index fc28f46d2f9dc..5404f7abbcf8d 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -612,7 +612,7 @@ DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault); DECLARE_INTERRUPT_HANDLER(do_bad_segment_interrupt); /* hash_utils.c */ -DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); +DECLARE_INTERRUPT_HANDLER(do_hash_fault); /* fault.c */ DECLARE_INTERRUPT_HANDLER(do_page_fault); diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index beba4979bff93..fee979d3a1aa4 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -359,25 +359,37 @@ static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr) */ static inline void __raw_rm_writeb(u8 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stbcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stbcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writew(u16 val, volatile void __iomem *paddr) { - __asm__ __volatile__("sthcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + sthcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writel(u32 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stwcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stwcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stdcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stdcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } @@ -389,7 +401,10 @@ static inline void __raw_rm_writeq_be(u64 val, volatile void __iomem *paddr) static inline u8 __raw_rm_readb(volatile void __iomem *paddr) { u8 ret; - __asm__ __volatile__("lbzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lbzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -397,7 +412,10 @@ static inline u8 __raw_rm_readb(volatile void __iomem *paddr) static inline u16 __raw_rm_readw(volatile void __iomem *paddr) { u16 ret; - __asm__ __volatile__("lhzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lhzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -405,7 +423,10 @@ static inline u16 __raw_rm_readw(volatile void __iomem *paddr) static inline u32 __raw_rm_readl(volatile void __iomem *paddr) { u32 ret; - __asm__ __volatile__("lwzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lwzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -413,7 +434,10 @@ static inline u32 __raw_rm_readl(volatile void __iomem *paddr) static inline u64 __raw_rm_readq(volatile void __iomem *paddr) { u64 ret; - __asm__ __volatile__("ldcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + ldcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 254687258f42b..f2c5c26869f1a 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -132,7 +132,11 @@ static inline bool pfn_valid(unsigned long pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) +#define virt_addr_valid(vaddr) ({ \ + unsigned long _addr = (unsigned long)vaddr; \ + _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \ + pfn_valid(virt_to_pfn(_addr)); \ +}) /* * On Book-E parts we need __va to parse the device tree and we can't diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h index b040094f79202..7ebc807aa8cc8 100644 --- a/arch/powerpc/include/asm/set_memory.h +++ b/arch/powerpc/include/asm/set_memory.h @@ -6,6 +6,8 @@ #define SET_MEMORY_RW 1 #define SET_MEMORY_NX 2 #define SET_MEMORY_X 3 +#define SET_MEMORY_NP 4 /* Set memory non present */ +#define SET_MEMORY_P 5 /* Set memory present */ int change_memory_attr(unsigned long addr, int numpages, long action); @@ -29,6 +31,14 @@ static inline int set_memory_x(unsigned long addr, int numpages) return change_memory_attr(addr, numpages, SET_MEMORY_X); } -int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot); +static inline int set_memory_np(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_NP); +} + +static inline int set_memory_p(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_P); +} #endif diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h index 0a0bc79bd1fa9..de1018cc522b3 100644 --- a/arch/powerpc/include/asm/static_call.h +++ b/arch/powerpc/include/asm/static_call.h @@ -24,5 +24,6 @@ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) __PPC_SCT(name, "b " #func) #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) __PPC_SCT(name, "blr") +#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) __PPC_SCT(name, "b .+20") #endif /* _ASM_POWERPC_STATIC_CALL_H */ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 63316100080c1..4a35423f766db 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -125,8 +125,11 @@ do { \ */ #define __get_user_atomic_128_aligned(kaddr, uaddr, err) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine altivec\n" \ "1: lvx 0,0,%1 # get user\n" \ " stvx 0,0,%2 # put kernel\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 733e6ef367589..1f42aabbbab3a 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1313,6 +1313,12 @@ int __init early_init_dt_scan_rtas(unsigned long node, entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); sizep = of_get_flat_dt_prop(node, "rtas-size", NULL); +#ifdef CONFIG_PPC64 + /* need this feature to decide the crashkernel offset */ + if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL)) + powerpc_firmware_features |= FW_FEATURE_LPAR; +#endif + if (basep && entryp && sizep) { rtas.base = *basep; rtas.entry = *entryp; diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index a0a78aba2083e..1ee4640a26413 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -26,15 +26,18 @@ static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr, const char *format; node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); - if (!of_device_is_available(node)) - return -ENODEV; + if (!of_device_is_available(node)) { + rc = -ENODEV; + goto out; + } rc = of_property_read_string(node, "format", &format); if (rc) - return rc; + goto out; rc = sprintf(buf, "%s\n", format); +out: of_node_put(node); return rc; diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 2600b4237292c..bb2f71a369415 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -530,3 +530,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index cd0b8b71ecddc..384f58a3f373f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -582,8 +582,9 @@ void timer_rearm_host_dec(u64 now) local_paca->irq_happened |= PACA_IRQ_DEC; } else { now = *next_tb - now; - if (now <= decrementer_max) - set_dec_or_work(now); + if (now > decrementer_max) + now = decrementer_max; + set_dec_or_work(now); } } EXPORT_SYMBOL_GPL(timer_rearm_host_dec); diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940bc..5a0f023a26e90 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1 diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 8b68d9f91a03b..abf5897ae88c8 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -134,11 +134,18 @@ void __init reserve_crashkernel(void) if (!crashk_res.start) { #ifdef CONFIG_PPC64 /* - * On 64bit we split the RMO in half but cap it at half of - * a small SLB (128MB) since the crash kernel needs to place - * itself and some stacks to be in the first segment. + * On the LPAR platform place the crash kernel to mid of + * RMA size (512MB or more) to ensure the crash kernel + * gets enough space to place itself and some stack to be + * in the first segment. At the same time normal kernel + * also get enough space to allocate memory for essential + * system resource in the first segment. Keep the crash + * kernel starts at 128MB offset on other platforms. */ - crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); + if (firmware_has_feature(FW_FEATURE_LPAR)) + crashk_res.start = ppc64_rma_size / 2; + else + crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); #else crashk_res.start = KDUMP_KERNELBASE; #endif diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S index 05e003eb5d906..e42d1c609e476 100644 --- a/arch/powerpc/kvm/book3s_64_entry.S +++ b/arch/powerpc/kvm/book3s_64_entry.S @@ -414,10 +414,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) */ ld r10,HSTATE_SCRATCH0(r13) cmpwi r10,BOOK3S_INTERRUPT_MACHINE_CHECK - beq machine_check_common + beq .Lcall_machine_check_common cmpwi r10,BOOK3S_INTERRUPT_SYSTEM_RESET - beq system_reset_common + beq .Lcall_system_reset_common b . + +.Lcall_machine_check_common: + b machine_check_common + +.Lcall_system_reset_common: + b system_reset_common #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 84c89f08ae9aa..316f61a4cb599 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -225,6 +225,13 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) int cpu; struct rcuwait *waitp; + /* + * rcuwait_wake_up contains smp_mb() which orders prior stores that + * create pending work vs below loads of cpu fields. The other side + * is the barrier in vcpu run that orders setting the cpu fields vs + * testing for pending work. + */ + waitp = kvm_arch_vcpu_get_wait(vcpu); if (rcuwait_wake_up(waitp)) ++vcpu->stat.generic.halt_wakeup; @@ -1089,7 +1096,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) break; } tvcpu->arch.prodded = 1; - smp_mb(); + smp_mb(); /* This orders prodded store vs ceded load */ if (tvcpu->arch.ceded) kvmppc_fast_vcpu_kick_hv(tvcpu); break; @@ -3771,6 +3778,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) pvc = core_info.vc[sub]; pvc->pcpu = pcpu + thr; for_each_runnable_thread(i, vcpu, pvc) { + /* + * XXX: is kvmppc_start_thread called too late here? + * It updates vcpu->cpu and vcpu->arch.thread_cpu + * which are used by kvmppc_fast_vcpu_kick_hv(), but + * kick is called after new exceptions become available + * and exceptions are checked earlier than here, by + * kvmppc_core_prepare_to_enter. + */ kvmppc_start_thread(vcpu, pvc); kvmppc_create_dtl_entry(vcpu, pvc); trace_kvm_guest_enter(vcpu); @@ -4492,6 +4507,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, if (need_resched() || !kvm->arch.mmu_ready) goto out; + vcpu->cpu = pcpu; + vcpu->arch.thread_cpu = pcpu; + vc->pcpu = pcpu; + local_paca->kvm_hstate.kvm_vcpu = vcpu; + local_paca->kvm_hstate.ptid = 0; + local_paca->kvm_hstate.fake_suspend = 0; + + /* + * Orders set cpu/thread_cpu vs testing for pending interrupts and + * doorbells below. The other side is when these fields are set vs + * kvmppc_fast_vcpu_kick_hv reading the cpu/thread_cpu fields to + * kick a vCPU to notice the pending interrupt. + */ + smp_mb(); + if (!nested) { kvmppc_core_prepare_to_enter(vcpu); if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, @@ -4511,13 +4541,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, tb = mftb(); - vcpu->cpu = pcpu; - vcpu->arch.thread_cpu = pcpu; - vc->pcpu = pcpu; - local_paca->kvm_hstate.kvm_vcpu = vcpu; - local_paca->kvm_hstate.ptid = 0; - local_paca->kvm_hstate.fake_suspend = 0; - __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0); trace_kvm_guest_enter(vcpu); @@ -4619,6 +4642,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; out: + vcpu->cpu = -1; + vcpu->arch.thread_cpu = -1; powerpc_local_irq_pmu_restore(flags); preempt_enable(); goto done; @@ -6137,8 +6162,11 @@ static int kvmppc_book3s_init_hv(void) if (r) return r; - if (kvmppc_radix_possible()) + if (kvmppc_radix_possible()) { r = kvmppc_radix_init(); + if (r) + return r; + } r = kvmppc_uvmem_init(); if (r < 0) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2ad0ccd202d5d..f0c4545dc3ab8 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1499,7 +1499,7 @@ int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu, { enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; while (vcpu->arch.mmio_vmx_copy_nums) { @@ -1596,7 +1596,7 @@ int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu, unsigned int index = rs & KVM_MMIO_REG_MASK; enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; vcpu->arch.io_gpr = rs; diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 906d434633667..00c68e7fb11e4 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -43,9 +43,14 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) #ifdef CONFIG_STRICT_KERNEL_RWX static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); +static int map_patch_area(void *addr, unsigned long text_poke_addr); +static void unmap_patch_area(unsigned long addr); + static int text_area_cpu_up(unsigned int cpu) { struct vm_struct *area; + unsigned long addr; + int err; area = get_vm_area(PAGE_SIZE, VM_ALLOC); if (!area) { @@ -53,6 +58,15 @@ static int text_area_cpu_up(unsigned int cpu) cpu); return -1; } + + // Map/unmap the area to ensure all page tables are pre-allocated + addr = (unsigned long)area->addr; + err = map_patch_area(empty_zero_page, addr); + if (err) + return err; + + unmap_patch_area(addr); + this_cpu_write(text_poke_area, area); return 0; diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index bd3734d5be892..bf755a7be5147 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -112,9 +112,9 @@ static nokprobe_inline long address_ok(struct pt_regs *regs, { if (!user_mode(regs)) return 1; - if (__access_ok(ea, nb)) + if (access_ok((void __user *)ea, nb)) return 1; - if (__access_ok(ea, 1)) + if (access_ok((void __user *)ea, 1)) /* Access overlaps the end of the user region */ regs->dar = TASK_SIZE_MAX - 1; else @@ -1097,7 +1097,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __put_user_asmx(x, addr, err, op, cr) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: " op " %2,0,%3\n" \ + ".machine pop\n" \ " mfcr %1\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ @@ -1110,7 +1113,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __get_user_asmx(x, addr, err, op) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: "op" %1,0,%2\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ @@ -3389,7 +3395,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) __put_user_asmx(op->val, ea, err, "stbcx.", cr); break; case 2: - __put_user_asmx(op->val, ea, err, "stbcx.", cr); + __put_user_asmx(op->val, ea, err, "sthcx.", cr); break; #endif case 4: diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 7abf82a698d32..985cabdd7f679 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1621,8 +1621,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -DECLARE_INTERRUPT_HANDLER(__do_hash_fault); -DEFINE_INTERRUPT_HANDLER(__do_hash_fault) +DEFINE_INTERRUPT_HANDLER(do_hash_fault) { unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; @@ -1681,35 +1680,6 @@ DEFINE_INTERRUPT_HANDLER(__do_hash_fault) } } -/* - * The _RAW interrupt entry checks for the in_nmi() case before - * running the full handler. - */ -DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) -{ - /* - * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then - * don't call hash_page, just fail the fault. This is required to - * prevent re-entrancy problems in the hash code, namely perf - * interrupts hitting while something holds H_PAGE_BUSY, and taking a - * hash fault. See the comment in hash_preload(). - * - * We come here as a result of a DSI at a point where we don't want - * to call hash_page, such as when we are accessing memory (possibly - * user memory) inside a PMU interrupt that occurred while interrupts - * were soft-disabled. We want to invoke the exception handler for - * the access, or panic if there isn't a handler. - */ - if (unlikely(in_nmi())) { - do_bad_page_fault_segv(regs); - return 0; - } - - __do_hash_fault(regs); - - return 0; -} - #ifdef CONFIG_PPC_MM_SLICES static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) { @@ -1776,26 +1746,18 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, #endif /* CONFIG_PPC_64K_PAGES */ /* - * __hash_page_* must run with interrupts off, as it sets the - * H_PAGE_BUSY bit. It's possible for perf interrupts to hit at any - * time and may take a hash fault reading the user stack, see - * read_user_stack_slow() in the powerpc/perf code. - * - * If that takes a hash fault on the same page as we lock here, it - * will bail out when seeing H_PAGE_BUSY set, and retry the access - * leading to an infinite loop. + * __hash_page_* must run with interrupts off, including PMI interrupts + * off, as it sets the H_PAGE_BUSY bit. * - * Disabling interrupts here does not prevent perf interrupts, but it - * will prevent them taking hash faults (see the NMI test in - * do_hash_page), then read_user_stack's copy_from_user_nofault will - * fail and perf will fall back to read_user_stack_slow(), which - * walks the Linux page tables. + * It's otherwise possible for perf interrupts to hit at any time and + * may take a hash fault reading the user stack, which could take a + * hash miss and deadlock on the same H_PAGE_BUSY bit. * * Interrupts must also be off for the duration of the * mm_is_thread_local test and update, to prevent preempt running the * mm on another CPU (XXX: this may be racy vs kthread_use_mm). */ - local_irq_save(flags); + powerpc_local_irq_pmu_save(flags); /* Is that local to this CPU ? */ if (mm_is_thread_local(mm)) @@ -1820,7 +1782,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, mm_ctx_user_psize(&mm->context), pte_val(*ptep)); - local_irq_restore(flags); + powerpc_local_irq_pmu_restore(flags); } /* diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index eb8ecd7343a99..7ba6d3eff636d 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -567,18 +567,24 @@ NOKPROBE_SYMBOL(hash__do_page_fault); static void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); + const char *msg; /* kernel has accessed a bad area */ + if (regs->dar < PAGE_SIZE) + msg = "Kernel NULL pointer dereference"; + else + msg = "Unable to handle kernel data access"; + switch (TRAP(regs)) { case INTERRUPT_DATA_STORAGE: - case INTERRUPT_DATA_SEGMENT: case INTERRUPT_H_DATA_STORAGE: - pr_alert("BUG: %s on %s at 0x%08lx\n", - regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : - "Unable to handle kernel data access", + pr_alert("BUG: %s on %s at 0x%08lx\n", msg, is_write ? "write" : "read", regs->dar); break; + case INTERRUPT_DATA_SEGMENT: + pr_alert("BUG: %s at 0x%08lx\n", msg, regs->dar); + break; case INTERRUPT_INST_STORAGE: case INTERRUPT_INST_SEGMENT: pr_alert("BUG: Unable to handle kernel instruction fetch%s", diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index cf8770b1a692e..f3e4d069e0ba7 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -83,13 +83,12 @@ void __init kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte) { unsigned long k_cur; - phys_addr_t pa = __pa(kasan_early_shadow_page); for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_off_k(k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); - if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) + if (pte_page(*ptep) != virt_to_page(lm_alias(kasan_early_shadow_page))) continue; __set_pte_at(&init_mm, k_cur, ptep, pte, 0); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8e301cd8925b2..4d221d033804e 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -255,7 +255,7 @@ void __init mem_init(void) #endif high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - set_max_mapnr(max_low_pfn); + set_max_mapnr(max_pfn); kasan_late_init(); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9d5f710d2c205..b9b7fefbb64b9 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -956,7 +956,9 @@ static int __init parse_numa_properties(void) of_node_put(cpu); } - node_set_online(nid); + /* node_set_online() is an UB if 'nid' is negative */ + if (likely(nid >= 0)) + node_set_online(nid); } get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c index edea388e9d3fb..85753e32a4de9 100644 --- a/arch/powerpc/mm/pageattr.c +++ b/arch/powerpc/mm/pageattr.c @@ -15,12 +15,14 @@ #include +static pte_basic_t pte_update_delta(pte_t *ptep, unsigned long addr, + unsigned long old, unsigned long new) +{ + return pte_update(&init_mm, addr, ptep, old & ~new, new & ~old, 0); +} + /* - * Updates the attributes of a page in three steps: - * - * 1. take the page_table_lock - * 2. install the new entry with the updated attributes - * 3. flush the TLB + * Updates the attributes of a page atomically. * * This sequence is safe against concurrent updates, and also allows updating the * attributes of a page currently being executed or accessed. @@ -28,41 +30,39 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) { long action = (long)data; - pte_t pte; - spin_lock(&init_mm.page_table_lock); - - pte = ptep_get(ptep); - - /* modify the PTE bits as desired, then apply */ + /* modify the PTE bits as desired */ switch (action) { case SET_MEMORY_RO: - pte = pte_wrprotect(pte); + /* Don't clear DIRTY bit */ + pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_RO); break; case SET_MEMORY_RW: - pte = pte_mkwrite(pte_mkdirty(pte)); + pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_RW); break; case SET_MEMORY_NX: - pte = pte_exprotect(pte); + pte_update_delta(ptep, addr, _PAGE_KERNEL_ROX, _PAGE_KERNEL_RO); break; case SET_MEMORY_X: - pte = pte_mkexec(pte); + pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_ROX); + break; + case SET_MEMORY_NP: + pte_update(&init_mm, addr, ptep, _PAGE_PRESENT, 0, 0); + break; + case SET_MEMORY_P: + pte_update(&init_mm, addr, ptep, 0, _PAGE_PRESENT, 0); break; default: WARN_ON_ONCE(1); break; } - pte_update(&init_mm, addr, ptep, ~0UL, pte_val(pte), 0); - /* See ptesync comment in radix__set_pte_at() */ if (radix_enabled()) asm volatile("ptesync": : :"memory"); flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - spin_unlock(&init_mm.page_table_lock); - return 0; } @@ -96,36 +96,3 @@ int change_memory_attr(unsigned long addr, int numpages, long action) return apply_to_existing_page_range(&init_mm, start, size, change_page_attr, (void *)action); } - -/* - * Set the attributes of a page: - * - * This function is used by PPC32 at the end of init to set final kernel memory - * protection. It includes changing the maping of the page it is executing from - * and data pages it is using. - */ -static int set_page_attr(pte_t *ptep, unsigned long addr, void *data) -{ - pgprot_t prot = __pgprot((unsigned long)data); - - spin_lock(&init_mm.page_table_lock); - - set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot)); - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - - spin_unlock(&init_mm.page_table_lock); - - return 0; -} - -int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot) -{ - unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE); - unsigned long sz = numpages * PAGE_SIZE; - - if (numpages <= 0) - return 0; - - return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr, - (void *)pgprot_val(prot)); -} diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 906e4e4328b2e..f71ededdc02a5 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -135,10 +135,12 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_sinittext)) + if (v_block_mapped((unsigned long)_sinittext)) { mmu_mark_initmem_nx(); - else - set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL); + } else { + set_memory_nx((unsigned long)_sinittext, numpages); + set_memory_rw((unsigned long)_sinittext, numpages); + } } #ifdef CONFIG_STRICT_KERNEL_RWX @@ -152,18 +154,14 @@ void mark_rodata_ro(void) return; } - numpages = PFN_UP((unsigned long)_etext) - - PFN_DOWN((unsigned long)_stext); - - set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX); /* - * mark .rodata as read only. Use __init_begin rather than __end_rodata - * to cover NOTES and EXCEPTION_TABLE. + * mark .text and .rodata as read only. Use __init_begin rather than + * __end_rodata to cover NOTES and EXCEPTION_TABLE. */ numpages = PFN_UP((unsigned long)__init_begin) - - PFN_DOWN((unsigned long)__start_rodata); + PFN_DOWN((unsigned long)_stext); - set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO); + set_memory_ro((unsigned long)_stext, numpages); // mark_initmem_nx() should have already run by now ptdump_check_wx(); @@ -179,8 +177,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) return; if (enable) - set_memory_attr(addr, numpages, PAGE_KERNEL); + set_memory_p(addr, numpages); else - set_memory_attr(addr, numpages, __pgprot(0)); + set_memory_np(addr, numpages); } #endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h index d6fa6e25234f4..19a8d051ddf10 100644 --- a/arch/powerpc/perf/callchain.h +++ b/arch/powerpc/perf/callchain.h @@ -2,7 +2,6 @@ #ifndef _POWERPC_PERF_CALLCHAIN_H #define _POWERPC_PERF_CALLCHAIN_H -int read_user_stack_slow(const void __user *ptr, void *buf, int nb); void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, @@ -26,17 +25,11 @@ static inline int __read_user_stack(const void __user *ptr, void *ret, size_t size) { unsigned long addr = (unsigned long)ptr; - int rc; if (addr > TASK_SIZE - size || (addr & (size - 1))) return -EFAULT; - rc = copy_from_user_nofault(ret, ptr, size); - - if (IS_ENABLED(CONFIG_PPC64) && !radix_enabled() && rc) - return read_user_stack_slow(ptr, ret, size); - - return rc; + return copy_from_user_nofault(ret, ptr, size); } #endif /* _POWERPC_PERF_CALLCHAIN_H */ diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 8d0df4226328d..488e8a21a11ea 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -18,33 +18,6 @@ #include "callchain.h" -/* - * On 64-bit we don't want to invoke hash_page on user addresses from - * interrupt context, so if the access faults, we read the page tables - * to find which page (if any) is mapped and access it directly. Radix - * has no need for this so it doesn't use read_user_stack_slow. - */ -int read_user_stack_slow(const void __user *ptr, void *buf, int nb) -{ - - unsigned long addr = (unsigned long) ptr; - unsigned long offset; - struct page *page; - void *kaddr; - - if (get_user_page_fast_only(addr, FOLL_WRITE, &page)) { - kaddr = page_address(page); - - /* align address to page boundary */ - offset = addr & ~PAGE_MASK; - - memcpy(buf, kaddr + offset, nb); - put_page(page); - return 0; - } - return -EFAULT; -} - static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret) { return __read_user_stack(ptr, ret, sizeof(*ret)); diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index e106909ff9c37..e7583fbcc8fa1 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1457,7 +1457,11 @@ static int trace_imc_event_init(struct perf_event *event) event->hw.idx = -1; - event->pmu->task_ctx_nr = perf_hw_context; + /* + * There can only be a single PMU for perf_hw_context events which is assigned to + * core PMU. Hence use "perf_sw_context" for trace_imc. + */ + event->pmu->task_ctx_nr = perf_sw_context; event->destroy = reset_global_refc; return 0; } diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index f2ba837249d69..04a6abf14c295 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,6 +153,7 @@ int __init mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; + goto out; } ret = 0; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 87bc1929ee5a8..e2e1fec91c6ed 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -107,6 +107,7 @@ config PPC_BOOK3S_64 config PPC_BOOK3E_64 bool "Embedded processors" + select PPC_FSL_BOOK3E select PPC_FPU # Make it a choice ? select PPC_SMP_MUXED_IPI select PPC_DOORBELL @@ -295,7 +296,7 @@ config FSL_BOOKE config PPC_FSL_BOOK3E bool select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 - select FSL_EMB_PERFMON + imply FSL_EMB_PERFMON select PPC_SMP_MUXED_IPI select PPC_DOORBELL select PPC_KUEP diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index b4386714494a6..e3d44b36ae98f 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -43,7 +43,11 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) unsigned long parity; /* Calculate the parity of the value */ - asm ("popcntd %0,%1" : "=r" (parity) : "r" (val)); + asm (".machine push; \ + .machine power7; \ + popcntd %0,%1; \ + .machine pop;" + : "=r" (parity) : "r" (val)); /* xor our value with the previous mask */ val ^= rng->mask; diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 90c9d3531694b..4ba8245681192 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -78,6 +78,9 @@ int remove_phb_dynamic(struct pci_controller *phb) pseries_msi_free_domains(phb); + /* Keep a reference so phb isn't freed yet */ + get_device(&host_bridge->dev); + /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); @@ -101,6 +104,7 @@ int remove_phb_dynamic(struct pci_controller *phb) * the pcibios_free_controller_deferred() callback; * see pseries_root_bridge_prepare(). */ + put_device(&host_bridge->dev); return 0; } diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c index 8963eaffb1b7b..39186ad6b3c3a 100644 --- a/arch/powerpc/sysdev/fsl_gtm.c +++ b/arch/powerpc/sysdev/fsl_gtm.c @@ -86,7 +86,7 @@ static LIST_HEAD(gtms); */ struct gtm_timer *gtm_get_timer16(void) { - struct gtm *gtm = NULL; + struct gtm *gtm; int i; list_for_each_entry(gtm, >ms, list_node) { @@ -103,7 +103,7 @@ struct gtm_timer *gtm_get_timer16(void) spin_unlock_irq(>m->lock); } - if (gtm) + if (!list_empty(>ms)) return ERR_PTR(-EBUSY); return ERR_PTR(-ENODEV); } diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 1ca5564bda9d0..bb5bda6b2357b 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1708,20 +1708,20 @@ __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift) static int __init xive_off(char *arg) { xive_cmdline_disabled = true; - return 0; + return 1; } __setup("xive=off", xive_off); static int __init xive_store_eoi_cmdline(char *arg) { if (!arg) - return -EINVAL; + return 1; if (strncmp(arg, "off", 3) == 0) { pr_info("StoreEOI disabled on kernel command line\n"); xive_store_eoi = false; } - return 0; + return 1; } __setup("xive.store-eoi=", xive_store_eoi_cmdline); @@ -1791,7 +1791,7 @@ static int xive_ipi_debug_show(struct seq_file *m, void *private) if (xive_ops->debug_show) xive_ops->debug_show(m, private); - for_each_possible_cpu(cpu) + for_each_online_cpu(cpu) xive_debug_show_ipi(m, cpu); return 0; } diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts index 984872f3d3a9b..b9e30df127fef 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts @@ -203,6 +203,8 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts index 7ba99b4da3042..8d23401b0bbb6 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts @@ -205,6 +205,8 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts index be9b12c9b374a..24fd83b43d9d5 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts @@ -213,6 +213,8 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts index 031c0c28f8195..25341f38292aa 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts @@ -178,6 +178,8 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/include/asm/module.lds.h b/arch/riscv/include/asm/module.lds.h index 4254ff2ff0494..1075beae1ac64 100644 --- a/arch/riscv/include/asm/module.lds.h +++ b/arch/riscv/include/asm/module.lds.h @@ -2,8 +2,8 @@ /* Copyright (C) 2017 Andes Technology Corporation */ #ifdef CONFIG_MODULE_SECTIONS SECTIONS { - .plt (NOLOAD) : { BYTE(0) } - .got (NOLOAD) : { BYTE(0) } - .got.plt (NOLOAD) : { BYTE(0) } + .plt : { BYTE(0) } + .got : { BYTE(0) } + .got.plt : { BYTE(0) } } #endif diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 60da0dcacf145..74d888c8d631a 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -11,11 +11,17 @@ #include #include +#ifdef CONFIG_KASAN +#define KASAN_STACK_ORDER 1 +#else +#define KASAN_STACK_ORDER 0 +#endif + /* thread information allocation */ #ifdef CONFIG_64BIT -#define THREAD_SIZE_ORDER (2) +#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) #else -#define THREAD_SIZE_ORDER (1) +#define THREAD_SIZE_ORDER (1 + KASAN_STACK_ORDER) #endif #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c index dae29cbfe550b..7f2ad008274f3 100644 --- a/arch/riscv/kernel/cpu_ops_sbi.c +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -21,7 +21,7 @@ const struct cpu_operations cpu_ops_sbi; * be invoked from multiple threads in parallel. Define a per cpu data * to handle that. */ -DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data); +static DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data); static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr, unsigned long priv) diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index 1fc075b8f764a..3348a61de7d99 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -15,8 +15,8 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, { struct stackframe buftail; unsigned long ra = 0; - unsigned long *user_frame_tail = - (unsigned long *)(fp - sizeof(struct stackframe)); + unsigned long __user *user_frame_tail = + (unsigned long __user *)(fp - sizeof(struct stackframe)); /* Check accessibility of one struct frame_tail beyond */ if (!access_ok(user_frame_tail, sizeof(buftail))) @@ -68,7 +68,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, static bool fill_callchain(void *entry, unsigned long pc) { - return perf_callchain_store(entry, pc); + return perf_callchain_store(entry, pc) == 0; } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 624166004e36c..6785aef4cbd46 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -653,8 +653,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->arch.isa); kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context); - csr_write(CSR_HGATP, 0); - csr->vsstatus = csr_read(CSR_VSSTATUS); csr->vsie = csr_read(CSR_VSIE); csr->vstvec = csr_read(CSR_VSTVEC); diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c index 4449a976e5a6b..d4308c5120078 100644 --- a/arch/riscv/kvm/vcpu_fp.c +++ b/arch/riscv/kvm/vcpu_fp.c @@ -11,6 +11,7 @@ #include #include #include +#include #ifdef CONFIG_FPU void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu) diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S index 07d1d2152ba5c..e0609e1f0864d 100644 --- a/arch/riscv/lib/memmove.S +++ b/arch/riscv/lib/memmove.S @@ -1,64 +1,316 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 Michael T. Kloos + */ #include #include -ENTRY(__memmove) -WEAK(memmove) - move t0, a0 - move t1, a1 - - beq a0, a1, exit_memcpy - beqz a2, exit_memcpy - srli t2, a2, 0x2 - - slt t3, a0, a1 - beqz t3, do_reverse - - andi a2, a2, 0x3 - li t4, 1 - beqz t2, byte_copy - -word_copy: - lw t3, 0(a1) - addi t2, t2, -1 - addi a1, a1, 4 - sw t3, 0(a0) - addi a0, a0, 4 - bnez t2, word_copy - beqz a2, exit_memcpy - j byte_copy - -do_reverse: - add a0, a0, a2 - add a1, a1, a2 - andi a2, a2, 0x3 - li t4, -1 - beqz t2, reverse_byte_copy - -reverse_word_copy: - addi a1, a1, -4 - addi t2, t2, -1 - lw t3, 0(a1) - addi a0, a0, -4 - sw t3, 0(a0) - bnez t2, reverse_word_copy - beqz a2, exit_memcpy - -reverse_byte_copy: - addi a0, a0, -1 - addi a1, a1, -1 +SYM_FUNC_START(__memmove) +SYM_FUNC_START_WEAK(memmove) + /* + * Returns + * a0 - dest + * + * Parameters + * a0 - Inclusive first byte of dest + * a1 - Inclusive first byte of src + * a2 - Length of copy n + * + * Because the return matches the parameter register a0, + * we will not clobber or modify that register. + * + * Note: This currently only works on little-endian. + * To port to big-endian, reverse the direction of shifts + * in the 2 misaligned fixup copy loops. + */ + /* Return if nothing to do */ + beq a0, a1, return_from_memmove + beqz a2, return_from_memmove + + /* + * Register Uses + * Forward Copy: a1 - Index counter of src + * Reverse Copy: a4 - Index counter of src + * Forward Copy: t3 - Index counter of dest + * Reverse Copy: t4 - Index counter of dest + * Both Copy Modes: t5 - Inclusive first multibyte/aligned of dest + * Both Copy Modes: t6 - Non-Inclusive last multibyte/aligned of dest + * Both Copy Modes: t0 - Link / Temporary for load-store + * Both Copy Modes: t1 - Temporary for load-store + * Both Copy Modes: t2 - Temporary for load-store + * Both Copy Modes: a5 - dest to src alignment offset + * Both Copy Modes: a6 - Shift ammount + * Both Copy Modes: a7 - Inverse Shift ammount + * Both Copy Modes: a2 - Alternate breakpoint for unrolled loops + */ + + /* + * Solve for some register values now. + * Byte copy does not need t5 or t6. + */ + mv t3, a0 + add t4, a0, a2 + add a4, a1, a2 + + /* + * Byte copy if copying less than (2 * SZREG) bytes. This can + * cause problems with the bulk copy implementation and is + * small enough not to bother. + */ + andi t0, a2, -(2 * SZREG) + beqz t0, byte_copy + + /* + * Now solve for t5 and t6. + */ + andi t5, t3, -SZREG + andi t6, t4, -SZREG + /* + * If dest(Register t3) rounded down to the nearest naturally + * aligned SZREG address, does not equal dest, then add SZREG + * to find the low-bound of SZREG alignment in the dest memory + * region. Note that this could overshoot the dest memory + * region if n is less than SZREG. This is one reason why + * we always byte copy if n is less than SZREG. + * Otherwise, dest is already naturally aligned to SZREG. + */ + beq t5, t3, 1f + addi t5, t5, SZREG + 1: + + /* + * If the dest and src are co-aligned to SZREG, then there is + * no need for the full rigmarole of a full misaligned fixup copy. + * Instead, do a simpler co-aligned copy. + */ + xor t0, a0, a1 + andi t1, t0, (SZREG - 1) + beqz t1, coaligned_copy + /* Fall through to misaligned fixup copy */ + +misaligned_fixup_copy: + bltu a1, a0, misaligned_fixup_copy_reverse + +misaligned_fixup_copy_forward: + jal t0, byte_copy_until_aligned_forward + + andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */ + slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ + sub a5, a1, t3 /* Find the difference between src and dest */ + andi a1, a1, -SZREG /* Align the src pointer */ + addi a2, t6, SZREG /* The other breakpoint for the unrolled loop*/ + + /* + * Compute The Inverse Shift + * a7 = XLEN - a6 = XLEN + -a6 + * 2s complement negation to find the negative: -a6 = ~a6 + 1 + * Add that to XLEN. XLEN = SZREG * 8. + */ + not a7, a6 + addi a7, a7, (SZREG * 8 + 1) + + /* + * Fix Misalignment Copy Loop - Forward + * load_val0 = load_ptr[0]; + * do { + * load_val1 = load_ptr[1]; + * store_ptr += 2; + * store_ptr[0 - 2] = (load_val0 >> {a6}) | (load_val1 << {a7}); + * + * if (store_ptr == {a2}) + * break; + * + * load_val0 = load_ptr[2]; + * load_ptr += 2; + * store_ptr[1 - 2] = (load_val1 >> {a6}) | (load_val0 << {a7}); + * + * } while (store_ptr != store_ptr_end); + * store_ptr = store_ptr_end; + */ + + REG_L t0, (0 * SZREG)(a1) + 1: + REG_L t1, (1 * SZREG)(a1) + addi t3, t3, (2 * SZREG) + srl t0, t0, a6 + sll t2, t1, a7 + or t2, t0, t2 + REG_S t2, ((0 * SZREG) - (2 * SZREG))(t3) + + beq t3, a2, 2f + + REG_L t0, (2 * SZREG)(a1) + addi a1, a1, (2 * SZREG) + srl t1, t1, a6 + sll t2, t0, a7 + or t2, t1, t2 + REG_S t2, ((1 * SZREG) - (2 * SZREG))(t3) + + bne t3, t6, 1b + 2: + mv t3, t6 /* Fix the dest pointer in case the loop was broken */ + + add a1, t3, a5 /* Restore the src pointer */ + j byte_copy_forward /* Copy any remaining bytes */ + +misaligned_fixup_copy_reverse: + jal t0, byte_copy_until_aligned_reverse + + andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */ + slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ + sub a5, a4, t4 /* Find the difference between src and dest */ + andi a4, a4, -SZREG /* Align the src pointer */ + addi a2, t5, -SZREG /* The other breakpoint for the unrolled loop*/ + + /* + * Compute The Inverse Shift + * a7 = XLEN - a6 = XLEN + -a6 + * 2s complement negation to find the negative: -a6 = ~a6 + 1 + * Add that to XLEN. XLEN = SZREG * 8. + */ + not a7, a6 + addi a7, a7, (SZREG * 8 + 1) + + /* + * Fix Misalignment Copy Loop - Reverse + * load_val1 = load_ptr[0]; + * do { + * load_val0 = load_ptr[-1]; + * store_ptr -= 2; + * store_ptr[1] = (load_val0 >> {a6}) | (load_val1 << {a7}); + * + * if (store_ptr == {a2}) + * break; + * + * load_val1 = load_ptr[-2]; + * load_ptr -= 2; + * store_ptr[0] = (load_val1 >> {a6}) | (load_val0 << {a7}); + * + * } while (store_ptr != store_ptr_end); + * store_ptr = store_ptr_end; + */ + + REG_L t1, ( 0 * SZREG)(a4) + 1: + REG_L t0, (-1 * SZREG)(a4) + addi t4, t4, (-2 * SZREG) + sll t1, t1, a7 + srl t2, t0, a6 + or t2, t1, t2 + REG_S t2, ( 1 * SZREG)(t4) + + beq t4, a2, 2f + + REG_L t1, (-2 * SZREG)(a4) + addi a4, a4, (-2 * SZREG) + sll t0, t0, a7 + srl t2, t1, a6 + or t2, t0, t2 + REG_S t2, ( 0 * SZREG)(t4) + + bne t4, t5, 1b + 2: + mv t4, t5 /* Fix the dest pointer in case the loop was broken */ + + add a4, t4, a5 /* Restore the src pointer */ + j byte_copy_reverse /* Copy any remaining bytes */ + +/* + * Simple copy loops for SZREG co-aligned memory locations. + * These also make calls to do byte copies for any unaligned + * data at their terminations. + */ +coaligned_copy: + bltu a1, a0, coaligned_copy_reverse + +coaligned_copy_forward: + jal t0, byte_copy_until_aligned_forward + + 1: + REG_L t1, ( 0 * SZREG)(a1) + addi a1, a1, SZREG + addi t3, t3, SZREG + REG_S t1, (-1 * SZREG)(t3) + bne t3, t6, 1b + + j byte_copy_forward /* Copy any remaining bytes */ + +coaligned_copy_reverse: + jal t0, byte_copy_until_aligned_reverse + + 1: + REG_L t1, (-1 * SZREG)(a4) + addi a4, a4, -SZREG + addi t4, t4, -SZREG + REG_S t1, ( 0 * SZREG)(t4) + bne t4, t5, 1b + + j byte_copy_reverse /* Copy any remaining bytes */ + +/* + * These are basically sub-functions within the function. They + * are used to byte copy until the dest pointer is in alignment. + * At which point, a bulk copy method can be used by the + * calling code. These work on the same registers as the bulk + * copy loops. Therefore, the register values can be picked + * up from where they were left and we avoid code duplication + * without any overhead except the call in and return jumps. + */ +byte_copy_until_aligned_forward: + beq t3, t5, 2f + 1: + lb t1, 0(a1) + addi a1, a1, 1 + addi t3, t3, 1 + sb t1, -1(t3) + bne t3, t5, 1b + 2: + jalr zero, 0x0(t0) /* Return to multibyte copy loop */ + +byte_copy_until_aligned_reverse: + beq t4, t6, 2f + 1: + lb t1, -1(a4) + addi a4, a4, -1 + addi t4, t4, -1 + sb t1, 0(t4) + bne t4, t6, 1b + 2: + jalr zero, 0x0(t0) /* Return to multibyte copy loop */ + +/* + * Simple byte copy loops. + * These will byte copy until they reach the end of data to copy. + * At that point, they will call to return from memmove. + */ byte_copy: - lb t3, 0(a1) - addi a2, a2, -1 - sb t3, 0(a0) - add a1, a1, t4 - add a0, a0, t4 - bnez a2, byte_copy - -exit_memcpy: - move a0, t0 - move a1, t1 - ret -END(__memmove) + bltu a1, a0, byte_copy_reverse + +byte_copy_forward: + beq t3, t4, 2f + 1: + lb t1, 0(a1) + addi a1, a1, 1 + addi t3, t3, 1 + sb t1, -1(t3) + bne t3, t4, 1b + 2: + ret + +byte_copy_reverse: + beq t4, t3, 2f + 1: + lb t1, -1(a4) + addi a4, a4, -1 + addi t4, t4, -1 + sb t1, 0(t4) + bne t4, t3, 1b + 2: + +return_from_memmove: + ret + +SYM_FUNC_END(memmove) +SYM_FUNC_END(__memmove) diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 799147658dee2..1cd523748bd2e 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 448 common process_mrelease sys_process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm sys_pmadv_ksm diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2296b1ff1e023..4e3db4004bfdc 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3869,14 +3869,12 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) return 0; } -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod) +static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { struct kvm_vcpu *vcpu; union tod_clock clk; unsigned long i; - mutex_lock(&kvm->lock); preempt_disable(); store_tod_clock_ext(&clk); @@ -3897,7 +3895,22 @@ void kvm_s390_set_tod_clock(struct kvm *kvm, kvm_s390_vcpu_unblock_all(kvm); preempt_enable(); +} + +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + mutex_lock(&kvm->lock); + __kvm_s390_set_tod_clock(kvm, gtod); + mutex_unlock(&kvm->lock); +} + +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + if (!mutex_trylock(&kvm->lock)) + return 0; + __kvm_s390_set_tod_clock(kvm, gtod); mutex_unlock(&kvm->lock); + return 1; } /** diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 098831e815e6c..f2c910763d7fa 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -349,8 +349,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod); +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 417154b314a64..6a765fe22eafc 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -102,7 +102,20 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_cond(vcpu, rc); VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod); - kvm_s390_set_tod_clock(vcpu->kvm, >od); + /* + * To set the TOD clock the kvm lock must be taken, but the vcpu lock + * is already held in handle_set_clock. The usual lock order is the + * opposite. As SCK is deprecated and should not be used in several + * cases, for example when the multiple epoch facility or TOD clock + * steering facility is installed (see Principles of Operation), a + * slow path can be used. If the lock can not be taken via try_lock, + * the instruction will be retried via -EAGAIN at a later point in + * time. + */ + if (!kvm_s390_try_set_tod_clock(vcpu->kvm, >od)) { + kvm_s390_retry_instr(vcpu); + return -EAGAIN; + } kvm_s390_set_psw_cc(vcpu, 0); return 0; diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 2de85c977f54f..cfc75fa43eae4 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index ffab16369beac..74f80443b195f 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -65,7 +65,7 @@ struct rt_signal_frame { */ static inline bool invalid_frame_pointer(void __user *fp, int fplen) { - if ((((unsigned long) fp) & 15) || !__access_ok((unsigned long)fp, fplen)) + if ((((unsigned long) fp) & 15) || !access_ok(fp, fplen)) return true; return false; diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 4398cc6fb68dd..d2c0a6426f6b8 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -496,3 +496,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 6ead1e2404576..8ca67a6926830 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -224,7 +224,7 @@ void mconsole_go(struct mc_request *req) void mconsole_stop(struct mc_request *req) { - deactivate_fd(req->originating_fd, MCONSOLE_IRQ); + block_signals(); os_set_fd_block(req->originating_fd, 1); mconsole_reply(req, "stopped", 0, 0); for (;;) { @@ -247,6 +247,7 @@ void mconsole_stop(struct mc_request *req) } os_set_fd_block(req->originating_fd, 0); mconsole_reply(req, "", 0, 0); + unblock_signals(); } static DEFINE_SPINLOCK(mc_devices_lock); diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h index f512704a9ec7b..22b39de73c246 100644 --- a/arch/um/include/asm/xor.h +++ b/arch/um/include/asm/xor.h @@ -4,8 +4,10 @@ #ifdef CONFIG_64BIT #undef CONFIG_X86_32 +#define TT_CPU_INF_XOR_DEFAULT (AVX_SELECT(&xor_block_sse_pf64)) #else #define CONFIG_X86_32 1 +#define TT_CPU_INF_XOR_DEFAULT (AVX_SELECT(&xor_block_8regs)) #endif #include @@ -16,7 +18,7 @@ #undef XOR_SELECT_TEMPLATE /* pick an arbitrary one - measuring isn't possible with inf-cpu */ #define XOR_SELECT_TEMPLATE(x) \ - (time_travel_mode == TT_MODE_INFCPU ? &xor_block_8regs : NULL) + (time_travel_mode == TT_MODE_INFCPU ? TT_CPU_INF_XOR_DEFAULT : x)) #endif #endif diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9f5bd41bf660c..54a8e93d68556 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -85,6 +85,7 @@ config X86 select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_NONLEAF_PMD_YOUNG select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_COPY_MC if X86_64 select ARCH_HAS_SET_MEMORY @@ -2837,6 +2838,11 @@ config IA32_AOUT config X86_X32 bool "x32 ABI for 64-bit mode" depends on X86_64 + # llvm-objcopy does not convert x86_64 .note.gnu.property or + # compressed debug sections to x86_x32 properly: + # https://github.com/ClangBuiltLinux/linux/issues/514 + # https://github.com/ClangBuiltLinux/linux/issues/1141 + depends on $(success,$(OBJCOPY) --version | head -n1 | grep -qv llvm) help Include code to run binaries for the x32 native 32-bit ABI for 64-bit processors. An x32 process gets access to the diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 542377cd419d7..22b919cdb6d19 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -157,7 +157,7 @@ config MPENTIUM4 config MK6 - bool "K6/K6-II/K6-III" + bool "AMD K6/K6-II/K6-III" depends on X86_32 help Select this for an AMD K6-family processor. Enables use of @@ -165,7 +165,7 @@ config MK6 flags to GCC. config MK7 - bool "Athlon/Duron/K7" + bool "AMD Athlon/Duron/K7" depends on X86_32 help Select this for an AMD Athlon K7-family processor. Enables use of @@ -173,12 +173,98 @@ config MK7 flags to GCC. config MK8 - bool "Opteron/Athlon64/Hammer/K8" + bool "AMD Opteron/Athlon64/Hammer/K8" help Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables use of some extended instructions, and passes appropriate optimization flags to GCC. +config MK8SSE3 + bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3" + help + Select this for improved AMD Opteron or Athlon64 Hammer-family processors. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MK10 + bool "AMD 61xx/7x50/PhenomX3/X4/II/K10" + help + Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50, + Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MBARCELONA + bool "AMD Barcelona" + help + Select this for AMD Family 10h Barcelona processors. + + Enables -march=barcelona + +config MBOBCAT + bool "AMD Bobcat" + help + Select this for AMD Family 14h Bobcat processors. + + Enables -march=btver1 + +config MJAGUAR + bool "AMD Jaguar" + help + Select this for AMD Family 16h Jaguar processors. + + Enables -march=btver2 + +config MBULLDOZER + bool "AMD Bulldozer" + help + Select this for AMD Family 15h Bulldozer processors. + + Enables -march=bdver1 + +config MPILEDRIVER + bool "AMD Piledriver" + help + Select this for AMD Family 15h Piledriver processors. + + Enables -march=bdver2 + +config MSTEAMROLLER + bool "AMD Steamroller" + help + Select this for AMD Family 15h Steamroller processors. + + Enables -march=bdver3 + +config MEXCAVATOR + bool "AMD Excavator" + help + Select this for AMD Family 15h Excavator processors. + + Enables -march=bdver4 + +config MZEN + bool "AMD Zen" + help + Select this for AMD Family 17h Zen processors. + + Enables -march=znver1 + +config MZEN2 + bool "AMD Zen 2" + help + Select this for AMD Family 17h Zen 2 processors. + + Enables -march=znver2 + +config MZEN3 + bool "AMD Zen 3" + depends on (CC_IS_GCC && GCC_VERSION >= 100300) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + help + Select this for AMD Family 19h Zen 3 processors. + + Enables -march=znver3 + config MCRUSOE bool "Crusoe" depends on X86_32 @@ -270,7 +356,7 @@ config MPSC in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. config MCORE2 - bool "Core 2/newer Xeon" + bool "Intel Core 2" help Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and @@ -278,6 +364,8 @@ config MCORE2 family in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) + Enables -march=core2 + config MATOM bool "Intel Atom" help @@ -287,6 +375,182 @@ config MATOM accordingly optimized code. Use a recent GCC with specific Atom support in order to fully benefit from selecting this option. +config MNEHALEM + bool "Intel Nehalem" + select X86_P6_NOP + help + + Select this for 1st Gen Core processors in the Nehalem family. + + Enables -march=nehalem + +config MWESTMERE + bool "Intel Westmere" + select X86_P6_NOP + help + + Select this for the Intel Westmere formerly Nehalem-C family. + + Enables -march=westmere + +config MSILVERMONT + bool "Intel Silvermont" + select X86_P6_NOP + help + + Select this for the Intel Silvermont platform. + + Enables -march=silvermont + +config MGOLDMONT + bool "Intel Goldmont" + select X86_P6_NOP + help + + Select this for the Intel Goldmont platform including Apollo Lake and Denverton. + + Enables -march=goldmont + +config MGOLDMONTPLUS + bool "Intel Goldmont Plus" + select X86_P6_NOP + help + + Select this for the Intel Goldmont Plus platform including Gemini Lake. + + Enables -march=goldmont-plus + +config MSANDYBRIDGE + bool "Intel Sandy Bridge" + select X86_P6_NOP + help + + Select this for 2nd Gen Core processors in the Sandy Bridge family. + + Enables -march=sandybridge + +config MIVYBRIDGE + bool "Intel Ivy Bridge" + select X86_P6_NOP + help + + Select this for 3rd Gen Core processors in the Ivy Bridge family. + + Enables -march=ivybridge + +config MHASWELL + bool "Intel Haswell" + select X86_P6_NOP + help + + Select this for 4th Gen Core processors in the Haswell family. + + Enables -march=haswell + +config MBROADWELL + bool "Intel Broadwell" + select X86_P6_NOP + help + + Select this for 5th Gen Core processors in the Broadwell family. + + Enables -march=broadwell + +config MSKYLAKE + bool "Intel Skylake" + select X86_P6_NOP + help + + Select this for 6th Gen Core processors in the Skylake family. + + Enables -march=skylake + +config MSKYLAKEX + bool "Intel Skylake X" + select X86_P6_NOP + help + + Select this for 6th Gen Core processors in the Skylake X family. + + Enables -march=skylake-avx512 + +config MCANNONLAKE + bool "Intel Cannon Lake" + select X86_P6_NOP + help + + Select this for 8th Gen Core processors + + Enables -march=cannonlake + +config MICELAKE + bool "Intel Ice Lake" + select X86_P6_NOP + help + + Select this for 10th Gen Core processors in the Ice Lake family. + + Enables -march=icelake-client + +config MCASCADELAKE + bool "Intel Cascade Lake" + select X86_P6_NOP + help + + Select this for Xeon processors in the Cascade Lake family. + + Enables -march=cascadelake + +config MCOOPERLAKE + bool "Intel Cooper Lake" + depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000) + select X86_P6_NOP + help + + Select this for Xeon processors in the Cooper Lake family. + + Enables -march=cooperlake + +config MTIGERLAKE + bool "Intel Tiger Lake" + depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000) + select X86_P6_NOP + help + + Select this for third-generation 10 nm process processors in the Tiger Lake family. + + Enables -march=tigerlake + +config MSAPPHIRERAPIDS + bool "Intel Sapphire Rapids" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + select X86_P6_NOP + help + + Select this for third-generation 10 nm process processors in the Sapphire Rapids family. + + Enables -march=sapphirerapids + +config MROCKETLAKE + bool "Intel Rocket Lake" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + select X86_P6_NOP + help + + Select this for eleventh-generation processors in the Rocket Lake family. + + Enables -march=rocketlake + +config MALDERLAKE + bool "Intel Alder Lake" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + select X86_P6_NOP + help + + Select this for twelfth-generation processors in the Alder Lake family. + + Enables -march=alderlake + config GENERIC_CPU bool "Generic-x86-64" depends on X86_64 @@ -294,6 +558,50 @@ config GENERIC_CPU Generic x86-64 CPU. Run equally well on all x86-64 CPUs. +config GENERIC_CPU2 + bool "Generic-x86-64-v2" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 + help + Generic x86-64 CPU. + Run equally well on all x86-64 CPUs with min support of x86-64-v2. + +config GENERIC_CPU3 + bool "Generic-x86-64-v3" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 + help + Generic x86-64-v3 CPU with v3 instructions. + Run equally well on all x86-64 CPUs with min support of x86-64-v3. + +config GENERIC_CPU4 + bool "Generic-x86-64-v4" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 + help + Generic x86-64 CPU with v4 instructions. + Run equally well on all x86-64 CPUs with min support of x86-64-v4. + +config MNATIVE_INTEL + bool "Intel-Native optimizations autodetected by the compiler" + help + + Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects + the optimum settings to use based on your processor. Do NOT use this + for AMD CPUs. Intel Only! + + Enables -march=native + +config MNATIVE_AMD + bool "AMD-Native optimizations autodetected by the compiler" + help + + Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects + the optimum settings to use based on your processor. Do NOT use this + for Intel CPUs. AMD Only! + + Enables -march=native + endchoice config X86_GENERIC @@ -318,7 +626,7 @@ config X86_INTERNODE_CACHE_SHIFT config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || MPSC - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD || X86_GENERIC || GENERIC_CPU || GENERIC_CPU2 || GENERIC_CPU3 || GENERIC_CPU4 default "4" if MELAN || M486SX || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX @@ -336,11 +644,11 @@ config X86_ALIGNMENT_16 config X86_INTEL_USERCOPY def_bool y - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL config X86_USE_PPRO_CHECKSUM def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD # # P6_NOPs are a relatively minor optimization that require a family >= @@ -356,26 +664,26 @@ config X86_USE_PPRO_CHECKSUM config X86_P6_NOP def_bool y depends on X86_64 - depends on (MCORE2 || MPENTIUM4 || MPSC) + depends on (MCORE2 || MPENTIUM4 || MPSC || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL) config X86_TSC def_bool y - depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 + depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD) || X86_64 config X86_CMPXCHG64 def_bool y - depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 + depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD # this should be set for all -march=.. options where the compiler # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) + depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD) config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 - default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8) + default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD) default "5" if X86_32 && X86_CMPXCHG64 default "4" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index e84cdd409b646..7d3bbf060079c 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -131,8 +131,44 @@ else # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) cflags-$(CONFIG_MK8) += -march=k8 cflags-$(CONFIG_MPSC) += -march=nocona - cflags-$(CONFIG_MCORE2) += -march=core2 - cflags-$(CONFIG_MATOM) += -march=atom + cflags-$(CONFIG_MK8SSE3) += -march=k8-sse3 + cflags-$(CONFIG_MK10) += -march=amdfam10 + cflags-$(CONFIG_MBARCELONA) += -march=barcelona + cflags-$(CONFIG_MBOBCAT) += -march=btver1 + cflags-$(CONFIG_MJAGUAR) += -march=btver2 + cflags-$(CONFIG_MBULLDOZER) += -march=bdver1 + cflags-$(CONFIG_MPILEDRIVER) += -march=bdver2 -mno-tbm + cflags-$(CONFIG_MSTEAMROLLER) += -march=bdver3 -mno-tbm + cflags-$(CONFIG_MEXCAVATOR) += -march=bdver4 -mno-tbm + cflags-$(CONFIG_MZEN) += -march=znver1 + cflags-$(CONFIG_MZEN2) += -march=znver2 + cflags-$(CONFIG_MZEN3) += -march=znver3 + cflags-$(CONFIG_MNATIVE_INTEL) += -march=native + cflags-$(CONFIG_MNATIVE_AMD) += -march=native + cflags-$(CONFIG_MATOM) += -march=bonnell + cflags-$(CONFIG_MCORE2) += -march=core2 + cflags-$(CONFIG_MNEHALEM) += -march=nehalem + cflags-$(CONFIG_MWESTMERE) += -march=westmere + cflags-$(CONFIG_MSILVERMONT) += -march=silvermont + cflags-$(CONFIG_MGOLDMONT) += -march=goldmont + cflags-$(CONFIG_MGOLDMONTPLUS) += -march=goldmont-plus + cflags-$(CONFIG_MSANDYBRIDGE) += -march=sandybridge + cflags-$(CONFIG_MIVYBRIDGE) += -march=ivybridge + cflags-$(CONFIG_MHASWELL) += -march=haswell + cflags-$(CONFIG_MBROADWELL) += -march=broadwell + cflags-$(CONFIG_MSKYLAKE) += -march=skylake + cflags-$(CONFIG_MSKYLAKEX) += -march=skylake-avx512 + cflags-$(CONFIG_MCANNONLAKE) += -march=cannonlake + cflags-$(CONFIG_MICELAKE) += -march=icelake-client + cflags-$(CONFIG_MCASCADELAKE) += -march=cascadelake + cflags-$(CONFIG_MCOOPERLAKE) += -march=cooperlake + cflags-$(CONFIG_MTIGERLAKE) += -march=tigerlake + cflags-$(CONFIG_MSAPPHIRERAPIDS) += -march=sapphirerapids + cflags-$(CONFIG_MROCKETLAKE) += -march=rocketlake + cflags-$(CONFIG_MALDERLAKE) += -march=alderlake + cflags-$(CONFIG_GENERIC_CPU2) += -march=x86-64-v2 + cflags-$(CONFIG_GENERIC_CPU3) += -march=x86-64-v3 + cflags-$(CONFIG_GENERIC_CPU4) += -march=x86-64-v4 cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic KBUILD_CFLAGS += $(cflags-y) diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl index 71fae5a09e56d..2077ce7a56479 100644 --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -297,7 +297,7 @@ sub poly1305_iteration { $code.=<<___; mov \$1,%eax .Lno_key: - ret + RET ___ &end_function("poly1305_init_x86_64"); @@ -373,7 +373,7 @@ sub poly1305_iteration { .cfi_adjust_cfa_offset -48 .Lno_data: .Lblocks_epilogue: - ret + RET .cfi_endproc ___ &end_function("poly1305_blocks_x86_64"); @@ -399,7 +399,7 @@ sub poly1305_iteration { mov %rax,0($mac) # write result mov %rcx,8($mac) - ret + RET ___ &end_function("poly1305_emit_x86_64"); if ($avx) { @@ -429,7 +429,7 @@ sub poly1305_iteration { &poly1305_iteration(); $code.=<<___; pop $ctx - ret + RET .size __poly1305_block,.-__poly1305_block .type __poly1305_init_avx,\@abi-omnipotent @@ -594,7 +594,7 @@ sub poly1305_iteration { lea -48-64($ctx),$ctx # size [de-]optimization pop %rbp - ret + RET .size __poly1305_init_avx,.-__poly1305_init_avx ___ @@ -747,7 +747,7 @@ sub poly1305_iteration { .cfi_restore %rbp .Lno_data_avx: .Lblocks_avx_epilogue: - ret + RET .cfi_endproc .align 32 @@ -1452,7 +1452,7 @@ sub poly1305_iteration { ___ $code.=<<___; vzeroupper - ret + RET .cfi_endproc ___ &end_function("poly1305_blocks_avx"); @@ -1508,7 +1508,7 @@ sub poly1305_iteration { mov %rax,0($mac) # write result mov %rcx,8($mac) - ret + RET ___ &end_function("poly1305_emit_avx"); @@ -1675,7 +1675,7 @@ sub poly1305_blocks_avxN { .cfi_restore %rbp .Lno_data_avx2$suffix: .Lblocks_avx2_epilogue$suffix: - ret + RET .cfi_endproc .align 32 @@ -2201,7 +2201,7 @@ sub poly1305_blocks_avxN { ___ $code.=<<___; vzeroupper - ret + RET .cfi_endproc ___ if($avx > 2 && $avx512) { @@ -2792,7 +2792,7 @@ sub poly1305_blocks_avxN { .cfi_def_cfa_register %rsp ___ $code.=<<___; - ret + RET .cfi_endproc ___ @@ -2893,7 +2893,7 @@ sub poly1305_blocks_avxN { ___ $code.=<<___; mov \$1,%eax - ret + RET .size poly1305_init_base2_44,.-poly1305_init_base2_44 ___ { @@ -3010,7 +3010,7 @@ sub poly1305_blocks_avxN { jnz .Lblocks_vpmadd52_4x .Lno_data_vpmadd52: - ret + RET .size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52 ___ } @@ -3451,7 +3451,7 @@ sub poly1305_blocks_avxN { vzeroall .Lno_data_vpmadd52_4x: - ret + RET .size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x ___ } @@ -3824,7 +3824,7 @@ sub poly1305_blocks_avxN { vzeroall .Lno_data_vpmadd52_8x: - ret + RET .size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x ___ } @@ -3861,7 +3861,7 @@ sub poly1305_blocks_avxN { mov %rax,0($mac) # write result mov %rcx,8($mac) - ret + RET .size poly1305_emit_base2_44,.-poly1305_emit_base2_44 ___ } } } @@ -3916,7 +3916,7 @@ sub poly1305_blocks_avxN { .Ldone_enc: mov $otp,%rax - ret + RET .size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad .globl xor128_decrypt_n_pad @@ -3967,7 +3967,7 @@ sub poly1305_blocks_avxN { .Ldone_dec: mov $otp,%rax - ret + RET .size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad ___ } @@ -4109,7 +4109,7 @@ sub poly1305_blocks_avxN { pop %rbx pop %rdi pop %rsi - ret + RET .size avx_handler,.-avx_handler .section .pdata diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 320480a8db4f8..331aaf1a782ff 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -455,3 +455,4 @@ 448 i386 process_mrelease sys_process_mrelease 449 i386 futex_waitv sys_futex_waitv 450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node +451 i386 pmadv_ksm sys_pmadv_ksm diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index c84d12608cd2d..14902db4c01fc 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -372,6 +372,7 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a3c7ca876aebd..d87c9b246a8fa 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -281,7 +281,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), - INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), EVENT_EXTRA_END @@ -5515,7 +5515,11 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con /* Disabled fixed counters which are not in CPUID */ c->idxmsk64 &= intel_ctrl; - if (c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) + /* + * Don't extend the pseudo-encoding to the + * generic counters + */ + if (!use_fixed_pseudo_encoding(c->code)) c->idxmsk64 |= (1ULL << num_counters) - 1; } c->idxmsk64 &= diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 2d33bba9a1440..215aed65e9782 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -472,7 +472,7 @@ static u64 pt_config_filters(struct perf_event *event) pt->filters.filter[range].msr_b = filter->msr_b; } - rtit_ctl |= filter->config << pt_address_ranges[range].reg_off; + rtit_ctl |= (u64)filter->config << pt_address_ranges[range].reg_off; } return rtit_ctl; diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index c878fed3056fd..fbcfec4dc4ccd 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -154,24 +154,24 @@ # define DEFINE_EXTABLE_TYPE_REG \ ".macro extable_type_reg type:req reg:req\n" \ - ".set found, 0\n" \ - ".set regnr, 0\n" \ + ".set .Lfound, 0\n" \ + ".set .Lregnr, 0\n" \ ".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n" \ ".ifc \\reg, %%\\rs\n" \ - ".set found, found+1\n" \ - ".long \\type + (regnr << 8)\n" \ + ".set .Lfound, .Lfound+1\n" \ + ".long \\type + (.Lregnr << 8)\n" \ ".endif\n" \ - ".set regnr, regnr+1\n" \ + ".set .Lregnr, .Lregnr+1\n" \ ".endr\n" \ - ".set regnr, 0\n" \ + ".set .Lregnr, 0\n" \ ".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n" \ ".ifc \\reg, %%\\rs\n" \ - ".set found, found+1\n" \ - ".long \\type + (regnr << 8)\n" \ + ".set .Lfound, .Lfound+1\n" \ + ".long \\type + (.Lregnr << 8)\n" \ ".endif\n" \ - ".set regnr, regnr+1\n" \ + ".set .Lregnr, .Lregnr+1\n" \ ".endr\n" \ - ".if (found != 1)\n" \ + ".if (.Lfound != 1)\n" \ ".error \"extable_type_reg: bad register argument\"\n" \ ".endif\n" \ ".endm\n" diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index bab883c0b6fee..66570e95af398 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -77,9 +77,9 @@ do { \ */ #define __WARN_FLAGS(flags) \ do { \ - __auto_type f = BUGFLAG_WARNING|(flags); \ + __auto_type __flags = BUGFLAG_WARNING|(flags); \ instrumentation_begin(); \ - _BUG_FLAGS(ASM_UD2, f, ASM_REACHABLE); \ + _BUG_FLAGS(ASM_UD2, __flags, ASM_REACHABLE); \ instrumentation_end(); \ } while (0) diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index ae9d40f6c7066..05af249d6bec2 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -99,7 +99,8 @@ } #define ASM_CALL_ARG0 \ - "call %P[__func] \n" + "call %P[__func] \n" \ + ASM_REACHABLE #define ASM_CALL_ARG1 \ "movq %[arg1], %%rdi \n" \ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ec9830d2aabf8..85ee96abba806 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -509,6 +509,7 @@ struct kvm_pmu { u64 global_ctrl_mask; u64 global_ovf_ctrl_mask; u64 reserved_bits; + u64 raw_event_mask; u8 version; struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; @@ -1573,8 +1574,9 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) #define kvm_arch_pmi_in_guest(vcpu) \ ((vcpu) && (vcpu)->arch.handling_intr_from_guest) -int kvm_mmu_module_init(void); -void kvm_mmu_module_exit(void); +void kvm_mmu_x86_module_init(void); +int kvm_mmu_vendor_module_init(void); +void kvm_mmu_vendor_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h index b85147d75626e..d71c7e8b738d2 100644 --- a/arch/x86/include/asm/msi.h +++ b/arch/x86/include/asm/msi.h @@ -12,14 +12,17 @@ int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, /* Structs and defines for the X86 specific MSI message format */ typedef struct x86_msi_data { - u32 vector : 8, - delivery_mode : 3, - dest_mode_logical : 1, - reserved : 2, - active_low : 1, - is_level : 1; - - u32 dmar_subhandle; + union { + struct { + u32 vector : 8, + delivery_mode : 3, + dest_mode_logical : 1, + reserved : 2, + active_low : 1, + is_level : 1; + }; + u32 dmar_subhandle; + }; } __attribute__ ((packed)) arch_msi_msg_data_t; #define arch_msi_msg_data x86_msi_data diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a4a39c3e0f196..0c2610cde6ea2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -128,9 +128,9 @@ #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ -/* SRBDS support */ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 -#define RNGDS_MITG_DIS BIT(0) +#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ +#define RTM_ALLOW BIT(1) /* TSX development mode */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8fc1b5003713f..a2b6626c681f5 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -241,6 +241,11 @@ struct x86_pmu_capability { #define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3) #define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS) +static inline bool use_fixed_pseudo_encoding(u64 code) +{ + return !(code & 0xff); +} + /* * We model BTS tracing as another fixed-mode PMC. * diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 8a9432fb3802b..f973788f6b217 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -819,7 +819,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) static inline int pmd_bad(pmd_t pmd) { - return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; + return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) != + (_KERNPG_TABLE & ~_PAGE_ACCESSED); } static inline unsigned long pages_to_mb(unsigned long npg) @@ -1423,10 +1424,10 @@ static inline bool arch_has_pfn_modify_check(void) return boot_cpu_has_bug(X86_BUG_L1TF); } -#define arch_faults_on_old_pte arch_faults_on_old_pte -static inline bool arch_faults_on_old_pte(void) +#define arch_has_hw_pte_young arch_has_hw_pte_young +static inline bool arch_has_hw_pte_young(void) { - return false; + return true; } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index ed4f8bb6c2d9c..2455d721503ec 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -38,6 +38,8 @@ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") +#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) #define ARCH_ADD_TRAMP_KEY(name) \ asm(".pushsection .static_call_tramp_key, \"a\" \n" \ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index bb2fb78523cee..ab572d8def2b7 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -222,17 +222,19 @@ struct __attribute__ ((__packed__)) vmcb_control_area { /* AVIC */ -#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFFULL) #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) -#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) -#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFF) +#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFFULL) -#define AVIC_DOORBELL_PHYSICAL_ID_MASK (0xFF) +#define AVIC_DOORBELL_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) + +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL #define AVIC_UNACCEL_ACCESS_WRITE_MASK 1 #define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0 diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h index 75884d2cdec37..4e6a08d4c7e53 100644 --- a/arch/x86/include/asm/vermagic.h +++ b/arch/x86/include/asm/vermagic.h @@ -17,6 +17,48 @@ #define MODULE_PROC_FAMILY "586MMX " #elif defined CONFIG_MCORE2 #define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_MNATIVE_INTEL +#define MODULE_PROC_FAMILY "NATIVE_INTEL " +#elif defined CONFIG_MNATIVE_AMD +#define MODULE_PROC_FAMILY "NATIVE_AMD " +#elif defined CONFIG_MNEHALEM +#define MODULE_PROC_FAMILY "NEHALEM " +#elif defined CONFIG_MWESTMERE +#define MODULE_PROC_FAMILY "WESTMERE " +#elif defined CONFIG_MSILVERMONT +#define MODULE_PROC_FAMILY "SILVERMONT " +#elif defined CONFIG_MGOLDMONT +#define MODULE_PROC_FAMILY "GOLDMONT " +#elif defined CONFIG_MGOLDMONTPLUS +#define MODULE_PROC_FAMILY "GOLDMONTPLUS " +#elif defined CONFIG_MSANDYBRIDGE +#define MODULE_PROC_FAMILY "SANDYBRIDGE " +#elif defined CONFIG_MIVYBRIDGE +#define MODULE_PROC_FAMILY "IVYBRIDGE " +#elif defined CONFIG_MHASWELL +#define MODULE_PROC_FAMILY "HASWELL " +#elif defined CONFIG_MBROADWELL +#define MODULE_PROC_FAMILY "BROADWELL " +#elif defined CONFIG_MSKYLAKE +#define MODULE_PROC_FAMILY "SKYLAKE " +#elif defined CONFIG_MSKYLAKEX +#define MODULE_PROC_FAMILY "SKYLAKEX " +#elif defined CONFIG_MCANNONLAKE +#define MODULE_PROC_FAMILY "CANNONLAKE " +#elif defined CONFIG_MICELAKE +#define MODULE_PROC_FAMILY "ICELAKE " +#elif defined CONFIG_MCASCADELAKE +#define MODULE_PROC_FAMILY "CASCADELAKE " +#elif defined CONFIG_MCOOPERLAKE +#define MODULE_PROC_FAMILY "COOPERLAKE " +#elif defined CONFIG_MTIGERLAKE +#define MODULE_PROC_FAMILY "TIGERLAKE " +#elif defined CONFIG_MSAPPHIRERAPIDS +#define MODULE_PROC_FAMILY "SAPPHIRERAPIDS " +#elif defined CONFIG_ROCKETLAKE +#define MODULE_PROC_FAMILY "ROCKETLAKE " +#elif defined CONFIG_MALDERLAKE +#define MODULE_PROC_FAMILY "ALDERLAKE " #elif defined CONFIG_MATOM #define MODULE_PROC_FAMILY "ATOM " #elif defined CONFIG_M686 @@ -35,6 +77,30 @@ #define MODULE_PROC_FAMILY "K7 " #elif defined CONFIG_MK8 #define MODULE_PROC_FAMILY "K8 " +#elif defined CONFIG_MK8SSE3 +#define MODULE_PROC_FAMILY "K8SSE3 " +#elif defined CONFIG_MK10 +#define MODULE_PROC_FAMILY "K10 " +#elif defined CONFIG_MBARCELONA +#define MODULE_PROC_FAMILY "BARCELONA " +#elif defined CONFIG_MBOBCAT +#define MODULE_PROC_FAMILY "BOBCAT " +#elif defined CONFIG_MBULLDOZER +#define MODULE_PROC_FAMILY "BULLDOZER " +#elif defined CONFIG_MPILEDRIVER +#define MODULE_PROC_FAMILY "PILEDRIVER " +#elif defined CONFIG_MSTEAMROLLER +#define MODULE_PROC_FAMILY "STEAMROLLER " +#elif defined CONFIG_MJAGUAR +#define MODULE_PROC_FAMILY "JAGUAR " +#elif defined CONFIG_MEXCAVATOR +#define MODULE_PROC_FAMILY "EXCAVATOR " +#elif defined CONFIG_MZEN +#define MODULE_PROC_FAMILY "ZEN " +#elif defined CONFIG_MZEN2 +#define MODULE_PROC_FAMILY "ZEN2 " +#elif defined CONFIG_MZEN3 +#define MODULE_PROC_FAMILY "ZEN3 " #elif defined CONFIG_MELAN #define MODULE_PROC_FAMILY "ELAN " #elif defined CONFIG_MCRUSOE diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 5b6d1a95776f0..0d01e7f5078c2 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1328,6 +1328,17 @@ static int __init disable_acpi_pci(const struct dmi_system_id *d) return 0; } +static int __init disable_acpi_xsdt(const struct dmi_system_id *d) +{ + if (!acpi_force) { + pr_notice("%s detected: force use of acpi=rsdt\n", d->ident); + acpi_gbl_do_not_use_xsdt = TRUE; + } else { + pr_notice("Warning: DMI blacklist says broken, but acpi XSDT forced\n"); + } + return 0; +} + static int __init dmi_disable_acpi(const struct dmi_system_id *d) { if (!acpi_force) { @@ -1451,6 +1462,19 @@ static const struct dmi_system_id acpi_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), }, }, + /* + * Boxes that need ACPI XSDT use disabled due to corrupted tables + */ + { + .callback = disable_acpi_xsdt, + .ident = "Advantech DAC-BJ01", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NEC"), + DMI_MATCH(DMI_PRODUCT_NAME, "Bearlake CRB Board"), + DMI_MATCH(DMI_BIOS_VERSION, "V1.12"), + DMI_MATCH(DMI_BIOS_DATE, "02/01/2011"), + }, + }, {} }; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7b8382c117889..bd6c690a9fb98 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1719,6 +1719,8 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) validate_apic_and_package_id(c); x86_spec_ctrl_setup_ap(); update_srbds_msr(); + + tsx_ap_init(); } static __init int setup_noclflush(char *arg) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index ee6f23f7587d4..2a8e584fc9913 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -55,11 +55,10 @@ enum tsx_ctrl_states { extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; extern void __init tsx_init(void); -extern void tsx_enable(void); -extern void tsx_disable(void); -extern void tsx_clear_cpuid(void); +void tsx_ap_init(void); #else static inline void tsx_init(void) { } +static inline void tsx_ap_init(void) { } #endif /* CONFIG_CPU_SUP_INTEL */ extern void get_cpu_cap(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 8321c43554a1d..f7a5370a9b3b8 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -717,13 +717,6 @@ static void init_intel(struct cpuinfo_x86 *c) init_intel_misc_features(c); - if (tsx_ctrl_state == TSX_CTRL_ENABLE) - tsx_enable(); - else if (tsx_ctrl_state == TSX_CTRL_DISABLE) - tsx_disable(); - else if (tsx_ctrl_state == TSX_CTRL_RTM_ALWAYS_ABORT) - tsx_clear_cpuid(); - split_lock_init(); bus_lock_init(); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 5818b837fd4d4..2d719e0d2e404 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -834,6 +834,59 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs) m->cs = regs->cs; } +/* + * Disable fast string copy and return from the MCE handler upon the first SRAR + * MCE on bank 1 due to a CPU erratum on Intel Skylake/Cascade Lake/Cooper Lake + * CPUs. + * The fast string copy instructions ("REP; MOVS*") could consume an + * uncorrectable memory error in the cache line _right after_ the desired region + * to copy and raise an MCE with RIP pointing to the instruction _after_ the + * "REP; MOVS*". + * This mitigation addresses the issue completely with the caveat of performance + * degradation on the CPU affected. This is still better than the OS crashing on + * MCEs raised on an irrelevant process due to "REP; MOVS*" accesses from a + * kernel context (e.g., copy_page). + * + * Returns true when fast string copy on CPU has been disabled. + */ +static noinstr bool quirk_skylake_repmov(void) +{ + u64 mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); + u64 misc_enable = mce_rdmsrl(MSR_IA32_MISC_ENABLE); + u64 mc1_status; + + /* + * Apply the quirk only to local machine checks, i.e., no broadcast + * sync is needed. + */ + if (!(mcgstatus & MCG_STATUS_LMCES) || + !(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) + return false; + + mc1_status = mce_rdmsrl(MSR_IA32_MCx_STATUS(1)); + + /* Check for a software-recoverable data fetch error. */ + if ((mc1_status & + (MCI_STATUS_VAL | MCI_STATUS_OVER | MCI_STATUS_UC | MCI_STATUS_EN | + MCI_STATUS_ADDRV | MCI_STATUS_MISCV | MCI_STATUS_PCC | + MCI_STATUS_AR | MCI_STATUS_S)) == + (MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | + MCI_STATUS_ADDRV | MCI_STATUS_MISCV | + MCI_STATUS_AR | MCI_STATUS_S)) { + misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING; + mce_wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); + mce_wrmsrl(MSR_IA32_MCx_STATUS(1), 0); + + instrumentation_begin(); + pr_err_once("Erratum detected, disable fast string copy instructions.\n"); + instrumentation_end(); + + return true; + } + + return false; +} + /* * Do a quick check if any of the events requires a panic. * This decides if we keep the events around or clear them. @@ -1403,6 +1456,9 @@ noinstr void do_machine_check(struct pt_regs *regs) else if (unlikely(!mca_cfg.initialized)) return unexpected_machine_check(regs); + if (mce_flags.skx_repmov_quirk && quirk_skylake_repmov()) + goto clear; + /* * Establish sequential order between the CPUs entering the machine * check handler. @@ -1545,6 +1601,7 @@ noinstr void do_machine_check(struct pt_regs *regs) out: instrumentation_end(); +clear: mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); } EXPORT_SYMBOL_GPL(do_machine_check); @@ -1858,6 +1915,13 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (c->x86 == 6 && c->x86_model == 45) mce_flags.snb_ifu_quirk = 1; + + /* + * Skylake, Cascacde Lake and Cooper Lake require a quirk on + * rep movs. + */ + if (c->x86 == 6 && c->x86_model == INTEL_FAM6_SKYLAKE_X) + mce_flags.skx_repmov_quirk = 1; } if (c->x86_vendor == X86_VENDOR_ZHAOXIN) { diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 52c633950b38d..24d099e2d2a23 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -170,7 +170,10 @@ struct mce_vendor_flags { /* SandyBridge IFU quirk */ snb_ifu_quirk : 1, - __reserved_0 : 57; + /* Skylake, Cascade Lake, Cooper Lake REP;MOVS* quirk */ + skx_repmov_quirk : 1, + + __reserved_0 : 56; }; extern struct mce_vendor_flags mce_flags; diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index 9c7a5f0492929..ec7bbac3a9f29 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -19,7 +19,7 @@ enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; -void tsx_disable(void) +static void tsx_disable(void) { u64 tsx; @@ -39,7 +39,7 @@ void tsx_disable(void) wrmsrl(MSR_IA32_TSX_CTRL, tsx); } -void tsx_enable(void) +static void tsx_enable(void) { u64 tsx; @@ -58,7 +58,7 @@ void tsx_enable(void) wrmsrl(MSR_IA32_TSX_CTRL, tsx); } -static bool __init tsx_ctrl_is_supported(void) +static bool tsx_ctrl_is_supported(void) { u64 ia32_cap = x86_read_arch_cap_msr(); @@ -84,7 +84,45 @@ static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) return TSX_CTRL_ENABLE; } -void tsx_clear_cpuid(void) +/* + * Disabling TSX is not a trivial business. + * + * First of all, there's a CPUID bit: X86_FEATURE_RTM_ALWAYS_ABORT + * which says that TSX is practically disabled (all transactions are + * aborted by default). When that bit is set, the kernel unconditionally + * disables TSX. + * + * In order to do that, however, it needs to dance a bit: + * + * 1. The first method to disable it is through MSR_TSX_FORCE_ABORT and + * the MSR is present only when *two* CPUID bits are set: + * + * - X86_FEATURE_RTM_ALWAYS_ABORT + * - X86_FEATURE_TSX_FORCE_ABORT + * + * 2. The second method is for CPUs which do not have the above-mentioned + * MSR: those use a different MSR - MSR_IA32_TSX_CTRL and disable TSX + * through that one. Those CPUs can also have the initially mentioned + * CPUID bit X86_FEATURE_RTM_ALWAYS_ABORT set and for those the same strategy + * applies: TSX gets disabled unconditionally. + * + * When either of the two methods are present, the kernel disables TSX and + * clears the respective RTM and HLE feature flags. + * + * An additional twist in the whole thing presents late microcode loading + * which, when done, may cause for the X86_FEATURE_RTM_ALWAYS_ABORT CPUID + * bit to be set after the update. + * + * A subsequent hotplug operation on any logical CPU except the BSP will + * cause for the supported CPUID feature bits to get re-detected and, if + * RTM and HLE get cleared all of a sudden, but, userspace did consult + * them before the update, then funny explosions will happen. Long story + * short: the kernel doesn't modify CPUID feature bits after booting. + * + * That's why, this function's call in init_intel() doesn't clear the + * feature flags. + */ +static void tsx_clear_cpuid(void) { u64 msr; @@ -97,6 +135,39 @@ void tsx_clear_cpuid(void) rdmsrl(MSR_TSX_FORCE_ABORT, msr); msr |= MSR_TFA_TSX_CPUID_CLEAR; wrmsrl(MSR_TSX_FORCE_ABORT, msr); + } else if (tsx_ctrl_is_supported()) { + rdmsrl(MSR_IA32_TSX_CTRL, msr); + msr |= TSX_CTRL_CPUID_CLEAR; + wrmsrl(MSR_IA32_TSX_CTRL, msr); + } +} + +/* + * Disable TSX development mode + * + * When the microcode released in Feb 2022 is applied, TSX will be disabled by + * default on some processors. MSR 0x122 (TSX_CTRL) and MSR 0x123 + * (IA32_MCU_OPT_CTRL) can be used to re-enable TSX for development, doing so is + * not recommended for production deployments. In particular, applying MD_CLEAR + * flows for mitigation of the Intel TSX Asynchronous Abort (TAA) transient + * execution attack may not be effective on these processors when Intel TSX is + * enabled with updated microcode. + */ +static void tsx_dev_mode_disable(void) +{ + u64 mcu_opt_ctrl; + + /* Check if RTM_ALLOW exists */ + if (!boot_cpu_has_bug(X86_BUG_TAA) || !tsx_ctrl_is_supported() || + !cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL)) + return; + + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_opt_ctrl); + + if (mcu_opt_ctrl & RTM_ALLOW) { + mcu_opt_ctrl &= ~RTM_ALLOW; + wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_opt_ctrl); + setup_force_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT); } } @@ -105,14 +176,14 @@ void __init tsx_init(void) char arg[5] = {}; int ret; + tsx_dev_mode_disable(); + /* - * Hardware will always abort a TSX transaction if both CPUID bits - * RTM_ALWAYS_ABORT and TSX_FORCE_ABORT are set. In this case, it is - * better not to enumerate CPUID.RTM and CPUID.HLE bits. Clear them - * here. + * Hardware will always abort a TSX transaction when the CPUID bit + * RTM_ALWAYS_ABORT is set. In this case, it is better not to enumerate + * CPUID.RTM and CPUID.HLE bits. Clear them here. */ - if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) && - boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { + if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)) { tsx_ctrl_state = TSX_CTRL_RTM_ALWAYS_ABORT; tsx_clear_cpuid(); setup_clear_cpu_cap(X86_FEATURE_RTM); @@ -175,3 +246,16 @@ void __init tsx_init(void) setup_force_cpu_cap(X86_FEATURE_HLE); } } + +void tsx_ap_init(void) +{ + tsx_dev_mode_disable(); + + if (tsx_ctrl_state == TSX_CTRL_ENABLE) + tsx_enable(); + else if (tsx_ctrl_state == TSX_CTRL_DISABLE) + tsx_disable(); + else if (tsx_ctrl_state == TSX_CTRL_RTM_ALWAYS_ABORT) + /* See comment over that function for more details. */ + tsx_clear_cpuid(); +} diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 7c7824ae78622..dc6d5e98d2963 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1639,7 +1639,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) perm = guest ? &fpu->guest_perm : &fpu->perm; /* Pairs with the READ_ONCE() in xstate_get_group_perm() */ - WRITE_ONCE(perm->__state_perm, requested); + WRITE_ONCE(perm->__state_perm, mask); /* Protected by sighand lock */ perm->__state_size = ksize; perm->__user_state_size = usize; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d77481ecb0d5f..ed8a13ac4ab23 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -517,7 +517,7 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector) } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) { ipi_bitmap <<= min - apic_id; min = apic_id; - } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) { + } else if (apic_id > min && apic_id < min + KVM_IPI_CLUSTER_SIZE) { max = apic_id < max ? max : apic_id; } else { ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 531fb4cbb63fd..aa72cefdd5be6 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -12,10 +12,9 @@ enum insn_type { }; /* - * data16 data16 xorq %rax, %rax - a single 5 byte instruction that clears %rax - * The REX.W cancels the effect of any data16. + * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax */ -static const u8 xor5rax[] = { 0x66, 0x66, 0x48, 0x31, 0xc0 }; +static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e86d610dc6b7a..de9d8a27387cf 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1623,11 +1623,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, goto exception; } - if (!seg_desc.p) { - err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; - goto exception; - } - dpl = seg_desc.dpl; switch (seg) { @@ -1667,6 +1662,10 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, case VCPU_SREG_TR: if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) goto exception; + if (!seg_desc.p) { + err_vec = NP_VECTOR; + goto exception; + } old_desc = seg_desc; seg_desc.type |= 2; /* busy */ ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc, @@ -1691,6 +1690,11 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, break; } + if (!seg_desc.p) { + err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; + goto exception; + } + if (seg_desc.s) { /* mark segment as accessed */ if (!(seg_desc.type & 1)) { @@ -3519,8 +3523,10 @@ static int em_rdpid(struct x86_emulate_ctxt *ctxt) { u64 tsc_aux = 0; - if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux)) + if (!ctxt->ops->guest_has_rdpid(ctxt)) return emulate_ud(ctxt); + + ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux); ctxt->dst.val = tsc_aux; return X86EMUL_CONTINUE; } diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 6e38a7d22e97a..10bc257d3803b 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -236,7 +236,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic); int ret; - if (!synic->active && !host) + if (!synic->active && (!host || data)) return 1; trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); @@ -282,6 +282,9 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, case HV_X64_MSR_EOM: { int i; + if (!synic->active) + break; + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) kvm_hv_notify_acked_sint(vcpu, i); break; @@ -446,6 +449,9 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) struct kvm_lapic_irq irq; int ret, vector; + if (KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm)) + return -EINVAL; + if (sint >= ARRAY_SIZE(synic->sint)) return -EINVAL; @@ -658,7 +664,7 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu); - if (!synic->active && !host) + if (!synic->active && (!host || config)) return 1; if (unlikely(!host && hv_vcpu->enforce_cpuid && new_config.direct_mode && @@ -687,7 +693,7 @@ static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer); struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu); - if (!synic->active && !host) + if (!synic->active && (!host || count)) return 1; trace_kvm_hv_stimer_set_count(hv_stimer_to_vcpu(stimer)->vcpu_id, @@ -1750,7 +1756,7 @@ struct kvm_hv_hcall { sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS]; }; -static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex) +static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { int i; gpa_t gpa; @@ -1765,7 +1771,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool int sparse_banks_len; bool all_cpus; - if (!ex) { + if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST || + hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE) { if (hc->fast) { flush.address_space = hc->ingpa; flush.flags = hc->outgpa; @@ -1819,7 +1826,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool if (!all_cpus) { if (hc->fast) { - if (sparse_banks_len > HV_HYPERCALL_MAX_XMM_REGISTERS - 1) + /* XMM0 is already consumed, each XMM holds two sparse banks. */ + if (sparse_banks_len > 2 * (HV_HYPERCALL_MAX_XMM_REGISTERS - 1)) return HV_STATUS_INVALID_HYPERCALL_INPUT; for (i = 0; i < sparse_banks_len; i += 2) { sparse_banks[i] = sse128_lo(hc->xmm[i / 2 + 1]); @@ -1875,7 +1883,7 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, } } -static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex) +static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { struct kvm *kvm = vcpu->kvm; struct hv_send_ipi_ex send_ipi_ex; @@ -1888,8 +1896,9 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool int sparse_banks_len; u32 vector; bool all_cpus; + int i; - if (!ex) { + if (hc->code == HVCALL_SEND_IPI) { if (!hc->fast) { if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi, sizeof(send_ipi)))) @@ -1908,9 +1917,15 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool trace_kvm_hv_send_ipi(vector, sparse_banks[0]); } else { - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex, - sizeof(send_ipi_ex)))) - return HV_STATUS_INVALID_HYPERCALL_INPUT; + if (!hc->fast) { + if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex, + sizeof(send_ipi_ex)))) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } else { + send_ipi_ex.vector = (u32)hc->ingpa; + send_ipi_ex.vp_set.format = hc->outgpa; + send_ipi_ex.vp_set.valid_bank_mask = sse128_lo(hc->xmm[0]); + } trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, send_ipi_ex.vp_set.format, @@ -1918,8 +1933,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool vector = send_ipi_ex.vector; valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; - sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * - sizeof(sparse_banks[0]); + sparse_banks_len = bitmap_weight(&valid_bank_mask, 64); all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; @@ -1929,12 +1943,27 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool if (!sparse_banks_len) goto ret_success; - if (kvm_read_guest(kvm, - hc->ingpa + offsetof(struct hv_send_ipi_ex, - vp_set.bank_contents), - sparse_banks, - sparse_banks_len)) - return HV_STATUS_INVALID_HYPERCALL_INPUT; + if (!hc->fast) { + if (kvm_read_guest(kvm, + hc->ingpa + offsetof(struct hv_send_ipi_ex, + vp_set.bank_contents), + sparse_banks, + sparse_banks_len * sizeof(sparse_banks[0]))) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } else { + /* + * The lower half of XMM0 is already consumed, each XMM holds + * two sparse banks. + */ + if (sparse_banks_len > (2 * HV_HYPERCALL_MAX_XMM_REGISTERS - 1)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + for (i = 0; i < sparse_banks_len; i++) { + if (i % 2) + sparse_banks[i] = sse128_lo(hc->xmm[(i + 1) / 2]); + else + sparse_banks[i] = sse128_hi(hc->xmm[i / 2]); + } + } } check_and_send_ipi: @@ -2096,6 +2125,7 @@ static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc) case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: + case HVCALL_SEND_IPI_EX: return true; } @@ -2247,46 +2277,28 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) kvm_hv_hypercall_complete_userspace; return 0; case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: - if (unlikely(!hc.rep_cnt || hc.rep_idx)) { - ret = HV_STATUS_INVALID_HYPERCALL_INPUT; - break; - } - ret = kvm_hv_flush_tlb(vcpu, &hc, false); - break; - case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: - if (unlikely(hc.rep)) { - ret = HV_STATUS_INVALID_HYPERCALL_INPUT; - break; - } - ret = kvm_hv_flush_tlb(vcpu, &hc, false); - break; case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: if (unlikely(!hc.rep_cnt || hc.rep_idx)) { ret = HV_STATUS_INVALID_HYPERCALL_INPUT; break; } - ret = kvm_hv_flush_tlb(vcpu, &hc, true); + ret = kvm_hv_flush_tlb(vcpu, &hc); break; + case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: if (unlikely(hc.rep)) { ret = HV_STATUS_INVALID_HYPERCALL_INPUT; break; } - ret = kvm_hv_flush_tlb(vcpu, &hc, true); + ret = kvm_hv_flush_tlb(vcpu, &hc); break; case HVCALL_SEND_IPI: - if (unlikely(hc.rep)) { - ret = HV_STATUS_INVALID_HYPERCALL_INPUT; - break; - } - ret = kvm_hv_send_ipi(vcpu, &hc, false); - break; case HVCALL_SEND_IPI_EX: - if (unlikely(hc.fast || hc.rep)) { + if (unlikely(hc.rep)) { ret = HV_STATUS_INVALID_HYPERCALL_INPUT; break; } - ret = kvm_hv_send_ipi(vcpu, &hc, true); + ret = kvm_hv_send_ipi(vcpu, &hc); break; case HVCALL_POST_DEBUG_DATA: case HVCALL_RETRIEVE_DEBUG_DATA: diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 39eded2426ffd..a2a7654d8aced 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -226,6 +226,7 @@ struct x86_emulate_ops { bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt); bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt); bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt); + bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt); void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9322e6340a742..2a10d0033c964 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -992,6 +992,10 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, *r = -1; if (irq->shorthand == APIC_DEST_SELF) { + if (KVM_BUG_ON(!src, kvm)) { + *r = 0; + return true; + } *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); return true; } @@ -2242,10 +2246,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) { - struct kvm_lapic *apic = vcpu->arch.apic; - - apic_set_tpr(apic, ((cr8 & 0x0f) << 4) - | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4)); + apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4); } u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e9fbb2c8bbe2d..c7070973f0de1 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -48,6 +48,7 @@ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE) #define KVM_MMU_CR0_ROLE_BITS (X86_CR0_PG | X86_CR0_WP) +#define KVM_MMU_EFER_ROLE_BITS (EFER_LME | EFER_NX) static __always_inline u64 rsvd_bits(int s, int e) { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5628d0ba637ec..7f009ebb319ab 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6144,12 +6144,24 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) return 0; } -int kvm_mmu_module_init(void) +/* + * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as + * its default value of -1 is technically undefined behavior for a boolean. + */ +void kvm_mmu_x86_module_init(void) { - int ret = -ENOMEM; - if (nx_huge_pages == -1) __set_nx_huge_pages(get_nx_auto_mode()); +} + +/* + * The bulk of the MMU initialization is deferred until the vendor module is + * loaded as many of the masks/values may be modified by VMX or SVM, i.e. need + * to be reset when a potentially different vendor module is loaded. + */ +int kvm_mmu_vendor_module_init(void) +{ + int ret = -ENOMEM; /* * MMU roles use union aliasing which is, generally speaking, an @@ -6197,7 +6209,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) mmu_free_memory_caches(vcpu); } -void kvm_mmu_module_exit(void) +void kvm_mmu_vendor_module_exit(void) { mmu_destroy_caches(); percpu_counter_destroy(&kvm_total_used_mmu_pages); diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 5b5bdac97c7b9..3821d5140ea31 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -34,9 +34,8 @@ #define PT_HAVE_ACCESSED_DIRTY(mmu) true #ifdef CONFIG_X86_64 #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgq" #else - #define CMPXCHG cmpxchg64 #define PT_MAX_FULL_LEVELS 2 #endif #elif PTTYPE == 32 @@ -52,7 +51,7 @@ #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgl" #elif PTTYPE == PTTYPE_EPT #define pt_element_t u64 #define guest_walker guest_walkerEPT @@ -65,7 +64,9 @@ #define PT_GUEST_DIRTY_SHIFT 9 #define PT_GUEST_ACCESSED_SHIFT 8 #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad) - #define CMPXCHG cmpxchg64 + #ifdef CONFIG_X86_64 + #define CMPXCHG "cmpxchgq" + #endif #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL #else #error Invalid PTTYPE value @@ -147,43 +148,36 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t __user *ptep_user, unsigned index, pt_element_t orig_pte, pt_element_t new_pte) { - int npages; - pt_element_t ret; - pt_element_t *table; - struct page *page; - - npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page); - if (likely(npages == 1)) { - table = kmap_atomic(page); - ret = CMPXCHG(&table[index], orig_pte, new_pte); - kunmap_atomic(table); - - kvm_release_page_dirty(page); - } else { - struct vm_area_struct *vma; - unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK; - unsigned long pfn; - unsigned long paddr; - - mmap_read_lock(current->mm); - vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE); - if (!vma || !(vma->vm_flags & VM_PFNMAP)) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - paddr = pfn << PAGE_SHIFT; - table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB); - if (!table) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - ret = CMPXCHG(&table[index], orig_pte, new_pte); - memunmap(table); - mmap_read_unlock(current->mm); - } + signed char r; - return (ret != orig_pte); + if (!user_access_begin(ptep_user, sizeof(pt_element_t))) + return -EFAULT; + +#ifdef CMPXCHG + asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n" + "setnz %b[r]\n" + "2:" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r]) + : [ptr] "+m" (*ptep_user), + [old] "+a" (orig_pte), + [r] "=q" (r) + : [new] "r" (new_pte) + : "memory"); +#else + asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n" + "setnz %b[r]\n" + "2:" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r]) + : [ptr] "+m" (*ptep_user), + [old] "+A" (orig_pte), + [r] "=q" (r) + : [new_lo] "b" ((u32)new_pte), + [new_hi] "c" ((u32)(new_pte >> 32)) + : "memory"); +#endif + + user_access_end(); + return r; } static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index bc9e3553fba2d..d2e69b2ddbbee 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -99,15 +99,18 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, } /* - * Finds the next valid root after root (or the first valid root if root - * is NULL), takes a reference on it, and returns that next root. If root - * is not NULL, this thread should have already taken a reference on it, and - * that reference will be dropped. If no valid root is found, this - * function will return NULL. + * Returns the next root after @prev_root (or the first root if @prev_root is + * NULL). A reference to the returned root is acquired, and the reference to + * @prev_root is released (the caller obviously must hold a reference to + * @prev_root if it's non-NULL). + * + * If @only_valid is true, invalid roots are skipped. + * + * Returns NULL if the end of tdp_mmu_roots was reached. */ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, struct kvm_mmu_page *prev_root, - bool shared) + bool shared, bool only_valid) { struct kvm_mmu_page *next_root; @@ -121,9 +124,14 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, next_root = list_first_or_null_rcu(&kvm->arch.tdp_mmu_roots, typeof(*next_root), link); - while (next_root && !kvm_tdp_mmu_get_root(kvm, next_root)) + while (next_root) { + if ((!only_valid || !next_root->role.invalid) && + kvm_tdp_mmu_get_root(kvm, next_root)) + break; + next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots, &next_root->link, typeof(*next_root), link); + } rcu_read_unlock(); @@ -143,13 +151,19 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, * mode. In the unlikely event that this thread must free a root, the lock * will be temporarily dropped and reacquired in write mode. */ -#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \ - for (_root = tdp_mmu_next_root(_kvm, NULL, _shared); \ - _root; \ - _root = tdp_mmu_next_root(_kvm, _root, _shared)) \ - if (kvm_mmu_page_as_id(_root) != _as_id) { \ +#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, _only_valid)\ + for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, _only_valid); \ + _root; \ + _root = tdp_mmu_next_root(_kvm, _root, _shared, _only_valid)) \ + if (kvm_mmu_page_as_id(_root) != _as_id) { \ } else +#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \ + __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true) + +#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \ + __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, false) + #define for_each_tdp_mmu_root(_kvm, _root, _as_id) \ list_for_each_entry_rcu(_root, &_kvm->arch.tdp_mmu_roots, link, \ lockdep_is_held_type(&kvm->mmu_lock, 0) || \ @@ -200,7 +214,10 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level); - /* Check for an existing root before allocating a new one. */ + /* + * Check for an existing root before allocating a new one. Note, the + * role check prevents consuming an invalid root. + */ for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) { if (root->role.word == role.word && kvm_tdp_mmu_get_root(kvm, root)) @@ -1032,13 +1049,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, bool flush) { - struct kvm_mmu_page *root; - - for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false) - flush = zap_gfn_range(kvm, root, range->start, range->end, - range->may_block, flush, false); - - return flush; + return __kvm_tdp_mmu_zap_gfn_range(kvm, range->slot->as_id, range->start, + range->end, range->may_block, flush); } typedef bool (*tdp_handler_t)(struct kvm *kvm, struct tdp_iter *iter, @@ -1221,7 +1233,7 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, lockdep_assert_held_read(&kvm->mmu_lock); - for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) + for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages, min_level); @@ -1249,6 +1261,9 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true)) continue; + if (!is_shadow_present_pte(iter.old_spte)) + continue; + if (spte_ad_need_write_protect(iter.old_spte)) { if (is_writable_pte(iter.old_spte)) new_spte = iter.old_spte & ~PT_WRITABLE_MASK; @@ -1291,7 +1306,7 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, lockdep_assert_held_read(&kvm->mmu_lock); - for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) + for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); @@ -1416,7 +1431,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, lockdep_assert_held_read(&kvm->mmu_lock); - for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) + for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) zap_collapsible_spte_range(kvm, root, slot); } diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 3899004a5d91e..08c917511fedd 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -10,9 +10,6 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu); __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm *kvm, struct kvm_mmu_page *root) { - if (root->role.invalid) - return false; - return refcount_inc_not_zero(&root->tdp_mmu_root_count); } diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index b1a02993782b3..eca39f56c2315 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -96,8 +96,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event, static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config, bool exclude_user, - bool exclude_kernel, bool intr, - bool in_tx, bool in_tx_cp) + bool exclude_kernel, bool intr) { struct perf_event *event; struct perf_event_attr attr = { @@ -116,16 +115,14 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, attr.sample_period = get_sample_period(pmc, pmc->counter); - if (in_tx) - attr.config |= HSW_IN_TX; - if (in_tx_cp) { + if ((attr.config & HSW_IN_TX_CHECKPOINTED) && + guest_cpuid_is_intel(pmc->vcpu)) { /* * HSW_IN_TX_CHECKPOINTED is not supported with nonzero * period. Just clear the sample period so at least * allocating the counter doesn't fail. */ attr.sample_period = 0; - attr.config |= HSW_IN_TX_CHECKPOINTED; } event = perf_event_create_kernel_counter(&attr, -1, current, @@ -185,6 +182,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) u32 type = PERF_TYPE_RAW; struct kvm *kvm = pmc->vcpu->kvm; struct kvm_pmu_event_filter *filter; + struct kvm_pmu *pmu = vcpu_to_pmu(pmc->vcpu); bool allow_event = true; if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) @@ -221,7 +219,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) } if (type == PERF_TYPE_RAW) - config = eventsel & AMD64_RAW_EVENT_MASK; + config = eventsel & pmu->raw_event_mask; if (pmc->current_config == eventsel && pmc_resume_counter(pmc)) return; @@ -232,9 +230,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) pmc_reprogram_counter(pmc, type, config, !(eventsel & ARCH_PERFMON_EVENTSEL_USR), !(eventsel & ARCH_PERFMON_EVENTSEL_OS), - eventsel & ARCH_PERFMON_EVENTSEL_INT, - (eventsel & HSW_IN_TX), - (eventsel & HSW_IN_TX_CHECKPOINTED)); + eventsel & ARCH_PERFMON_EVENTSEL_INT); } EXPORT_SYMBOL_GPL(reprogram_gp_counter); @@ -270,7 +266,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx) kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc), !(en_field & 0x2), /* exclude user */ !(en_field & 0x1), /* exclude kernel */ - pmi, false, false); + pmi); } EXPORT_SYMBOL_GPL(reprogram_fixed_counter); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index fb3e207913388..7ef229011db8a 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -783,7 +783,7 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; - int idx, ret = -EINVAL; + int idx, ret = 0; if (!kvm_arch_has_assigned_device(kvm) || !irq_remapping_cap(IRQ_POSTING_CAP)) @@ -794,7 +794,13 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, idx = srcu_read_lock(&kvm->irq_srcu); irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - WARN_ON(guest_irq >= irq_rt->nr_rt_entries); + + if (guest_irq >= irq_rt->nr_rt_entries || + hlist_empty(&irq_rt->map[guest_irq])) { + pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n", + guest_irq, irq_rt->nr_rt_entries); + goto out; + } hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { struct vcpu_data vcpu_info; @@ -927,17 +933,12 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; - /* ID = 0xff (broadcast), ID > 0xff (reserved) */ int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); lockdep_assert_preemption_disabled(); - /* - * Since the host physical APIC id is 8 bits, - * we can support host APIC ID upto 255. - */ - if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) + if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; /* diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 5aa45f13b16dc..ba40b7fced5ae 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -262,12 +262,10 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* MSR_EVNTSELn */ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); if (pmc) { - if (data == pmc->eventsel) - return 0; - if (!(data & pmu->reserved_bits)) { + data &= ~pmu->reserved_bits; + if (data != pmc->eventsel) reprogram_gp_counter(pmc, data); - return 0; - } + return 0; } return 1; @@ -284,6 +282,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; pmu->reserved_bits = 0xfffffff000280000ull; + pmu->raw_event_mask = AMD64_RAW_EVENT_MASK; pmu->version = 1; /* not applicable to AMD; but clean them to prevent any fall out */ pmu->counter_bitmask[KVM_PMC_FIXED] = 0; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 17b53457d8664..fef9758525826 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2358,7 +2358,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } -static bool sev_es_validate_vmgexit(struct vcpu_svm *svm) +static int sev_es_validate_vmgexit(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu; struct ghcb *ghcb; @@ -2463,7 +2463,7 @@ static bool sev_es_validate_vmgexit(struct vcpu_svm *svm) goto vmgexit_err; } - return true; + return 0; vmgexit_err: vcpu = &svm->vcpu; @@ -2486,7 +2486,8 @@ static bool sev_es_validate_vmgexit(struct vcpu_svm *svm) ghcb_set_sw_exit_info_1(ghcb, 2); ghcb_set_sw_exit_info_2(ghcb, reason); - return false; + /* Resume the guest to "return" the error code. */ + return 1; } void sev_es_unmap_ghcb(struct vcpu_svm *svm) @@ -2545,7 +2546,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) } #define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) -static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) +static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) { struct vmcb_control_area *control = &svm->vmcb->control; struct ghcb *ghcb = svm->sev_es.ghcb; @@ -2598,14 +2599,14 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) } scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT); if (!scratch_va) - goto e_scratch; + return -ENOMEM; if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) { /* Unable to copy scratch area from guest */ pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); kvfree(scratch_va); - goto e_scratch; + return -EFAULT; } /* @@ -2621,13 +2622,13 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) svm->sev_es.ghcb_sa = scratch_va; svm->sev_es.ghcb_sa_len = len; - return true; + return 0; e_scratch: ghcb_set_sw_exit_info_1(ghcb, 2); ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA); - return false; + return 1; } static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, @@ -2765,17 +2766,18 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) exit_code = ghcb_get_sw_exit_code(ghcb); - if (!sev_es_validate_vmgexit(svm)) - return 1; + ret = sev_es_validate_vmgexit(svm); + if (ret) + return ret; sev_es_sync_from_ghcb(svm); ghcb_set_sw_exit_info_1(ghcb, 0); ghcb_set_sw_exit_info_2(ghcb, 0); - ret = 1; switch (exit_code) { case SVM_VMGEXIT_MMIO_READ: - if (!setup_vmgexit_scratch(svm, true, control->exit_info_2)) + ret = setup_vmgexit_scratch(svm, true, control->exit_info_2); + if (ret) break; ret = kvm_sev_es_mmio_read(vcpu, @@ -2784,7 +2786,8 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) svm->sev_es.ghcb_sa); break; case SVM_VMGEXIT_MMIO_WRITE: - if (!setup_vmgexit_scratch(svm, false, control->exit_info_2)) + ret = setup_vmgexit_scratch(svm, false, control->exit_info_2); + if (ret) break; ret = kvm_sev_es_mmio_write(vcpu, @@ -2817,6 +2820,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT); } + ret = 1; break; } case SVM_VMGEXIT_UNSUPPORTED_EVENT: @@ -2836,6 +2840,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) { int count; int bytes; + int r; if (svm->vmcb->control.exit_info_2 > INT_MAX) return -EINVAL; @@ -2844,8 +2849,9 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) if (unlikely(check_mul_overflow(count, size, &bytes))) return -EINVAL; - if (!setup_vmgexit_scratch(svm, in, bytes)) - return 1; + r = setup_vmgexit_scratch(svm, in, bytes); + if (r) + return r; return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa, count, in); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index fa98d6844728f..86bcfed6599ea 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -22,6 +22,8 @@ #include #include +#include "kvm_cache_regs.h" + #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) #define IOPM_SIZE PAGE_SIZE * 3 diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c index 98aa981c04ec5..8cdc62c74a964 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.c +++ b/arch/x86/kvm/svm/svm_onhyperv.c @@ -4,7 +4,6 @@ */ #include -#include "kvm_cache_regs.h" #include diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 466d18fc0c5da..5fa3870b89881 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -389,6 +389,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) struct kvm_pmc *pmc; u32 msr = msr_info->index; u64 data = msr_info->data; + u64 reserved_bits; switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: @@ -443,7 +444,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { if (data == pmc->eventsel) return 0; - if (!(data & pmu->reserved_bits)) { + reserved_bits = pmu->reserved_bits; + if ((pmc->idx == 2) && + (pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED)) + reserved_bits ^= HSW_IN_TX_CHECKPOINTED; + if (!(data & reserved_bits)) { reprogram_gp_counter(pmc, data); return 0; } @@ -485,6 +490,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->counter_bitmask[KVM_PMC_FIXED] = 0; pmu->version = 0; pmu->reserved_bits = 0xffffffff00200000ull; + pmu->raw_event_mask = X86_RAW_EVENT_MASK; entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); if (!entry || !enable_pmu) @@ -533,8 +539,10 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) entry = kvm_find_cpuid_entry(vcpu, 7, 0); if (entry && (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && - (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) - pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED; + (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) { + pmu->reserved_bits ^= HSW_IN_TX; + pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); + } bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eb4029660bd9f..05128162ebd58 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1656,8 +1656,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return r; } - /* Update reserved bits */ - if ((efer ^ old_efer) & EFER_NX) + if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS) kvm_mmu_reset_context(vcpu); return 0; @@ -7676,6 +7675,11 @@ static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt) return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR); } +static bool emulator_guest_has_rdpid(struct x86_emulate_ctxt *ctxt) +{ + return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID); +} + static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) { return kvm_register_read_raw(emul_to_vcpu(ctxt), reg); @@ -7758,6 +7762,7 @@ static const struct x86_emulate_ops emulate_ops = { .guest_has_long_mode = emulator_guest_has_long_mode, .guest_has_movbe = emulator_guest_has_movbe, .guest_has_fxsr = emulator_guest_has_fxsr, + .guest_has_rdpid = emulator_guest_has_rdpid, .set_nmi_mask = emulator_set_nmi_mask, .get_hflags = emulator_get_hflags, .exiting_smm = emulator_exiting_smm, @@ -8841,7 +8846,7 @@ int kvm_arch_init(void *opaque) } kvm_nr_uret_msrs = 0; - r = kvm_mmu_module_init(); + r = kvm_mmu_vendor_module_init(); if (r) goto out_free_percpu; @@ -8889,7 +8894,7 @@ void kvm_arch_exit(void) cancel_work_sync(&pvclock_gtod_work); #endif kvm_x86_ops.hardware_enable = NULL; - kvm_mmu_module_exit(); + kvm_mmu_vendor_module_exit(); free_percpu(user_return_msrs); kmem_cache_destroy(x86_emulator_cache); #ifdef CONFIG_KVM_XEN @@ -12882,3 +12887,19 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit); + +static int __init kvm_x86_init(void) +{ + kvm_mmu_x86_module_init(); + return 0; +} +module_init(kvm_x86_init); + +static void __exit kvm_x86_exit(void) +{ + /* + * If module_init() is implemented, module_exit() must also be + * implemented to allow module unload. + */ +} +module_exit(kvm_x86_exit); diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c index df50451d94ef7..3e2f33fc33de2 100644 --- a/arch/x86/lib/iomem.c +++ b/arch/x86/lib/iomem.c @@ -22,7 +22,7 @@ static __always_inline void rep_movs(void *to, const void *from, size_t n) : "memory"); } -void memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) +static void string_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) { if (unlikely(!n)) return; @@ -38,9 +38,8 @@ void memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) } rep_movs(to, (const void *)from, n); } -EXPORT_SYMBOL(memcpy_fromio); -void memcpy_toio(volatile void __iomem *to, const void *from, size_t n) +static void string_memcpy_toio(volatile void __iomem *to, const void *from, size_t n) { if (unlikely(!n)) return; @@ -56,14 +55,64 @@ void memcpy_toio(volatile void __iomem *to, const void *from, size_t n) } rep_movs((void *)to, (const void *) from, n); } + +static void unrolled_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) +{ + const volatile char __iomem *in = from; + char *out = to; + int i; + + for (i = 0; i < n; ++i) + out[i] = readb(&in[i]); +} + +static void unrolled_memcpy_toio(volatile void __iomem *to, const void *from, size_t n) +{ + volatile char __iomem *out = to; + const char *in = from; + int i; + + for (i = 0; i < n; ++i) + writeb(in[i], &out[i]); +} + +static void unrolled_memset_io(volatile void __iomem *a, int b, size_t c) +{ + volatile char __iomem *mem = a; + int i; + + for (i = 0; i < c; ++i) + writeb(b, &mem[i]); +} + +void memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) +{ + if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) + unrolled_memcpy_fromio(to, from, n); + else + string_memcpy_fromio(to, from, n); +} +EXPORT_SYMBOL(memcpy_fromio); + +void memcpy_toio(volatile void __iomem *to, const void *from, size_t n) +{ + if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) + unrolled_memcpy_toio(to, from, n); + else + string_memcpy_toio(to, from, n); +} EXPORT_SYMBOL(memcpy_toio); void memset_io(volatile void __iomem *a, int b, size_t c) { - /* - * TODO: memset can mangle the IO patterns quite a bit. - * perhaps it would be better to use a dumb one: - */ - memset((void *)a, b, c); + if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { + unrolled_memset_io(a, b, c); + } else { + /* + * TODO: memset can mangle the IO patterns quite a bit. + * perhaps it would be better to use a dumb one: + */ + memset((void *)a, b, c); + } } EXPORT_SYMBOL(memset_io); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3481b35cb4ec7..a224193d84bff 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, return ret; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { @@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, return ret; } +#endif + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE int pudp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pud_t *pudp) { diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a6cf56a149393..b3cb49de0a643 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -854,13 +854,11 @@ static void flush_tlb_func(void *info) nr_invalidate); } -static bool tlb_is_not_lazy(int cpu) +static bool tlb_is_not_lazy(int cpu, void *data) { return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu); } -static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); - DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); EXPORT_PER_CPU_SYMBOL(cpu_tlbstate_shared); @@ -889,36 +887,11 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, * up on the new contents of what used to be page tables, while * doing a speculative memory access. */ - if (info->freed_tables) { + if (info->freed_tables) on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); - } else { - /* - * Although we could have used on_each_cpu_cond_mask(), - * open-coding it has performance advantages, as it eliminates - * the need for indirect calls or retpolines. In addition, it - * allows to use a designated cpumask for evaluating the - * condition, instead of allocating one. - * - * This code works under the assumption that there are no nested - * TLB flushes, an assumption that is already made in - * flush_tlb_mm_range(). - * - * cond_cpumask is logically a stack-local variable, but it is - * more efficient to have it off the stack and not to allocate - * it on demand. Preemption is disabled and this code is - * non-reentrant. - */ - struct cpumask *cond_cpumask = this_cpu_ptr(&flush_tlb_mask); - int cpu; - - cpumask_clear(cond_cpumask); - - for_each_cpu(cpu, cpumask) { - if (tlb_is_not_lazy(cpu)) - __cpumask_set_cpu(cpu, cond_cpumask); - } - on_each_cpu_mask(cond_cpumask, flush_tlb_func, (void *)info, true); - } + else + on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, + (void *)info, 1, cpumask); } void flush_tlb_multi(const struct cpumask *cpumask, diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 0ecb140864b21..b272e963388cb 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -398,6 +398,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { + OPTIMIZER_HIDE_VAR(reg); emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); } else #endif diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 9f2b251e83c56..3822666fb73d5 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -40,7 +40,8 @@ static void msr_save_context(struct saved_context *ctxt) struct saved_msr *end = msr + ctxt->saved_msrs.num; while (msr < end) { - msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q); + if (msr->valid) + rdmsrl(msr->info.msr_no, msr->info.reg.q); msr++; } } @@ -424,8 +425,10 @@ static int msr_build_context(const u32 *msr_id, const int num) } for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) { + u64 dummy; + msr_array[i].info.msr_no = msr_id[j]; - msr_array[i].valid = false; + msr_array[i].valid = !rdmsrl_safe(msr_id[j], &dummy); msr_array[i].info.reg.q = 0; } saved_msrs->num = total_num; @@ -500,10 +503,24 @@ static int pm_cpu_check(const struct x86_cpu_id *c) return ret; } +static void pm_save_spec_msr(void) +{ + u32 spec_msr_id[] = { + MSR_IA32_SPEC_CTRL, + MSR_IA32_TSX_CTRL, + MSR_TSX_FORCE_ABORT, + MSR_IA32_MCU_OPT_CTRL, + MSR_AMD64_LS_CFG, + }; + + msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); +} + static int pm_check_save_msr(void) { dmi_check_system(msr_save_dmi_table); pm_cpu_check(msr_save_cpu_table); + pm_save_spec_msr(); return 0; } diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 89dd6b1708b04..21ecbe754cb2f 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -506,10 +506,7 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) return ret; } -bool is_xen_pmu(int cpu) -{ - return (get_xenpmu_data() != NULL); -} +bool is_xen_pmu; void xen_pmu_init(int cpu) { @@ -520,7 +517,7 @@ void xen_pmu_init(int cpu) BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE); - if (xen_hvm_domain()) + if (xen_hvm_domain() || (cpu != 0 && !is_xen_pmu)) return; xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL); @@ -541,7 +538,8 @@ void xen_pmu_init(int cpu) per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data; per_cpu(xenpmu_shared, cpu).flags = 0; - if (cpu == 0) { + if (!is_xen_pmu) { + is_xen_pmu = true; perf_register_guest_info_callbacks(&xen_guest_cbs); xen_pmu_arch_init(); } diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h index 0e83a160589bc..65c58894fc79f 100644 --- a/arch/x86/xen/pmu.h +++ b/arch/x86/xen/pmu.h @@ -4,6 +4,8 @@ #include +extern bool is_xen_pmu; + irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); #ifdef CONFIG_XEN_HAVE_VPMU void xen_pmu_init(int cpu); @@ -12,7 +14,6 @@ void xen_pmu_finish(int cpu); static inline void xen_pmu_init(int cpu) {} static inline void xen_pmu_finish(int cpu) {} #endif -bool is_xen_pmu(int cpu); bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err); bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err); int pmu_apic_update(uint32_t reg); diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index 6ff3c887e0b99..b70afdff419ca 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -19,6 +19,12 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) */ xen_vcpu_setup(0); + /* + * Called again in case the kernel boots on vcpu >= MAX_VIRT_CPUS. + * Refer to comments in xen_hvm_init_time_ops(). + */ + xen_hvm_init_time_ops(); + /* * The alternative logic (which patches the unlock/lock) runs before * the smp bootup up code is activated. Hence we need to set this up diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 4a6019238ee7d..688aa8b6ae29a 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -129,7 +129,7 @@ int xen_smp_intr_init_pv(unsigned int cpu) per_cpu(xen_irq_work, cpu).irq = rc; per_cpu(xen_irq_work, cpu).name = callfunc_name; - if (is_xen_pmu(cpu)) { + if (is_xen_pmu) { pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu); rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu, xen_pmu_irq_handler, diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index d9c945ee11008..9ef0a5cca96ee 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -558,6 +558,11 @@ static void xen_hvm_setup_cpu_clockevents(void) void __init xen_hvm_init_time_ops(void) { + static bool hvm_time_initialized; + + if (hvm_time_initialized) + return; + /* * vector callback is needed otherwise we cannot receive interrupts * on cpu > 0 and at this point we don't know how many cpus are @@ -567,7 +572,22 @@ void __init xen_hvm_init_time_ops(void) return; if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { - pr_info("Xen doesn't support pvclock on HVM, disable pv timer"); + pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer"); + return; + } + + /* + * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'. + * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest + * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access + * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic. + * + * The xen_hvm_init_time_ops() should be called again later after + * __this_cpu_read(xen_vcpu) is available. + */ + if (!__this_cpu_read(xen_vcpu)) { + pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n", + xen_vcpu_nr(0)); return; } @@ -577,6 +597,8 @@ void __init xen_hvm_init_time_ops(void) x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; x86_platform.set_wallclock = xen_set_wallclock; + + hvm_time_initialized = true; } #endif diff --git a/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi index 9bf8bad1dd18a..c33932568aa73 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-128m.dtsi @@ -8,19 +8,19 @@ reg = <0x00000000 0x08000000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "data"; reg = <0x00000000 0x06000000>; }; - partition@0x6000000 { + partition@6000000 { label = "boot loader area"; reg = <0x06000000 0x00800000>; }; - partition@0x6800000 { + partition@6800000 { label = "kernel image"; reg = <0x06800000 0x017e0000>; }; - partition@0x7fe0000 { + partition@7fe0000 { label = "boot environment"; reg = <0x07fe0000 0x00020000>; }; diff --git a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi index 40c2f81f7cb66..7bde2ab2d6fb5 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-16m.dtsi @@ -8,19 +8,19 @@ reg = <0x08000000 0x01000000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "boot loader area"; reg = <0x00000000 0x00400000>; }; - partition@0x400000 { + partition@400000 { label = "kernel image"; reg = <0x00400000 0x00600000>; }; - partition@0xa00000 { + partition@a00000 { label = "data"; reg = <0x00a00000 0x005e0000>; }; - partition@0xfe0000 { + partition@fe0000 { label = "boot environment"; reg = <0x00fe0000 0x00020000>; }; diff --git a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi index fb8d3a9f33c23..0655b868749a4 100644 --- a/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi +++ b/arch/xtensa/boot/dts/xtfpga-flash-4m.dtsi @@ -8,11 +8,11 @@ reg = <0x08000000 0x00400000>; bank-width = <2>; device-width = <2>; - partition@0x0 { + partition@0 { label = "boot loader area"; reg = <0x00000000 0x003f0000>; }; - partition@0x3f0000 { + partition@3f0000 { label = "boot environment"; reg = <0x003f0000 0x00010000>; }; diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index bd5aeb7955675..a63eca1266577 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -411,6 +411,10 @@ extern void update_mmu_cache(struct vm_area_struct * vma, typedef pte_t *pte_addr_t; +void update_mmu_tlb(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep); +#define __HAVE_ARCH_UPDATE_MMU_TLB + #endif /* !defined (__ASSEMBLY__) */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 37d3e9887fe7b..d68987d703e75 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -246,8 +246,8 @@ extern unsigned long __get_wchan(struct task_struct *p); #define xtensa_set_sr(x, sr) \ ({ \ - unsigned int v = (unsigned int)(x); \ - __asm__ __volatile__ ("wsr %0, "__stringify(sr) :: "a"(v)); \ + __asm__ __volatile__ ("wsr %0, "__stringify(sr) :: \ + "a"((unsigned int)(x))); \ }) #define xtensa_get_sr(sr) \ diff --git a/arch/xtensa/kernel/jump_label.c b/arch/xtensa/kernel/jump_label.c index 61cf6497a646b..0dde21e0d3de4 100644 --- a/arch/xtensa/kernel/jump_label.c +++ b/arch/xtensa/kernel/jump_label.c @@ -61,7 +61,7 @@ static void patch_text(unsigned long addr, const void *data, size_t sz) .data = data, }; stop_machine_cpuslocked(patch_text_stop_machine, - &patch, NULL); + &patch, cpu_online_mask); } else { unsigned long flags; diff --git a/arch/xtensa/kernel/mxhead.S b/arch/xtensa/kernel/mxhead.S index 9f38437427264..b702c0908b1f6 100644 --- a/arch/xtensa/kernel/mxhead.S +++ b/arch/xtensa/kernel/mxhead.S @@ -37,11 +37,13 @@ _SetupOCD: * xt-gdb to single step via DEBUG exceptions received directly * by ocd. */ +#if XCHAL_HAVE_WINDOWED movi a1, 1 movi a0, 0 wsr a1, windowstart wsr a0, windowbase rsync +#endif movi a1, LOCKLEVEL wsr a1, ps diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 52c94ab5c2058..1518e261d882e 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -421,3 +421,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c index f436cf2efd8b7..27a477dae2322 100644 --- a/arch/xtensa/mm/tlb.c +++ b/arch/xtensa/mm/tlb.c @@ -162,6 +162,12 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) } } +void update_mmu_tlb(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + local_flush_tlb_page(vma, address); +} + #ifdef CONFIG_DEBUG_TLB_SANITY static unsigned get_pte_for_vaddr(unsigned vaddr) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 24a5c5329bcd0..809bc612d96b3 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -646,6 +646,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_entity *entity = &bfqq->entity; + /* + * oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group + * until elevator exit. + */ + if (bfqq == &bfqd->oom_bfqq) + return; /* * Get extra reference to prevent bfqq from being freed in * next possible expire or deactivate. diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 36a66e97e3c28..1dff82d34b44b 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2782,6 +2782,15 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) * are likely to increase the throughput. */ bfqq->new_bfqq = new_bfqq; + /* + * The above assignment schedules the following redirections: + * each time some I/O for bfqq arrives, the process that + * generated that I/O is disassociated from bfqq and + * associated with new_bfqq. Here we increases new_bfqq->ref + * in advance, adding the number of processes that are + * expected to be associated with new_bfqq as they happen to + * issue I/O. + */ new_bfqq->ref += process_refs; return new_bfqq; } @@ -2844,6 +2853,10 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_queue *in_service_bfqq, *new_bfqq; + /* if a merge has already been setup, then proceed with that first */ + if (bfqq->new_bfqq) + return bfqq->new_bfqq; + /* * Check delayed stable merge for rotational or non-queueing * devs. For this branch to be executed, bfqq must not be @@ -2945,9 +2958,6 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfq_too_late_for_merging(bfqq)) return NULL; - if (bfqq->new_bfqq) - return bfqq->new_bfqq; - if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) return NULL; @@ -5181,7 +5191,7 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; struct request *rq; struct bfq_queue *in_serv_queue; - bool waiting_rq, idle_timer_disabled; + bool waiting_rq, idle_timer_disabled = false; spin_lock_irq(&bfqd->lock); @@ -5189,14 +5199,15 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue); rq = __bfq_dispatch_request(hctx); - - idle_timer_disabled = - waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); + if (in_serv_queue == bfqd->in_service_queue) { + idle_timer_disabled = + waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); + } spin_unlock_irq(&bfqd->lock); - - bfq_update_dispatch_stats(hctx->queue, rq, in_serv_queue, - idle_timer_disabled); + bfq_update_dispatch_stats(hctx->queue, rq, + idle_timer_disabled ? in_serv_queue : NULL, + idle_timer_disabled); return rq; } diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index b74cc0da118ec..709b901de3ca9 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -519,7 +519,7 @@ unsigned short bfq_ioprio_to_weight(int ioprio) static unsigned short bfq_weight_to_ioprio(int weight) { return max_t(int, 0, - IOPRIO_NR_LEVELS * BFQ_WEIGHT_CONVERSION_COEFF - weight); + IOPRIO_NR_LEVELS - weight / BFQ_WEIGHT_CONVERSION_COEFF); } static void bfq_get_entity(struct bfq_entity *entity) diff --git a/block/bio.c b/block/bio.c index 4312a8085396b..342b1cf5d713c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1486,8 +1486,7 @@ void bio_endio(struct bio *bio) if (!bio_integrity_endio(bio)) return; - if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED)) - rq_qos_done_bio(bdev_get_queue(bio->bi_bdev), bio); + rq_qos_done_bio(bio); if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio); @@ -1571,7 +1570,7 @@ EXPORT_SYMBOL(bio_split); void bio_trim(struct bio *bio, sector_t offset, sector_t size) { if (WARN_ON_ONCE(offset > BIO_MAX_SECTORS || size > BIO_MAX_SECTORS || - offset + size > bio->bi_iter.bi_size)) + offset + size > bio_sectors(bio))) return; size <<= 9; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 650f7e27989f1..87a1c0c3fa401 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -857,11 +857,11 @@ static void blkcg_fill_root_iostats(void) blk_queue_root_blkg(bdev_get_queue(bdev)); struct blkg_iostat tmp; int cpu; + unsigned long flags; memset(&tmp, 0, sizeof(tmp)); for_each_possible_cpu(cpu) { struct disk_stats *cpu_dkstats; - unsigned long flags; cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu); tmp.ios[BLKG_IOSTAT_READ] += @@ -877,11 +877,11 @@ static void blkcg_fill_root_iostats(void) cpu_dkstats->sectors[STAT_WRITE] << 9; tmp.bytes[BLKG_IOSTAT_DISCARD] += cpu_dkstats->sectors[STAT_DISCARD] << 9; - - flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); - blkg_iostat_set(&blkg->iostat.cur, &tmp); - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } + + flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); + blkg_iostat_set(&blkg->iostat.cur, &tmp); + u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } } diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 11f49f78db32b..df9cfe4ca5328 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -280,7 +280,6 @@ int set_task_ioprio(struct task_struct *task, int ioprio) task_lock(task); if (task->flags & PF_EXITING) { - err = -ESRCH; kmem_cache_free(iocontext_cachep, ioc); goto out; } @@ -292,7 +291,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) task->io_context->ioprio = ioprio; out: task_unlock(task); - return err; + return 0; } EXPORT_SYMBOL_GPL(set_task_ioprio); diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 6593c7123b97e..24d70e0555ddb 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -598,7 +598,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) int inflight = 0; blkg = bio->bi_blkg; - if (!blkg || !bio_flagged(bio, BIO_TRACKED)) + if (!blkg || !bio_flagged(bio, BIO_QOS_THROTTLED)) return; iolat = blkg_to_lat(bio->bi_blkg); diff --git a/block/blk-merge.c b/block/blk-merge.c index 4de34a332c9fd..ea6968313b4a8 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -368,8 +369,6 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio, trace_block_split(split, (*bio)->bi_iter.bi_sector); submit_bio_noacct(*bio); *bio = split; - - blk_throtl_charge_bio_split(*bio); } } @@ -600,6 +599,9 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq, static inline int ll_new_hw_segment(struct request *req, struct bio *bio, unsigned int nr_phys_segs) { + if (!blk_cgroup_mergeable(req, bio)) + goto no_merge; + if (blk_integrity_merge_bio(req->q, req, bio) == false) goto no_merge; @@ -696,6 +698,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (total_phys_segments > blk_rq_get_max_segments(req)) return 0; + if (!blk_cgroup_mergeable(req, next->bio)) + return 0; + if (blk_integrity_merge_rq(q, req, next) == false) return 0; @@ -904,6 +909,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (bio_data_dir(bio) != rq_data_dir(rq)) return false; + /* don't merge across cgroup boundaries */ + if (!blk_cgroup_mergeable(rq, bio)) + return false; + /* only merge integrity protected bio into ditto rq */ if (blk_integrity_merge_bio(rq->q, rq, bio) == false) return false; @@ -1089,12 +1098,20 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, if (!plug || rq_list_empty(plug->mq_list)) return false; - /* check the previously added entry for a quick merge attempt */ - rq = rq_list_peek(&plug->mq_list); - if (rq->q == q) { - if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == - BIO_MERGE_OK) - return true; + rq_list_for_each(&plug->mq_list, rq) { + if (rq->q == q) { + if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == + BIO_MERGE_OK) + return true; + break; + } + + /* + * Only keep iterating plug list for merges if we have multiple + * queues + */ + if (!plug->multiple_queues) + break; } return false; } diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 55488ba978232..80e0eb26b697c 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -180,11 +180,18 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) { + unsigned long end = jiffies + HZ; int ret; do { ret = __blk_mq_do_dispatch_sched(hctx); - } while (ret == 1); + if (ret != 1) + break; + if (need_resched() || time_is_before_jiffies(end)) { + blk_mq_delay_run_hw_queue(hctx, 0); + break; + } + } while (1); return ret; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 9a9185a0a2d13..cb50097366cd4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2561,13 +2561,36 @@ static void __blk_mq_flush_plug_list(struct request_queue *q, q->mq_ops->queue_rqs(&plug->mq_list); } +static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched) +{ + struct blk_mq_hw_ctx *this_hctx = NULL; + struct blk_mq_ctx *this_ctx = NULL; + struct request *requeue_list = NULL; + unsigned int depth = 0; + LIST_HEAD(list); + + do { + struct request *rq = rq_list_pop(&plug->mq_list); + + if (!this_hctx) { + this_hctx = rq->mq_hctx; + this_ctx = rq->mq_ctx; + } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) { + rq_list_add(&requeue_list, rq); + continue; + } + list_add_tail(&rq->queuelist, &list); + depth++; + } while (!rq_list_empty(plug->mq_list)); + + plug->mq_list = requeue_list; + trace_block_unplug(this_hctx->queue, depth, !from_sched); + blk_mq_sched_insert_requests(this_hctx, this_ctx, &list, from_sched); +} + void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) { - struct blk_mq_hw_ctx *this_hctx; - struct blk_mq_ctx *this_ctx; struct request *rq; - unsigned int depth; - LIST_HEAD(list); if (rq_list_empty(plug->mq_list)) return; @@ -2603,35 +2626,9 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) return; } - this_hctx = NULL; - this_ctx = NULL; - depth = 0; do { - rq = rq_list_pop(&plug->mq_list); - - if (!this_hctx) { - this_hctx = rq->mq_hctx; - this_ctx = rq->mq_ctx; - } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) { - trace_block_unplug(this_hctx->queue, depth, - !from_schedule); - blk_mq_sched_insert_requests(this_hctx, this_ctx, - &list, from_schedule); - depth = 0; - this_hctx = rq->mq_hctx; - this_ctx = rq->mq_ctx; - - } - - list_add(&rq->queuelist, &list); - depth++; + blk_mq_dispatch_plug_list(plug, from_schedule); } while (!rq_list_empty(plug->mq_list)); - - if (!list_empty(&list)) { - trace_block_unplug(this_hctx->queue, depth, !from_schedule); - blk_mq_sched_insert_requests(this_hctx, this_ctx, &list, - from_schedule); - } } void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 3cfbc8668cba9..68267007da1c6 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -177,20 +177,20 @@ static inline void rq_qos_requeue(struct request_queue *q, struct request *rq) __rq_qos_requeue(q->rq_qos, rq); } -static inline void rq_qos_done_bio(struct request_queue *q, struct bio *bio) +static inline void rq_qos_done_bio(struct bio *bio) { - if (q->rq_qos) - __rq_qos_done_bio(q->rq_qos, bio); + if (bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) || + bio_flagged(bio, BIO_QOS_MERGED))) { + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + if (q->rq_qos) + __rq_qos_done_bio(q->rq_qos, bio); + } } static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) { - /* - * BIO_TRACKED lets controllers know that a bio went through the - * normal rq_qos path. - */ if (q->rq_qos) { - bio_set_flag(bio, BIO_TRACKED); + bio_set_flag(bio, BIO_QOS_THROTTLED); __rq_qos_throttle(q->rq_qos, bio); } } @@ -205,8 +205,10 @@ static inline void rq_qos_track(struct request_queue *q, struct request *rq, static inline void rq_qos_merge(struct request_queue *q, struct request *rq, struct bio *bio) { - if (q->rq_qos) + if (q->rq_qos) { + bio_set_flag(bio, BIO_QOS_MERGED); __rq_qos_merge(q->rq_qos, rq, bio); + } } static inline void rq_qos_queue_depth_changed(struct request_queue *q) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9f32882ceb2f6..7923f49f1046f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -954,9 +954,6 @@ void blk_unregister_queue(struct gendisk *disk) */ if (queue_is_mq(q)) blk_mq_unregister_dev(disk_to_dev(disk), q); - - kobject_uevent(&q->kobj, KOBJ_REMOVE); - kobject_del(&q->kobj); blk_trace_remove_sysfs(disk_to_dev(disk)); mutex_lock(&q->sysfs_lock); @@ -964,6 +961,11 @@ void blk_unregister_queue(struct gendisk *disk) elv_unregister_queue(q); disk_unregister_independent_access_ranges(disk); mutex_unlock(&q->sysfs_lock); + + /* Now that we've deleted all child objects, we can delete the queue. */ + kobject_uevent(&q->kobj, KOBJ_REMOVE); + kobject_del(&q->kobj); + mutex_unlock(&q->sysfs_dir_lock); kobject_put(&disk_to_dev(disk)->kobj); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 7c462c006b269..87769b337fc55 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -808,7 +808,8 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; unsigned int bio_size = throtl_bio_data_size(bio); - if (bps_limit == U64_MAX) { + /* no need to throttle if this bio's bytes have been accounted */ + if (bps_limit == U64_MAX || bio_flagged(bio, BIO_THROTTLED)) { if (wait) *wait = 0; return true; @@ -920,9 +921,12 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) unsigned int bio_size = throtl_bio_data_size(bio); /* Charge the bio to the group */ - tg->bytes_disp[rw] += bio_size; + if (!bio_flagged(bio, BIO_THROTTLED)) { + tg->bytes_disp[rw] += bio_size; + tg->last_bytes_disp[rw] += bio_size; + } + tg->io_disp[rw]++; - tg->last_bytes_disp[rw] += bio_size; tg->last_io_disp[rw]++; /* diff --git a/block/blk-throttle.h b/block/blk-throttle.h index 175f03abd9e41..cb43f4417d6ea 100644 --- a/block/blk-throttle.h +++ b/block/blk-throttle.h @@ -170,8 +170,6 @@ static inline bool blk_throtl_bio(struct bio *bio) { struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg); - if (bio_flagged(bio, BIO_THROTTLED)) - return false; if (!tg->has_rules[bio_data_dir(bio)]) return false; diff --git a/block/genhd.c b/block/genhd.c index 9eca1f7d35c97..9d9d702d07787 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -330,7 +330,7 @@ int blk_alloc_ext_minor(void) { int idx; - idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL); + idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT - 1, GFP_KERNEL); if (idx == -ENOSPC) return -EBUSY; return idx; @@ -927,12 +927,17 @@ ssize_t part_stat_show(struct device *dev, struct disk_stats stat; unsigned int inflight; - part_stat_read_all(bdev, &stat); if (queue_is_mq(q)) inflight = blk_mq_in_flight(q, bdev); else inflight = part_in_flight(bdev); + if (inflight) { + part_stat_lock(); + update_io_ticks(bdev, jiffies, true); + part_stat_unlock(); + } + part_stat_read_all(bdev, &stat); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " @@ -1188,12 +1193,17 @@ static int diskstats_show(struct seq_file *seqf, void *v) xa_for_each(&gp->part_tbl, idx, hd) { if (bdev_is_partition(hd) && !bdev_nr_sectors(hd)) continue; - part_stat_read_all(hd, &stat); if (queue_is_mq(gp->queue)) inflight = blk_mq_in_flight(gp->queue, hd); else inflight = part_in_flight(hd); + if (inflight) { + part_stat_lock(); + update_io_ticks(hd, jiffies, true); + part_stat_unlock(); + } + part_stat_read_all(hd, &stat); seq_printf(seqf, "%4d %7d %pg " "%lu %lu %lu %u " "%lu %lu %lu %u " diff --git a/crypto/Kconfig b/crypto/Kconfig index 442765219c375..2cca54c59fecd 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1847,6 +1847,7 @@ config CRYPTO_JITTERENTROPY config CRYPTO_KDF800108_CTR tristate + select CRYPTO_HMAC select CRYPTO_SHA256 config CRYPTO_USER_API diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c index 0b4d07aa88111..f94a1d1ad3a6c 100644 --- a/crypto/asymmetric_keys/pkcs7_verify.c +++ b/crypto/asymmetric_keys/pkcs7_verify.c @@ -174,12 +174,6 @@ static int pkcs7_find_key(struct pkcs7_message *pkcs7, pr_devel("Sig %u: Found cert serial match X.509[%u]\n", sinfo->index, certix); - if (strcmp(x509->pub->pkey_algo, sinfo->sig->pkey_algo) != 0) { - pr_warn("Sig %u: X.509 algo and PKCS#7 sig algo don't match\n", - sinfo->index); - continue; - } - sinfo->signer = x509; return 0; } diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index 4fefb219bfdc8..7c9e6be35c30c 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -60,39 +60,83 @@ static void public_key_destroy(void *payload0, void *payload3) } /* - * Determine the crypto algorithm name. + * Given a public_key, and an encoding and hash_algo to be used for signing + * and/or verification with that key, determine the name of the corresponding + * akcipher algorithm. Also check that encoding and hash_algo are allowed. */ -static -int software_key_determine_akcipher(const char *encoding, - const char *hash_algo, - const struct public_key *pkey, - char alg_name[CRYPTO_MAX_ALG_NAME]) +static int +software_key_determine_akcipher(const struct public_key *pkey, + const char *encoding, const char *hash_algo, + char alg_name[CRYPTO_MAX_ALG_NAME]) { int n; - if (strcmp(encoding, "pkcs1") == 0) { - /* The data wangled by the RSA algorithm is typically padded - * and encoded in some manner, such as EMSA-PKCS1-1_5 [RFC3447 - * sec 8.2]. + if (!encoding) + return -EINVAL; + + if (strcmp(pkey->pkey_algo, "rsa") == 0) { + /* + * RSA signatures usually use EMSA-PKCS1-1_5 [RFC3447 sec 8.2]. + */ + if (strcmp(encoding, "pkcs1") == 0) { + if (!hash_algo) + n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, + "pkcs1pad(%s)", + pkey->pkey_algo); + else + n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, + "pkcs1pad(%s,%s)", + pkey->pkey_algo, hash_algo); + return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0; + } + if (strcmp(encoding, "raw") != 0) + return -EINVAL; + /* + * Raw RSA cannot differentiate between different hash + * algorithms. + */ + if (hash_algo) + return -EINVAL; + } else if (strncmp(pkey->pkey_algo, "ecdsa", 5) == 0) { + if (strcmp(encoding, "x962") != 0) + return -EINVAL; + /* + * ECDSA signatures are taken over a raw hash, so they don't + * differentiate between different hash algorithms. That means + * that the verifier should hard-code a specific hash algorithm. + * Unfortunately, in practice ECDSA is used with multiple SHAs, + * so we have to allow all of them and not just one. */ if (!hash_algo) - n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, - "pkcs1pad(%s)", - pkey->pkey_algo); - else - n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, - "pkcs1pad(%s,%s)", - pkey->pkey_algo, hash_algo); - return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0; - } - - if (strcmp(encoding, "raw") == 0 || - strcmp(encoding, "x962") == 0) { - strcpy(alg_name, pkey->pkey_algo); - return 0; + return -EINVAL; + if (strcmp(hash_algo, "sha1") != 0 && + strcmp(hash_algo, "sha224") != 0 && + strcmp(hash_algo, "sha256") != 0 && + strcmp(hash_algo, "sha384") != 0 && + strcmp(hash_algo, "sha512") != 0) + return -EINVAL; + } else if (strcmp(pkey->pkey_algo, "sm2") == 0) { + if (strcmp(encoding, "raw") != 0) + return -EINVAL; + if (!hash_algo) + return -EINVAL; + if (strcmp(hash_algo, "sm3") != 0) + return -EINVAL; + } else if (strcmp(pkey->pkey_algo, "ecrdsa") == 0) { + if (strcmp(encoding, "raw") != 0) + return -EINVAL; + if (!hash_algo) + return -EINVAL; + if (strcmp(hash_algo, "streebog256") != 0 && + strcmp(hash_algo, "streebog512") != 0) + return -EINVAL; + } else { + /* Unknown public key algorithm */ + return -ENOPKG; } - - return -ENOPKG; + if (strscpy(alg_name, pkey->pkey_algo, CRYPTO_MAX_ALG_NAME) < 0) + return -EINVAL; + return 0; } static u8 *pkey_pack_u32(u8 *dst, u32 val) @@ -113,9 +157,8 @@ static int software_key_query(const struct kernel_pkey_params *params, u8 *key, *ptr; int ret, len; - ret = software_key_determine_akcipher(params->encoding, - params->hash_algo, - pkey, alg_name); + ret = software_key_determine_akcipher(pkey, params->encoding, + params->hash_algo, alg_name); if (ret < 0) return ret; @@ -179,9 +222,8 @@ static int software_key_eds_op(struct kernel_pkey_params *params, pr_devel("==>%s()\n", __func__); - ret = software_key_determine_akcipher(params->encoding, - params->hash_algo, - pkey, alg_name); + ret = software_key_determine_akcipher(pkey, params->encoding, + params->hash_algo, alg_name); if (ret < 0) return ret; @@ -325,9 +367,23 @@ int public_key_verify_signature(const struct public_key *pkey, BUG_ON(!sig); BUG_ON(!sig->s); - ret = software_key_determine_akcipher(sig->encoding, - sig->hash_algo, - pkey, alg_name); + /* + * If the signature specifies a public key algorithm, it *must* match + * the key's actual public key algorithm. + * + * Small exception: ECDSA signatures don't specify the curve, but ECDSA + * keys do. So the strings can mismatch slightly in that case: + * "ecdsa-nist-*" for the key, but "ecdsa" for the signature. + */ + if (sig->pkey_algo) { + if (strcmp(pkey->pkey_algo, sig->pkey_algo) != 0 && + (strncmp(pkey->pkey_algo, "ecdsa-", 6) != 0 || + strcmp(sig->pkey_algo, "ecdsa") != 0)) + return -EKEYREJECTED; + } + + ret = software_key_determine_akcipher(pkey, sig->encoding, + sig->hash_algo, alg_name); if (ret < 0) return ret; diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index fe14cae115b51..71cc1738fbfd2 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -128,12 +128,6 @@ int x509_check_for_self_signed(struct x509_certificate *cert) goto out; } - ret = -EKEYREJECTED; - if (strcmp(cert->pub->pkey_algo, cert->sig->pkey_algo) != 0 && - (strncmp(cert->pub->pkey_algo, "ecdsa-", 6) != 0 || - strcmp(cert->sig->pkey_algo, "ecdsa") != 0)) - goto out; - ret = public_key_verify_signature(cert->pub, cert->sig); if (ret < 0) { if (ret == -ENOPKG) { diff --git a/crypto/authenc.c b/crypto/authenc.c index 670bf1a01d00e..17f674a7cdff5 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -253,7 +253,7 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req, dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen); skcipher_request_set_tfm(skreq, ctx->enc); - skcipher_request_set_callback(skreq, aead_request_flags(req), + skcipher_request_set_callback(skreq, flags, req->base.complete, req->base.data); skcipher_request_set_crypt(skreq, src, dst, req->cryptlen - authsize, req->iv); diff --git a/crypto/drbg.c b/crypto/drbg.c index 177983b6ae38b..ea79a31014a40 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -115,7 +115,7 @@ * the SHA256 / AES 256 over other ciphers. Thus, the favored * DRBGs are the latest entries in this array. */ -static const struct drbg_core drbg_cores[] = { +const struct drbg_core drbg_cores[] = { #ifdef CONFIG_CRYPTO_DRBG_CTR { .flags = DRBG_CTR | DRBG_STRENGTH128, @@ -192,6 +192,7 @@ static const struct drbg_core drbg_cores[] = { }, #endif /* CONFIG_CRYPTO_DRBG_HMAC */ }; +EXPORT_SYMBOL(drbg_cores); static int drbg_uninstantiate(struct drbg_state *drbg); @@ -207,7 +208,7 @@ static int drbg_uninstantiate(struct drbg_state *drbg); * Return: normalized strength in *bytes* value or 32 as default * to counter programming errors */ -static inline unsigned short drbg_sec_strength(drbg_flag_t flags) +unsigned short drbg_sec_strength(drbg_flag_t flags) { switch (flags & DRBG_STRENGTH_MASK) { case DRBG_STRENGTH128: @@ -220,6 +221,7 @@ static inline unsigned short drbg_sec_strength(drbg_flag_t flags) return 32; } } +EXPORT_SYMBOL(drbg_sec_strength); /* * FIPS 140-2 continuous self test for the noise source @@ -1252,7 +1254,7 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, } /* Free all substructures in a DRBG state without the DRBG state structure */ -static inline void drbg_dealloc_state(struct drbg_state *drbg) +void drbg_dealloc_state(struct drbg_state *drbg) { if (!drbg) return; @@ -1273,12 +1275,13 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg) drbg->fips_primed = false; } } +EXPORT_SYMBOL(drbg_dealloc_state); /* * Allocate all sub-structures for a DRBG state. * The DRBG state structure must already be allocated. */ -static inline int drbg_alloc_state(struct drbg_state *drbg) +int drbg_alloc_state(struct drbg_state *drbg) { int ret = -ENOMEM; unsigned int sb_size = 0; @@ -1359,6 +1362,7 @@ static inline int drbg_alloc_state(struct drbg_state *drbg) drbg_dealloc_state(drbg); return ret; } +EXPORT_SYMBOL(drbg_alloc_state); /************************************************************************* * DRBG interface functions @@ -1895,8 +1899,7 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, * * return: flags */ -static inline void drbg_convert_tfm_core(const char *cra_driver_name, - int *coreref, bool *pr) +void drbg_convert_tfm_core(const char *cra_driver_name, int *coreref, bool *pr) { int i = 0; size_t start = 0; @@ -1923,6 +1926,7 @@ static inline void drbg_convert_tfm_core(const char *cra_driver_name, } } } +EXPORT_SYMBOL(drbg_convert_tfm_core); static int drbg_kcapi_init(struct crypto_tfm *tfm) { diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c index 2d115bec15aeb..e7dac734d2377 100644 --- a/crypto/jitterentropy-kcapi.c +++ b/crypto/jitterentropy-kcapi.c @@ -42,8 +42,7 @@ #include #include #include - -#include "jitterentropy.h" +#include /*************************************************************************** * Helper function diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c index 93bff32138238..81b80a4d3d3a0 100644 --- a/crypto/jitterentropy.c +++ b/crypto/jitterentropy.c @@ -133,7 +133,7 @@ struct rand_data { #define JENT_ENTROPY_SAFETY_FACTOR 64 #include -#include "jitterentropy.h" +#include /*************************************************************************** * Adaptive Proportion Test diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 8ac3e73e8ea65..9d804831c8b3f 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -476,6 +476,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) pos++; if (digest_info) { + if (digest_info->size > dst_len - pos) + goto done; if (crypto_memneq(out_buf + pos, digest_info->data, digest_info->size)) goto done; @@ -495,7 +497,7 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) sg_nents_for_len(req->src, req->src_len + req->dst_len), req_ctx->out_buf + ctx->key_size, - req->dst_len, ctx->key_size); + req->dst_len, req->src_len); /* Do the actual verification step. */ if (memcmp(req_ctx->out_buf + ctx->key_size, out_buf + pos, req->dst_len) != 0) @@ -538,7 +540,7 @@ static int pkcs1pad_verify(struct akcipher_request *req) if (WARN_ON(req->dst) || WARN_ON(!req->dst_len) || - !ctx->key_size || req->src_len < ctx->key_size) + !ctx->key_size || req->src_len != ctx->key_size) return -EINVAL; req_ctx->out_buf = kmalloc(ctx->key_size + req->dst_len, GFP_KERNEL); @@ -621,6 +623,11 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn); + if (strcmp(rsa_alg->base.cra_name, "rsa") != 0) { + err = -EINVAL; + goto err_free_inst; + } + err = -ENAMETOOLONG; hash_name = crypto_attr_alg_name(tb[2]); if (IS_ERR(hash_name)) { diff --git a/crypto/xts.c b/crypto/xts.c index 6c12f30dbdd6d..63c85b9e64e08 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -466,3 +466,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("XTS block cipher mode"); MODULE_ALIAS_CRYPTO("xts"); MODULE_IMPORT_NS(CRYPTO_INTERNAL); +MODULE_SOFTDEP("pre: ecb"); diff --git a/drivers/acpi/acpica/nswalk.c b/drivers/acpi/acpica/nswalk.c index 915c2433463d7..e7c30ce06e189 100644 --- a/drivers/acpi/acpica/nswalk.c +++ b/drivers/acpi/acpica/nswalk.c @@ -169,6 +169,9 @@ acpi_ns_walk_namespace(acpi_object_type type, if (start_node == ACPI_ROOT_OBJECT) { start_node = acpi_gbl_root_node; + if (!start_node) { + return_ACPI_STATUS(AE_NO_NAMESPACE); + } } /* Null child means "get first node" */ diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c index 19e50fcbf4d6f..598fd19b65fa4 100644 --- a/drivers/acpi/apei/bert.c +++ b/drivers/acpi/apei/bert.c @@ -29,6 +29,7 @@ #undef pr_fmt #define pr_fmt(fmt) "BERT: " fmt +#define ACPI_BERT_PRINT_MAX_LEN 1024 static int bert_disable; @@ -58,8 +59,11 @@ static void __init bert_print_all(struct acpi_bert_region *region, } pr_info_once("Error records from previous boot:\n"); - - cper_estatus_print(KERN_INFO HW_ERR, estatus); + if (region_len < ACPI_BERT_PRINT_MAX_LEN) + cper_estatus_print(KERN_INFO HW_ERR, estatus); + else + pr_info_once("Max print length exceeded, table data is available at:\n" + "/sys/firmware/acpi/tables/data/BERT"); /* * Because the boot error source is "one-time polled" type, @@ -77,7 +81,7 @@ static int __init setup_bert_disable(char *str) { bert_disable = 1; - return 0; + return 1; } __setup("bert_disable", setup_bert_disable); diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 242f3c2d55330..698d67cee0527 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -891,7 +891,7 @@ EXPORT_SYMBOL_GPL(erst_clear); static int __init setup_erst_disable(char *str) { erst_disable = 1; - return 0; + return 1; } __setup("erst_disable", setup_erst_disable); diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 0edc1ed476737..6aef1ee5e1bdb 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -224,7 +224,7 @@ static int __init hest_ghes_dev_register(unsigned int ghes_count) static int __init setup_hest_disable(char *str) { hest_disable = HEST_DISABLED; - return 0; + return 1; } __setup("hest_disable", setup_hest_disable); diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index ea31ae01458b4..dc208f5f5a1f7 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -59,6 +59,10 @@ MODULE_PARM_DESC(cache_time, "cache time in milliseconds"); static const struct acpi_device_id battery_device_ids[] = { {"PNP0C0A", 0}, + + /* Microsoft Surface Go 3 */ + {"MSHW0146", 0}, + {"", 0}, }; @@ -1148,6 +1152,14 @@ static const struct dmi_system_id bat_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad"), }, }, + { + /* Microsoft Surface Go 3 */ + .callback = battery_notification_delay_quirk, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"), + }, + }, {}, }; diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 07f604832fd6b..079b952ab59f2 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -332,21 +332,32 @@ static void acpi_bus_osc_negotiate_platform_control(void) if (ACPI_FAILURE(acpi_run_osc(handle, &context))) return; - kfree(context.ret.pointer); + capbuf_ret = context.ret.pointer; + if (context.ret.length <= OSC_SUPPORT_DWORD) { + kfree(context.ret.pointer); + return; + } - /* Now run _OSC again with query flag clear */ + /* + * Now run _OSC again with query flag clear and with the caps + * supported by both the OS and the platform. + */ capbuf[OSC_QUERY_DWORD] = 0; + capbuf[OSC_SUPPORT_DWORD] = capbuf_ret[OSC_SUPPORT_DWORD]; + kfree(context.ret.pointer); if (ACPI_FAILURE(acpi_run_osc(handle, &context))) return; capbuf_ret = context.ret.pointer; - osc_sb_apei_support_acked = - capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT; - osc_pc_lpi_support_confirmed = - capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT; - osc_sb_native_usb4_support_confirmed = - capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT; + if (context.ret.length > OSC_SUPPORT_DWORD) { + osc_sb_apei_support_acked = + capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT; + osc_pc_lpi_support_confirmed = + capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT; + osc_sb_native_usb4_support_confirmed = + capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT; + } kfree(context.ret.pointer); } diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 866560cbb082c..123e98a765de7 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -676,6 +676,11 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) cpc_obj = &out_obj->package.elements[0]; if (cpc_obj->type == ACPI_TYPE_INTEGER) { num_ent = cpc_obj->integer.value; + if (num_ent <= 1) { + pr_debug("Unexpected _CPC NumEntries value (%d) for CPU:%d\n", + num_ent, pr->id); + goto out_free; + } } else { pr_debug("Unexpected entry type(%d) for NumEntries\n", cpc_obj->type); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index f8e9fa82cb9b1..4556c86c34659 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -570,8 +570,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) { struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); - if (cx->type == ACPI_STATE_C3) - ACPI_FLUSH_CPU_CACHE(); + ACPI_FLUSH_CPU_CACHE(); while (1) { @@ -1080,6 +1079,11 @@ static int flatten_lpi_states(struct acpi_processor *pr, return 0; } +int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu) +{ + return -EOPNOTSUPP; +} + static int acpi_processor_get_lpi_info(struct acpi_processor *pr) { int ret, i; @@ -1088,6 +1092,11 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr) struct acpi_device *d = NULL; struct acpi_lpi_states_array info[2], *tmp, *prev, *curr; + /* make sure our architecture has support */ + ret = acpi_processor_ffh_lpi_probe(pr->id); + if (ret == -EOPNOTSUPP) + return ret; + if (!osc_pc_lpi_support_confirmed) return -EOPNOTSUPP; @@ -1139,11 +1148,6 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr) return 0; } -int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu) -{ - return -ENODEV; -} - int __weak acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) { return -ENODEV; diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index d0986bda29640..3fceb4681ec9f 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -685,7 +685,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, */ if (obj->type == ACPI_TYPE_LOCAL_REFERENCE) { if (index) - return -EINVAL; + return -ENOENT; device = acpi_fetch_acpi_dev(obj->reference.handle); if (!device) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 4f64713e9917b..becc198e4c224 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -415,6 +415,81 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "GA503"), }, }, + /* + * Clevo NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2 have both a + * working native and video interface. However the default detection + * mechanism first registers the video interface before unregistering + * it again and switching to the native interface during boot. This + * results in a dangling SBIOS request for backlight change for some + * reason, causing the backlight to switch to ~2% once per boot on the + * first power cord connect or disconnect event. Setting the native + * interface explicitly circumvents this buggy behaviour, by avoiding + * the unregistering process. + */ + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), + DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), + DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "AURA1501"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xNU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xNU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), + DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xNU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), + DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), + }, + }, /* * Desktops which falsely report a backlight and which our heuristics diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index ffdeed5334d6f..664070fc83498 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -284,6 +284,27 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), }, + { + /* Lenovo Yoga Tablet 1050F/L */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Intel Corp."), + DMI_MATCH(DMI_PRODUCT_NAME, "VALLEYVIEW C0 PLATFORM"), + DMI_MATCH(DMI_BOARD_NAME, "BYT-T FFD8"), + /* Partial match on beginning of BIOS version */ + DMI_MATCH(DMI_BIOS_VERSION, "BLADE_21"), + }, + .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | + ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), + }, + { + /* Nextbook Ares 8 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_MATCH(DMI_PRODUCT_NAME, "M890BAP"), + }, + .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | + ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), + }, { /* Whitelabel (sold as various brands) TM800A550L */ .matches = { diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 0c854aebfe0bd..760c0d81d1482 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4014,6 +4014,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Crucial_CT*MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Samsung SSD 840 EVO*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_NO_DMA_LOG | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung SSD 840*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung SSD 850*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index bec33d781ae04..e3263e961045a 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -137,7 +137,11 @@ struct sata_dwc_device { #endif }; -#define SATA_DWC_QCMD_MAX 32 +/* + * Allow one extra special slot for commands and DMA management + * to account for libata internal commands. + */ +#define SATA_DWC_QCMD_MAX (ATA_MAX_QUEUE + 1) struct sata_dwc_device_port { struct sata_dwc_device *hsdev; diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f47cab21430f9..7e079fa3795b1 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -296,6 +296,7 @@ int driver_deferred_probe_check_state(struct device *dev) return -EPROBE_DEFER; } +EXPORT_SYMBOL_GPL(driver_deferred_probe_check_state); static void deferred_probe_timeout_work_func(struct work_struct *work) { @@ -810,7 +811,7 @@ static int __init save_async_options(char *buf) pr_warn("Too long list of driver names for 'driver_async_probe'!\n"); strlcpy(async_probe_drv_names, buf, ASYNC_DRV_NAMES_MAX_LEN); - return 0; + return 1; } __setup("driver_async_probe=", save_async_options); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 365cd4a7f2397..60c38f9cf1a75 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -663,14 +663,16 @@ static int init_memory_block(unsigned long block_id, unsigned long state, mem->nr_vmemmap_pages = nr_vmemmap_pages; INIT_LIST_HEAD(&mem->group_next); + ret = register_memory(mem); + if (ret) + return ret; + if (group) { mem->group = group; list_add(&mem->group_next, &group->memory_blocks); } - ret = register_memory(mem); - - return ret; + return 0; } static int add_memory_block(unsigned long base_section_nr) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 5db704f02e712..7e8039d1884cc 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2058,9 +2058,9 @@ static int genpd_remove(struct generic_pm_domain *genpd) kfree(link); } - genpd_debug_remove(genpd); list_del(&genpd->gpd_list_node); genpd_unlock(genpd); + genpd_debug_remove(genpd); cancel_work_sync(&genpd->power_off_work); if (genpd_is_cpu_domain(genpd)) free_cpumask_var(genpd->cpus); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 04ea92cbd9cfd..08c8a69d7b810 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -2018,7 +2018,9 @@ static bool pm_ops_is_empty(const struct dev_pm_ops *ops) void device_pm_check_callbacks(struct device *dev) { - spin_lock_irq(&dev->power.lock); + unsigned long flags; + + spin_lock_irqsave(&dev->power.lock, flags); dev->power.no_pm_callbacks = (!dev->bus || (pm_ops_is_empty(dev->bus->pm) && !dev->bus->suspend && !dev->bus->resume)) && @@ -2027,7 +2029,7 @@ void device_pm_check_callbacks(struct device *dev) (!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) && (!dev->driver || (pm_ops_is_empty(dev->driver->pm) && !dev->driver->suspend && !dev->driver->resume)); - spin_unlock_irq(&dev->power.lock); + spin_unlock_irqrestore(&dev->power.lock, flags); } bool dev_pm_skip_suspend(struct device *dev) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f27d5b0f9a0bb..a98bfcf4a5f02 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1642,22 +1642,22 @@ struct sib_info { }; void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib); -extern void notify_resource_state(struct sk_buff *, +extern int notify_resource_state(struct sk_buff *, unsigned int, struct drbd_resource *, struct resource_info *, enum drbd_notification_type); -extern void notify_device_state(struct sk_buff *, +extern int notify_device_state(struct sk_buff *, unsigned int, struct drbd_device *, struct device_info *, enum drbd_notification_type); -extern void notify_connection_state(struct sk_buff *, +extern int notify_connection_state(struct sk_buff *, unsigned int, struct drbd_connection *, struct connection_info *, enum drbd_notification_type); -extern void notify_peer_device_state(struct sk_buff *, +extern int notify_peer_device_state(struct sk_buff *, unsigned int, struct drbd_peer_device *, struct peer_device_info *, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6f450816c4fa6..478ba959362ce 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2739,6 +2739,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig sprintf(disk->disk_name, "drbd%d", minor); disk->private_data = device; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); blk_queue_write_cache(disk->queue, true, true); /* Setting the max_hw_sectors to an odd value of 8kibyte here This triggers a max_bio_size message upon first attach or connect */ @@ -2793,12 +2794,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig if (init_submitter(device)) { err = ERR_NOMEM; - goto out_idr_remove_vol; + goto out_idr_remove_from_resource; } err = add_disk(disk); if (err) - goto out_idr_remove_vol; + goto out_idr_remove_from_resource; /* inherit the connection state */ device->state.conn = first_connection(resource)->cstate; @@ -2812,8 +2813,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_debugfs_device_add(device); return NO_ERROR; -out_idr_remove_vol: - idr_remove(&connection->peer_devices, vnr); out_idr_remove_from_resource: for_each_connection(connection, resource) { peer_device = idr_remove(&connection->peer_devices, vnr); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 44ccf8b4f4b29..69184cf17b6ad 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -4617,7 +4617,7 @@ static int nla_put_notification_header(struct sk_buff *msg, return drbd_notification_header_to_skb(msg, &nh, true); } -void notify_resource_state(struct sk_buff *skb, +int notify_resource_state(struct sk_buff *skb, unsigned int seq, struct drbd_resource *resource, struct resource_info *resource_info, @@ -4659,16 +4659,17 @@ void notify_resource_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_device_state(struct sk_buff *skb, +int notify_device_state(struct sk_buff *skb, unsigned int seq, struct drbd_device *device, struct device_info *device_info, @@ -4708,16 +4709,17 @@ void notify_device_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_connection_state(struct sk_buff *skb, +int notify_connection_state(struct sk_buff *skb, unsigned int seq, struct drbd_connection *connection, struct connection_info *connection_info, @@ -4757,16 +4759,17 @@ void notify_connection_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_peer_device_state(struct sk_buff *skb, +int notify_peer_device_state(struct sk_buff *skb, unsigned int seq, struct drbd_peer_device *peer_device, struct peer_device_info *peer_device_info, @@ -4807,13 +4810,14 @@ void notify_peer_device_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } void notify_helper(enum drbd_notification_type type, @@ -4864,7 +4868,7 @@ void notify_helper(enum drbd_notification_type type, err, seq); } -static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) +static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq) { struct drbd_genlmsghdr *dh; int err; @@ -4878,11 +4882,12 @@ static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) if (nla_put_notification_header(skb, NOTIFY_EXISTS)) goto nla_put_failure; genlmsg_end(skb, dh); - return; + return 0; nla_put_failure: nlmsg_free(skb); pr_err("Error %d sending event. Event seq:%u\n", err, seq); + return err; } static void free_state_changes(struct list_head *list) @@ -4909,6 +4914,7 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) unsigned int seq = cb->args[2]; unsigned int n; enum drbd_notification_type flags = 0; + int err = 0; /* There is no need for taking notification_mutex here: it doesn't matter if the initial state events mix with later state chage @@ -4917,32 +4923,32 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) cb->args[5]--; if (cb->args[5] == 1) { - notify_initial_state_done(skb, seq); + err = notify_initial_state_done(skb, seq); goto out; } n = cb->args[4]++; if (cb->args[4] < cb->args[3]) flags |= NOTIFY_CONTINUES; if (n < 1) { - notify_resource_state_change(skb, seq, state_change->resource, + err = notify_resource_state_change(skb, seq, state_change->resource, NOTIFY_EXISTS | flags); goto next; } n--; if (n < state_change->n_connections) { - notify_connection_state_change(skb, seq, &state_change->connections[n], + err = notify_connection_state_change(skb, seq, &state_change->connections[n], NOTIFY_EXISTS | flags); goto next; } n -= state_change->n_connections; if (n < state_change->n_devices) { - notify_device_state_change(skb, seq, &state_change->devices[n], + err = notify_device_state_change(skb, seq, &state_change->devices[n], NOTIFY_EXISTS | flags); goto next; } n -= state_change->n_devices; if (n < state_change->n_devices * state_change->n_connections) { - notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], + err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], NOTIFY_EXISTS | flags); goto next; } @@ -4957,7 +4963,10 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) cb->args[4] = 0; } out: - return skb->len; + if (err) + return err; + else + return skb->len; } int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3235532ae0778..8b26f631ebc15 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -180,7 +180,8 @@ void start_new_tl_epoch(struct drbd_connection *connection) void complete_master_bio(struct drbd_device *device, struct bio_and_error *m) { - m->bio->bi_status = errno_to_blk_status(m->error); + if (unlikely(m->error)) + m->bio->bi_status = errno_to_blk_status(m->error); bio_endio(m->bio); dec_ap_bio(device); } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index b8a27818ab3f8..4ee11aef6672b 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1537,7 +1537,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, return rv; } -void notify_resource_state_change(struct sk_buff *skb, +int notify_resource_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_resource_state_change *resource_state_change, enum drbd_notification_type type) @@ -1550,10 +1550,10 @@ void notify_resource_state_change(struct sk_buff *skb, .res_susp_fen = resource_state_change->susp_fen[NEW], }; - notify_resource_state(skb, seq, resource, &resource_info, type); + return notify_resource_state(skb, seq, resource, &resource_info, type); } -void notify_connection_state_change(struct sk_buff *skb, +int notify_connection_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_connection_state_change *connection_state_change, enum drbd_notification_type type) @@ -1564,10 +1564,10 @@ void notify_connection_state_change(struct sk_buff *skb, .conn_role = connection_state_change->peer_role[NEW], }; - notify_connection_state(skb, seq, connection, &connection_info, type); + return notify_connection_state(skb, seq, connection, &connection_info, type); } -void notify_device_state_change(struct sk_buff *skb, +int notify_device_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_device_state_change *device_state_change, enum drbd_notification_type type) @@ -1577,10 +1577,10 @@ void notify_device_state_change(struct sk_buff *skb, .dev_disk_state = device_state_change->disk_state[NEW], }; - notify_device_state(skb, seq, device, &device_info, type); + return notify_device_state(skb, seq, device, &device_info, type); } -void notify_peer_device_state_change(struct sk_buff *skb, +int notify_peer_device_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_peer_device_state_change *p, enum drbd_notification_type type) @@ -1594,7 +1594,7 @@ void notify_peer_device_state_change(struct sk_buff *skb, .peer_resync_susp_dependency = p->resync_susp_dependency[NEW], }; - notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type); + return notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type); } static void broadcast_state_change(struct drbd_state_change *state_change) @@ -1602,7 +1602,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change) struct drbd_resource_state_change *resource_state_change = &state_change->resource[0]; bool resource_state_has_changed; unsigned int n_device, n_connection, n_peer_device, n_peer_devices; - void (*last_func)(struct sk_buff *, unsigned int, void *, + int (*last_func)(struct sk_buff *, unsigned int, void *, enum drbd_notification_type) = NULL; void *last_arg = NULL; diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h index ba80f612d6abb..d5b0479bc9a66 100644 --- a/drivers/block/drbd/drbd_state_change.h +++ b/drivers/block/drbd/drbd_state_change.h @@ -44,19 +44,19 @@ extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_ extern void copy_old_to_new_state_change(struct drbd_state_change *); extern void forget_state_change(struct drbd_state_change *); -extern void notify_resource_state_change(struct sk_buff *, +extern int notify_resource_state_change(struct sk_buff *, unsigned int, struct drbd_resource_state_change *, enum drbd_notification_type type); -extern void notify_connection_state_change(struct sk_buff *, +extern int notify_connection_state_change(struct sk_buff *, unsigned int, struct drbd_connection_state_change *, enum drbd_notification_type type); -extern void notify_device_state_change(struct sk_buff *, +extern int notify_device_state_change(struct sk_buff *, unsigned int, struct drbd_device_state_change *, enum drbd_notification_type type); -extern void notify_peer_device_state_change(struct sk_buff *, +extern int notify_peer_device_state_change(struct sk_buff *, unsigned int, struct drbd_peer_device_state_change *, enum drbd_notification_type type); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 19fe19eaa50e9..d46a3d5d0c2ec 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -681,33 +681,33 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset); + return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset); } static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); + return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); } static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf) { int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR); - return sprintf(buf, "%s\n", autoclear ? "1" : "0"); + return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0"); } static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) { int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN); - return sprintf(buf, "%s\n", partscan ? "1" : "0"); + return sysfs_emit(buf, "%s\n", partscan ? "1" : "0"); } static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf) { int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO); - return sprintf(buf, "%s\n", dio ? "1" : "0"); + return sysfs_emit(buf, "%s\n", dio ? "1" : "0"); } LOOP_ATTR_RO(backing_file); @@ -1592,6 +1592,7 @@ struct compat_loop_info { compat_ulong_t lo_inode; /* ioctl r/o */ compat_dev_t lo_rdevice; /* ioctl r/o */ compat_int_t lo_offset; + compat_int_t lo_encrypt_type; /* obsolete, ignored */ compat_int_t lo_encrypt_key_size; /* ioctl w/o */ compat_int_t lo_flags; /* ioctl r/o */ char lo_name[LO_NAME_SIZE]; diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index 4db9a8c244af5..e094d2b8b5a92 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -88,7 +88,7 @@ static void n64cart_submit_bio(struct bio *bio) { struct bio_vec bvec; struct bvec_iter iter; - struct device *dev = bio->bi_disk->private_data; + struct device *dev = bio->bi_bdev->bd_disk->private_data; u32 pos = bio->bi_iter.bi_sector << SECTOR_SHIFT; bio_for_each_segment(bvec, bio, iter) { diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 13004beb48cab..233577b141412 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1606,7 +1606,7 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res) * Only fake timeouts need to execute blk_mq_complete_request() here. */ cmd->error = BLK_STS_TIMEOUT; - if (cmd->fake_timeout) + if (cmd->fake_timeout || hctx->type == HCTX_TYPE_POLL) blk_mq_complete_request(rq); return BLK_EH_DONE; } diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 1a4f8b227eac0..06514ed660229 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2428,10 +2428,15 @@ static int btintel_setup_combined(struct hci_dev *hdev) /* Apply the device specific HCI quirks * - * WBS for SdP - SdP and Stp have a same hw_varaint but - * different fw_variant + * WBS for SdP - For the Legacy ROM products, only SdP + * supports the WBS. But the version information is not + * enough to use here because the StP2 and SdP have same + * hw_variant and fw_variant. So, this flag is set by + * the transport driver (btusb) based on the HW info + * (idProduct) */ - if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22) + if (!btintel_test_flag(hdev, + INTEL_ROM_LEGACY_NO_WBS_SUPPORT)) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h index c9b24e9299e2a..e0060e58573c3 100644 --- a/drivers/bluetooth/btintel.h +++ b/drivers/bluetooth/btintel.h @@ -152,6 +152,7 @@ enum { INTEL_BROKEN_INITIAL_NCMD, INTEL_BROKEN_SHUTDOWN_LED, INTEL_ROM_LEGACY, + INTEL_ROM_LEGACY_NO_WBS_SUPPORT, __INTEL_NUM_FLAGS, }; diff --git a/drivers/bluetooth/btmtk.h b/drivers/bluetooth/btmtk.h index 6e7b0c7567c0f..0defa68bc2cef 100644 --- a/drivers/bluetooth/btmtk.h +++ b/drivers/bluetooth/btmtk.h @@ -5,6 +5,7 @@ #define FIRMWARE_MT7668 "mediatek/mt7668pr2h.bin" #define FIRMWARE_MT7961 "mediatek/BT_RAM_CODE_MT7961_1_2_hdr.bin" +#define HCI_EV_WMT 0xe4 #define HCI_WMT_MAX_EVENT_SIZE 64 #define BTMTK_WMT_REG_READ 0x2 diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index b5ea8d3bffaa7..ecf29cfa7d792 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -38,21 +38,25 @@ static bool enable_autosuspend; struct btmtksdio_data { const char *fwname; u16 chipid; + bool lp_mbox_supported; }; static const struct btmtksdio_data mt7663_data = { .fwname = FIRMWARE_MT7663, .chipid = 0x7663, + .lp_mbox_supported = false, }; static const struct btmtksdio_data mt7668_data = { .fwname = FIRMWARE_MT7668, .chipid = 0x7668, + .lp_mbox_supported = false, }; static const struct btmtksdio_data mt7921_data = { .fwname = FIRMWARE_MT7961, .chipid = 0x7921, + .lp_mbox_supported = true, }; static const struct sdio_device_id btmtksdio_table[] = { @@ -87,8 +91,17 @@ MODULE_DEVICE_TABLE(sdio, btmtksdio_table); #define RX_DONE_INT BIT(1) #define TX_EMPTY BIT(2) #define TX_FIFO_OVERFLOW BIT(8) +#define FW_MAILBOX_INT BIT(15) +#define INT_MASK GENMASK(15, 0) #define RX_PKT_LEN GENMASK(31, 16) +#define MTK_REG_CSICR 0xc0 +#define CSICR_CLR_MBOX_ACK BIT(0) +#define MTK_REG_PH2DSM0R 0xc4 +#define PH2DSM0R_DRIVER_OWN BIT(0) +#define MTK_REG_PD2HRM0R 0xdc +#define PD2HRM0R_DRV_OWN BIT(0) + #define MTK_REG_CTDR 0x18 #define MTK_REG_CRDR 0x1c @@ -100,6 +113,7 @@ MODULE_DEVICE_TABLE(sdio, btmtksdio_table); #define BTMTKSDIO_TX_WAIT_VND_EVT 1 #define BTMTKSDIO_HW_TX_READY 2 #define BTMTKSDIO_FUNC_ENABLED 3 +#define BTMTKSDIO_PATCH_ENABLED 4 struct mtkbtsdio_hdr { __le16 len; @@ -278,19 +292,84 @@ static u32 btmtksdio_drv_own_query(struct btmtksdio_dev *bdev) return sdio_readl(bdev->func, MTK_REG_CHLPCR, NULL); } +static u32 btmtksdio_drv_own_query_79xx(struct btmtksdio_dev *bdev) +{ + return sdio_readl(bdev->func, MTK_REG_PD2HRM0R, NULL); +} + +static int btmtksdio_fw_pmctrl(struct btmtksdio_dev *bdev) +{ + u32 status; + int err; + + sdio_claim_host(bdev->func); + + if (bdev->data->lp_mbox_supported && + test_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state)) { + sdio_writel(bdev->func, CSICR_CLR_MBOX_ACK, MTK_REG_CSICR, + &err); + err = readx_poll_timeout(btmtksdio_drv_own_query_79xx, bdev, + status, !(status & PD2HRM0R_DRV_OWN), + 2000, 1000000); + if (err < 0) { + bt_dev_err(bdev->hdev, "mailbox ACK not cleared"); + goto out; + } + } + + /* Return ownership to the device */ + sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, &err); + if (err < 0) + goto out; + + err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, + !(status & C_COM_DRV_OWN), 2000, 1000000); + +out: + sdio_release_host(bdev->func); + + if (err < 0) + bt_dev_err(bdev->hdev, "Cannot return ownership to device"); + + return err; +} + +static int btmtksdio_drv_pmctrl(struct btmtksdio_dev *bdev) +{ + u32 status; + int err; + + sdio_claim_host(bdev->func); + + /* Get ownership from the device */ + sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err); + if (err < 0) + goto out; + + err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, + status & C_COM_DRV_OWN, 2000, 1000000); + + if (!err && bdev->data->lp_mbox_supported && + test_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state)) + err = readx_poll_timeout(btmtksdio_drv_own_query_79xx, bdev, + status, status & PD2HRM0R_DRV_OWN, + 2000, 1000000); + +out: + sdio_release_host(bdev->func); + + if (err < 0) + bt_dev_err(bdev->hdev, "Cannot get ownership from device"); + + return err; +} + static int btmtksdio_recv_event(struct hci_dev *hdev, struct sk_buff *skb) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); struct hci_event_hdr *hdr = (void *)skb->data; int err; - /* Fix up the vendor event id with 0xff for vendor specific instead - * of 0xe4 so that event send via monitoring socket can be parsed - * properly. - */ - if (hdr->evt == 0xe4) - hdr->evt = HCI_EV_VENDOR; - /* When someone waits for the WMT event, the skb is being cloned * and being processed the events from there then. */ @@ -306,7 +385,7 @@ static int btmtksdio_recv_event(struct hci_dev *hdev, struct sk_buff *skb) if (err < 0) goto err_free_skb; - if (hdr->evt == HCI_EV_VENDOR) { + if (hdr->evt == HCI_EV_WMT) { if (test_and_clear_bit(BTMTKSDIO_TX_WAIT_VND_EVT, &bdev->tx_state)) { /* Barrier to sync with other CPUs */ @@ -480,6 +559,13 @@ static void btmtksdio_txrx_work(struct work_struct *work) * FIFO. */ sdio_writel(bdev->func, int_status, MTK_REG_CHISR, NULL); + int_status &= INT_MASK; + + if ((int_status & FW_MAILBOX_INT) && + bdev->data->chipid == 0x7921) { + sdio_writel(bdev->func, PH2DSM0R_DRIVER_OWN, + MTK_REG_PH2DSM0R, 0); + } if (int_status & FW_OWN_BACK_INT) bt_dev_dbg(bdev->hdev, "Get fw own back"); @@ -531,7 +617,7 @@ static void btmtksdio_interrupt(struct sdio_func *func) static int btmtksdio_open(struct hci_dev *hdev) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); - u32 status, val; + u32 val; int err; sdio_claim_host(bdev->func); @@ -542,18 +628,10 @@ static int btmtksdio_open(struct hci_dev *hdev) set_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state); - /* Get ownership from the device */ - sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err); + err = btmtksdio_drv_pmctrl(bdev); if (err < 0) goto err_disable_func; - err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, - status & C_COM_DRV_OWN, 2000, 1000000); - if (err < 0) { - bt_dev_err(bdev->hdev, "Cannot get ownership from device"); - goto err_disable_func; - } - /* Disable interrupt & mask out all interrupt sources */ sdio_writel(bdev->func, C_INT_EN_CLR, MTK_REG_CHLPCR, &err); if (err < 0) @@ -623,8 +701,6 @@ static int btmtksdio_open(struct hci_dev *hdev) static int btmtksdio_close(struct hci_dev *hdev) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); - u32 status; - int err; sdio_claim_host(bdev->func); @@ -635,13 +711,7 @@ static int btmtksdio_close(struct hci_dev *hdev) cancel_work_sync(&bdev->txrx_work); - /* Return ownership to the device */ - sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, NULL); - - err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, - !(status & C_COM_DRV_OWN), 2000, 1000000); - if (err < 0) - bt_dev_err(bdev->hdev, "Cannot return ownership to device"); + btmtksdio_fw_pmctrl(bdev); clear_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state); sdio_disable_func(bdev->func); @@ -686,6 +756,7 @@ static int btmtksdio_func_query(struct hci_dev *hdev) static int mt76xx_setup(struct hci_dev *hdev, const char *fwname) { + struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); struct btmtk_hci_wmt_params wmt_params; struct btmtk_tci_sleep tci_sleep; struct sk_buff *skb; @@ -746,6 +817,8 @@ static int mt76xx_setup(struct hci_dev *hdev, const char *fwname) return err; } + set_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state); + ignore_func_on: /* Apply the low power environment setup */ tci_sleep.mode = 0x5; @@ -768,6 +841,7 @@ static int mt76xx_setup(struct hci_dev *hdev, const char *fwname) static int mt79xx_setup(struct hci_dev *hdev, const char *fwname) { + struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); struct btmtk_hci_wmt_params wmt_params; u8 param = 0x1; int err; @@ -793,6 +867,7 @@ static int mt79xx_setup(struct hci_dev *hdev, const char *fwname) hci_set_msft_opcode(hdev, 0xFD30); hci_set_aosp_capable(hdev); + set_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state); return err; } @@ -862,6 +937,15 @@ static int btmtksdio_setup(struct hci_dev *hdev) err = mt79xx_setup(hdev, fwname); if (err < 0) return err; + + err = btmtksdio_fw_pmctrl(bdev); + if (err < 0) + return err; + + err = btmtksdio_drv_pmctrl(bdev); + if (err < 0) + return err; + break; case 0x7663: case 0x7668: @@ -1004,6 +1088,8 @@ static int btmtksdio_probe(struct sdio_func *func, hdev->manufacturer = 70; set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + sdio_set_drvdata(func, bdev); + err = hci_register_dev(hdev); if (err < 0) { dev_err(&func->dev, "Can't register HCI device\n"); @@ -1011,8 +1097,6 @@ static int btmtksdio_probe(struct sdio_func *func, return err; } - sdio_set_drvdata(func, bdev); - /* pm_runtime_enable would be done after the firmware is being * downloaded because the core layer probably already enables * runtime PM for this func such as the case host->caps & @@ -1058,7 +1142,6 @@ static int btmtksdio_runtime_suspend(struct device *dev) { struct sdio_func *func = dev_to_sdio_func(dev); struct btmtksdio_dev *bdev; - u32 status; int err; bdev = sdio_get_drvdata(func); @@ -1070,19 +1153,10 @@ static int btmtksdio_runtime_suspend(struct device *dev) sdio_set_host_pm_flags(func, MMC_PM_KEEP_POWER); - sdio_claim_host(bdev->func); + err = btmtksdio_fw_pmctrl(bdev); - sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, &err); - if (err < 0) - goto out; - - err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, - !(status & C_COM_DRV_OWN), 2000, 1000000); -out: bt_dev_info(bdev->hdev, "status (%d) return ownership to device", err); - sdio_release_host(bdev->func); - return err; } @@ -1090,7 +1164,6 @@ static int btmtksdio_runtime_resume(struct device *dev) { struct sdio_func *func = dev_to_sdio_func(dev); struct btmtksdio_dev *bdev; - u32 status; int err; bdev = sdio_get_drvdata(func); @@ -1100,19 +1173,10 @@ static int btmtksdio_runtime_resume(struct device *dev) if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) return 0; - sdio_claim_host(bdev->func); - - sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err); - if (err < 0) - goto out; + err = btmtksdio_drv_pmctrl(bdev); - err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, - status & C_COM_DRV_OWN, 2000, 1000000); -out: bt_dev_info(bdev->hdev, "status (%d) get ownership from device", err); - sdio_release_host(bdev->func); - return err; } diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c30d131da7847..42234d5f602dd 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -62,6 +62,7 @@ static struct usb_driver btusb_driver; #define BTUSB_QCA_WCN6855 0x1000000 #define BTUSB_INTEL_BROKEN_SHUTDOWN_LED 0x2000000 #define BTUSB_INTEL_BROKEN_INITIAL_NCMD 0x4000000 +#define BTUSB_INTEL_NO_WBS_SUPPORT 0x8000000 static const struct usb_device_id btusb_table[] = { /* Generic Bluetooth USB device */ @@ -385,9 +386,11 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x8087, 0x0033), .driver_info = BTUSB_INTEL_COMBINED }, { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR }, { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL_COMBINED | + BTUSB_INTEL_NO_WBS_SUPPORT | BTUSB_INTEL_BROKEN_INITIAL_NCMD | BTUSB_INTEL_BROKEN_SHUTDOWN_LED }, { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL_COMBINED | + BTUSB_INTEL_NO_WBS_SUPPORT | BTUSB_INTEL_BROKEN_SHUTDOWN_LED }, { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_COMBINED }, { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL_COMBINED | @@ -405,6 +408,8 @@ static const struct usb_device_id blacklist_table[] = { BTUSB_WIDEBAND_SPEECH }, /* Realtek 8852AE Bluetooth devices */ + { USB_DEVICE(0x0bda, 0x2852), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0xc852), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0x385a), .driver_info = BTUSB_REALTEK | @@ -482,6 +487,8 @@ static const struct usb_device_id blacklist_table[] = { /* Additional Realtek 8761BU Bluetooth devices */ { USB_DEVICE(0x0b05, 0x190e), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x2550, 0x8761), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, /* Additional Realtek 8821AE Bluetooth devices */ { USB_DEVICE(0x0b05, 0x17dc), .driver_info = BTUSB_REALTEK }, @@ -2041,6 +2048,8 @@ static int btusb_setup_csr(struct hci_dev *hdev) */ set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks); + set_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks); + set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks); /* Clear the reset quirk since this is not an actual * early Bluetooth 1.1 device from CSR. @@ -2051,7 +2060,7 @@ static int btusb_setup_csr(struct hci_dev *hdev) /* * Special workaround for these BT 4.0 chip clones, and potentially more: * - * - 0x0134: a Barrot 8041a02 (HCI rev: 0x1012 sub: 0x0810) + * - 0x0134: a Barrot 8041a02 (HCI rev: 0x0810 sub: 0x1012) * - 0x7558: IC markings FR3191AHAL 749H15143 (HCI rev/sub-version: 0x0709) * * These controllers are really messed-up. @@ -2080,7 +2089,7 @@ static int btusb_setup_csr(struct hci_dev *hdev) if (ret >= 0) msleep(200); else - bt_dev_err(hdev, "CSR: Failed to suspend the device for our Barrot 8041a02 receive-issue workaround"); + bt_dev_warn(hdev, "CSR: Couldn't suspend the device for our Barrot 8041a02 receive-issue workaround"); pm_runtime_forbid(&data->udev->dev); @@ -2245,7 +2254,6 @@ static void btusb_mtk_wmt_recv(struct urb *urb) { struct hci_dev *hdev = urb->context; struct btusb_data *data = hci_get_drvdata(hdev); - struct hci_event_hdr *hdr; struct sk_buff *skb; int err; @@ -2265,13 +2273,6 @@ static void btusb_mtk_wmt_recv(struct urb *urb) hci_skb_pkt_type(skb) = HCI_EVENT_PKT; skb_put_data(skb, urb->transfer_buffer, urb->actual_length); - hdr = (void *)skb->data; - /* Fix up the vendor event id with 0xff for vendor specific - * instead of 0xe4 so that event send via monitoring socket can - * be parsed properly. - */ - hdr->evt = 0xff; - /* When someone waits for the WMT event, the skb is being cloned * and being processed the events from there then. */ @@ -3737,6 +3738,9 @@ static int btusb_probe(struct usb_interface *intf, hdev->send = btusb_send_frame_intel; hdev->cmd_timeout = btusb_intel_cmd_timeout; + if (id->driver_info & BTUSB_INTEL_NO_WBS_SUPPORT) + btintel_set_flag(hdev, INTEL_ROM_LEGACY_NO_WBS_SUPPORT); + if (id->driver_info & BTUSB_INTEL_BROKEN_INITIAL_NCMD) btintel_set_flag(hdev, INTEL_BROKEN_INITIAL_NCMD); diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index 34286ffe0568f..7ac6908a4dfb4 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -629,9 +629,11 @@ static int h5_enqueue(struct hci_uart *hu, struct sk_buff *skb) break; } - pm_runtime_get_sync(&hu->serdev->dev); - pm_runtime_mark_last_busy(&hu->serdev->dev); - pm_runtime_put_autosuspend(&hu->serdev->dev); + if (hu->serdev) { + pm_runtime_get_sync(&hu->serdev->dev); + pm_runtime_mark_last_busy(&hu->serdev->dev); + pm_runtime_put_autosuspend(&hu->serdev->dev); + } return 0; } diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index 3b00d82d36cf7..4cda890ce6470 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -305,6 +305,8 @@ int hci_uart_register_device(struct hci_uart *hu, if (err) return err; + percpu_init_rwsem(&hu->proto_lock); + err = p->open(hu); if (err) goto err_open; @@ -327,7 +329,6 @@ int hci_uart_register_device(struct hci_uart *hu, INIT_WORK(&hu->init_ready, hci_uart_init_work); INIT_WORK(&hu->write_work, hci_uart_write_work); - percpu_init_rwsem(&hu->proto_lock); /* Only when vendor specific setup callback is provided, consider * the manufacturer information valid. This avoids filling in the diff --git a/drivers/bus/mhi/core/debugfs.c b/drivers/bus/mhi/core/debugfs.c index 858d7516410bb..d818586c229d2 100644 --- a/drivers/bus/mhi/core/debugfs.c +++ b/drivers/bus/mhi/core/debugfs.c @@ -60,16 +60,16 @@ static int mhi_debugfs_events_show(struct seq_file *m, void *d) } seq_printf(m, "Index: %d intmod count: %lu time: %lu", - i, (er_ctxt->intmod & EV_CTX_INTMODC_MASK) >> + i, (le32_to_cpu(er_ctxt->intmod) & EV_CTX_INTMODC_MASK) >> EV_CTX_INTMODC_SHIFT, - (er_ctxt->intmod & EV_CTX_INTMODT_MASK) >> + (le32_to_cpu(er_ctxt->intmod) & EV_CTX_INTMODT_MASK) >> EV_CTX_INTMODT_SHIFT); - seq_printf(m, " base: 0x%0llx len: 0x%llx", er_ctxt->rbase, - er_ctxt->rlen); + seq_printf(m, " base: 0x%0llx len: 0x%llx", le64_to_cpu(er_ctxt->rbase), + le64_to_cpu(er_ctxt->rlen)); - seq_printf(m, " rp: 0x%llx wp: 0x%llx", er_ctxt->rp, - er_ctxt->wp); + seq_printf(m, " rp: 0x%llx wp: 0x%llx", le64_to_cpu(er_ctxt->rp), + le64_to_cpu(er_ctxt->wp)); seq_printf(m, " local rp: 0x%pK db: 0x%pad\n", ring->rp, &mhi_event->db_cfg.db_val); @@ -106,18 +106,18 @@ static int mhi_debugfs_channels_show(struct seq_file *m, void *d) seq_printf(m, "%s(%u) state: 0x%lx brstmode: 0x%lx pollcfg: 0x%lx", - mhi_chan->name, mhi_chan->chan, (chan_ctxt->chcfg & + mhi_chan->name, mhi_chan->chan, (le32_to_cpu(chan_ctxt->chcfg) & CHAN_CTX_CHSTATE_MASK) >> CHAN_CTX_CHSTATE_SHIFT, - (chan_ctxt->chcfg & CHAN_CTX_BRSTMODE_MASK) >> - CHAN_CTX_BRSTMODE_SHIFT, (chan_ctxt->chcfg & + (le32_to_cpu(chan_ctxt->chcfg) & CHAN_CTX_BRSTMODE_MASK) >> + CHAN_CTX_BRSTMODE_SHIFT, (le32_to_cpu(chan_ctxt->chcfg) & CHAN_CTX_POLLCFG_MASK) >> CHAN_CTX_POLLCFG_SHIFT); - seq_printf(m, " type: 0x%x event ring: %u", chan_ctxt->chtype, - chan_ctxt->erindex); + seq_printf(m, " type: 0x%x event ring: %u", le32_to_cpu(chan_ctxt->chtype), + le32_to_cpu(chan_ctxt->erindex)); seq_printf(m, " base: 0x%llx len: 0x%llx rp: 0x%llx wp: 0x%llx", - chan_ctxt->rbase, chan_ctxt->rlen, chan_ctxt->rp, - chan_ctxt->wp); + le64_to_cpu(chan_ctxt->rbase), le64_to_cpu(chan_ctxt->rlen), + le64_to_cpu(chan_ctxt->rp), le64_to_cpu(chan_ctxt->wp)); seq_printf(m, " local rp: 0x%pK local wp: 0x%pK db: 0x%pad\n", ring->rp, ring->wp, diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c index 046f407dc5d6e..d8787aaa176ba 100644 --- a/drivers/bus/mhi/core/init.c +++ b/drivers/bus/mhi/core/init.c @@ -77,12 +77,14 @@ static const char * const mhi_pm_state_str[] = { [MHI_PM_STATE_LD_ERR_FATAL_DETECT] = "Linkdown or Error Fatal Detect", }; -const char *to_mhi_pm_state_str(enum mhi_pm_state state) +const char *to_mhi_pm_state_str(u32 state) { - unsigned long pm_state = state; - int index = find_last_bit(&pm_state, 32); + int index; - if (index >= ARRAY_SIZE(mhi_pm_state_str)) + if (state) + index = __fls(state); + + if (!state || index >= ARRAY_SIZE(mhi_pm_state_str)) return "Invalid State"; return mhi_pm_state_str[index]; @@ -291,17 +293,17 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl) if (mhi_chan->offload_ch) continue; - tmp = chan_ctxt->chcfg; + tmp = le32_to_cpu(chan_ctxt->chcfg); tmp &= ~CHAN_CTX_CHSTATE_MASK; tmp |= (MHI_CH_STATE_DISABLED << CHAN_CTX_CHSTATE_SHIFT); tmp &= ~CHAN_CTX_BRSTMODE_MASK; tmp |= (mhi_chan->db_cfg.brstmode << CHAN_CTX_BRSTMODE_SHIFT); tmp &= ~CHAN_CTX_POLLCFG_MASK; tmp |= (mhi_chan->db_cfg.pollcfg << CHAN_CTX_POLLCFG_SHIFT); - chan_ctxt->chcfg = tmp; + chan_ctxt->chcfg = cpu_to_le32(tmp); - chan_ctxt->chtype = mhi_chan->type; - chan_ctxt->erindex = mhi_chan->er_index; + chan_ctxt->chtype = cpu_to_le32(mhi_chan->type); + chan_ctxt->erindex = cpu_to_le32(mhi_chan->er_index); mhi_chan->ch_state = MHI_CH_STATE_DISABLED; mhi_chan->tre_ring.db_addr = (void __iomem *)&chan_ctxt->wp; @@ -326,14 +328,14 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl) if (mhi_event->offload_ev) continue; - tmp = er_ctxt->intmod; + tmp = le32_to_cpu(er_ctxt->intmod); tmp &= ~EV_CTX_INTMODC_MASK; tmp &= ~EV_CTX_INTMODT_MASK; tmp |= (mhi_event->intmod << EV_CTX_INTMODT_SHIFT); - er_ctxt->intmod = tmp; + er_ctxt->intmod = cpu_to_le32(tmp); - er_ctxt->ertype = MHI_ER_TYPE_VALID; - er_ctxt->msivec = mhi_event->irq; + er_ctxt->ertype = cpu_to_le32(MHI_ER_TYPE_VALID); + er_ctxt->msivec = cpu_to_le32(mhi_event->irq); mhi_event->db_cfg.db_mode = true; ring->el_size = sizeof(struct mhi_tre); @@ -347,9 +349,9 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl) * ring is empty */ ring->rp = ring->wp = ring->base; - er_ctxt->rbase = ring->iommu_base; + er_ctxt->rbase = cpu_to_le64(ring->iommu_base); er_ctxt->rp = er_ctxt->wp = er_ctxt->rbase; - er_ctxt->rlen = ring->len; + er_ctxt->rlen = cpu_to_le64(ring->len); ring->ctxt_wp = &er_ctxt->wp; } @@ -376,9 +378,9 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl) goto error_alloc_cmd; ring->rp = ring->wp = ring->base; - cmd_ctxt->rbase = ring->iommu_base; + cmd_ctxt->rbase = cpu_to_le64(ring->iommu_base); cmd_ctxt->rp = cmd_ctxt->wp = cmd_ctxt->rbase; - cmd_ctxt->rlen = ring->len; + cmd_ctxt->rlen = cpu_to_le64(ring->len); ring->ctxt_wp = &cmd_ctxt->wp; } @@ -579,10 +581,10 @@ void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl, chan_ctxt->rp = 0; chan_ctxt->wp = 0; - tmp = chan_ctxt->chcfg; + tmp = le32_to_cpu(chan_ctxt->chcfg); tmp &= ~CHAN_CTX_CHSTATE_MASK; tmp |= (MHI_CH_STATE_DISABLED << CHAN_CTX_CHSTATE_SHIFT); - chan_ctxt->chcfg = tmp; + chan_ctxt->chcfg = cpu_to_le32(tmp); /* Update to all cores */ smp_wmb(); @@ -616,14 +618,14 @@ int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl, return -ENOMEM; } - tmp = chan_ctxt->chcfg; + tmp = le32_to_cpu(chan_ctxt->chcfg); tmp &= ~CHAN_CTX_CHSTATE_MASK; tmp |= (MHI_CH_STATE_ENABLED << CHAN_CTX_CHSTATE_SHIFT); - chan_ctxt->chcfg = tmp; + chan_ctxt->chcfg = cpu_to_le32(tmp); - chan_ctxt->rbase = tre_ring->iommu_base; + chan_ctxt->rbase = cpu_to_le64(tre_ring->iommu_base); chan_ctxt->rp = chan_ctxt->wp = chan_ctxt->rbase; - chan_ctxt->rlen = tre_ring->len; + chan_ctxt->rlen = cpu_to_le64(tre_ring->len); tre_ring->ctxt_wp = &chan_ctxt->wp; tre_ring->rp = tre_ring->wp = tre_ring->base; diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h index e2e10474a9d92..37c39bf1c7a98 100644 --- a/drivers/bus/mhi/core/internal.h +++ b/drivers/bus/mhi/core/internal.h @@ -209,14 +209,14 @@ extern struct bus_type mhi_bus_type; #define EV_CTX_INTMODT_MASK GENMASK(31, 16) #define EV_CTX_INTMODT_SHIFT 16 struct mhi_event_ctxt { - __u32 intmod; - __u32 ertype; - __u32 msivec; - - __u64 rbase __packed __aligned(4); - __u64 rlen __packed __aligned(4); - __u64 rp __packed __aligned(4); - __u64 wp __packed __aligned(4); + __le32 intmod; + __le32 ertype; + __le32 msivec; + + __le64 rbase __packed __aligned(4); + __le64 rlen __packed __aligned(4); + __le64 rp __packed __aligned(4); + __le64 wp __packed __aligned(4); }; #define CHAN_CTX_CHSTATE_MASK GENMASK(7, 0) @@ -227,25 +227,25 @@ struct mhi_event_ctxt { #define CHAN_CTX_POLLCFG_SHIFT 10 #define CHAN_CTX_RESERVED_MASK GENMASK(31, 16) struct mhi_chan_ctxt { - __u32 chcfg; - __u32 chtype; - __u32 erindex; - - __u64 rbase __packed __aligned(4); - __u64 rlen __packed __aligned(4); - __u64 rp __packed __aligned(4); - __u64 wp __packed __aligned(4); + __le32 chcfg; + __le32 chtype; + __le32 erindex; + + __le64 rbase __packed __aligned(4); + __le64 rlen __packed __aligned(4); + __le64 rp __packed __aligned(4); + __le64 wp __packed __aligned(4); }; struct mhi_cmd_ctxt { - __u32 reserved0; - __u32 reserved1; - __u32 reserved2; - - __u64 rbase __packed __aligned(4); - __u64 rlen __packed __aligned(4); - __u64 rp __packed __aligned(4); - __u64 wp __packed __aligned(4); + __le32 reserved0; + __le32 reserved1; + __le32 reserved2; + + __le64 rbase __packed __aligned(4); + __le64 rlen __packed __aligned(4); + __le64 rp __packed __aligned(4); + __le64 wp __packed __aligned(4); }; struct mhi_ctxt { @@ -258,8 +258,8 @@ struct mhi_ctxt { }; struct mhi_tre { - u64 ptr; - u32 dword[2]; + __le64 ptr; + __le32 dword[2]; }; struct bhi_vec_entry { @@ -277,57 +277,58 @@ enum mhi_cmd_type { /* No operation command */ #define MHI_TRE_CMD_NOOP_PTR (0) #define MHI_TRE_CMD_NOOP_DWORD0 (0) -#define MHI_TRE_CMD_NOOP_DWORD1 (MHI_CMD_NOP << 16) +#define MHI_TRE_CMD_NOOP_DWORD1 (cpu_to_le32(MHI_CMD_NOP << 16)) /* Channel reset command */ #define MHI_TRE_CMD_RESET_PTR (0) #define MHI_TRE_CMD_RESET_DWORD0 (0) -#define MHI_TRE_CMD_RESET_DWORD1(chid) ((chid << 24) | \ - (MHI_CMD_RESET_CHAN << 16)) +#define MHI_TRE_CMD_RESET_DWORD1(chid) (cpu_to_le32((chid << 24) | \ + (MHI_CMD_RESET_CHAN << 16))) /* Channel stop command */ #define MHI_TRE_CMD_STOP_PTR (0) #define MHI_TRE_CMD_STOP_DWORD0 (0) -#define MHI_TRE_CMD_STOP_DWORD1(chid) ((chid << 24) | \ - (MHI_CMD_STOP_CHAN << 16)) +#define MHI_TRE_CMD_STOP_DWORD1(chid) (cpu_to_le32((chid << 24) | \ + (MHI_CMD_STOP_CHAN << 16))) /* Channel start command */ #define MHI_TRE_CMD_START_PTR (0) #define MHI_TRE_CMD_START_DWORD0 (0) -#define MHI_TRE_CMD_START_DWORD1(chid) ((chid << 24) | \ - (MHI_CMD_START_CHAN << 16)) +#define MHI_TRE_CMD_START_DWORD1(chid) (cpu_to_le32((chid << 24) | \ + (MHI_CMD_START_CHAN << 16))) -#define MHI_TRE_GET_CMD_CHID(tre) (((tre)->dword[1] >> 24) & 0xFF) -#define MHI_TRE_GET_CMD_TYPE(tre) (((tre)->dword[1] >> 16) & 0xFF) +#define MHI_TRE_GET_DWORD(tre, word) (le32_to_cpu((tre)->dword[(word)])) +#define MHI_TRE_GET_CMD_CHID(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 24) & 0xFF) +#define MHI_TRE_GET_CMD_TYPE(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 16) & 0xFF) /* Event descriptor macros */ -#define MHI_TRE_EV_PTR(ptr) (ptr) -#define MHI_TRE_EV_DWORD0(code, len) ((code << 24) | len) -#define MHI_TRE_EV_DWORD1(chid, type) ((chid << 24) | (type << 16)) -#define MHI_TRE_GET_EV_PTR(tre) ((tre)->ptr) -#define MHI_TRE_GET_EV_CODE(tre) (((tre)->dword[0] >> 24) & 0xFF) -#define MHI_TRE_GET_EV_LEN(tre) ((tre)->dword[0] & 0xFFFF) -#define MHI_TRE_GET_EV_CHID(tre) (((tre)->dword[1] >> 24) & 0xFF) -#define MHI_TRE_GET_EV_TYPE(tre) (((tre)->dword[1] >> 16) & 0xFF) -#define MHI_TRE_GET_EV_STATE(tre) (((tre)->dword[0] >> 24) & 0xFF) -#define MHI_TRE_GET_EV_EXECENV(tre) (((tre)->dword[0] >> 24) & 0xFF) -#define MHI_TRE_GET_EV_SEQ(tre) ((tre)->dword[0]) -#define MHI_TRE_GET_EV_TIME(tre) ((tre)->ptr) -#define MHI_TRE_GET_EV_COOKIE(tre) lower_32_bits((tre)->ptr) -#define MHI_TRE_GET_EV_VEID(tre) (((tre)->dword[0] >> 16) & 0xFF) -#define MHI_TRE_GET_EV_LINKSPEED(tre) (((tre)->dword[1] >> 24) & 0xFF) -#define MHI_TRE_GET_EV_LINKWIDTH(tre) ((tre)->dword[0] & 0xFF) +#define MHI_TRE_EV_PTR(ptr) (cpu_to_le64(ptr)) +#define MHI_TRE_EV_DWORD0(code, len) (cpu_to_le32((code << 24) | len)) +#define MHI_TRE_EV_DWORD1(chid, type) (cpu_to_le32((chid << 24) | (type << 16))) +#define MHI_TRE_GET_EV_PTR(tre) (le64_to_cpu((tre)->ptr)) +#define MHI_TRE_GET_EV_CODE(tre) ((MHI_TRE_GET_DWORD(tre, 0) >> 24) & 0xFF) +#define MHI_TRE_GET_EV_LEN(tre) (MHI_TRE_GET_DWORD(tre, 0) & 0xFFFF) +#define MHI_TRE_GET_EV_CHID(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 24) & 0xFF) +#define MHI_TRE_GET_EV_TYPE(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 16) & 0xFF) +#define MHI_TRE_GET_EV_STATE(tre) ((MHI_TRE_GET_DWORD(tre, 0) >> 24) & 0xFF) +#define MHI_TRE_GET_EV_EXECENV(tre) ((MHI_TRE_GET_DWORD(tre, 0) >> 24) & 0xFF) +#define MHI_TRE_GET_EV_SEQ(tre) MHI_TRE_GET_DWORD(tre, 0) +#define MHI_TRE_GET_EV_TIME(tre) (MHI_TRE_GET_EV_PTR(tre)) +#define MHI_TRE_GET_EV_COOKIE(tre) lower_32_bits(MHI_TRE_GET_EV_PTR(tre)) +#define MHI_TRE_GET_EV_VEID(tre) ((MHI_TRE_GET_DWORD(tre, 0) >> 16) & 0xFF) +#define MHI_TRE_GET_EV_LINKSPEED(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 24) & 0xFF) +#define MHI_TRE_GET_EV_LINKWIDTH(tre) (MHI_TRE_GET_DWORD(tre, 0) & 0xFF) /* Transfer descriptor macros */ -#define MHI_TRE_DATA_PTR(ptr) (ptr) -#define MHI_TRE_DATA_DWORD0(len) (len & MHI_MAX_MTU) -#define MHI_TRE_DATA_DWORD1(bei, ieot, ieob, chain) ((2 << 16) | (bei << 10) \ - | (ieot << 9) | (ieob << 8) | chain) +#define MHI_TRE_DATA_PTR(ptr) (cpu_to_le64(ptr)) +#define MHI_TRE_DATA_DWORD0(len) (cpu_to_le32(len & MHI_MAX_MTU)) +#define MHI_TRE_DATA_DWORD1(bei, ieot, ieob, chain) (cpu_to_le32((2 << 16) | (bei << 10) \ + | (ieot << 9) | (ieob << 8) | chain)) /* RSC transfer descriptor macros */ -#define MHI_RSCTRE_DATA_PTR(ptr, len) (((u64)len << 48) | ptr) -#define MHI_RSCTRE_DATA_DWORD0(cookie) (cookie) -#define MHI_RSCTRE_DATA_DWORD1 (MHI_PKT_TYPE_COALESCING << 16) +#define MHI_RSCTRE_DATA_PTR(ptr, len) (cpu_to_le64(((u64)len << 48) | ptr)) +#define MHI_RSCTRE_DATA_DWORD0(cookie) (cpu_to_le32(cookie)) +#define MHI_RSCTRE_DATA_DWORD1 (cpu_to_le32(MHI_PKT_TYPE_COALESCING << 16)) enum mhi_pkt_type { MHI_PKT_TYPE_INVALID = 0x0, @@ -500,7 +501,7 @@ struct state_transition { struct mhi_ring { dma_addr_t dma_handle; dma_addr_t iommu_base; - u64 *ctxt_wp; /* point to ctxt wp */ + __le64 *ctxt_wp; /* point to ctxt wp */ void *pre_aligned; void *base; void *rp; @@ -622,7 +623,7 @@ void mhi_free_bhie_table(struct mhi_controller *mhi_cntrl, enum mhi_pm_state __must_check mhi_tryset_pm_state( struct mhi_controller *mhi_cntrl, enum mhi_pm_state state); -const char *to_mhi_pm_state_str(enum mhi_pm_state state); +const char *to_mhi_pm_state_str(u32 state); int mhi_queue_state_transition(struct mhi_controller *mhi_cntrl, enum dev_st_transition state); void mhi_pm_st_worker(struct work_struct *work); diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index ffde617f93a3b..85f4f7c8d7c60 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -114,7 +114,7 @@ void mhi_ring_er_db(struct mhi_event *mhi_event) struct mhi_ring *ring = &mhi_event->ring; mhi_event->db_cfg.process_db(mhi_event->mhi_cntrl, &mhi_event->db_cfg, - ring->db_addr, *ring->ctxt_wp); + ring->db_addr, le64_to_cpu(*ring->ctxt_wp)); } void mhi_ring_cmd_db(struct mhi_controller *mhi_cntrl, struct mhi_cmd *mhi_cmd) @@ -123,7 +123,7 @@ void mhi_ring_cmd_db(struct mhi_controller *mhi_cntrl, struct mhi_cmd *mhi_cmd) struct mhi_ring *ring = &mhi_cmd->ring; db = ring->iommu_base + (ring->wp - ring->base); - *ring->ctxt_wp = db; + *ring->ctxt_wp = cpu_to_le64(db); mhi_write_db(mhi_cntrl, ring->db_addr, db); } @@ -140,7 +140,7 @@ void mhi_ring_chan_db(struct mhi_controller *mhi_cntrl, * before letting h/w know there is new element to fetch. */ dma_wmb(); - *ring->ctxt_wp = db; + *ring->ctxt_wp = cpu_to_le64(db); mhi_chan->db_cfg.process_db(mhi_cntrl, &mhi_chan->db_cfg, ring->db_addr, db); @@ -432,7 +432,7 @@ irqreturn_t mhi_irq_handler(int irq_number, void *dev) struct mhi_event_ctxt *er_ctxt = &mhi_cntrl->mhi_ctxt->er_ctxt[mhi_event->er_index]; struct mhi_ring *ev_ring = &mhi_event->ring; - dma_addr_t ptr = er_ctxt->rp; + dma_addr_t ptr = le64_to_cpu(er_ctxt->rp); void *dev_rp; if (!is_valid_ring_ptr(ev_ring, ptr)) { @@ -537,14 +537,14 @@ static void mhi_recycle_ev_ring_element(struct mhi_controller *mhi_cntrl, /* Update the WP */ ring->wp += ring->el_size; - ctxt_wp = *ring->ctxt_wp + ring->el_size; + ctxt_wp = le64_to_cpu(*ring->ctxt_wp) + ring->el_size; if (ring->wp >= (ring->base + ring->len)) { ring->wp = ring->base; ctxt_wp = ring->iommu_base; } - *ring->ctxt_wp = ctxt_wp; + *ring->ctxt_wp = cpu_to_le64(ctxt_wp); /* Update the RP */ ring->rp += ring->el_size; @@ -801,7 +801,7 @@ int mhi_process_ctrl_ev_ring(struct mhi_controller *mhi_cntrl, struct device *dev = &mhi_cntrl->mhi_dev->dev; u32 chan; int count = 0; - dma_addr_t ptr = er_ctxt->rp; + dma_addr_t ptr = le64_to_cpu(er_ctxt->rp); /* * This is a quick check to avoid unnecessary event processing @@ -940,7 +940,7 @@ int mhi_process_ctrl_ev_ring(struct mhi_controller *mhi_cntrl, mhi_recycle_ev_ring_element(mhi_cntrl, ev_ring); local_rp = ev_ring->rp; - ptr = er_ctxt->rp; + ptr = le64_to_cpu(er_ctxt->rp); if (!is_valid_ring_ptr(ev_ring, ptr)) { dev_err(&mhi_cntrl->mhi_dev->dev, "Event ring rp points outside of the event ring\n"); @@ -970,7 +970,7 @@ int mhi_process_data_event_ring(struct mhi_controller *mhi_cntrl, int count = 0; u32 chan; struct mhi_chan *mhi_chan; - dma_addr_t ptr = er_ctxt->rp; + dma_addr_t ptr = le64_to_cpu(er_ctxt->rp); if (unlikely(MHI_EVENT_ACCESS_INVALID(mhi_cntrl->pm_state))) return -EIO; @@ -1011,7 +1011,7 @@ int mhi_process_data_event_ring(struct mhi_controller *mhi_cntrl, mhi_recycle_ev_ring_element(mhi_cntrl, ev_ring); local_rp = ev_ring->rp; - ptr = er_ctxt->rp; + ptr = le64_to_cpu(er_ctxt->rp); if (!is_valid_ring_ptr(ev_ring, ptr)) { dev_err(&mhi_cntrl->mhi_dev->dev, "Event ring rp points outside of the event ring\n"); @@ -1533,7 +1533,7 @@ static void mhi_mark_stale_events(struct mhi_controller *mhi_cntrl, /* mark all stale events related to channel as STALE event */ spin_lock_irqsave(&mhi_event->lock, flags); - ptr = er_ctxt->rp; + ptr = le64_to_cpu(er_ctxt->rp); if (!is_valid_ring_ptr(ev_ring, ptr)) { dev_err(&mhi_cntrl->mhi_dev->dev, "Event ring rp points outside of the event ring\n"); diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c index 4aae0baea0084..c35c5ddc72207 100644 --- a/drivers/bus/mhi/core/pm.c +++ b/drivers/bus/mhi/core/pm.c @@ -218,7 +218,7 @@ int mhi_ready_state_transition(struct mhi_controller *mhi_cntrl) continue; ring->wp = ring->base + ring->len - ring->el_size; - *ring->ctxt_wp = ring->iommu_base + ring->len - ring->el_size; + *ring->ctxt_wp = cpu_to_le64(ring->iommu_base + ring->len - ring->el_size); /* Update all cores */ smp_wmb(); @@ -420,7 +420,7 @@ static int mhi_pm_mission_mode_transition(struct mhi_controller *mhi_cntrl) continue; ring->wp = ring->base + ring->len - ring->el_size; - *ring->ctxt_wp = ring->iommu_base + ring->len - ring->el_size; + *ring->ctxt_wp = cpu_to_le64(ring->iommu_base + ring->len - ring->el_size); /* Update to all cores */ smp_wmb(); diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index b79895810c52f..9527b7d638401 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -327,6 +327,7 @@ static const struct mhi_pci_dev_info mhi_quectel_em1xx_info = { .config = &modem_quectel_em1xx_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, .dma_data_width = 32, + .mru_default = 32768, .sideband_wake = true, }; diff --git a/drivers/bus/mips_cdmm.c b/drivers/bus/mips_cdmm.c index 626dedd110cbc..fca0d0669aa97 100644 --- a/drivers/bus/mips_cdmm.c +++ b/drivers/bus/mips_cdmm.c @@ -351,6 +351,7 @@ phys_addr_t __weak mips_cdmm_phys_base(void) np = of_find_compatible_node(NULL, NULL, "mti,mips-cdmm"); if (np) { err = of_address_to_resource(np, 0, &res); + of_node_put(np); if (!err) return res.start; } diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 740811893c570..93d52a419470a 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -449,6 +449,9 @@ config RANDOM_TRUST_BOOTLOADER device randomness. Say Y here to assume the entropy provided by the booloader is trustworthy so it will be added to the kernel's entropy pool. Otherwise, say N here so it will be regarded as device input that - only mixes the entropy pool. + only mixes the entropy pool. This can also be configured at boot with + "random.trust_bootloader=on/off". + +source "drivers/char/lrng/Kconfig" endmenu diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 264eb398fdd4f..7371f7464a49f 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -3,7 +3,14 @@ # Makefile for the kernel character device drivers. # -obj-y += mem.o random.o +obj-y += mem.o + +ifeq ($(CONFIG_LRNG),y) + obj-y += lrng/ +else + obj-y += random.o +endif + obj-$(CONFIG_TTY_PRINTK) += ttyprintk.o obj-y += misc.o obj-$(CONFIG_ATARI_DSP56K) += dsp56k.o diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 9704963f9d500..a087156a58186 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -401,7 +401,7 @@ config HW_RANDOM_MESON config HW_RANDOM_CAVIUM tristate "Cavium ThunderX Random Number Generator support" - depends on HW_RANDOM && PCI && ARM64 + depends on HW_RANDOM && PCI && ARCH_THUNDER default HW_RANDOM help This driver provides kernel-side support for the Random Number diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c index ecb71c4317a50..8cf0ef501341e 100644 --- a/drivers/char/hw_random/atmel-rng.c +++ b/drivers/char/hw_random/atmel-rng.c @@ -114,6 +114,7 @@ static int atmel_trng_probe(struct platform_device *pdev) err_register: clk_disable_unprepare(trng->clk); + atmel_trng_disable(trng); return ret; } diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c index 6f66919652bf5..7c55f4cf4a8ba 100644 --- a/drivers/char/hw_random/cavium-rng-vf.c +++ b/drivers/char/hw_random/cavium-rng-vf.c @@ -179,7 +179,7 @@ static int cavium_map_pf_regs(struct cavium_rng *rng) pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CAVIUM_RNG_PF, NULL); if (!pdev) { - dev_err(&pdev->dev, "Cannot find RNG PF device\n"); + pr_err("Cannot find RNG PF device\n"); return -EIO; } diff --git a/drivers/char/hw_random/nomadik-rng.c b/drivers/char/hw_random/nomadik-rng.c index 67947a19aa225..e8f9621e79541 100644 --- a/drivers/char/hw_random/nomadik-rng.c +++ b/drivers/char/hw_random/nomadik-rng.c @@ -65,14 +65,14 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id) out_release: amba_release_regions(dev); out_clk: - clk_disable(rng_clk); + clk_disable_unprepare(rng_clk); return ret; } static void nmk_rng_remove(struct amba_device *dev) { amba_release_regions(dev); - clk_disable(rng_clk); + clk_disable_unprepare(rng_clk); } static const struct amba_id nmk_rng_ids[] = { diff --git a/drivers/char/lrng/Kconfig b/drivers/char/lrng/Kconfig new file mode 100644 index 0000000000000..658f89f6b35fd --- /dev/null +++ b/drivers/char/lrng/Kconfig @@ -0,0 +1,613 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Linux Random Number Generator configuration +# + +choice + prompt "Random Number Generator Implementation" + default RANDOM_DEFAULT_IMPL + help + Select the random number generator implementation that is + accessible via /dev/random, /dev/urandom, getrandom(2), and + the in-kernel function get_random_bytes. + +config RANDOM_DEFAULT_IMPL + bool "Default Implementation" + help + The default random number generator as provided with + drivers/char/random.c is selected with this option. + +config LRNG + bool "LRNG Implementation with SP800-90A/B/C compliance" + select CRYPTO_LIB_SHA256 if CRYPTO + help + The Linux Random Number Generator (LRNG) generates entropy + from different entropy sources. Each entropy source can + be enabled and configured independently. The interrupt + entropy source can be configured to be SP800-90B compliant. + The entire LRNG can be configured to be SP800-90C compliant. + Runtime-switchable cryptographic support is available. + The LRNG delivers significant entropy during boot. + +endchoice + +menu "Linux Random Number Generator Configuration" + depends on LRNG + +if LRNG + +menu "Specific DRNG seeding strategies" + +config LRNG_OVERSAMPLE_ENTROPY_SOURCES + bool "Oversample entropy sources" + default n + help + When enabling this option, the entropy sources are + over-sampled with the following approach: First, the + the entropy sources are requested to provide 64 bits more + entropy than the size of the entropy buffer. For example, + if the entropy buffer is 256 bits, 320 bits of entropy + is requested to fill that buffer. + + Second, the seed operation of the deterministic RNG + requests 128 bits more data from each entropy source than + the security strength of the DRNG during initialization. + A prerequisite for this operation is that the digest size + of the used hash must be at least equally large to generate + that buffer. If the prerequisite is not met, this + oversampling is not applied. + + This strategy is intended to offset the asymptotic entropy + increase to reach full entropy in a buffer. + + The strategy is consistent with the requirements in + NIST SP800-90C and is only enforced with fips=1. + + If unsure, say N. + +config LRNG_OVERSAMPLE_ES_BITS + int + default 0 if !LRNG_OVERSAMPLE_ENTROPY_SOURCES + default 64 if LRNG_OVERSAMPLE_ENTROPY_SOURCES + +config LRNG_SEED_BUFFER_INIT_ADD_BITS + int + default 0 if !LRNG_OVERSAMPLE_ENTROPY_SOURCES + default 128 if LRNG_OVERSAMPLE_ENTROPY_SOURCES + +endmenu # "Specific DRNG seeding strategies" + +menu "Entropy Source Configuration" + +comment "Interrupt Entropy Source" + +config LRNG_IRQ + bool "Enable Interrupt Entropy Source as LRNG Seed Source" + default y + help + The LRNG models an entropy source based on the timing of the + occurrence of interrupts. Enable this option to enable this + IRQ entropy source. + + The IRQ entropy source is triggered every time an interrupt + arrives and thus causes the interrupt handler to execute + slightly longer. Disabling the IRQ entropy source implies + that the performance penalty on the interrupt handler added + by the LRNG is eliminated. Yet, this entropy source is + considered to be the internal entropy source of the LRNG. + Thus, only disable it if you ensured that other entropy + sources are available that supply the LRNG with entropy. + + If you disable the IRQ entropy source, you MUST ensure + one or more entropy sources collectively have the + capability to deliver sufficient entropy with one invocation + at a rate compliant to the security strength of the DRNG + (usually 256 bits of entropy). In addition, if those + entropy sources do not deliver sufficient entropy during + first request, the reseed must be triggered from user + space or kernel space when sufficient entropy is considered + to be present. + + If unsure, say Y. + +choice + prompt "Continuous entropy compression boot time setting" + default LRNG_CONTINUOUS_COMPRESSION_ENABLED + depends on LRNG_IRQ + help + Select the default behavior of the interrupt entropy source + continuous compression operation. + + The Linux RNG collects entropy data during each interrupt. + For performance reasons, a amount of entropy data defined by + the LRNG entropy collection pool size is concatenated into + an array. When that array is filled up, a hash is calculated + to compress the entropy. That hash is calculated in + interrupt context. + + In case such hash calculation in interrupt context is deemed + too time-consuming, the continuous compression operation + can be disabled. If disabled, the collection of entropy will + not trigger a hash compression operation in interrupt context. + The compression happens only when the DRNG is reseeded which is + in process context. This implies that old entropy data + collected after the last DRNG-reseed is overwritten with newer + entropy data once the collection pool is full instead of + retaining its entropy with the compression operation. + + config LRNG_CONTINUOUS_COMPRESSION_ENABLED + bool "Enable continuous compression (default)" + + config LRNG_CONTINUOUS_COMPRESSION_DISABLED + bool "Disable continuous compression" +endchoice + +config LRNG_ENABLE_CONTINUOUS_COMPRESSION + bool + default y if LRNG_CONTINUOUS_COMPRESSION_ENABLED + default n if LRNG_CONTINUOUS_COMPRESSION_DISABLED + +config LRNG_SWITCHABLE_CONTINUOUS_COMPRESSION + bool "Runtime-switchable continuous entropy compression" + depends on LRNG_IRQ + help + Per default, the interrupt entropy source continuous + compression operation behavior is hard-wired into the kernel. + Enable this option to allow it to be configurable at boot time. + + To modify the default behavior of the continuous + compression operation, use the kernel command line option + of lrng_sw_noise.lrng_pcpu_continuous_compression. + + If unsure, say N. + +choice + prompt "LRNG Entropy Collection Pool Size" + default LRNG_COLLECTION_SIZE_1024 + depends on LRNG_IRQ + help + Select the size of the LRNG entropy collection pool + storing data for the interrupt entropy source without + performing a compression operation. The larger the + collection size is, the faster the average interrupt + handling will be. The collection size represents the + number of bytes of the per-CPU memory used to batch + up entropy event data. + + The default value is good for regular operations. Choose + larger sizes for servers that have no memory limitations. + If runtime memory is precious, choose a smaller size. + + The collection size is unrelated to the entropy rate + or the amount of entropy the LRNG can process. + + config LRNG_COLLECTION_SIZE_32 + depends on LRNG_CONTINUOUS_COMPRESSION_ENABLED + depends on !LRNG_SWITCHABLE_CONTINUOUS_COMPRESSION + depends on !LRNG_OVERSAMPLE_ENTROPY_SOURCES + bool "32 interrupt events" + + config LRNG_COLLECTION_SIZE_256 + depends on !LRNG_OVERSAMPLE_ENTROPY_SOURCES + bool "256 interrupt events" + + config LRNG_COLLECTION_SIZE_512 + bool "512 interrupt events" + + config LRNG_COLLECTION_SIZE_1024 + bool "1024 interrupt events (default)" + + config LRNG_COLLECTION_SIZE_2048 + bool "2048 interrupt events" + + config LRNG_COLLECTION_SIZE_4096 + bool "4096 interrupt events" + + config LRNG_COLLECTION_SIZE_8192 + bool "8192 interrupt events" + +endchoice + +config LRNG_COLLECTION_SIZE + int + default 32 if LRNG_COLLECTION_SIZE_32 + default 256 if LRNG_COLLECTION_SIZE_256 + default 512 if LRNG_COLLECTION_SIZE_512 + default 1024 if LRNG_COLLECTION_SIZE_1024 + default 2048 if LRNG_COLLECTION_SIZE_2048 + default 4096 if LRNG_COLLECTION_SIZE_4096 + default 8192 if LRNG_COLLECTION_SIZE_8192 + +config LRNG_HEALTH_TESTS + bool "Enable interrupt entropy source online health tests" + depends on LRNG_IRQ + help + The online health tests applied to the interrupt entropy + source validate the noise source at runtime for fatal + errors. These tests include SP800-90B compliant tests + which are invoked if the system is booted with fips=1. + In case of fatal errors during active SP800-90B tests, + the issue is logged and the noise data is discarded. + These tests are required for full compliance of the + interrupt entropy source with SP800-90B. + + If unsure, say Y. + +config LRNG_RCT_BROKEN + bool "SP800-90B RCT with dangerous low cutoff value" + depends on LRNG_HEALTH_TESTS + depends on BROKEN + default n + help + This option enables a dangerously low SP800-90B repetitive + count test (RCT) cutoff value which makes it very likely + that the RCT is triggered to raise a self test failure. + + This option is ONLY intended for developers wanting to + test the effectiveness of the SP800-90B RCT health test. + + If unsure, say N. + +config LRNG_APT_BROKEN + bool "SP800-90B APT with dangerous low cutoff value" + depends on LRNG_HEALTH_TESTS + depends on BROKEN + default n + help + This option enables a dangerously low SP800-90B adaptive + proportion test (APT) cutoff value which makes it very + likely that the APT is triggered to raise a self test + failure. + + This option is ONLY intended for developers wanting to + test the effectiveness of the SP800-90B APT health test. + + If unsure, say N. + +# Default taken from SP800-90B sec 4.4.1 - significance level 2^-30 +config LRNG_RCT_CUTOFF + int + default 31 if !LRNG_RCT_BROKEN + default 1 if LRNG_RCT_BROKEN + +# Default taken from SP800-90B sec 4.4.2 - significance level 2^-30 +config LRNG_APT_CUTOFF + int + default 325 if !LRNG_APT_BROKEN + default 32 if LRNG_APT_BROKEN + +config LRNG_IRQ_ENTROPY_RATE + int "Interrupt Entropy Source Entropy Rate" + depends on LRNG_IRQ + range 256 4294967295 + default 256 + help + The LRNG will collect the configured number of interrupts to + obtain 256 bits of entropy. This value can be set to any between + 256 and 4294967295. The LRNG guarantees that this value is not + lower than 256. This lower limit implies that one interrupt event + is credited with one bit of entropy. This value is subject to the + increase by the oversampling factor, if no high-resolution timer + is found. + + In order to effectively disable the interrupt entropy source, + the option has to be set to 4294967295. In this case, the + interrupt entropy source will still deliver data but without + being credited with entropy. + +comment "Jitter RNG Entropy Source" + +config LRNG_JENT + bool "Enable Jitter RNG as LRNG Seed Source" + depends on CRYPTO + select CRYPTO_JITTERENTROPY + help + The Linux RNG may use the Jitter RNG as noise source. Enabling + this option enables the use of the Jitter RNG. Its default + entropy level is 16 bits of entropy per 256 data bits delivered + by the Jitter RNG. This entropy level can be changed at boot + time or at runtime with the lrng_base.jitterrng configuration + variable. + +config LRNG_JENT_ENTROPY_RATE + int "Jitter RNG Entropy Source Entropy Rate" + depends on LRNG_JENT + range 0 256 + default 16 + help + The option defines the amount of entropy the LRNG applies to 256 + bits of data obtained from the Jitter RNG entropy source. The + LRNG enforces the limit that this value must be in the range + between 0 and 256. + + When configuring this value to 0, the Jitter RNG entropy source + will provide 256 bits of data without being credited to contain + entropy. + +comment "CPU Entropy Source" + +config LRNG_CPU + bool "Enable CPU Entropy Source as LRNG Seed Source" + default y + help + Current CPUs commonly contain entropy sources which can be + used to seed the LRNG. For example, the Intel RDSEED + instruction, or the POWER DARN instruction will be sourced + to seed the LRNG if this option is enabled. + + Note, if this option is enabled and the underlying CPU + does not offer such entropy source, the LRNG will automatically + detect this and ignore the hardware. + +config LRNG_CPU_FULL_ENT_MULTIPLIER + int + default 1 if !LRNG_TEST_CPU_ES_COMPRESSION + default 123 if LRNG_TEST_CPU_ES_COMPRESSION + +config LRNG_CPU_ENTROPY_RATE + int "CPU Entropy Source Entropy Rate" + depends on LRNG_CPU + range 0 256 + default 8 + help + The option defines the amount of entropy the LRNG applies to 256 + bits of data obtained from the CPU entropy source. The LRNG + enforces the limit that this value must be in the range between + 0 and 256. + + When configuring this value to 0, the CPU entropy source will + provide 256 bits of data without being credited to contain + entropy. + + Note, this option is overwritten when the option + CONFIG_RANDOM_TRUST_CPU is set. + +endmenu # "Entropy Source Configuration" + +menuconfig LRNG_DRNG_SWITCH + bool "Support DRNG runtime switching" + help + The Linux RNG per default uses a ChaCha20 DRNG that is + accessible via the external interfaces. With this configuration + option other DRNGs can be selected and loaded at runtime. + +if LRNG_DRNG_SWITCH + +config LRNG_KCAPI_HASH + bool + select CRYPTO_HASH + +config LRNG_DRBG + tristate "SP800-90A support for the LRNG" + depends on CRYPTO + select CRYPTO_DRBG_MENU + select CRYPTO_SHA512 + select LRNG_KCAPI_HASH + help + Enable the SP800-90A DRBG support for the LRNG. Once the + module is loaded, output from /dev/random, /dev/urandom, + getrandom(2), or get_random_bytes_full is provided by a DRBG. + +config LRNG_KCAPI + tristate "Kernel Crypto API support for the LRNG" + depends on CRYPTO + depends on !LRNG_DRBG + select CRYPTO_RNG + select LRNG_KCAPI_HASH + help + Enable the support for generic pseudo-random number + generators offered by the kernel crypto API with the + LRNG. Once the module is loaded, output from /dev/random, + /dev/urandom, getrandom(2), or get_random_bytes is + provided by the selected kernel crypto API RNG. +endif # LRNG_DRNG_SWITCH + +menuconfig LRNG_TESTING_MENU + bool "LRNG testing interfaces" + depends on DEBUG_FS + help + Enable one or more of the following test interfaces. + + If unsure, say N. + +if LRNG_TESTING_MENU + +config LRNG_RAW_HIRES_ENTROPY + bool "Enable entropy test interface to hires timer noise source" + default y + help + The test interface allows a privileged process to capture + the raw unconditioned high resolution time stamp noise that + is collected by the LRNG for statistical analysis. Extracted + noise data is not used to seed the LRNG. + + The raw noise data can be obtained using the lrng_raw_hires + debugfs file. Using the option lrng_testing.boot_raw_hires_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_JIFFIES_ENTROPY + bool "Enable entropy test interface to Jiffies noise source" + help + The test interface allows a privileged process to capture + the raw unconditioned Jiffies that is collected by + the LRNG for statistical analysis. This data is used for + seeding the LRNG if a high-resolution time stamp is not + available. If a high-resolution time stamp is detected, + the Jiffies value is not collected by the LRNG and no + data is provided via the test interface. Extracted noise + data is not used to seed the random number generator. + + The raw noise data can be obtained using the lrng_raw_jiffies + debugfs file. Using the option lrng_testing.boot_raw_jiffies_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_IRQ_ENTROPY + bool "Enable entropy test interface to IRQ number noise source" + help + The test interface allows a privileged process to capture + the raw unconditioned interrupt number that is collected by + the LRNG for statistical analysis. This data is used for + seeding the random32 PRNG external to the LRNG if a + high-resolution time stamp is available or it will be used to + seed the LRNG otherwise. Extracted noise data is not used to + seed the random number generator. + + The raw noise data can be obtained using the lrng_raw_irq + debugfs file. Using the option lrng_testing.boot_raw_irq_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_IRQFLAGS_ENTROPY + bool "Enable entropy test interface to IRQ flags noise source" + help + The test interface allows a privileged process to capture + the raw unconditioned interrupt flags that is collected by + the LRNG for statistical analysis. This data is used for + seeding the random32 PRNG external to the LRNG if a + high-resolution time stamp is available or it will be used to + seed the LRNG otherwise. Extracted noise data is not used to + seed the random number generator. + + The raw noise data can be obtained using the lrng_raw_irqflags + debugfs file. Using the option lrng_testing.boot_raw_irqflags_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_RETIP_ENTROPY + bool "Enable entropy test interface to RETIP value noise source" + help + The test interface allows a privileged process to capture + the raw unconditioned return instruction pointer value + that is collected by the LRNG for statistical analysis. + This data is used for seeding the random32 PRNG external + to the LRNG if a high-resolution time stamp is available or + it will be used to seed the LRNG otherwise. Extracted noise + data is not used to seed the random number generator. + + The raw noise data can be obtained using the lrng_raw_retip + debugfs file. Using the option lrng_testing.boot_raw_retip_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_REGS_ENTROPY + bool "Enable entropy test interface to IRQ register value noise source" + help + The test interface allows a privileged process to capture + the raw unconditioned interrupt register value that is + collected by the LRNG for statistical analysis. Extracted noise + data is not used to seed the random number generator. + + The raw noise data can be obtained using the lrng_raw_regs + debugfs file. Using the option lrng_testing.boot_raw_regs_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_ARRAY + bool "Enable test interface to LRNG raw entropy storage array" + help + The test interface allows a privileged process to capture + the raw noise data that is collected by the LRNG + in the per-CPU array for statistical analysis. The purpose + of this interface is to verify that the array handling code + truly only concatenates data and provides the same entropy + rate as the raw unconditioned noise source when assessing + the collected data byte-wise. + + The data can be obtained using the lrng_raw_array debugfs + file. Using the option lrng_testing.boot_raw_array=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_IRQ_PERF + bool "Enable LRNG interrupt performance monitor" + help + With this option, the performance monitor of the LRNG + interrupt handling code is enabled. The file provides + the execution time of the interrupt handler in + cycles. + + The interrupt performance data can be obtained using + the lrng_irq_perf debugfs file. Using the option + lrng_testing.boot_irq_perf=1 the performance data of + the first 1000 entropy events since boot can be sampled. + +config LRNG_ACVT_HASH + bool "Enable LRNG ACVT Hash interface" + help + With this option, the LRNG built-in hash function used for + auxiliary pool management and prior to switching the + cryptographic backends is made available for ACVT. The + interface allows writing of the data to be hashed + into the interface. The read operation triggers the hash + operation to generate message digest. + + The ACVT interface is available with the lrng_acvt_hash + debugfs file. + +config LRNG_RUNTIME_ES_CONFIG + bool "Enable runtime configuration of entropy sources" + help + When enabling this option, the LRNG provides the mechanism + allowing to alter the entropy rate of each entropy source + during boot time and runtime. + + The following interfaces are available: + lrng_archrandom.archrandom for the CPU entropy source, + lrng_jent.jitterrng for the Jitter RNG entropy source, and + lrng_sw_noise.irq_entropy for the interrupt entropy source. + +config LRNG_RUNTIME_MAX_WO_RESEED_CONFIG + bool "Enable runtime configuration of max reseed threshold" + help + When enabling this option, the LRNG provides an interface + allowing the setting of the maximum number of DRNG generate + operations without a reseed that has full entropy. The + interface is lrng_drng.max_wo_reseed. + +config LRNG_TEST_CPU_ES_COMPRESSION + bool "Force CPU ES compression operation" + help + When enabling this option, the CPU ES compression operation + is forced by setting an arbitrary value > 1 for the data + multiplier even when the CPU ES would deliver full entropy. + This allows testing of the compression operation. It + therefore forces to pull more data from the CPU ES + than what may be required. + +config LRNG_TESTING + bool + default y if (LRNG_RAW_HIRES_ENTROPY || LRNG_RAW_JIFFIES_ENTROPY ||LRNG_RAW_IRQ_ENTROPY || LRNG_RAW_IRQFLAGS_ENTROPY || LRNG_RAW_RETIP_ENTROPY || LRNG_RAW_REGS_ENTROPY || LRNG_RAW_ARRAY || LRNG_IRQ_PERF || LRNG_ACVT_HASH) + +endif #LRNG_TESTING_MENU + +config LRNG_SELFTEST + bool "Enable power-on and on-demand self-tests" + help + The power-on self-tests are executed during boot time + covering the ChaCha20 DRNG, the hash operation used for + processing the entropy pools and the auxiliary pool, and + the time stamp management of the LRNG. + + The on-demand self-tests are triggered by writing any + value into the SysFS file selftest_status. At the same + time, when reading this file, the test status is + returned. A zero indicates that all tests were executed + successfully. + + If unsure, say Y. + +if LRNG_SELFTEST + +config LRNG_SELFTEST_PANIC + bool "Panic the kernel upon self-test failure" + help + If the option is enabled, the kernel is terminated if an + LRNG power-on self-test failure is detected. + +endif # LRNG_SELFTEST + +endif # LRNG + +endmenu # LRNG diff --git a/drivers/char/lrng/Makefile b/drivers/char/lrng/Makefile new file mode 100644 index 0000000000000..e4f7f9702eb41 --- /dev/null +++ b/drivers/char/lrng/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Linux Random Number Generator. +# + +obj-y += lrng_es_mgr.o lrng_aux.o \ + lrng_drng.o lrng_chacha20.o \ + lrng_interfaces.o lrng_es_aux.o + +obj-$(CONFIG_LRNG_IRQ) += lrng_es_irq.o +obj-$(CONFIG_SYSCTL) += lrng_proc.o +obj-$(CONFIG_NUMA) += lrng_numa.o +obj-$(CONFIG_LRNG_CPU) += lrng_es_archrandom.o +obj-$(CONFIG_LRNG_DRNG_SWITCH) += lrng_switch.o +obj-$(CONFIG_LRNG_KCAPI_HASH) += lrng_kcapi_hash.o +obj-$(CONFIG_LRNG_DRBG) += lrng_drbg.o +obj-$(CONFIG_LRNG_KCAPI) += lrng_kcapi.o +obj-$(CONFIG_LRNG_JENT) += lrng_es_jent.o +obj-$(CONFIG_LRNG_HEALTH_TESTS) += lrng_health.o +obj-$(CONFIG_LRNG_TESTING) += lrng_testing.o +obj-$(CONFIG_LRNG_SELFTEST) += lrng_selftest.o diff --git a/drivers/char/lrng/lrng_aux.c b/drivers/char/lrng/lrng_aux.c new file mode 100644 index 0000000000000..e3b994f6e4c14 --- /dev/null +++ b/drivers/char/lrng/lrng_aux.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG auxiliary interfaces + * + * Copyright (C) 2019 - 2021 Stephan Mueller + * Copyright (C) 2017 Jason A. Donenfeld . All + * Rights Reserved. + * Copyright (C) 2016 Jason Cooper + */ + +#include +#include + +#include "lrng_internal.h" + +struct batched_entropy { + union { + u64 entropy_u64[LRNG_DRNG_BLOCKSIZE / sizeof(u64)]; + u32 entropy_u32[LRNG_DRNG_BLOCKSIZE / sizeof(u32)]; + }; + unsigned int position; + spinlock_t batch_lock; +}; + +/* + * Get a random word for internal kernel use only. The quality of the random + * number is as good as /dev/urandom, but there is no backtrack protection, + * with the goal of being quite fast and not depleting entropy. + */ +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { + .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), +}; + +u64 get_random_u64(void) +{ + u64 ret; + unsigned long flags; + struct batched_entropy *batch; + + lrng_debug_report_seedlevel("get_random_u64"); + + batch = raw_cpu_ptr(&batched_entropy_u64); + spin_lock_irqsave(&batch->batch_lock, flags); + if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { + lrng_drng_get_atomic((u8 *)batch->entropy_u64, + LRNG_DRNG_BLOCKSIZE); + batch->position = 0; + } + ret = batch->entropy_u64[batch->position++]; + spin_unlock_irqrestore(&batch->batch_lock, flags); + return ret; +} +EXPORT_SYMBOL(get_random_u64); + +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { + .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock), +}; + +u32 get_random_u32(void) +{ + u32 ret; + unsigned long flags; + struct batched_entropy *batch; + + lrng_debug_report_seedlevel("get_random_u32"); + + batch = raw_cpu_ptr(&batched_entropy_u32); + spin_lock_irqsave(&batch->batch_lock, flags); + if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { + lrng_drng_get_atomic((u8 *)batch->entropy_u32, + LRNG_DRNG_BLOCKSIZE); + batch->position = 0; + } + ret = batch->entropy_u32[batch->position++]; + spin_unlock_irqrestore(&batch->batch_lock, flags); + return ret; +} +EXPORT_SYMBOL(get_random_u32); + +/* + * It's important to invalidate all potential batched entropy that might + * be stored before the crng is initialized, which we can do lazily by + * simply resetting the counter to zero so that it's re-extracted on the + * next usage. + */ +void invalidate_batched_entropy(void) +{ + int cpu; + unsigned long flags; + + for_each_possible_cpu(cpu) { + struct batched_entropy *batched_entropy; + + batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu); + spin_lock_irqsave(&batched_entropy->batch_lock, flags); + batched_entropy->position = 0; + spin_unlock(&batched_entropy->batch_lock); + + batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu); + spin_lock(&batched_entropy->batch_lock); + batched_entropy->position = 0; + spin_unlock_irqrestore(&batched_entropy->batch_lock, flags); + } +} + +/* + * randomize_page - Generate a random, page aligned address + * @start: The smallest acceptable address the caller will take. + * @range: The size of the area, starting at @start, within which the + * random address must fall. + * + * If @start + @range would overflow, @range is capped. + * + * NOTE: Historical use of randomize_range, which this replaces, presumed that + * @start was already page aligned. We now align it regardless. + * + * Return: A page aligned address within [start, start + range). On error, + * @start is returned. + */ +unsigned long randomize_page(unsigned long start, unsigned long range) +{ + if (!PAGE_ALIGNED(start)) { + range -= PAGE_ALIGN(start) - start; + start = PAGE_ALIGN(start); + } + + if (start > ULONG_MAX - range) + range = ULONG_MAX - start; + + range >>= PAGE_SHIFT; + + if (range == 0) + return start; + + return start + (get_random_long() % range << PAGE_SHIFT); +} diff --git a/drivers/char/lrng/lrng_chacha20.c b/drivers/char/lrng/lrng_chacha20.c new file mode 100644 index 0000000000000..b5387bb33095e --- /dev/null +++ b/drivers/char/lrng/lrng_chacha20.c @@ -0,0 +1,321 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Backend for the LRNG providing the cryptographic primitives using + * ChaCha20 cipher implementations. + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include "lrng_chacha20.h" +#include "lrng_internal.h" + +/******************************* ChaCha20 DRNG *******************************/ + +#define CHACHA_BLOCK_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32)) + +struct chacha20_state { + struct chacha20_block block; +}; + +/* + * Have a static memory blocks for the ChaCha20 DRNG instance to avoid calling + * kmalloc too early in the boot cycle. For subsequent allocation requests, + * such as per-NUMA-node DRNG instances, kmalloc will be used. + */ +struct chacha20_state chacha20; + +/* + * Update of the ChaCha20 state by either using an unused buffer part or by + * generating one ChaCha20 block which is half of the state of the ChaCha20. + * The block is XORed into the key part of the state. This shall ensure + * backtracking resistance as well as a proper mix of the ChaCha20 state once + * the key is injected. + */ +static void lrng_chacha20_update(struct chacha20_state *chacha20_state, + __le32 *buf, u32 used_words) +{ + struct chacha20_block *chacha20 = &chacha20_state->block; + u32 i; + __le32 tmp[CHACHA_BLOCK_WORDS]; + + BUILD_BUG_ON(sizeof(struct chacha20_block) != CHACHA_BLOCK_SIZE); + BUILD_BUG_ON(CHACHA_BLOCK_SIZE != 2 * CHACHA_KEY_SIZE); + + if (used_words > CHACHA_KEY_SIZE_WORDS) { + chacha20_block(&chacha20->constants[0], (u8 *)tmp); + for (i = 0; i < CHACHA_KEY_SIZE_WORDS; i++) + chacha20->key.u[i] ^= le32_to_cpu(tmp[i]); + memzero_explicit(tmp, sizeof(tmp)); + } else { + for (i = 0; i < CHACHA_KEY_SIZE_WORDS; i++) + chacha20->key.u[i] ^= le32_to_cpu(buf[i + used_words]); + } + + /* Deterministic increment of nonce as required in RFC 7539 chapter 4 */ + chacha20->nonce[0]++; + if (chacha20->nonce[0] == 0) { + chacha20->nonce[1]++; + if (chacha20->nonce[1] == 0) + chacha20->nonce[2]++; + } + + /* Leave counter untouched as it is start value is undefined in RFC */ +} + +/* + * Seed the ChaCha20 DRNG by injecting the input data into the key part of + * the ChaCha20 state. If the input data is longer than the ChaCha20 key size, + * perform a ChaCha20 operation after processing of key size input data. + * This operation shall spread out the entropy into the ChaCha20 state before + * new entropy is injected into the key part. + */ +static int lrng_cc20_drng_seed_helper(void *drng, const u8 *inbuf, u32 inbuflen) +{ + struct chacha20_state *chacha20_state = (struct chacha20_state *)drng; + struct chacha20_block *chacha20 = &chacha20_state->block; + + while (inbuflen) { + u32 i, todo = min_t(u32, inbuflen, CHACHA_KEY_SIZE); + + for (i = 0; i < todo; i++) + chacha20->key.b[i] ^= inbuf[i]; + + /* Break potential dependencies between the inbuf key blocks */ + lrng_chacha20_update(chacha20_state, NULL, + CHACHA_BLOCK_WORDS); + inbuf += todo; + inbuflen -= todo; + } + + return 0; +} + +/* + * Chacha20 DRNG generation of random numbers: the stream output of ChaCha20 + * is the random number. After the completion of the generation of the + * stream, the entire ChaCha20 state is updated. + * + * Note, as the ChaCha20 implements a 32 bit counter, we must ensure + * that this function is only invoked for at most 2^32 - 1 ChaCha20 blocks + * before a reseed or an update happens. This is ensured by the variable + * outbuflen which is a 32 bit integer defining the number of bytes to be + * generated by the ChaCha20 DRNG. At the end of this function, an update + * operation is invoked which implies that the 32 bit counter will never be + * overflown in this implementation. + */ +static int lrng_cc20_drng_generate_helper(void *drng, u8 *outbuf, u32 outbuflen) +{ + struct chacha20_state *chacha20_state = (struct chacha20_state *)drng; + struct chacha20_block *chacha20 = &chacha20_state->block; + __le32 aligned_buf[CHACHA_BLOCK_WORDS]; + u32 ret = outbuflen, used = CHACHA_BLOCK_WORDS; + int zeroize_buf = 0; + + while (outbuflen >= CHACHA_BLOCK_SIZE) { + chacha20_block(&chacha20->constants[0], outbuf); + outbuf += CHACHA_BLOCK_SIZE; + outbuflen -= CHACHA_BLOCK_SIZE; + } + + if (outbuflen) { + chacha20_block(&chacha20->constants[0], (u8 *)aligned_buf); + memcpy(outbuf, aligned_buf, outbuflen); + used = ((outbuflen + sizeof(aligned_buf[0]) - 1) / + sizeof(aligned_buf[0])); + zeroize_buf = 1; + } + + lrng_chacha20_update(chacha20_state, aligned_buf, used); + + if (zeroize_buf) + memzero_explicit(aligned_buf, sizeof(aligned_buf)); + + return ret; +} + +void lrng_cc20_init_state(struct chacha20_state *state) +{ + lrng_cc20_init_rfc7539(&state->block); +} + +/* + * Allocation of the DRNG state + */ +static void *lrng_cc20_drng_alloc(u32 sec_strength) +{ + struct chacha20_state *state = NULL; + + if (sec_strength > CHACHA_KEY_SIZE) { + pr_err("Security strength of ChaCha20 DRNG (%u bits) lower than requested by LRNG (%u bits)\n", + CHACHA_KEY_SIZE * 8, sec_strength * 8); + return ERR_PTR(-EINVAL); + } + if (sec_strength < CHACHA_KEY_SIZE) + pr_warn("Security strength of ChaCha20 DRNG (%u bits) higher than requested by LRNG (%u bits)\n", + CHACHA_KEY_SIZE * 8, sec_strength * 8); + + state = kmalloc(sizeof(struct chacha20_state), GFP_KERNEL); + if (!state) + return ERR_PTR(-ENOMEM); + pr_debug("memory for ChaCha20 core allocated\n"); + + lrng_cc20_init_state(state); + + return state; +} + +static void lrng_cc20_drng_dealloc(void *drng) +{ + struct chacha20_state *chacha20_state = (struct chacha20_state *)drng; + + if (drng == &chacha20) { + memzero_explicit(chacha20_state, sizeof(*chacha20_state)); + pr_debug("static ChaCha20 core zeroized\n"); + return; + } + + pr_debug("ChaCha20 core zeroized and freed\n"); + kfree_sensitive(chacha20_state); +} + +/******************************* Hash Operation *******************************/ + +#ifdef CONFIG_CRYPTO_LIB_SHA256 + +#include + +static u32 lrng_cc20_hash_digestsize(void *hash) +{ + return SHA256_DIGEST_SIZE; +} + +static int lrng_cc20_hash_init(struct shash_desc *shash, void *hash) +{ + /* + * We do not need a TFM - we only need sufficient space for + * struct sha256_state on the stack. + */ + sha256_init(shash_desc_ctx(shash)); + return 0; +} + +static int lrng_cc20_hash_update(struct shash_desc *shash, + const u8 *inbuf, u32 inbuflen) +{ + sha256_update(shash_desc_ctx(shash), inbuf, inbuflen); + return 0; +} + +static int lrng_cc20_hash_final(struct shash_desc *shash, u8 *digest) +{ + sha256_final(shash_desc_ctx(shash), digest); + return 0; +} + +static const char *lrng_cc20_hash_name(void) +{ + return "SHA-256"; +} + +static void lrng_cc20_hash_desc_zero(struct shash_desc *shash) +{ + memzero_explicit(shash_desc_ctx(shash), sizeof(struct sha256_state)); +} + +#else /* CONFIG_CRYPTO_LIB_SHA256 */ + +#include +#include + +/* + * If the SHA-256 support is not compiled, we fall back to SHA-1 that is always + * compiled and present in the kernel. + */ +static u32 lrng_cc20_hash_digestsize(void *hash) +{ + return SHA1_DIGEST_SIZE; +} + +static void lrng_sha1_block_fn(struct sha1_state *sctx, const u8 *src, + int blocks) +{ + u32 temp[SHA1_WORKSPACE_WORDS]; + + while (blocks--) { + sha1_transform(sctx->state, src, temp); + src += SHA1_BLOCK_SIZE; + } + memzero_explicit(temp, sizeof(temp)); +} + +static int lrng_cc20_hash_init(struct shash_desc *shash, void *hash) +{ + /* + * We do not need a TFM - we only need sufficient space for + * struct sha1_state on the stack. + */ + sha1_base_init(shash); + return 0; +} + +static int lrng_cc20_hash_update(struct shash_desc *shash, + const u8 *inbuf, u32 inbuflen) +{ + return sha1_base_do_update(shash, inbuf, inbuflen, lrng_sha1_block_fn); +} + +static int lrng_cc20_hash_final(struct shash_desc *shash, u8 *digest) +{ + return sha1_base_do_finalize(shash, lrng_sha1_block_fn) ?: + sha1_base_finish(shash, digest); +} + +static const char *lrng_cc20_hash_name(void) +{ + return "SHA-1"; +} + +static void lrng_cc20_hash_desc_zero(struct shash_desc *shash) +{ + memzero_explicit(shash_desc_ctx(shash), sizeof(struct sha1_state)); +} + +#endif /* CONFIG_CRYPTO_LIB_SHA256 */ + +static void *lrng_cc20_hash_alloc(void) +{ + pr_info("Hash %s allocated\n", lrng_cc20_hash_name()); + return NULL; +} + +static void lrng_cc20_hash_dealloc(void *hash) +{ +} + +static const char *lrng_cc20_drng_name(void) +{ + return "ChaCha20 DRNG"; +} + +const struct lrng_crypto_cb lrng_cc20_crypto_cb = { + .lrng_drng_name = lrng_cc20_drng_name, + .lrng_hash_name = lrng_cc20_hash_name, + .lrng_drng_alloc = lrng_cc20_drng_alloc, + .lrng_drng_dealloc = lrng_cc20_drng_dealloc, + .lrng_drng_seed_helper = lrng_cc20_drng_seed_helper, + .lrng_drng_generate_helper = lrng_cc20_drng_generate_helper, + .lrng_hash_alloc = lrng_cc20_hash_alloc, + .lrng_hash_dealloc = lrng_cc20_hash_dealloc, + .lrng_hash_digestsize = lrng_cc20_hash_digestsize, + .lrng_hash_init = lrng_cc20_hash_init, + .lrng_hash_update = lrng_cc20_hash_update, + .lrng_hash_final = lrng_cc20_hash_final, + .lrng_hash_desc_zero = lrng_cc20_hash_desc_zero, +}; diff --git a/drivers/char/lrng/lrng_chacha20.h b/drivers/char/lrng/lrng_chacha20.h new file mode 100644 index 0000000000000..bd0c0bee38f35 --- /dev/null +++ b/drivers/char/lrng/lrng_chacha20.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * LRNG ChaCha20 definitions + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#include + +/* State according to RFC 7539 section 2.3 */ +struct chacha20_block { + u32 constants[4]; + union { +#define CHACHA_KEY_SIZE_WORDS (CHACHA_KEY_SIZE / sizeof(u32)) + u32 u[CHACHA_KEY_SIZE_WORDS]; + u8 b[CHACHA_KEY_SIZE]; + } key; + u32 counter; + u32 nonce[3]; +}; + +static inline void lrng_cc20_init_rfc7539(struct chacha20_block *chacha20) +{ + chacha_init_consts(chacha20->constants); +} diff --git a/drivers/char/lrng/lrng_drbg.c b/drivers/char/lrng/lrng_drbg.c new file mode 100644 index 0000000000000..6ca6b05eccf4c --- /dev/null +++ b/drivers/char/lrng/lrng_drbg.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Backend for the LRNG providing the cryptographic primitives using the + * kernel crypto API and its DRBG. + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include "lrng_kcapi_hash.h" + +/* + * Define a DRBG plus a hash / MAC used to extract data from the entropy pool. + * For LRNG_HASH_NAME you can use a hash or a MAC (HMAC or CMAC) of your choice + * (Note, you should use the suggested selections below -- using SHA-1 or MD5 + * is not wise). The idea is that the used cipher primitive can be selected to + * be the same as used for the DRBG. I.e. the LRNG only uses one cipher + * primitive using the same cipher implementation with the options offered in + * the following. This means, if the CTR DRBG is selected and AES-NI is present, + * both the CTR DRBG and the selected cmac(aes) use AES-NI. + * + * The security strengths of the DRBGs are all 256 bits according to + * SP800-57 section 5.6.1. + * + * This definition is allowed to be changed. + */ +#ifdef CONFIG_CRYPTO_DRBG_CTR +static unsigned int lrng_drbg_type = 0; +#elif defined CONFIG_CRYPTO_DRBG_HMAC +static unsigned int lrng_drbg_type = 1; +#elif defined CONFIG_CRYPTO_DRBG_HASH +static unsigned int lrng_drbg_type = 2; +#else +#error "Unknown DRBG in use" +#endif + +/* The parameter must be r/o in sysfs as otherwise races appear. */ +module_param(lrng_drbg_type, uint, 0444); +MODULE_PARM_DESC(lrng_drbg_type, "DRBG type used for LRNG (0->CTR_DRBG, 1->HMAC_DRBG, 2->Hash_DRBG)"); + +struct lrng_drbg { + const char *hash_name; + const char *drbg_core; +}; + +static const struct lrng_drbg lrng_drbg_types[] = { + { /* CTR_DRBG with AES-256 using derivation function */ + .hash_name = "sha512", + .drbg_core = "drbg_nopr_ctr_aes256", + }, { /* HMAC_DRBG with SHA-512 */ + .hash_name = "sha512", + .drbg_core = "drbg_nopr_hmac_sha512", + }, { /* Hash_DRBG with SHA-512 using derivation function */ + .hash_name = "sha512", + .drbg_core = "drbg_nopr_sha512" + } +}; + +static int lrng_drbg_drng_seed_helper(void *drng, const u8 *inbuf, u32 inbuflen) +{ + struct drbg_state *drbg = (struct drbg_state *)drng; + LIST_HEAD(seedlist); + struct drbg_string data; + int ret; + + drbg_string_fill(&data, inbuf, inbuflen); + list_add_tail(&data.list, &seedlist); + ret = drbg->d_ops->update(drbg, &seedlist, drbg->seeded); + + if (ret >= 0) + drbg->seeded = true; + + return ret; +} + +static int lrng_drbg_drng_generate_helper(void *drng, u8 *outbuf, u32 outbuflen) +{ + struct drbg_state *drbg = (struct drbg_state *)drng; + + return drbg->d_ops->generate(drbg, outbuf, outbuflen, NULL); +} + +static void *lrng_drbg_drng_alloc(u32 sec_strength) +{ + struct drbg_state *drbg; + int coreref = -1; + bool pr = false; + int ret; + + drbg_convert_tfm_core(lrng_drbg_types[lrng_drbg_type].drbg_core, + &coreref, &pr); + if (coreref < 0) + return ERR_PTR(-EFAULT); + + drbg = kzalloc(sizeof(struct drbg_state), GFP_KERNEL); + if (!drbg) + return ERR_PTR(-ENOMEM); + + drbg->core = &drbg_cores[coreref]; + drbg->seeded = false; + ret = drbg_alloc_state(drbg); + if (ret) + goto err; + + if (sec_strength > drbg_sec_strength(drbg->core->flags)) { + pr_err("Security strength of DRBG (%u bits) lower than requested by LRNG (%u bits)\n", + drbg_sec_strength(drbg->core->flags) * 8, + sec_strength * 8); + goto dealloc; + } + + if (sec_strength < drbg_sec_strength(drbg->core->flags)) + pr_warn("Security strength of DRBG (%u bits) higher than requested by LRNG (%u bits)\n", + drbg_sec_strength(drbg->core->flags) * 8, + sec_strength * 8); + + pr_info("DRBG with %s core allocated\n", drbg->core->backend_cra_name); + + return drbg; + +dealloc: + if (drbg->d_ops) + drbg->d_ops->crypto_fini(drbg); + drbg_dealloc_state(drbg); +err: + kfree(drbg); + return ERR_PTR(-EINVAL); +} + +static void lrng_drbg_drng_dealloc(void *drng) +{ + struct drbg_state *drbg = (struct drbg_state *)drng; + + if (drbg && drbg->d_ops) + drbg->d_ops->crypto_fini(drbg); + drbg_dealloc_state(drbg); + kfree_sensitive(drbg); + pr_info("DRBG deallocated\n"); +} + +static void *lrng_drbg_hash_alloc(void) +{ + return lrng_kcapi_hash_alloc(lrng_drbg_types[lrng_drbg_type].hash_name); +} + +static const char *lrng_drbg_name(void) +{ + return lrng_drbg_types[lrng_drbg_type].drbg_core; +} + +static const char *lrng_hash_name(void) +{ + return lrng_drbg_types[lrng_drbg_type].hash_name; +} + +static const struct lrng_crypto_cb lrng_drbg_crypto_cb = { + .lrng_drng_name = lrng_drbg_name, + .lrng_hash_name = lrng_hash_name, + .lrng_drng_alloc = lrng_drbg_drng_alloc, + .lrng_drng_dealloc = lrng_drbg_drng_dealloc, + .lrng_drng_seed_helper = lrng_drbg_drng_seed_helper, + .lrng_drng_generate_helper = lrng_drbg_drng_generate_helper, + .lrng_hash_alloc = lrng_drbg_hash_alloc, + .lrng_hash_dealloc = lrng_kcapi_hash_dealloc, + .lrng_hash_digestsize = lrng_kcapi_hash_digestsize, + .lrng_hash_init = lrng_kcapi_hash_init, + .lrng_hash_update = lrng_kcapi_hash_update, + .lrng_hash_final = lrng_kcapi_hash_final, + .lrng_hash_desc_zero = lrng_kcapi_hash_zero, +}; + +static int __init lrng_drbg_init(void) +{ + if (lrng_drbg_type >= ARRAY_SIZE(lrng_drbg_types)) { + pr_err("lrng_drbg_type parameter too large (given %u - max: %lu)", + lrng_drbg_type, + (unsigned long)ARRAY_SIZE(lrng_drbg_types) - 1); + return -EAGAIN; + } + return lrng_set_drng_cb(&lrng_drbg_crypto_cb); +} + +static void __exit lrng_drbg_exit(void) +{ + lrng_set_drng_cb(NULL); +} + +late_initcall(lrng_drbg_init); +module_exit(lrng_drbg_exit); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Stephan Mueller "); +MODULE_DESCRIPTION("Linux Random Number Generator - SP800-90A DRBG backend"); diff --git a/drivers/char/lrng/lrng_drng.c b/drivers/char/lrng/lrng_drng.c new file mode 100644 index 0000000000000..b1d89a6f548d7 --- /dev/null +++ b/drivers/char/lrng/lrng_drng.c @@ -0,0 +1,451 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG DRNG processing + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include "lrng_internal.h" + +/* + * Maximum number of seconds between DRNG reseed intervals of the DRNG. Note, + * this is enforced with the next request of random numbers from the + * DRNG. Setting this value to zero implies a reseeding attempt before every + * generated random number. + */ +int lrng_drng_reseed_max_time = 600; + +static atomic_t lrng_avail = ATOMIC_INIT(0); + +DEFINE_MUTEX(lrng_crypto_cb_update); + +/* DRNG for /dev/urandom, getrandom(2), get_random_bytes */ +static struct lrng_drng lrng_drng_init = { + .drng = &chacha20, + .crypto_cb = &lrng_cc20_crypto_cb, + .lock = __MUTEX_INITIALIZER(lrng_drng_init.lock), + .spin_lock = __SPIN_LOCK_UNLOCKED(lrng_drng_init.spin_lock), + .hash_lock = __RW_LOCK_UNLOCKED(lrng_drng_init.hash_lock) +}; + +/* + * DRNG for get_random_bytes when called in atomic context. This + * DRNG will always use the ChaCha20 DRNG. It will never benefit from a + * DRNG switch like the "regular" DRNG. If there was no DRNG switch, the atomic + * DRNG is identical to the "regular" DRNG. + * + * The reason for having this is due to the fact that DRNGs other than + * the ChaCha20 DRNG may sleep. + */ +static struct lrng_drng lrng_drng_atomic = { + .drng = &chacha20, + .crypto_cb = &lrng_cc20_crypto_cb, + .spin_lock = __SPIN_LOCK_UNLOCKED(lrng_drng_atomic.spin_lock), + .hash_lock = __RW_LOCK_UNLOCKED(lrng_drng_atomic.hash_lock) +}; + +static u32 max_wo_reseed = LRNG_DRNG_MAX_WITHOUT_RESEED; +#ifdef CONFIG_LRNG_RUNTIME_MAX_WO_RESEED_CONFIG +module_param(max_wo_reseed, uint, 0444); +MODULE_PARM_DESC(max_wo_reseed, + "Maximum number of DRNG generate operation without full reseed\n"); +#endif + +/********************************** Helper ************************************/ + +bool lrng_get_available(void) +{ + return likely(atomic_read(&lrng_avail)); +} + +void lrng_set_available(void) +{ + atomic_set(&lrng_avail, 1); +} + +struct lrng_drng *lrng_drng_init_instance(void) +{ + return &lrng_drng_init; +} + +struct lrng_drng *lrng_drng_atomic_instance(void) +{ + return &lrng_drng_atomic; +} + +void lrng_drng_reset(struct lrng_drng *drng) +{ + atomic_set(&drng->requests, LRNG_DRNG_RESEED_THRESH); + atomic_set(&drng->requests_since_fully_seeded, 0); + drng->last_seeded = jiffies; + drng->fully_seeded = false; + drng->force_reseed = true; + pr_debug("reset DRNG\n"); +} + +/* Initialize the default DRNG during boot */ +static void lrng_drng_seed(struct lrng_drng *drng); +void lrng_drngs_init_cc20(bool force_seed) +{ + unsigned long flags = 0; + + if (lrng_get_available()) + return; + + lrng_drng_lock(&lrng_drng_init, &flags); + if (lrng_get_available()) { + lrng_drng_unlock(&lrng_drng_init, &flags); + if (force_seed) + goto seed; + return; + } + + lrng_drng_reset(&lrng_drng_init); + lrng_cc20_init_state(&chacha20); + lrng_drng_unlock(&lrng_drng_init, &flags); + + lrng_drng_lock(&lrng_drng_atomic, &flags); + lrng_drng_reset(&lrng_drng_atomic); + /* + * We do not initialize the state of the atomic DRNG as it is identical + * to the DRNG at this point. + */ + lrng_drng_unlock(&lrng_drng_atomic, &flags); + + lrng_set_available(); + +seed: + /* Seed the DRNG with any entropy available */ + if (!lrng_pool_trylock()) { + lrng_drng_seed(&lrng_drng_init); + pr_info("ChaCha20 core initialized with first seeding\n"); + lrng_pool_unlock(); + } else { + pr_info("ChaCha20 core initialized without seeding\n"); + } +} + +bool lrng_sp80090c_compliant(void) +{ + if (!IS_ENABLED(CONFIG_LRNG_OVERSAMPLE_ENTROPY_SOURCES)) + return false; + + /* Entropy source hash must be capable of transporting enough entropy */ + if (lrng_get_digestsize() < + (lrng_security_strength() + CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS)) + return false; + + /* SP800-90C only requested in FIPS mode */ + return fips_enabled; +} + +/************************* Random Number Generation ***************************/ + +/* Inject a data buffer into the DRNG */ +static void lrng_drng_inject(struct lrng_drng *drng, + const u8 *inbuf, u32 inbuflen, bool fully_seeded) +{ + const char *drng_type = unlikely(drng == &lrng_drng_atomic) ? + "atomic" : "regular"; + unsigned long flags = 0; + + BUILD_BUG_ON(LRNG_DRNG_RESEED_THRESH > INT_MAX); + pr_debug("seeding %s DRNG with %u bytes\n", drng_type, inbuflen); + lrng_drng_lock(drng, &flags); + if (drng->crypto_cb->lrng_drng_seed_helper(drng->drng, + inbuf, inbuflen) < 0) { + pr_warn("seeding of %s DRNG failed\n", drng_type); + drng->force_reseed = true; + } else { + int gc = LRNG_DRNG_RESEED_THRESH - atomic_read(&drng->requests); + + pr_debug("%s DRNG stats since last seeding: %lu secs; generate calls: %d\n", + drng_type, + (time_after(jiffies, drng->last_seeded) ? + (jiffies - drng->last_seeded) : 0) / HZ, gc); + + /* Count the numbers of generate ops since last fully seeded */ + if (fully_seeded) + atomic_set(&drng->requests_since_fully_seeded, 0); + else + atomic_add(gc, &drng->requests_since_fully_seeded); + + drng->last_seeded = jiffies; + atomic_set(&drng->requests, LRNG_DRNG_RESEED_THRESH); + drng->force_reseed = false; + + if (!drng->fully_seeded) { + drng->fully_seeded = fully_seeded; + if (drng->fully_seeded) + pr_debug("DRNG fully seeded\n"); + } + + if (drng->drng == lrng_drng_atomic.drng) { + lrng_drng_atomic.last_seeded = jiffies; + atomic_set(&lrng_drng_atomic.requests, + LRNG_DRNG_RESEED_THRESH); + lrng_drng_atomic.force_reseed = false; + } + } + lrng_drng_unlock(drng, &flags); +} + +/* + * Perform the seeding of the DRNG with data from noise source + */ +static void _lrng_drng_seed(struct lrng_drng *drng) +{ + struct entropy_buf seedbuf __aligned(LRNG_KCAPI_ALIGN); + + lrng_fill_seed_buffer(&seedbuf, + lrng_get_seed_entropy_osr(drng->fully_seeded)); + lrng_init_ops(&seedbuf); + lrng_drng_inject(drng, (u8 *)&seedbuf, sizeof(seedbuf), + lrng_fully_seeded(drng->fully_seeded, &seedbuf)); + memzero_explicit(&seedbuf, sizeof(seedbuf)); +} + +static int lrng_drng_get(struct lrng_drng *drng, u8 *outbuf, u32 outbuflen); +static void lrng_drng_seed(struct lrng_drng *drng) +{ + _lrng_drng_seed(drng); + + BUILD_BUG_ON(LRNG_MIN_SEED_ENTROPY_BITS > + LRNG_DRNG_SECURITY_STRENGTH_BITS); + + /* + * Reseed atomic DRNG from current DRNG, + * + * We can obtain random numbers from DRNG as the lock type + * chosen by lrng_drng_get is usable with the current caller. + */ + if ((drng->drng != lrng_drng_atomic.drng) && + (lrng_drng_atomic.force_reseed || + atomic_read(&lrng_drng_atomic.requests) <= 0 || + time_after(jiffies, lrng_drng_atomic.last_seeded + + lrng_drng_reseed_max_time * HZ))) { + u8 seedbuf[LRNG_DRNG_SECURITY_STRENGTH_BYTES] + __aligned(LRNG_KCAPI_ALIGN); + int ret = lrng_drng_get(drng, seedbuf, sizeof(seedbuf)); + + if (ret < 0) { + pr_warn("Error generating random numbers for atomic DRNG: %d\n", + ret); + } else { + lrng_drng_inject(&lrng_drng_atomic, seedbuf, ret, true); + } + memzero_explicit(&seedbuf, sizeof(seedbuf)); + } +} + +static void _lrng_drng_seed_work(struct lrng_drng *drng, u32 node) +{ + pr_debug("reseed triggered by interrupt noise source for DRNG on NUMA node %d\n", + node); + lrng_drng_seed(drng); + if (drng->fully_seeded) { + /* Prevent reseed storm */ + drng->last_seeded += node * 100 * HZ; + /* Prevent draining of pool on idle systems */ + lrng_drng_reseed_max_time += 100; + } +} + +/* + * DRNG reseed trigger: Kernel thread handler triggered by the schedule_work() + */ +void lrng_drng_seed_work(struct work_struct *dummy) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + u32 node; + + if (lrng_drng) { + for_each_online_node(node) { + struct lrng_drng *drng = lrng_drng[node]; + + if (drng && !drng->fully_seeded) { + _lrng_drng_seed_work(drng, node); + goto out; + } + } + } else { + if (!lrng_drng_init.fully_seeded) { + _lrng_drng_seed_work(&lrng_drng_init, 0); + goto out; + } + } + + lrng_pool_all_numa_nodes_seeded(true); + +out: + /* Allow the seeding operation to be called again */ + lrng_pool_unlock(); +} + +/* Force all DRNGs to reseed before next generation */ +void lrng_drng_force_reseed(void) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + u32 node; + + /* + * If the initial DRNG is over the reseed threshold, allow a forced + * reseed only for the initial DRNG as this is the fallback for all. It + * must be kept seeded before all others to keep the LRNG operational. + */ + if (!lrng_drng || + (atomic_read_u32(&lrng_drng_init.requests_since_fully_seeded) > + LRNG_DRNG_RESEED_THRESH)) { + lrng_drng_init.force_reseed = lrng_drng_init.fully_seeded; + pr_debug("force reseed of initial DRNG\n"); + return; + } + for_each_online_node(node) { + struct lrng_drng *drng = lrng_drng[node]; + + if (!drng) + continue; + + drng->force_reseed = drng->fully_seeded; + pr_debug("force reseed of DRNG on node %u\n", node); + } + lrng_drng_atomic.force_reseed = lrng_drng_atomic.fully_seeded; +} + +/* + * lrng_drng_get() - Get random data out of the DRNG which is reseeded + * frequently. + * + * @outbuf: buffer for storing random data + * @outbuflen: length of outbuf + * + * Return: + * * < 0 in error case (DRNG generation or update failed) + * * >=0 returning the returned number of bytes + */ +static int lrng_drng_get(struct lrng_drng *drng, u8 *outbuf, u32 outbuflen) +{ + unsigned long flags = 0; + u32 processed = 0; + + if (!outbuf || !outbuflen) + return 0; + + outbuflen = min_t(size_t, outbuflen, INT_MAX); + + lrng_drngs_init_cc20(false); + + /* If DRNG operated without proper reseed for too long, block LRNG */ + BUILD_BUG_ON(LRNG_DRNG_MAX_WITHOUT_RESEED < LRNG_DRNG_RESEED_THRESH); + if (atomic_read_u32(&drng->requests_since_fully_seeded) > max_wo_reseed) + lrng_unset_fully_seeded(drng); + + while (outbuflen) { + u32 todo = min_t(u32, outbuflen, LRNG_DRNG_MAX_REQSIZE); + int ret; + + /* All but the atomic DRNG are seeded during generation */ + if (atomic_dec_and_test(&drng->requests) || + drng->force_reseed || + time_after(jiffies, drng->last_seeded + + lrng_drng_reseed_max_time * HZ)) { + if (likely(drng != &lrng_drng_atomic)) { + if (lrng_pool_trylock()) { + drng->force_reseed = true; + } else { + lrng_drng_seed(drng); + lrng_pool_unlock(); + } + } + } + + lrng_drng_lock(drng, &flags); + ret = drng->crypto_cb->lrng_drng_generate_helper( + drng->drng, outbuf + processed, todo); + lrng_drng_unlock(drng, &flags); + if (ret <= 0) { + pr_warn("getting random data from DRNG failed (%d)\n", + ret); + return -EFAULT; + } + processed += ret; + outbuflen -= ret; + } + + return processed; +} + +int lrng_drng_get_atomic(u8 *outbuf, u32 outbuflen) +{ + return lrng_drng_get(&lrng_drng_atomic, outbuf, outbuflen); +} + +int lrng_drng_get_sleep(u8 *outbuf, u32 outbuflen) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + struct lrng_drng *drng = &lrng_drng_init; + int node = numa_node_id(); + + might_sleep(); + + if (lrng_drng && lrng_drng[node] && lrng_drng[node]->fully_seeded) + drng = lrng_drng[node]; + + return lrng_drng_get(drng, outbuf, outbuflen); +} + +/* Reset LRNG such that all existing entropy is gone */ +static void _lrng_reset(struct work_struct *work) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + unsigned long flags = 0; + + if (!lrng_drng) { + lrng_drng_lock(&lrng_drng_init, &flags); + lrng_drng_reset(&lrng_drng_init); + lrng_drng_unlock(&lrng_drng_init, &flags); + } else { + u32 node; + + for_each_online_node(node) { + struct lrng_drng *drng = lrng_drng[node]; + + if (!drng) + continue; + lrng_drng_lock(drng, &flags); + lrng_drng_reset(drng); + lrng_drng_unlock(drng, &flags); + } + } + lrng_set_entropy_thresh(LRNG_INIT_ENTROPY_BITS); + + lrng_reset_state(); +} + +static DECLARE_WORK(lrng_reset_work, _lrng_reset); + +void lrng_reset(void) +{ + schedule_work(&lrng_reset_work); +} + +/***************************** Initialize LRNG *******************************/ + +static int __init lrng_init(void) +{ + lrng_drngs_init_cc20(false); + + lrng_drngs_numa_alloc(); + return 0; +} + +late_initcall(lrng_init); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Stephan Mueller "); +MODULE_DESCRIPTION("Linux Random Number Generator"); diff --git a/drivers/char/lrng/lrng_es_archrandom.c b/drivers/char/lrng/lrng_es_archrandom.c new file mode 100644 index 0000000000000..337f84fab1201 --- /dev/null +++ b/drivers/char/lrng/lrng_es_archrandom.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Fast Entropy Source: CPU-based entropy source + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#include "lrng_internal.h" + +/* + * Estimated entropy of data is a 32th of LRNG_DRNG_SECURITY_STRENGTH_BITS. + * As we have no ability to review the implementation of those noise sources, + * it is prudent to have a conservative estimate here. + */ +#define LRNG_ARCHRANDOM_DEFAULT_STRENGTH CONFIG_LRNG_CPU_ENTROPY_RATE +#define LRNG_ARCHRANDOM_TRUST_CPU_STRENGTH LRNG_DRNG_SECURITY_STRENGTH_BITS +#ifdef CONFIG_RANDOM_TRUST_CPU +static u32 archrandom = LRNG_ARCHRANDOM_TRUST_CPU_STRENGTH; +#else +static u32 archrandom = LRNG_ARCHRANDOM_DEFAULT_STRENGTH; +#endif +#ifdef CONFIG_LRNG_RUNTIME_ES_CONFIG +module_param(archrandom, uint, 0644); +MODULE_PARM_DESC(archrandom, "Entropy in bits of 256 data bits from CPU noise source (e.g. RDSEED)"); +#endif + +static int __init lrng_parse_trust_cpu(char *arg) +{ + int ret; + bool trust_cpu = false; + + ret = kstrtobool(arg, &trust_cpu); + if (ret) + return ret; + + if (trust_cpu) { + archrandom = LRNG_ARCHRANDOM_TRUST_CPU_STRENGTH; + lrng_pool_add_entropy(); + } else { + archrandom = LRNG_ARCHRANDOM_DEFAULT_STRENGTH; + } + + return 0; +} +early_param("random.trust_cpu", lrng_parse_trust_cpu); + +u32 lrng_archrandom_entropylevel(u32 requested_bits) +{ + return lrng_fast_noise_entropylevel(archrandom, requested_bits); +} + +static u32 lrng_get_arch_data(u8 *outbuf, u32 requested_bits) +{ + u32 i; + + /* operate on full blocks */ + BUILD_BUG_ON(LRNG_DRNG_SECURITY_STRENGTH_BYTES % sizeof(unsigned long)); + BUILD_BUG_ON(CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS % + sizeof(unsigned long)); + /* ensure we have aligned buffers */ + BUILD_BUG_ON(LRNG_KCAPI_ALIGN % sizeof(unsigned long)); + + for (i = 0; i < (requested_bits >> 3); + i += sizeof(unsigned long)) { + if (!arch_get_random_seed_long((unsigned long *)(outbuf + i)) && + !arch_get_random_long((unsigned long *)(outbuf + i))) { + archrandom = 0; + return 0; + } + } + + return requested_bits; +} + +static u32 lrng_get_arch_data_compress(u8 *outbuf, u32 requested_bits, + u32 data_multiplier) +{ + SHASH_DESC_ON_STACK(shash, NULL); + const struct lrng_crypto_cb *crypto_cb; + struct lrng_drng *drng = lrng_drng_init_instance(); + unsigned long flags; + u32 ent_bits = 0, i, partial_bits = 0, + full_bits = requested_bits * data_multiplier; + void *hash; + + /* Calculate oversampling for SP800-90C */ + if (lrng_sp80090c_compliant()) { + /* Complete amount of bits to be pulled */ + full_bits += CONFIG_LRNG_OVERSAMPLE_ES_BITS * data_multiplier; + /* Full blocks that will be pulled */ + data_multiplier = full_bits / requested_bits; + /* Partial block in bits to be pulled */ + partial_bits = full_bits - (data_multiplier * requested_bits); + } + + lrng_hash_lock(drng, &flags); + crypto_cb = drng->crypto_cb; + hash = drng->hash; + + if (crypto_cb->lrng_hash_init(shash, hash)) + goto out; + + /* Hash all data from the CPU entropy source */ + for (i = 0; i < data_multiplier; i++) { + ent_bits = lrng_get_arch_data(outbuf, requested_bits); + if (!ent_bits) + goto out; + + if (crypto_cb->lrng_hash_update(shash, outbuf, ent_bits >> 3)) + goto err; + } + + /* Hash partial block, if applicable */ + ent_bits = lrng_get_arch_data(outbuf, partial_bits); + if (ent_bits && + crypto_cb->lrng_hash_update(shash, outbuf, ent_bits >> 3)) + goto err; + + pr_debug("pulled %u bits from CPU RNG entropy source\n", full_bits); + + /* Generate the compressed data to be returned to the caller */ + ent_bits = crypto_cb->lrng_hash_digestsize(hash) << 3; + if (requested_bits < ent_bits) { + u8 digest[LRNG_MAX_DIGESTSIZE]; + + if (crypto_cb->lrng_hash_final(shash, digest)) + goto err; + + /* Truncate output data to requested size */ + memcpy(outbuf, digest, requested_bits >> 3); + memzero_explicit(digest, crypto_cb->lrng_hash_digestsize(hash)); + ent_bits = requested_bits; + } else { + if (crypto_cb->lrng_hash_final(shash, outbuf)) + goto err; + } + +out: + crypto_cb->lrng_hash_desc_zero(shash); + lrng_hash_unlock(drng, flags); + return ent_bits; + +err: + ent_bits = 0; + goto out; +} + +/* + * If CPU entropy source requires does not return full entropy, return the + * multiplier of how much data shall be sampled from it. + */ +static u32 lrng_arch_multiplier(void) +{ + static u32 data_multiplier = 0; + unsigned long v; + + if (data_multiplier > 0) + return data_multiplier; + + if (IS_ENABLED(CONFIG_X86) && !arch_get_random_seed_long(&v)) { + /* + * Intel SPEC: pulling 512 blocks from RDRAND ensures + * one reseed making it logically equivalent to RDSEED. + */ + data_multiplier = 512; + } else if (IS_ENABLED(CONFIG_PPC)) { + /* + * PowerISA defines DARN to deliver at least 0.5 bits of + * entropy per data bit. + */ + data_multiplier = 2; + } else if (IS_ENABLED(CONFIG_RISCV)) { + /* + * riscv-crypto-spec-scalar-1.0.0-rc6.pdf section 4.2 defines + * this requirement. + */ + data_multiplier = 2; + } else { + /* CPU provides full entropy */ + data_multiplier = CONFIG_LRNG_CPU_FULL_ENT_MULTIPLIER; + } + return data_multiplier; +} + +/* + * lrng_get_arch() - Get CPU entropy source entropy + * + * @outbuf: buffer to store entropy of size requested_bits + * + * Return: + * * > 0 on success where value provides the added entropy in bits + * * 0 if no fast source was available + */ +u32 lrng_get_arch(u8 *outbuf, u32 requested_bits) +{ + u32 ent_bits, data_multiplier = lrng_arch_multiplier(); + + if (data_multiplier <= 1) { + ent_bits = lrng_get_arch_data(outbuf, requested_bits); + } else { + ent_bits = lrng_get_arch_data_compress(outbuf, requested_bits, + data_multiplier); + } + + ent_bits = lrng_archrandom_entropylevel(ent_bits); + pr_debug("obtained %u bits of entropy from CPU RNG entropy source\n", + ent_bits); + return ent_bits; +} + +void lrng_arch_es_state(unsigned char *buf, size_t buflen) +{ + const struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + u32 data_multiplier = lrng_arch_multiplier(); + + /* Assume the lrng_drng_init lock is taken by caller */ + snprintf(buf, buflen, + "CPU ES properties:\n" + " Hash for compressing data: %s\n" + " Data multiplier: %u\n", + (data_multiplier <= 1) ? + "N/A" : lrng_drng_init->crypto_cb->lrng_hash_name(), + data_multiplier); +} diff --git a/drivers/char/lrng/lrng_es_aux.c b/drivers/char/lrng/lrng_es_aux.c new file mode 100644 index 0000000000000..c8a040d29f78e --- /dev/null +++ b/drivers/char/lrng/lrng_es_aux.c @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Slow Entropy Source: Auxiliary entropy pool + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include + +#include "lrng_internal.h" + +/* + * This is the auxiliary pool + * + * The aux pool array is aligned to 8 bytes to comfort the kernel crypto API + * cipher implementations of the hash functions used to read the pool: for some + * accelerated implementations, we need an alignment to avoid a realignment + * which involves memcpy(). The alignment to 8 bytes should satisfy all crypto + * implementations. + */ +struct lrng_pool { + u8 aux_pool[LRNG_POOL_SIZE]; /* Aux pool: digest state */ + atomic_t aux_entropy_bits; + atomic_t digestsize; /* Digest size of used hash */ + bool initialized; /* Aux pool initialized? */ + + /* Serialize read of entropy pool and update of aux pool */ + spinlock_t lock; +}; + +static struct lrng_pool lrng_pool __aligned(LRNG_KCAPI_ALIGN) = { + .aux_entropy_bits = ATOMIC_INIT(0), + .digestsize = ATOMIC_INIT(LRNG_ATOMIC_DIGEST_SIZE), + .initialized = false, + .lock = __SPIN_LOCK_UNLOCKED(lrng_pool.lock) +}; + +/********************************** Helper ***********************************/ + +/* Entropy in bits present in aux pool */ +u32 lrng_avail_aux_entropy(void) +{ + /* Cap available entropy with max entropy */ + u32 avail_bits = min_t(u32, lrng_get_digestsize(), + atomic_read_u32(&lrng_pool.aux_entropy_bits)); + + /* Consider oversampling rate due to aux pool conditioning */ + return lrng_reduce_by_osr(avail_bits); +} + +/* Set the digest size of the used hash in bytes */ +static void lrng_set_digestsize(u32 digestsize) +{ + struct lrng_pool *pool = &lrng_pool; + u32 ent_bits = atomic_xchg_relaxed(&pool->aux_entropy_bits, 0), + old_digestsize = lrng_get_digestsize(); + + atomic_set(&lrng_pool.digestsize, digestsize); + + /* + * Update the /proc/.../write_wakeup_threshold which must not be larger + * than the digest size of the curent conditioning hash. + */ + digestsize <<= 3; + lrng_proc_update_max_write_thresh(digestsize); + if (lrng_write_wakeup_bits > digestsize) + lrng_write_wakeup_bits = digestsize; + + /* + * In case the new digest is larger than the old one, cap the available + * entropy to the old message digest used to process the existing data. + */ + ent_bits = min_t(u32, ent_bits, old_digestsize); + atomic_add(ent_bits, &pool->aux_entropy_bits); +} + +/* Obtain the digest size provided by the used hash in bits */ +u32 lrng_get_digestsize(void) +{ + return atomic_read_u32(&lrng_pool.digestsize) << 3; +} + +/* Set entropy content in user-space controllable aux pool */ +void lrng_pool_set_entropy(u32 entropy_bits) +{ + atomic_set(&lrng_pool.aux_entropy_bits, entropy_bits); +} + +/* + * Replace old with new hash for auxiliary pool handling + * + * Assumption: the caller must guarantee that the new_cb is available during the + * entire operation (e.g. it must hold the write lock against pointer updating). + */ +int lrng_aux_switch_hash(const struct lrng_crypto_cb *new_cb, void *new_hash, + const struct lrng_crypto_cb *old_cb) +{ + struct lrng_pool *pool = &lrng_pool; + struct shash_desc *shash = (struct shash_desc *)pool->aux_pool; + u8 digest[LRNG_MAX_DIGESTSIZE]; + int ret; + + if (!IS_ENABLED(CONFIG_LRNG_DRNG_SWITCH)) + return -EOPNOTSUPP; + + if (unlikely(!pool->initialized)) + return 0; + + /* Get the aux pool hash with old digest ... */ + ret = old_cb->lrng_hash_final(shash, digest) ?: + /* ... re-initialize the hash with the new digest ... */ + new_cb->lrng_hash_init(shash, new_hash) ?: + /* + * ... feed the old hash into the new state. We may feed + * uninitialized memory into the new state, but this is + * considered no issue and even good as we have some more + * uncertainty here. + */ + new_cb->lrng_hash_update(shash, digest, sizeof(digest)); + if (!ret) { + lrng_set_digestsize(new_cb->lrng_hash_digestsize(new_hash)); + pr_debug("Re-initialize aux entropy pool with hash %s\n", + new_cb->lrng_hash_name()); + } + + memzero_explicit(digest, sizeof(digest)); + return ret; +} + +/* Insert data into auxiliary pool by using the hash update function. */ +static int +lrng_pool_insert_aux_locked(const u8 *inbuf, u32 inbuflen, u32 entropy_bits) +{ + struct lrng_pool *pool = &lrng_pool; + struct shash_desc *shash = (struct shash_desc *)pool->aux_pool; + struct lrng_drng *drng = lrng_drng_init_instance(); + const struct lrng_crypto_cb *crypto_cb; + unsigned long flags; + void *hash; + int ret; + + entropy_bits = min_t(u32, entropy_bits, inbuflen << 3); + + lrng_hash_lock(drng, &flags); + + crypto_cb = drng->crypto_cb; + hash = drng->hash; + + if (unlikely(!pool->initialized)) { + ret = crypto_cb->lrng_hash_init(shash, hash); + if (ret) + goto out; + pool->initialized = true; + } + + ret = crypto_cb->lrng_hash_update(shash, inbuf, inbuflen); + if (ret) + goto out; + + /* + * Cap the available entropy to the hash output size compliant to + * SP800-90B section 3.1.5.1 table 1. + */ + entropy_bits += atomic_read_u32(&pool->aux_entropy_bits); + atomic_set(&pool->aux_entropy_bits, + min_t(u32, entropy_bits, + crypto_cb->lrng_hash_digestsize(hash) << 3)); + +out: + lrng_hash_unlock(drng, flags); + return ret; +} + +int lrng_pool_insert_aux(const u8 *inbuf, u32 inbuflen, u32 entropy_bits) +{ + struct lrng_pool *pool = &lrng_pool; + unsigned long flags; + int ret; + + spin_lock_irqsave(&pool->lock, flags); + ret = lrng_pool_insert_aux_locked(inbuf, inbuflen, entropy_bits); + spin_unlock_irqrestore(&pool->lock, flags); + + lrng_pool_add_entropy(); + + return ret; +} + +/************************* Get data from entropy pool *************************/ + +/* + * Get auxiliary entropy pool and its entropy content for seed buffer. + * Caller must hold lrng_pool.pool->lock. + * @outbuf: buffer to store data in with size requested_bits + * @requested_bits: Requested amount of entropy + * @return: amount of entropy in outbuf in bits. + */ +static u32 lrng_get_aux_pool(u8 *outbuf, u32 requested_bits) +{ + struct lrng_pool *pool = &lrng_pool; + struct shash_desc *shash = (struct shash_desc *)pool->aux_pool; + struct lrng_drng *drng = lrng_drng_init_instance(); + const struct lrng_crypto_cb *crypto_cb; + unsigned long flags; + void *hash; + u32 collected_ent_bits, returned_ent_bits, unused_bits = 0, + digestsize, requested_bits_osr; + u8 aux_output[LRNG_MAX_DIGESTSIZE]; + + if (unlikely(!pool->initialized)) + return 0; + + lrng_hash_lock(drng, &flags); + + crypto_cb = drng->crypto_cb; + hash = drng->hash; + digestsize = crypto_cb->lrng_hash_digestsize(hash); + + /* Ensure that no more than the size of aux_pool can be requested */ + requested_bits = min_t(u32, requested_bits, (LRNG_MAX_DIGESTSIZE << 3)); + requested_bits_osr = requested_bits + lrng_compress_osr(); + + /* Cap entropy with entropy counter from aux pool and the used digest */ + collected_ent_bits = min_t(u32, digestsize << 3, + atomic_xchg_relaxed(&pool->aux_entropy_bits, 0)); + + /* We collected too much entropy and put the overflow back */ + if (collected_ent_bits > requested_bits_osr) { + /* Amount of bits we collected too much */ + unused_bits = collected_ent_bits - requested_bits_osr; + /* Put entropy back */ + atomic_add(unused_bits, &pool->aux_entropy_bits); + /* Fix collected entropy */ + collected_ent_bits = requested_bits_osr; + } + + /* Apply oversampling: discount requested oversampling rate */ + returned_ent_bits = lrng_reduce_by_osr(collected_ent_bits); + + pr_debug("obtained %u bits by collecting %u bits of entropy from aux pool, %u bits of entropy remaining\n", + returned_ent_bits, collected_ent_bits, unused_bits); + + /* Get the digest for the aux pool to be returned to the caller ... */ + if (crypto_cb->lrng_hash_final(shash, aux_output) || + /* + * ... and re-initialize the aux state. Do not add the aux pool + * digest for backward secrecy as it will be added with the + * insertion of the complete seed buffer after it has been filled. + */ + crypto_cb->lrng_hash_init(shash, hash)) { + returned_ent_bits = 0; + } else { + /* + * Do not truncate the output size exactly to collected_ent_bits + * as the aux pool may contain data that is not credited with + * entropy, but we want to use them to stir the DRNG state. + */ + memcpy(outbuf, aux_output, requested_bits >> 3); + } + + lrng_hash_unlock(drng, flags); + memzero_explicit(aux_output, digestsize); + return returned_ent_bits; +} + +void lrng_get_backtrack_aux(struct entropy_buf *entropy_buf, u32 requested_bits) +{ + struct lrng_pool *pool = &lrng_pool; + unsigned long flags; + + /* Ensure aux pool extraction and backtracking op are atomic */ + spin_lock_irqsave(&pool->lock, flags); + + entropy_buf->a_bits = lrng_get_aux_pool(entropy_buf->a, requested_bits); + + /* Mix the extracted data back into pool for backtracking resistance */ + if (lrng_pool_insert_aux_locked((u8 *)entropy_buf, + sizeof(struct entropy_buf), 0)) + pr_warn("Backtracking resistance operation failed\n"); + + spin_unlock_irqrestore(&pool->lock, flags); +} + +void lrng_aux_es_state(unsigned char *buf, size_t buflen) +{ + const struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + + /* Assume the lrng_drng_init lock is taken by caller */ + snprintf(buf, buflen, + "Auxiliary ES properties:\n" + " Hash for operating entropy pool: %s\n", + lrng_drng_init->crypto_cb->lrng_hash_name()); +} diff --git a/drivers/char/lrng/lrng_es_irq.c b/drivers/char/lrng/lrng_es_irq.c new file mode 100644 index 0000000000000..6b13ff97d1782 --- /dev/null +++ b/drivers/char/lrng/lrng_es_irq.c @@ -0,0 +1,824 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Slow Entropy Source: Interrupt data collection + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +#include "lrng_internal.h" +#include "lrng_es_irq.h" + +/* + * Number of interrupts to be recorded to assume that DRNG security strength + * bits of entropy are received. + * Note: a value below the DRNG security strength should not be defined as this + * may imply the DRNG can never be fully seeded in case other noise + * sources are unavailable. + */ +#define LRNG_IRQ_ENTROPY_BITS CONFIG_LRNG_IRQ_ENTROPY_RATE + + +/* Number of interrupts required for LRNG_DRNG_SECURITY_STRENGTH_BITS entropy */ +static u32 lrng_irq_entropy_bits = LRNG_IRQ_ENTROPY_BITS; +/* Is high-resolution timer present? */ +static bool lrng_irq_highres_timer = false; + +static u32 irq_entropy __read_mostly = LRNG_IRQ_ENTROPY_BITS; +#ifdef CONFIG_LRNG_RUNTIME_ES_CONFIG +module_param(irq_entropy, uint, 0444); +MODULE_PARM_DESC(irq_entropy, + "How many interrupts must be collected for obtaining 256 bits of entropy\n"); +#endif + +/* Per-CPU array holding concatenated entropy events */ +static DEFINE_PER_CPU(u32 [LRNG_DATA_ARRAY_SIZE], lrng_pcpu_array) + __aligned(LRNG_KCAPI_ALIGN); +static DEFINE_PER_CPU(u32, lrng_pcpu_array_ptr) = 0; +static DEFINE_PER_CPU(atomic_t, lrng_pcpu_array_irqs) = ATOMIC_INIT(0); + +/* + * The entropy collection is performed by executing the following steps: + * 1. fill up the per-CPU array holding the time stamps + * 2. once the per-CPU array is full, a compression of the data into + * the entropy pool is performed - this happens in interrupt context + * + * If step 2 is not desired in interrupt context, the following boolean + * needs to be set to false. This implies that old entropy data in the + * per-CPU array collected since the last DRNG reseed is overwritten with + * new entropy data instead of retaining the entropy with the compression + * operation. + * + * Impact on entropy: + * + * If continuous compression is enabled, the maximum entropy that is collected + * per CPU between DRNG reseeds is equal to the digest size of the used hash. + * + * If continuous compression is disabled, the maximum number of entropy events + * that can be collected per CPU is equal to LRNG_DATA_ARRAY_SIZE. This amount + * of events is converted into an entropy statement which then represents the + * maximum amount of entropy collectible per CPU between DRNG reseeds. + */ +static bool lrng_pcpu_continuous_compression __read_mostly = + IS_ENABLED(CONFIG_LRNG_ENABLE_CONTINUOUS_COMPRESSION); + +#ifdef CONFIG_LRNG_SWITCHABLE_CONTINUOUS_COMPRESSION +module_param(lrng_pcpu_continuous_compression, bool, 0444); +MODULE_PARM_DESC(lrng_pcpu_continuous_compression, + "Perform entropy compression if per-CPU entropy data array is full\n"); +#endif + +/* + * Per-CPU entropy pool with compressed entropy event + * + * The per-CPU entropy pool is defined as the hash state. New data is simply + * inserted into the entropy pool by performing a hash update operation. + * To read the entropy pool, a hash final must be invoked. However, before + * the entropy pool is released again after a hash final, the hash init must + * be performed. + */ +static DEFINE_PER_CPU(u8 [LRNG_POOL_SIZE], lrng_pcpu_pool) + __aligned(LRNG_KCAPI_ALIGN); +/* + * Lock to allow other CPUs to read the pool - as this is only done during + * reseed which is infrequent, this lock is hardly contended. + */ +static DEFINE_PER_CPU(spinlock_t, lrng_pcpu_lock); +static DEFINE_PER_CPU(bool, lrng_pcpu_lock_init) = false; + +/* Number of time stamps analyzed to calculate a GCD */ +#define LRNG_GCD_WINDOW_SIZE 100 +static u32 lrng_gcd_history[LRNG_GCD_WINDOW_SIZE]; +static atomic_t lrng_gcd_history_ptr = ATOMIC_INIT(-1); + +/* The common divisor for all timestamps */ +static u32 lrng_gcd_timer = 0; + +static bool lrng_gcd_tested(void) +{ + return (lrng_gcd_timer != 0); +} + +/* Set the GCD for use in IRQ ES - if 0, the GCD calculation is restarted. */ +static void _lrng_gcd_set(u32 running_gcd) +{ + lrng_gcd_timer = running_gcd; + /* Ensure that update to global variable lrng_gcd_timer is visible */ + mb(); +} + +static void lrng_gcd_set(u32 running_gcd) +{ + if (!lrng_gcd_tested()) { + _lrng_gcd_set(running_gcd); + pr_debug("Setting GCD to %u\n", running_gcd); + } +} + +u32 lrng_gcd_analyze(u32 *history, size_t nelem) +{ + u32 running_gcd = 0; + size_t i; + + /* Now perform the analysis on the accumulated time data. */ + for (i = 0; i < nelem; i++) { + /* + * NOTE: this would be the place to add more analysis on the + * appropriateness of the timer like checking the presence + * of sufficient variations in the timer. + */ + + /* + * This calculates the gcd of all the time values. that is + * gcd(time_1, time_2, ..., time_nelem) + * + * Some timers increment by a fixed (non-1) amount each step. + * This code checks for such increments, and allows the library + * to output the number of such changes have occurred. + */ + running_gcd = (u32)gcd(history[i], running_gcd); + + /* Zeroize data */ + history[i] = 0; + } + + return running_gcd; +} + +static void lrng_gcd_add_value(u32 time) +{ + u32 ptr = (u32)atomic_inc_return_relaxed(&lrng_gcd_history_ptr); + + if (ptr < LRNG_GCD_WINDOW_SIZE) { + lrng_gcd_history[ptr] = time; + } else if (ptr == LRNG_GCD_WINDOW_SIZE) { + u32 gcd = lrng_gcd_analyze(lrng_gcd_history, + LRNG_GCD_WINDOW_SIZE); + + if (!gcd) + gcd = 1; + + /* + * Ensure that we have variations in the time stamp below the + * given value. This is just a safety measure to prevent the GCD + * becoming too large. + */ + if (gcd >= 1000) { + pr_warn("calculated GCD is larger than expected: %u\n", + gcd); + gcd = 1000; + } + + /* Adjust all deltas by the observed (small) common factor. */ + lrng_gcd_set(gcd); + atomic_set(&lrng_gcd_history_ptr, 0); + } +} + +/* Return boolean whether LRNG identified presence of high-resolution timer */ +static bool lrng_pool_highres_timer(void) +{ + return lrng_irq_highres_timer; +} + +/* Convert entropy in bits into number of IRQs with the same entropy content. */ +static u32 lrng_entropy_to_data(u32 entropy_bits) +{ + return ((entropy_bits * lrng_irq_entropy_bits) / + LRNG_DRNG_SECURITY_STRENGTH_BITS); +} + +/* Convert number of IRQs into entropy value. */ +static u32 lrng_data_to_entropy(u32 irqnum) +{ + return ((irqnum * LRNG_DRNG_SECURITY_STRENGTH_BITS) / + lrng_irq_entropy_bits); +} + +static bool lrng_pcpu_pool_online(int cpu) +{ + return per_cpu(lrng_pcpu_lock_init, cpu); +} + +static void lrng_pcpu_check_compression_state(void) +{ + /* One pool must hold sufficient entropy for disabled compression */ + if (!lrng_pcpu_continuous_compression) { + u32 max_ent = min_t(u32, lrng_get_digestsize(), + lrng_data_to_entropy(LRNG_DATA_NUM_VALUES)); + if (max_ent < lrng_security_strength()) { + pr_warn("Force continuous compression operation to ensure LRNG can hold enough entropy\n"); + lrng_pcpu_continuous_compression = true; + } + } +} + +static int __init lrng_init_time_source(void) +{ + /* Set a minimum number of interrupts that must be collected */ + irq_entropy = max_t(u32, LRNG_IRQ_ENTROPY_BITS, irq_entropy); + + if ((random_get_entropy() & LRNG_DATA_SLOTSIZE_MASK) || + (random_get_entropy() & LRNG_DATA_SLOTSIZE_MASK)) { + /* + * As the highres timer is identified here, previous interrupts + * obtained during boot time are treated like a lowres-timer + * would have been present. + */ + lrng_irq_highres_timer = true; + lrng_irq_entropy_bits = irq_entropy; + } else { + u32 new_entropy = irq_entropy * LRNG_IRQ_OVERSAMPLING_FACTOR; + + lrng_health_disable(); + lrng_irq_highres_timer = false; + lrng_irq_entropy_bits = (irq_entropy < new_entropy) ? + new_entropy : irq_entropy; + pr_warn("operating without high-resolution timer and applying IRQ oversampling factor %u\n", + LRNG_IRQ_OVERSAMPLING_FACTOR); + lrng_pcpu_check_compression_state(); + } + /* Ensure that changes to global variables are visible */ + mb(); + + return 0; +} +core_initcall(lrng_init_time_source); + +/* + * Reset all per-CPU pools - reset entropy estimator but leave the pool data + * that may or may not have entropy unchanged. + */ +void lrng_pcpu_reset(void) +{ + int cpu; + + /* Trigger GCD calculation anew. */ + _lrng_gcd_set(0); + + for_each_online_cpu(cpu) + atomic_set(per_cpu_ptr(&lrng_pcpu_array_irqs, cpu), 0); +} + +u32 lrng_pcpu_avail_pool_size(void) +{ + u32 max_size = 0, max_pool = lrng_get_digestsize(); + int cpu; + + if (!lrng_pcpu_continuous_compression) + max_pool = min_t(u32, max_pool, LRNG_DATA_NUM_VALUES); + + for_each_online_cpu(cpu) { + if (lrng_pcpu_pool_online(cpu)) + max_size += max_pool; + } + + return max_size; +} + +/* Return entropy of unused IRQs present in all per-CPU pools. */ +u32 lrng_pcpu_avail_entropy(void) +{ + u32 digestsize_irqs, irq = 0; + int cpu; + + /* Obtain the cap of maximum numbers of IRQs we count */ + digestsize_irqs = lrng_entropy_to_data(lrng_get_digestsize()); + if (!lrng_pcpu_continuous_compression) { + /* Cap to max. number of IRQs the array can hold */ + digestsize_irqs = min_t(u32, digestsize_irqs, + LRNG_DATA_NUM_VALUES); + } + + for_each_online_cpu(cpu) { + if (!lrng_pcpu_pool_online(cpu)) + continue; + irq += min_t(u32, digestsize_irqs, + atomic_read_u32(per_cpu_ptr(&lrng_pcpu_array_irqs, + cpu))); + } + + /* Consider oversampling rate */ + return lrng_reduce_by_osr(lrng_data_to_entropy(irq)); +} + +/* + * Trigger a switch of the hash implementation for the per-CPU pool. + * + * For each per-CPU pool, obtain the message digest with the old hash + * implementation, initialize the per-CPU pool again with the new hash + * implementation and inject the message digest into the new state. + * + * Assumption: the caller must guarantee that the new_cb is available during the + * entire operation (e.g. it must hold the lock against pointer updating). + */ +int lrng_pcpu_switch_hash(int node, + const struct lrng_crypto_cb *new_cb, void *new_hash, + const struct lrng_crypto_cb *old_cb) +{ + u8 digest[LRNG_MAX_DIGESTSIZE]; + u32 digestsize_irqs, found_irqs; + int ret = 0, cpu; + + if (!IS_ENABLED(CONFIG_LRNG_DRNG_SWITCH)) + return -EOPNOTSUPP; + + for_each_online_cpu(cpu) { + struct shash_desc *pcpu_shash; + + /* + * Only switch the per-CPU pools for the current node because + * the crypto_cb only applies NUMA-node-wide. + */ + if (cpu_to_node(cpu) != node || !lrng_pcpu_pool_online(cpu)) + continue; + + pcpu_shash = (struct shash_desc *)per_cpu_ptr(lrng_pcpu_pool, + cpu); + + digestsize_irqs = old_cb->lrng_hash_digestsize(pcpu_shash); + digestsize_irqs = lrng_entropy_to_data(digestsize_irqs << 3); + + if (pcpu_shash->tfm == new_hash) + continue; + + /* Get the per-CPU pool hash with old digest ... */ + ret = old_cb->lrng_hash_final(pcpu_shash, digest) ?: + /* ... re-initialize the hash with the new digest ... */ + new_cb->lrng_hash_init(pcpu_shash, new_hash) ?: + /* + * ... feed the old hash into the new state. We may feed + * uninitialized memory into the new state, but this is + * considered no issue and even good as we have some more + * uncertainty here. + */ + new_cb->lrng_hash_update(pcpu_shash, digest, + sizeof(digest)); + if (ret) + goto out; + + /* + * In case the new digest is larger than the old one, cap + * the available entropy to the old message digest used to + * process the existing data. + */ + found_irqs = atomic_xchg_relaxed( + per_cpu_ptr(&lrng_pcpu_array_irqs, cpu), 0); + found_irqs = min_t(u32, found_irqs, digestsize_irqs); + atomic_add_return_relaxed(found_irqs, + per_cpu_ptr(&lrng_pcpu_array_irqs, cpu)); + + pr_debug("Re-initialize per-CPU entropy pool for CPU %d on NUMA node %d with hash %s\n", + cpu, node, new_cb->lrng_hash_name()); + } + +out: + memzero_explicit(digest, sizeof(digest)); + return ret; +} + +/* + * When reading the per-CPU message digest, make sure we use the crypto + * callbacks defined for the NUMA node the per-CPU pool is defined for because + * the LRNG crypto switch support is only atomic per NUMA node. + */ +static u32 +lrng_pcpu_pool_hash_one(const struct lrng_crypto_cb *pcpu_crypto_cb, + void *pcpu_hash, int cpu, u8 *digest, u32 *digestsize) +{ + struct shash_desc *pcpu_shash = + (struct shash_desc *)per_cpu_ptr(lrng_pcpu_pool, cpu); + spinlock_t *lock = per_cpu_ptr(&lrng_pcpu_lock, cpu); + unsigned long flags; + u32 digestsize_irqs, found_irqs; + + /* Lock guarding against reading / writing to per-CPU pool */ + spin_lock_irqsave(lock, flags); + + *digestsize = pcpu_crypto_cb->lrng_hash_digestsize(pcpu_hash); + digestsize_irqs = lrng_entropy_to_data(*digestsize << 3); + + /* Obtain entropy statement like for the entropy pool */ + found_irqs = atomic_xchg_relaxed( + per_cpu_ptr(&lrng_pcpu_array_irqs, cpu), 0); + /* Cap to maximum amount of data we can hold in hash */ + found_irqs = min_t(u32, found_irqs, digestsize_irqs); + + /* Cap to maximum amount of data we can hold in array */ + if (!lrng_pcpu_continuous_compression) + found_irqs = min_t(u32, found_irqs, LRNG_DATA_NUM_VALUES); + + /* Store all not-yet compressed data in data array into hash, ... */ + if (pcpu_crypto_cb->lrng_hash_update(pcpu_shash, + (u8 *)per_cpu_ptr(lrng_pcpu_array, cpu), + LRNG_DATA_ARRAY_SIZE * sizeof(u32)) ?: + /* ... get the per-CPU pool digest, ... */ + pcpu_crypto_cb->lrng_hash_final(pcpu_shash, digest) ?: + /* ... re-initialize the hash, ... */ + pcpu_crypto_cb->lrng_hash_init(pcpu_shash, pcpu_hash) ?: + /* ... feed the old hash into the new state. */ + pcpu_crypto_cb->lrng_hash_update(pcpu_shash, digest, *digestsize)) + found_irqs = 0; + + spin_unlock_irqrestore(lock, flags); + return found_irqs; +} + +/* + * Hash all per-CPU pools and return the digest to be used as seed data for + * seeding a DRNG. The caller must guarantee backtracking resistance. + * The function will only copy as much data as entropy is available into the + * caller-provided output buffer. + * + * This function handles the translation from the number of received interrupts + * into an entropy statement. The conversion depends on LRNG_IRQ_ENTROPY_BITS + * which defines how many interrupts must be received to obtain 256 bits of + * entropy. With this value, the function lrng_data_to_entropy converts a given + * data size (received interrupts, requested amount of data, etc.) into an + * entropy statement. lrng_entropy_to_data does the reverse. + * + * @outbuf: buffer to store data in with size requested_bits + * @requested_bits: Requested amount of entropy + * @fully_seeded: indicator whether LRNG is fully seeded + * @return: amount of entropy in outbuf in bits. + */ +u32 lrng_pcpu_pool_hash(u8 *outbuf, u32 requested_bits, bool fully_seeded) +{ + SHASH_DESC_ON_STACK(shash, NULL); + const struct lrng_crypto_cb *crypto_cb; + struct lrng_drng **lrng_drng = lrng_drng_instances(); + struct lrng_drng *drng = lrng_drng_init_instance(); + u8 digest[LRNG_MAX_DIGESTSIZE]; + unsigned long flags, flags2; + u32 found_irqs, collected_irqs = 0, collected_ent_bits, requested_irqs, + returned_ent_bits; + int ret, cpu; + void *hash; + + /* Lock guarding replacement of per-NUMA hash */ + lrng_hash_lock(drng, &flags); + + crypto_cb = drng->crypto_cb; + hash = drng->hash; + + /* The hash state of filled with all per-CPU pool hashes. */ + ret = crypto_cb->lrng_hash_init(shash, hash); + if (ret) + goto err; + + requested_irqs = lrng_entropy_to_data(requested_bits + + lrng_compress_osr()); + + /* + * Harvest entropy from each per-CPU hash state - even though we may + * have collected sufficient entropy, we will hash all per-CPU pools. + */ + for_each_online_cpu(cpu) { + struct lrng_drng *pcpu_drng = drng; + u32 digestsize, pcpu_unused_irqs = 0; + int node = cpu_to_node(cpu); + + /* If pool is not online, then no entropy is present. */ + if (!lrng_pcpu_pool_online(cpu)) + continue; + + if (lrng_drng && lrng_drng[node]) + pcpu_drng = lrng_drng[node]; + + if (pcpu_drng == drng) { + found_irqs = lrng_pcpu_pool_hash_one(crypto_cb, hash, + cpu, digest, + &digestsize); + } else { + lrng_hash_lock(pcpu_drng, &flags2); + found_irqs = + lrng_pcpu_pool_hash_one(pcpu_drng->crypto_cb, + pcpu_drng->hash, cpu, + digest, &digestsize); + lrng_hash_unlock(pcpu_drng, flags2); + } + + /* Inject the digest into the state of all per-CPU pools */ + ret = crypto_cb->lrng_hash_update(shash, digest, digestsize); + if (ret) + goto err; + + collected_irqs += found_irqs; + if (collected_irqs > requested_irqs) { + pcpu_unused_irqs = collected_irqs - requested_irqs; + atomic_add_return_relaxed(pcpu_unused_irqs, + per_cpu_ptr(&lrng_pcpu_array_irqs, cpu)); + collected_irqs = requested_irqs; + } + pr_debug("%u interrupts used from entropy pool of CPU %d, %u interrupts remain unused\n", + found_irqs - pcpu_unused_irqs, cpu, pcpu_unused_irqs); + } + + ret = crypto_cb->lrng_hash_final(shash, digest); + if (ret) + goto err; + + collected_ent_bits = lrng_data_to_entropy(collected_irqs); + /* Cap to maximum entropy that can ever be generated with given hash */ + collected_ent_bits = min_t(u32, collected_ent_bits, + crypto_cb->lrng_hash_digestsize(hash) << 3); + /* Apply oversampling: discount requested oversampling rate */ + returned_ent_bits = lrng_reduce_by_osr(collected_ent_bits); + + pr_debug("obtained %u bits by collecting %u bits of entropy from entropy pool noise source\n", + returned_ent_bits, collected_ent_bits); + + /* + * Truncate to available entropy as implicitly allowed by SP800-90B + * section 3.1.5.1.1 table 1 which awards truncated hashes full + * entropy. + * + * During boot time, we read requested_bits data with + * returned_ent_bits entropy. In case our conservative entropy + * estimate underestimates the available entropy we can transport as + * much available entropy as possible. + */ + memcpy(outbuf, digest, fully_seeded ? returned_ent_bits >> 3 : + requested_bits >> 3); + +out: + crypto_cb->lrng_hash_desc_zero(shash); + lrng_hash_unlock(drng, flags); + memzero_explicit(digest, sizeof(digest)); + return returned_ent_bits; + +err: + returned_ent_bits = 0; + goto out; +} + +/* Compress the lrng_pcpu_array array into lrng_pcpu_pool */ +static void lrng_pcpu_array_compress(void) +{ + struct shash_desc *shash = + (struct shash_desc *)this_cpu_ptr(lrng_pcpu_pool); + struct lrng_drng **lrng_drng = lrng_drng_instances(); + struct lrng_drng *drng = lrng_drng_init_instance(); + const struct lrng_crypto_cb *crypto_cb; + spinlock_t *lock = this_cpu_ptr(&lrng_pcpu_lock); + unsigned long flags, flags2; + int node = numa_node_id(); + void *hash; + bool init = false; + + /* Get NUMA-node local hash instance */ + if (lrng_drng && lrng_drng[node]) + drng = lrng_drng[node]; + + lrng_hash_lock(drng, &flags); + crypto_cb = drng->crypto_cb; + hash = drng->hash; + + if (unlikely(!this_cpu_read(lrng_pcpu_lock_init))) { + init = true; + spin_lock_init(lock); + this_cpu_write(lrng_pcpu_lock_init, true); + pr_debug("Initializing per-CPU entropy pool for CPU %d on NUMA node %d with hash %s\n", + raw_smp_processor_id(), node, + crypto_cb->lrng_hash_name()); + } + + spin_lock_irqsave(lock, flags2); + + if (unlikely(init) && crypto_cb->lrng_hash_init(shash, hash)) { + this_cpu_write(lrng_pcpu_lock_init, false); + pr_warn("Initialization of hash failed\n"); + } else if (lrng_pcpu_continuous_compression) { + /* Add entire per-CPU data array content into entropy pool. */ + if (crypto_cb->lrng_hash_update(shash, + (u8 *)this_cpu_ptr(lrng_pcpu_array), + LRNG_DATA_ARRAY_SIZE * sizeof(u32))) + pr_warn_ratelimited("Hashing of entropy data failed\n"); + } + + spin_unlock_irqrestore(lock, flags2); + lrng_hash_unlock(drng, flags); +} + +/* Compress data array into hash */ +static void lrng_pcpu_array_to_hash(u32 ptr) +{ + u32 *array = this_cpu_ptr(lrng_pcpu_array); + + /* + * During boot time the hash operation is triggered more often than + * during regular operation. + */ + if (unlikely(!lrng_state_fully_seeded())) { + if ((ptr & 31) && (ptr < LRNG_DATA_WORD_MASK)) + return; + } else if (ptr < LRNG_DATA_WORD_MASK) { + return; + } + + if (lrng_raw_array_entropy_store(*array)) { + u32 i; + + /* + * If we fed even a part of the array to external analysis, we + * mark that the entire array and the per-CPU pool to have no + * entropy. This is due to the non-IID property of the data as + * we do not fully know whether the existing dependencies + * diminish the entropy beyond to what we expect it has. + */ + atomic_set(this_cpu_ptr(&lrng_pcpu_array_irqs), 0); + + for (i = 1; i < LRNG_DATA_ARRAY_SIZE; i++) + lrng_raw_array_entropy_store(*(array + i)); + } else { + lrng_pcpu_array_compress(); + /* Ping pool handler about received entropy */ + lrng_pool_add_entropy(); + } +} + +/* + * Concatenate full 32 bit word at the end of time array even when current + * ptr is not aligned to sizeof(data). + */ +static void _lrng_pcpu_array_add_u32(u32 data) +{ + /* Increment pointer by number of slots taken for input value */ + u32 pre_ptr, mask, ptr = this_cpu_add_return(lrng_pcpu_array_ptr, + LRNG_DATA_SLOTS_PER_UINT); + unsigned int pre_array; + + /* + * This function injects a unit into the array - guarantee that + * array unit size is equal to data type of input data. + */ + BUILD_BUG_ON(LRNG_DATA_ARRAY_MEMBER_BITS != (sizeof(data) << 3)); + + /* + * The following logic requires at least two units holding + * the data as otherwise the pointer would immediately wrap when + * injection an u32 word. + */ + BUILD_BUG_ON(LRNG_DATA_NUM_VALUES <= LRNG_DATA_SLOTS_PER_UINT); + + lrng_pcpu_split_u32(&ptr, &pre_ptr, &mask); + + /* MSB of data go into previous unit */ + pre_array = lrng_data_idx2array(pre_ptr); + /* zeroization of slot to ensure the following OR adds the data */ + this_cpu_and(lrng_pcpu_array[pre_array], ~(0xffffffff & ~mask)); + this_cpu_or(lrng_pcpu_array[pre_array], data & ~mask); + + /* Invoke compression as we just filled data array completely */ + if (unlikely(pre_ptr > ptr)) + lrng_pcpu_array_to_hash(LRNG_DATA_WORD_MASK); + + /* LSB of data go into current unit */ + this_cpu_write(lrng_pcpu_array[lrng_data_idx2array(ptr)], + data & mask); + + if (likely(pre_ptr <= ptr)) + lrng_pcpu_array_to_hash(ptr); +} + +/* Concatenate a 32-bit word at the end of the per-CPU array */ +void lrng_pcpu_array_add_u32(u32 data) +{ + /* + * Disregard entropy-less data without continuous compression to + * avoid it overwriting data with entropy when array ptr wraps. + */ + if (lrng_pcpu_continuous_compression) + _lrng_pcpu_array_add_u32(data); +} + +/* Concatenate data of max LRNG_DATA_SLOTSIZE_MASK at the end of time array */ +static void lrng_pcpu_array_add_slot(u32 data) +{ + /* Get slot */ + u32 ptr = this_cpu_inc_return(lrng_pcpu_array_ptr) & + LRNG_DATA_WORD_MASK; + unsigned int array = lrng_data_idx2array(ptr); + unsigned int slot = lrng_data_idx2slot(ptr); + + BUILD_BUG_ON(LRNG_DATA_ARRAY_MEMBER_BITS % LRNG_DATA_SLOTSIZE_BITS); + /* Ensure consistency of values */ + BUILD_BUG_ON(LRNG_DATA_ARRAY_MEMBER_BITS != + sizeof(lrng_pcpu_array[0]) << 3); + + /* zeroization of slot to ensure the following OR adds the data */ + this_cpu_and(lrng_pcpu_array[array], + ~(lrng_data_slot_val(0xffffffff & LRNG_DATA_SLOTSIZE_MASK, + slot))); + /* Store data into slot */ + this_cpu_or(lrng_pcpu_array[array], lrng_data_slot_val(data, slot)); + + lrng_pcpu_array_to_hash(ptr); +} + +static void +lrng_time_process_common(u32 time, void(*add_time)(u32 data)) +{ + enum lrng_health_res health_test; + + if (lrng_raw_hires_entropy_store(time)) + return; + + health_test = lrng_health_test(time); + if (health_test > lrng_health_fail_use) + return; + + if (health_test == lrng_health_pass) + atomic_inc_return(this_cpu_ptr(&lrng_pcpu_array_irqs)); + + add_time(time); +} + +/* + * Batching up of entropy in per-CPU array before injecting into entropy pool. + */ +static void lrng_time_process(void) +{ + u32 now_time = random_get_entropy(); + + if (unlikely(!lrng_gcd_tested())) { + /* When GCD is unknown, we process the full time stamp */ + lrng_time_process_common(now_time, _lrng_pcpu_array_add_u32); + lrng_gcd_add_value(now_time); + } else { + /* GCD is known and applied */ + lrng_time_process_common((now_time / lrng_gcd_timer) & + LRNG_DATA_SLOTSIZE_MASK, + lrng_pcpu_array_add_slot); + } + + lrng_perf_time(now_time); +} + +/* Hot code path - Callback for interrupt handler */ +void add_interrupt_randomness(int irq) +{ + if (lrng_pool_highres_timer()) { + lrng_time_process(); + } else { + struct pt_regs *regs = get_irq_regs(); + static atomic_t reg_idx = ATOMIC_INIT(0); + u64 ip; + u32 tmp; + + if (regs) { + u32 *ptr = (u32 *)regs; + int reg_ptr = atomic_add_return_relaxed(1, ®_idx); + size_t n = (sizeof(struct pt_regs) / sizeof(u32)); + + ip = instruction_pointer(regs); + tmp = *(ptr + (reg_ptr % n)); + tmp = lrng_raw_regs_entropy_store(tmp) ? 0 : tmp; + _lrng_pcpu_array_add_u32(tmp); + } else { + ip = _RET_IP_; + } + + lrng_time_process(); + + /* + * The XOR operation combining the different values is not + * considered to destroy entropy since the entirety of all + * processed values delivers the entropy (and not each + * value separately of the other values). + */ + tmp = lrng_raw_jiffies_entropy_store(jiffies) ? 0 : jiffies; + tmp ^= lrng_raw_irq_entropy_store(irq) ? 0 : irq; + tmp ^= lrng_raw_retip_entropy_store(ip) ? 0 : ip; + tmp ^= ip >> 32; + _lrng_pcpu_array_add_u32(tmp); + } +} +EXPORT_SYMBOL(add_interrupt_randomness); + +void lrng_irq_es_state(unsigned char *buf, size_t buflen) +{ + const struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + + /* Assume the lrng_drng_init lock is taken by caller */ + snprintf(buf, buflen, + "IRQ ES properties:\n" + " Hash for operating entropy pool: %s\n" + " per-CPU interrupt collection size: %u\n" + " Standards compliance: %s\n" + " High-resolution timer: %s\n" + " Continuous compression: %s\n", + lrng_drng_init->crypto_cb->lrng_hash_name(), + LRNG_DATA_NUM_VALUES, + lrng_sp80090b_compliant() ? "SP800-90B " : "", + lrng_pool_highres_timer() ? "true" : "false", + lrng_pcpu_continuous_compression ? "true" : "false"); +} diff --git a/drivers/char/lrng/lrng_es_irq.h b/drivers/char/lrng/lrng_es_irq.h new file mode 100644 index 0000000000000..00b16b1aa45fe --- /dev/null +++ b/drivers/char/lrng/lrng_es_irq.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * LRNG Slow Noise Source: Time stamp array handling + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +/* + * To limit the impact on the interrupt handling, the LRNG concatenates + * entropic LSB parts of the time stamps in a per-CPU array and only + * injects them into the entropy pool when the array is full. + */ + +/* Store multiple integers in one u32 */ +#define LRNG_DATA_SLOTSIZE_BITS (8) +#define LRNG_DATA_SLOTSIZE_MASK ((1 << LRNG_DATA_SLOTSIZE_BITS) - 1) +#define LRNG_DATA_ARRAY_MEMBER_BITS (4 << 3) /* ((sizeof(u32)) << 3) */ +#define LRNG_DATA_SLOTS_PER_UINT (LRNG_DATA_ARRAY_MEMBER_BITS / \ + LRNG_DATA_SLOTSIZE_BITS) + +/* + * Number of time values to store in the array - in small environments + * only one atomic_t variable per CPU is used. + */ +#define LRNG_DATA_NUM_VALUES (CONFIG_LRNG_COLLECTION_SIZE) +/* Mask of LSB of time stamp to store */ +#define LRNG_DATA_WORD_MASK (LRNG_DATA_NUM_VALUES - 1) + +#define LRNG_DATA_SLOTS_MASK (LRNG_DATA_SLOTS_PER_UINT - 1) +#define LRNG_DATA_ARRAY_SIZE (LRNG_DATA_NUM_VALUES / \ + LRNG_DATA_SLOTS_PER_UINT) + +/* Starting bit index of slot */ +static inline unsigned int lrng_data_slot2bitindex(unsigned int slot) +{ + return (LRNG_DATA_SLOTSIZE_BITS * slot); +} + +/* Convert index into the array index */ +static inline unsigned int lrng_data_idx2array(unsigned int idx) +{ + return idx / LRNG_DATA_SLOTS_PER_UINT; +} + +/* Convert index into the slot of a given array index */ +static inline unsigned int lrng_data_idx2slot(unsigned int idx) +{ + return idx & LRNG_DATA_SLOTS_MASK; +} + +/* Convert value into slot value */ +static inline unsigned int lrng_data_slot_val(unsigned int val, + unsigned int slot) +{ + return val << lrng_data_slot2bitindex(slot); +} + +/* + * Return the pointers for the previous and current units to inject a u32 into. + * Also return the mask which the u32 word is to be processed. + */ +static inline void lrng_pcpu_split_u32(u32 *ptr, u32 *pre_ptr, u32 *mask) +{ + /* ptr to previous unit */ + *pre_ptr = (*ptr - LRNG_DATA_SLOTS_PER_UINT) & LRNG_DATA_WORD_MASK; + *ptr &= LRNG_DATA_WORD_MASK; + + /* mask to split data into the two parts for the two units */ + *mask = ((1 << (*pre_ptr & (LRNG_DATA_SLOTS_PER_UINT - 1)) * + LRNG_DATA_SLOTSIZE_BITS)) - 1; +} diff --git a/drivers/char/lrng/lrng_es_jent.c b/drivers/char/lrng/lrng_es_jent.c new file mode 100644 index 0000000000000..79b3dc8e8f562 --- /dev/null +++ b/drivers/char/lrng/lrng_es_jent.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Fast Entropy Source: Jitter RNG + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include "lrng_internal.h" + +/* + * Estimated entropy of data is a 16th of LRNG_DRNG_SECURITY_STRENGTH_BITS. + * Albeit a full entropy assessment is provided for the noise source indicating + * that it provides high entropy rates and considering that it deactivates + * when it detects insufficient hardware, the chosen under estimation of + * entropy is considered to be acceptable to all reviewers. + */ +static u32 jitterrng = CONFIG_LRNG_JENT_ENTROPY_RATE; +#ifdef CONFIG_LRNG_RUNTIME_ES_CONFIG +module_param(jitterrng, uint, 0644); +MODULE_PARM_DESC(jitterrng, "Entropy in bits of 256 data bits from Jitter RNG noise source"); +#endif + +static bool lrng_jent_initialized = false; +static struct rand_data *lrng_jent_state; + +static int __init lrng_jent_initialize(void) +{ + /* Initialize the Jitter RNG after the clocksources are initialized. */ + if (jent_entropy_init() || + (lrng_jent_state = jent_entropy_collector_alloc(1, 0)) == NULL) { + jitterrng = 0; + pr_info("Jitter RNG unusable on current system\n"); + return 0; + } + lrng_jent_initialized = true; + lrng_pool_add_entropy(); + pr_debug("Jitter RNG working on current system\n"); + + return 0; +} +device_initcall(lrng_jent_initialize); + +/* + * lrng_get_jent() - Get Jitter RNG entropy + * + * @outbuf: buffer to store entropy + * @outbuflen: length of buffer + * + * Return: + * * > 0 on success where value provides the added entropy in bits + * * 0 if no fast source was available + */ +u32 lrng_get_jent(u8 *outbuf, u32 requested_bits) +{ + int ret; + u32 ent_bits = lrng_jent_entropylevel(requested_bits); + unsigned long flags; + static DEFINE_SPINLOCK(lrng_jent_lock); + + spin_lock_irqsave(&lrng_jent_lock, flags); + + if (!lrng_jent_initialized) { + spin_unlock_irqrestore(&lrng_jent_lock, flags); + return 0; + } + + ret = jent_read_entropy(lrng_jent_state, outbuf, requested_bits >> 3); + spin_unlock_irqrestore(&lrng_jent_lock, flags); + + if (ret) { + pr_debug("Jitter RNG failed with %d\n", ret); + return 0; + } + + pr_debug("obtained %u bits of entropy from Jitter RNG noise source\n", + ent_bits); + + return ent_bits; +} + +u32 lrng_jent_entropylevel(u32 requested_bits) +{ + return lrng_fast_noise_entropylevel((lrng_jent_initialized) ? + jitterrng : 0, requested_bits); +} + +void lrng_jent_es_state(unsigned char *buf, size_t buflen) +{ + snprintf(buf, buflen, + "JitterRNG ES properties:\n" + " Enabled: %s\n", lrng_jent_initialized ? "true" : "false"); +} diff --git a/drivers/char/lrng/lrng_es_mgr.c b/drivers/char/lrng/lrng_es_mgr.c new file mode 100644 index 0000000000000..efeb62ce0ce9e --- /dev/null +++ b/drivers/char/lrng/lrng_es_mgr.c @@ -0,0 +1,373 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Entropy sources management + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#include "lrng_internal.h" + +struct lrng_state { + bool can_invalidate; /* Can invalidate batched entropy? */ + bool perform_seedwork; /* Can seed work be performed? */ + bool lrng_operational; /* Is DRNG operational? */ + bool lrng_fully_seeded; /* Is DRNG fully seeded? */ + bool lrng_min_seeded; /* Is DRNG minimally seeded? */ + bool all_online_numa_node_seeded;/* All NUMA DRNGs seeded? */ + + /* + * To ensure that external entropy providers cannot dominate the + * internal noise sources but yet cannot be dominated by internal + * noise sources, the following booleans are intended to allow + * external to provide seed once when a DRNG reseed occurs. This + * triggering of external noise source is performed even when the + * entropy pool has sufficient entropy. + */ + bool lrng_seed_hw; /* Allow HW to provide seed */ + bool lrng_seed_user; /* Allow user space to provide seed */ + + atomic_t boot_entropy_thresh; /* Reseed threshold */ + atomic_t reseed_in_progress; /* Flag for on executing reseed */ + struct work_struct lrng_seed_work; /* (re)seed work queue */ +}; + +static struct lrng_state lrng_state = { + false, false, false, false, false, false, true, true, + .boot_entropy_thresh = ATOMIC_INIT(LRNG_INIT_ENTROPY_BITS), + .reseed_in_progress = ATOMIC_INIT(0), +}; + +/********************************** Helper ***********************************/ + +/* External entropy provider is allowed to provide seed data */ +bool lrng_state_exseed_allow(enum lrng_external_noise_source source) +{ + if (source == lrng_noise_source_hw) + return lrng_state.lrng_seed_hw; + return lrng_state.lrng_seed_user; +} + +/* Enable / disable external entropy provider to furnish seed */ +void lrng_state_exseed_set(enum lrng_external_noise_source source, bool type) +{ + if (source == lrng_noise_source_hw) + lrng_state.lrng_seed_hw = type; + else + lrng_state.lrng_seed_user = type; +} + +static void lrng_state_exseed_allow_all(void) +{ + lrng_state_exseed_set(lrng_noise_source_hw, true); + lrng_state_exseed_set(lrng_noise_source_user, true); +} + +/* + * Reading of the LRNG pool is only allowed by one caller. The reading is + * only performed to (re)seed DRNGs. Thus, if this "lock" is already taken, + * the reseeding operation is in progress. The caller is not intended to wait + * but continue with its other operation. + */ +int lrng_pool_trylock(void) +{ + return atomic_cmpxchg(&lrng_state.reseed_in_progress, 0, 1); +} + +void lrng_pool_unlock(void) +{ + atomic_set(&lrng_state.reseed_in_progress, 0); +} + +/* Set new entropy threshold for reseeding during boot */ +void lrng_set_entropy_thresh(u32 new_entropy_bits) +{ + atomic_set(&lrng_state.boot_entropy_thresh, new_entropy_bits); +} + +/* + * Reset LRNG state - the entropy counters are reset, but the data that may + * or may not have entropy remains in the pools as this data will not hurt. + */ +void lrng_reset_state(void) +{ + lrng_pool_set_entropy(0); + lrng_pcpu_reset(); + lrng_state.lrng_operational = false; + lrng_state.lrng_fully_seeded = false; + lrng_state.lrng_min_seeded = false; + lrng_state.all_online_numa_node_seeded = false; + pr_debug("reset LRNG\n"); +} + +/* Set flag that all DRNGs are fully seeded */ +void lrng_pool_all_numa_nodes_seeded(bool set) +{ + lrng_state.all_online_numa_node_seeded = set; +} + +/* Return boolean whether LRNG reached minimally seed level */ +bool lrng_state_min_seeded(void) +{ + return lrng_state.lrng_min_seeded; +} + +/* Return boolean whether LRNG reached fully seed level */ +bool lrng_state_fully_seeded(void) +{ + return lrng_state.lrng_fully_seeded; +} + +/* Return boolean whether LRNG is considered fully operational */ +bool lrng_state_operational(void) +{ + return lrng_state.lrng_operational; +} + +/* Policy to check whether entropy buffer contains full seeded entropy */ +bool lrng_fully_seeded(bool fully_seeded, struct entropy_buf *eb) +{ + return ((eb->a_bits + eb->b_bits + eb->c_bits + eb->d_bits) >= + lrng_get_seed_entropy_osr(fully_seeded)); +} + +/* Mark one DRNG as not fully seeded */ +void lrng_unset_fully_seeded(struct lrng_drng *drng) +{ + drng->fully_seeded = false; + lrng_pool_all_numa_nodes_seeded(false); + + /* + * The init DRNG instance must always be fully seeded as this instance + * is the fall-back if any of the per-NUMA node DRNG instances is + * insufficiently seeded. Thus, we mark the entire LRNG as + * non-operational if the initial DRNG becomes not fully seeded. + */ + if (drng == lrng_drng_init_instance() && lrng_state_operational()) { + pr_debug("LRNG set to non-operational\n"); + lrng_state.lrng_operational = false; + lrng_state.lrng_fully_seeded = false; + + /* If sufficient entropy is available, reseed now. */ + lrng_pool_add_entropy(); + } +} + +/* Policy to enable LRNG operational mode */ +static void lrng_set_operational(u32 external_es) +{ + /* LRNG is operational if the initial DRNG is fully seeded ... */ + if (lrng_state.lrng_fully_seeded && + /* ... and either internal ES SP800-90B startup is complete ... */ + (lrng_sp80090b_startup_complete() || + /* ... or the external ES provided sufficient entropy. */ + (lrng_get_seed_entropy_osr(lrng_state_fully_seeded()) <= + external_es))) { + lrng_state.lrng_operational = true; + lrng_process_ready_list(); + lrng_init_wakeup(); + pr_info("LRNG fully operational\n"); + } +} + +/* Available entropy in the entire LRNG considering all entropy sources */ +u32 lrng_avail_entropy(void) +{ + u32 ent_thresh = lrng_security_strength(); + + /* + * Apply oversampling during initialization according to SP800-90C as + * we request a larger buffer from the ES. + */ + if (lrng_sp80090c_compliant() && + !lrng_state.all_online_numa_node_seeded) + ent_thresh += CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS; + + return lrng_pcpu_avail_entropy() + lrng_avail_aux_entropy() + + lrng_archrandom_entropylevel(ent_thresh) + + lrng_jent_entropylevel(ent_thresh); +} + +/* + * lrng_init_ops() - Set seed stages of LRNG + * + * Set the slow noise source reseed trigger threshold. The initial threshold + * is set to the minimum data size that can be read from the pool: a word. Upon + * reaching this value, the next seed threshold of 128 bits is set followed + * by 256 bits. + * + * @eb: buffer containing the size of entropy currently injected into DRNG + */ +void lrng_init_ops(struct entropy_buf *eb) +{ + struct lrng_state *state = &lrng_state; + u32 requested_bits, seed_bits, external_es; + + if (state->lrng_operational) + return; + + requested_bits = lrng_get_seed_entropy_osr( + state->all_online_numa_node_seeded); + + /* + * Entropy provided by external entropy sources - if they provide + * the requested amount of entropy, unblock the interface. + */ + external_es = eb->a_bits + eb->c_bits + eb->d_bits; + seed_bits = external_es + eb->b_bits; + + /* DRNG is seeded with full security strength */ + if (state->lrng_fully_seeded) { + lrng_set_operational(external_es); + lrng_set_entropy_thresh(requested_bits); + } else if (lrng_fully_seeded(state->all_online_numa_node_seeded, eb)) { + if (state->can_invalidate) + invalidate_batched_entropy(); + + state->lrng_fully_seeded = true; + lrng_set_operational(external_es); + state->lrng_min_seeded = true; + pr_info("LRNG fully seeded with %u bits of entropy\n", + seed_bits); + lrng_set_entropy_thresh(requested_bits); + } else if (!state->lrng_min_seeded) { + + /* DRNG is seeded with at least 128 bits of entropy */ + if (seed_bits >= LRNG_MIN_SEED_ENTROPY_BITS) { + if (state->can_invalidate) + invalidate_batched_entropy(); + + state->lrng_min_seeded = true; + pr_info("LRNG minimally seeded with %u bits of entropy\n", + seed_bits); + lrng_set_entropy_thresh(requested_bits); + lrng_init_wakeup(); + + /* DRNG is seeded with at least LRNG_INIT_ENTROPY_BITS bits */ + } else if (seed_bits >= LRNG_INIT_ENTROPY_BITS) { + pr_info("LRNG initial entropy level %u bits of entropy\n", + seed_bits); + lrng_set_entropy_thresh(LRNG_MIN_SEED_ENTROPY_BITS); + } + } +} + +int __init rand_initialize(void) +{ + struct seed { + ktime_t time; + unsigned long data[(LRNG_MAX_DIGESTSIZE / + sizeof(unsigned long))]; + struct new_utsname utsname; + } seed __aligned(LRNG_KCAPI_ALIGN); + unsigned int i; + + BUILD_BUG_ON(LRNG_MAX_DIGESTSIZE % sizeof(unsigned long)); + + seed.time = ktime_get_real(); + + for (i = 0; i < ARRAY_SIZE(seed.data); i++) { + if (!arch_get_random_seed_long_early(&(seed.data[i])) && + !arch_get_random_long_early(&seed.data[i])) + seed.data[i] = random_get_entropy(); + } + memcpy(&seed.utsname, utsname(), sizeof(*(utsname()))); + + lrng_pool_insert_aux((u8 *)&seed, sizeof(seed), 0); + memzero_explicit(&seed, sizeof(seed)); + + /* Initialize the seed work queue */ + INIT_WORK(&lrng_state.lrng_seed_work, lrng_drng_seed_work); + lrng_state.perform_seedwork = true; + + lrng_drngs_init_cc20(true); + invalidate_batched_entropy(); + + lrng_state.can_invalidate = true; + + return 0; +} + +/* Interface requesting a reseed of the DRNG */ +void lrng_pool_add_entropy(void) +{ + /* + * Once all DRNGs are fully seeded, the interrupt noise + * sources will not trigger any reseeding any more. + */ + if (likely(lrng_state.all_online_numa_node_seeded)) + return; + + /* Only try to reseed if the DRNG is alive. */ + if (!lrng_get_available()) + return; + + /* Only trigger the DRNG reseed if we have collected entropy. */ + if (lrng_avail_entropy() < + atomic_read_u32(&lrng_state.boot_entropy_thresh)) + return; + + /* Ensure that the seeding only occurs once at any given time. */ + if (lrng_pool_trylock()) + return; + + /* Seed the DRNG with any available noise. */ + if (lrng_state.perform_seedwork) + schedule_work(&lrng_state.lrng_seed_work); + else + lrng_drng_seed_work(NULL); +} + +/* Fill the seed buffer with data from the noise sources */ +void lrng_fill_seed_buffer(struct entropy_buf *entropy_buf, u32 requested_bits) +{ + struct lrng_state *state = &lrng_state; + u32 req_ent = lrng_sp80090c_compliant() ? + lrng_security_strength() : LRNG_MIN_SEED_ENTROPY_BITS; + + /* Guarantee that requested bits is a multiple of bytes */ + BUILD_BUG_ON(LRNG_DRNG_SECURITY_STRENGTH_BITS % 8); + + /* always reseed the DRNG with the current time stamp */ + entropy_buf->now = random_get_entropy(); + + /* + * Require at least 128 bits of entropy for any reseed. If the LRNG is + * operated SP800-90C compliant we want to comply with SP800-90A section + * 9.2 mandating that DRNG is reseeded with the security strength. + */ + if (state->lrng_fully_seeded && (lrng_avail_entropy() < req_ent)) { + entropy_buf->a_bits = entropy_buf->b_bits = 0; + entropy_buf->c_bits = entropy_buf->d_bits = 0; + goto wakeup; + } + + /* Concatenate the output of the entropy sources. */ + entropy_buf->b_bits = lrng_pcpu_pool_hash(entropy_buf->b, + requested_bits, + state->lrng_fully_seeded); + entropy_buf->c_bits = lrng_get_arch(entropy_buf->c, requested_bits); + entropy_buf->d_bits = lrng_get_jent(entropy_buf->d, requested_bits); + lrng_get_backtrack_aux(entropy_buf, requested_bits); + + /* allow external entropy provider to provide seed */ + lrng_state_exseed_allow_all(); + +wakeup: + /* + * Shall we wake up user space writers? This location covers + * ensures that the user space provider does not dominate the internal + * noise sources since in case the first call of this function finds + * sufficient entropy in the entropy pool, it will not trigger the + * wakeup. This implies that when the next /dev/urandom read happens, + * the entropy pool is drained. + */ + lrng_writer_wakeup(); +} diff --git a/drivers/char/lrng/lrng_health.c b/drivers/char/lrng/lrng_health.c new file mode 100644 index 0000000000000..61f266537f16a --- /dev/null +++ b/drivers/char/lrng/lrng_health.c @@ -0,0 +1,410 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Linux Random Number Generator (LRNG) Health Testing + * + * Copyright (C) 2019 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include "lrng_internal.h" + +/* Stuck Test */ +struct lrng_stuck_test { + u32 last_time; /* Stuck test: time of previous IRQ */ + u32 last_delta; /* Stuck test: delta of previous IRQ */ + u32 last_delta2; /* Stuck test: 2. time derivation of prev IRQ */ +}; + +/* Repetition Count Test */ +struct lrng_rct { + atomic_t rct_count; /* Number of stuck values */ +}; + +/* Adaptive Proportion Test */ +struct lrng_apt { + /* Data window size */ +#define LRNG_APT_WINDOW_SIZE 512 + /* LSB of time stamp to process */ +#define LRNG_APT_LSB 16 +#define LRNG_APT_WORD_MASK (LRNG_APT_LSB - 1) + atomic_t apt_count; /* APT counter */ + atomic_t apt_base; /* APT base reference */ + + atomic_t apt_trigger; + bool apt_base_set; /* Is APT base set? */ +}; + +/* The health test code must operate lock-less */ +struct lrng_health { + struct lrng_rct rct; + struct lrng_apt apt; + + bool health_test_enabled; + + /* SP800-90B startup health tests */ +#define LRNG_SP80090B_STARTUP_SAMPLES 1024 +#define LRNG_SP80090B_STARTUP_BLOCKS ((LRNG_SP80090B_STARTUP_SAMPLES + \ + LRNG_APT_WINDOW_SIZE - 1) / \ + LRNG_APT_WINDOW_SIZE) + bool sp80090b_startup_done; + atomic_t sp80090b_startup_blocks; +}; + +static struct lrng_health lrng_health = { + .rct.rct_count = ATOMIC_INIT(0), + + .apt.apt_count = ATOMIC_INIT(0), + .apt.apt_base = ATOMIC_INIT(-1), + .apt.apt_trigger = ATOMIC_INIT(LRNG_APT_WINDOW_SIZE), + .apt.apt_base_set = false, + + .health_test_enabled = true, + + .sp80090b_startup_blocks = ATOMIC_INIT(LRNG_SP80090B_STARTUP_BLOCKS), + .sp80090b_startup_done = false, +}; + +static DEFINE_PER_CPU(struct lrng_stuck_test, lrng_stuck_test); + +static bool lrng_sp80090b_health_requested(void) +{ + /* Health tests are only requested in FIPS mode */ + return fips_enabled; +} + +static bool lrng_sp80090b_health_enabled(void) +{ + struct lrng_health *health = &lrng_health; + + return lrng_sp80090b_health_requested() && health->health_test_enabled; +} + +/*************************************************************************** + * SP800-90B Compliance + * + * If the Linux-RNG is booted into FIPS mode, the following interfaces + * provide an SP800-90B compliant noise source: + * + * * /dev/random + * * getrandom(2) + * * get_random_bytes when using it in conjunction with + * add_random_ready_callback + * + * All other interfaces, including /dev/urandom or get_random_bytes without + * the add_random_ready_callback cannot claim to use an SP800-90B compliant + * noise source. + ***************************************************************************/ + +/* + * Perform SP800-90B startup testing + */ +static void lrng_sp80090b_startup(struct lrng_health *health) +{ + if (!health->sp80090b_startup_done && + atomic_dec_and_test(&health->sp80090b_startup_blocks)) { + struct entropy_buf eb; + + health->sp80090b_startup_done = true; + pr_info("SP800-90B startup health tests completed\n"); + memset(&eb, 0, sizeof(eb)); + lrng_init_ops(&eb); + + /* + * Force a reseed of DRNGs to ensure they are seeded with + * entropy that passed the SP800-90B health tests. + * As the DRNG always will reseed before generating + * random numbers, it does not need a reseed trigger. + */ + lrng_drng_force_reseed(); + } +} + +/* + * Handle failure of SP800-90B startup testing + */ +static void lrng_sp80090b_startup_failure(struct lrng_health *health) +{ + /* Reset of LRNG and its entropy - NOTE: we are in atomic context */ + lrng_reset(); + + /* + * Reset the SP800-90B startup test. + * + * NOTE SP800-90B section 4.3 bullet 4 does not specify what + * exactly is to be done in case of failure! Thus, we do what + * makes sense, i.e. restarting the health test and thus gating + * the output function of /dev/random and getrandom(2). + */ + atomic_set(&health->sp80090b_startup_blocks, + LRNG_SP80090B_STARTUP_BLOCKS); +} + +/* + * Handle failure of SP800-90B runtime testing + */ +static void lrng_sp80090b_runtime_failure(struct lrng_health *health) +{ + lrng_sp80090b_startup_failure(health); + health->sp80090b_startup_done = false; +} + +static void lrng_sp80090b_failure(struct lrng_health *health) +{ + if (health->sp80090b_startup_done) { + pr_err("SP800-90B runtime health test failure - invalidating all existing entropy and initiate SP800-90B startup\n"); + lrng_sp80090b_runtime_failure(health); + } else { + pr_err("SP800-90B startup test failure - resetting\n"); + lrng_sp80090b_startup_failure(health); + } +} + +/* + * Is the SP800-90B startup testing complete? + * + * This function is called by the LRNG to determine whether to unblock + * a certain user interface. Therefore, only the potentially blocking + * user interfaces are considered SP800-90B compliant. + */ +bool lrng_sp80090b_startup_complete(void) +{ + struct lrng_health *health = &lrng_health; + + return (lrng_sp80090b_health_enabled()) ? + health->sp80090b_startup_done : true; +} + +bool lrng_sp80090b_compliant(void) +{ + struct lrng_health *health = &lrng_health; + + return lrng_sp80090b_health_enabled() && health->sp80090b_startup_done; +} + +/*************************************************************************** + * Adaptive Proportion Test + * + * This test complies with SP800-90B section 4.4.2. + ***************************************************************************/ + +/* + * Reset the APT counter + * + * @health [in] Reference to health state + */ +static void lrng_apt_reset(struct lrng_health *health, + unsigned int time_masked) +{ + struct lrng_apt *apt = &health->apt; + + pr_debug("APT value %d for base %d\n", + atomic_read(&apt->apt_count), atomic_read(&apt->apt_base)); + + /* Reset APT */ + atomic_set(&apt->apt_count, 0); + atomic_set(&apt->apt_base, time_masked); +} + +static void lrng_apt_restart(struct lrng_health *health) +{ + struct lrng_apt *apt = &health->apt; + + atomic_set(&apt->apt_trigger, LRNG_APT_WINDOW_SIZE); +} + +/* + * Insert a new entropy event into APT + * + * This function does is void as it does not decide about the fate of a time + * stamp. An APT failure can only happen at the same time of a stuck test + * failure. Thus, the stuck failure will already decide how the time stamp + * is handled. + * + * @health [in] Reference to health state + * @now_time [in] Time stamp to process + */ +static void lrng_apt_insert(struct lrng_health *health, + unsigned int now_time) +{ + struct lrng_apt *apt = &health->apt; + + if (!lrng_sp80090b_health_requested()) + return; + + now_time &= LRNG_APT_WORD_MASK; + + /* Initialization of APT */ + if (!apt->apt_base_set) { + atomic_set(&apt->apt_base, now_time); + apt->apt_base_set = true; + return; + } + + if (now_time == (unsigned int)atomic_read(&apt->apt_base)) { + u32 apt_val = (u32)atomic_inc_return_relaxed(&apt->apt_count); + + if (apt_val >= CONFIG_LRNG_APT_CUTOFF) + lrng_sp80090b_failure(health); + } + + if (atomic_dec_and_test(&apt->apt_trigger)) { + lrng_apt_restart(health); + lrng_apt_reset(health, now_time); + lrng_sp80090b_startup(health); + } +} + +/*************************************************************************** + * Repetition Count Test + * + * The LRNG uses an enhanced version of the Repetition Count Test + * (RCT) specified in SP800-90B section 4.4.1. Instead of counting identical + * back-to-back values, the input to the RCT is the counting of the stuck + * values while filling the entropy pool. + * + * The RCT is applied with an alpha of 2^-30 compliant to FIPS 140-2 IG 9.8. + * + * During the counting operation, the LRNG always calculates the RCT + * cut-off value of C. If that value exceeds the allowed cut-off value, + * the LRNG will invalidate all entropy for the entropy pool which implies + * that no data can be extracted from the entropy pool unless new entropy + * is received. + ***************************************************************************/ + +/* + * Hot code path - Insert data for Repetition Count Test + * + * @health: Reference to health information + * @stuck: Decision of stuck test + */ +static void lrng_rct(struct lrng_health *health, int stuck) +{ + struct lrng_rct *rct = &health->rct; + + if (!lrng_sp80090b_health_requested()) + return; + + if (stuck) { + u32 rct_count = atomic_add_return_relaxed(1, &rct->rct_count); + + pr_debug("RCT count: %u\n", rct_count); + + /* + * The cutoff value is based on the following consideration: + * alpha = 2^-30 as recommended in FIPS 140-2 IG 9.8. + * In addition, we imply an entropy value H of 1 bit as this + * is the minimum entropy required to provide full entropy. + * + * Note, rct_count (which equals to value B in the + * pseudo code of SP800-90B section 4.4.1) starts with zero. + * Hence we need to subtract one from the cutoff value as + * calculated following SP800-90B. + */ + if (rct_count >= CONFIG_LRNG_RCT_CUTOFF) { + atomic_set(&rct->rct_count, 0); + + /* + * APT must start anew as we consider all previously + * recorded data to contain no entropy. + */ + lrng_apt_restart(health); + + lrng_sp80090b_failure(health); + } + } else { + atomic_set(&rct->rct_count, 0); + } +} + +/*************************************************************************** + * Stuck Test + * + * Checking the: + * 1st derivative of the event occurrence (time delta) + * 2nd derivative of the event occurrence (delta of time deltas) + * 3rd derivative of the event occurrence (delta of delta of time deltas) + * + * All values must always be non-zero. The stuck test is only valid disabled if + * high-resolution time stamps are identified after initialization. + ***************************************************************************/ + +static u32 lrng_delta(u32 prev, u32 next) +{ + /* + * Note that this (unsigned) subtraction does yield the correct value + * in the wraparound-case, i.e. when next < prev. + */ + return (next - prev); +} + +/* + * Hot code path + * + * @health: Reference to health information + * @now: Event time + * @return: 0 event occurrence not stuck (good time stamp) + * != 0 event occurrence stuck (reject time stamp) + */ +static int lrng_irq_stuck(struct lrng_stuck_test *stuck, u32 now_time) +{ + u32 delta = lrng_delta(stuck->last_time, now_time); + u32 delta2 = lrng_delta(stuck->last_delta, delta); + u32 delta3 = lrng_delta(stuck->last_delta2, delta2); + + stuck->last_time = now_time; + stuck->last_delta = delta; + stuck->last_delta2 = delta2; + + if (!delta || !delta2 || !delta3) + return 1; + + return 0; +} + +/*************************************************************************** + * Health test interfaces + ***************************************************************************/ + +/* + * Disable all health tests + */ +void lrng_health_disable(void) +{ + struct lrng_health *health = &lrng_health; + + health->health_test_enabled = false; + + if (lrng_sp80090b_health_requested()) + pr_warn("SP800-90B compliance requested but the Linux RNG is NOT SP800-90B compliant\n"); +} + +/* + * Hot code path - Perform health test on time stamp received from an event + * + * @now_time Time stamp + */ +enum lrng_health_res lrng_health_test(u32 now_time) +{ + struct lrng_health *health = &lrng_health; + struct lrng_stuck_test *stuck_test = this_cpu_ptr(&lrng_stuck_test); + int stuck; + + if (!health->health_test_enabled) + return lrng_health_pass; + + lrng_apt_insert(health, now_time); + + stuck = lrng_irq_stuck(stuck_test, now_time); + lrng_rct(health, stuck); + if (stuck) { + /* SP800-90B disallows using a failing health test time stamp */ + return lrng_sp80090b_health_requested() ? + lrng_health_fail_drop : lrng_health_fail_use; + } + + return lrng_health_pass; +} diff --git a/drivers/char/lrng/lrng_interfaces.c b/drivers/char/lrng/lrng_interfaces.c new file mode 100644 index 0000000000000..134e556ac7392 --- /dev/null +++ b/drivers/char/lrng/lrng_interfaces.c @@ -0,0 +1,654 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG User and kernel space interfaces + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +#include "lrng_internal.h" + +/* + * If the entropy count falls under this number of bits, then we + * should wake up processes which are selecting or polling on write + * access to /dev/random. + */ +u32 lrng_write_wakeup_bits = (LRNG_WRITE_WAKEUP_ENTROPY << 3); + +static LIST_HEAD(lrng_ready_list); +static DEFINE_SPINLOCK(lrng_ready_list_lock); + +static DECLARE_WAIT_QUEUE_HEAD(lrng_write_wait); +static DECLARE_WAIT_QUEUE_HEAD(lrng_init_wait); +static struct fasync_struct *fasync; + +/********************************** Helper ***********************************/ + +/* Is the DRNG seed level too low? */ +static bool lrng_need_entropy(void) +{ + return (lrng_avail_aux_entropy() < lrng_write_wakeup_bits); +} + +void lrng_writer_wakeup(void) +{ + if (lrng_need_entropy() && wq_has_sleeper(&lrng_write_wait)) { + wake_up_interruptible(&lrng_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + } +} + +void lrng_init_wakeup(void) +{ + wake_up_all(&lrng_init_wait); + kill_fasync(&fasync, SIGIO, POLL_IN); +} + +/** + * lrng_process_ready_list() - Ping all kernel internal callers waiting until + * the DRNG is completely initialized to inform that the DRNG reached that + * seed level. + * + * When the SP800-90B testing is enabled, the ping only happens if the SP800-90B + * startup health tests are completed. This implies that kernel internal + * callers always have an SP800-90B compliant noise source when being + * pinged. + */ +void lrng_process_ready_list(void) +{ + unsigned long flags; + struct random_ready_callback *rdy, *tmp; + + if (!lrng_state_operational()) + return; + + spin_lock_irqsave(&lrng_ready_list_lock, flags); + list_for_each_entry_safe(rdy, tmp, &lrng_ready_list, list) { + struct module *owner = rdy->owner; + + list_del_init(&rdy->list); + rdy->func(rdy); + module_put(owner); + } + spin_unlock_irqrestore(&lrng_ready_list_lock, flags); +} + +void lrng_debug_report_seedlevel(const char *name) +{ +#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM + static void *previous = NULL; + void *caller = (void *) _RET_IP_; + + if (READ_ONCE(previous) == caller) + return; + + if (!lrng_state_min_seeded()) + pr_notice("%pS %s called without reaching minimally seeded level (available entropy %u)\n", + caller, name, lrng_avail_entropy()); + + WRITE_ONCE(previous, caller); +#endif +} + +/************************ LRNG kernel input interfaces ************************/ + +/* + * add_hwgenerator_randomness() - Interface for in-kernel drivers of true + * hardware RNGs. + * + * Those devices may produce endless random bits and will be throttled + * when our pool is full. + * + * @buffer: buffer holding the entropic data from HW noise sources to be used to + * insert into entropy pool. + * @count: length of buffer + * @entropy_bits: amount of entropy in buffer (value is in bits) + */ +void add_hwgenerator_randomness(const char *buffer, size_t count, + size_t entropy_bits) +{ + /* + * Suspend writing if we are fully loaded with entropy. + * We'll be woken up again once below lrng_write_wakeup_thresh, + * or when the calling thread is about to terminate. + */ + wait_event_interruptible(lrng_write_wait, + lrng_need_entropy() || + lrng_state_exseed_allow(lrng_noise_source_hw) || + kthread_should_stop()); + lrng_state_exseed_set(lrng_noise_source_hw, false); + lrng_pool_insert_aux(buffer, count, entropy_bits); +} +EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); + +/* + * add_bootloader_randomness() - Handle random seed passed by bootloader. + * + * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise + * it would be regarded as device data. + * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. + * + * @buf: buffer holding the entropic data from HW noise sources to be used to + * insert into entropy pool. + * @size: length of buffer + */ +void add_bootloader_randomness(const void *buf, unsigned int size) +{ + lrng_pool_insert_aux(buf, size, + IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER) ? + size * 8 : 0); +} +EXPORT_SYMBOL_GPL(add_bootloader_randomness); + +/* + * Callback for HID layer -- use the HID event values to stir the entropy pool + */ +void add_input_randomness(unsigned int type, unsigned int code, + unsigned int value) +{ + static unsigned char last_value; + + /* ignore autorepeat and the like */ + if (value == last_value) + return; + + last_value = value; + + lrng_pcpu_array_add_u32((type << 4) ^ code ^ (code >> 4) ^ value); +} +EXPORT_SYMBOL_GPL(add_input_randomness); + +/* + * add_device_randomness() - Add device- or boot-specific data to the entropy + * pool to help initialize it. + * + * None of this adds any entropy; it is meant to avoid the problem of + * the entropy pool having similar initial state across largely + * identical devices. + * + * @buf: buffer holding the entropic data from HW noise sources to be used to + * insert into entropy pool. + * @size: length of buffer + */ +void add_device_randomness(const void *buf, unsigned int size) +{ + lrng_pool_insert_aux((u8 *)buf, size, 0); +} +EXPORT_SYMBOL(add_device_randomness); + +#ifdef CONFIG_BLOCK +void rand_initialize_disk(struct gendisk *disk) { } +void add_disk_randomness(struct gendisk *disk) { } +EXPORT_SYMBOL(add_disk_randomness); +#endif + +#ifndef CONFIG_LRNG_IRQ +void add_interrupt_randomness(int irq) { } +EXPORT_SYMBOL(add_interrupt_randomness); +#endif + +/* + * del_random_ready_callback() - Delete a previously registered readiness + * callback function. + * + * @rdy: callback definition that was registered initially + */ +void del_random_ready_callback(struct random_ready_callback *rdy) +{ + unsigned long flags; + struct module *owner = NULL; + + spin_lock_irqsave(&lrng_ready_list_lock, flags); + if (!list_empty(&rdy->list)) { + list_del_init(&rdy->list); + owner = rdy->owner; + } + spin_unlock_irqrestore(&lrng_ready_list_lock, flags); + + module_put(owner); +} +EXPORT_SYMBOL(del_random_ready_callback); + +/* + * add_random_ready_callback() - Add a callback function that will be invoked + * when the DRNG is fully initialized and seeded. + * + * @rdy: callback definition to be invoked when the LRNG is seeded + * + * Return: + * * 0 if callback is successfully added + * * -EALREADY if pool is already initialised (callback not called) + * * -ENOENT if module for callback is not alive + */ +int add_random_ready_callback(struct random_ready_callback *rdy) +{ + struct module *owner; + unsigned long flags; + int err = -EALREADY; + + if (likely(lrng_state_operational())) + return err; + + owner = rdy->owner; + if (!try_module_get(owner)) + return -ENOENT; + + spin_lock_irqsave(&lrng_ready_list_lock, flags); + if (lrng_state_operational()) + goto out; + + owner = NULL; + + list_add(&rdy->list, &lrng_ready_list); + err = 0; + +out: + spin_unlock_irqrestore(&lrng_ready_list_lock, flags); + + module_put(owner); + + return err; +} +EXPORT_SYMBOL(add_random_ready_callback); + +/*********************** LRNG kernel output interfaces ************************/ + +/* + * get_random_bytes() - Provider of cryptographic strong random numbers for + * kernel-internal usage. + * + * This function is appropriate for all in-kernel use cases. However, + * it will always use the ChaCha20 DRNG. + * + * @buf: buffer to store the random bytes + * @nbytes: size of the buffer + */ +void get_random_bytes(void *buf, int nbytes) +{ + lrng_drng_get_atomic((u8 *)buf, (u32)nbytes); + lrng_debug_report_seedlevel("get_random_bytes"); +} +EXPORT_SYMBOL(get_random_bytes); + +/* + * get_random_bytes_full() - Provider of cryptographic strong random numbers + * for kernel-internal usage. + * + * This function is appropriate only for non-atomic use cases as this + * function may sleep. Though, it provides access to the full functionality + * of LRNG including the switchable DRNG support, that may support other + * DRNGs such as the SP800-90A DRBG. + * + * @buf: buffer to store the random bytes + * @nbytes: size of the buffer + */ +void get_random_bytes_full(void *buf, int nbytes) +{ + lrng_drng_get_sleep((u8 *)buf, (u32)nbytes); + lrng_debug_report_seedlevel("get_random_bytes_full"); +} +EXPORT_SYMBOL(get_random_bytes_full); + +/* + * wait_for_random_bytes() - Wait for the LRNG to be seeded and thus + * guaranteed to supply cryptographically secure random numbers. + * + * This applies to: the /dev/urandom device, the get_random_bytes function, + * and the get_random_{u32,u64,int,long} family of functions. Using any of + * these functions without first calling this function forfeits the guarantee + * of security. + * + * Return: + * * 0 if the LRNG has been seeded. + * * -ERESTARTSYS if the function was interrupted by a signal. + */ +int wait_for_random_bytes(void) +{ + if (likely(lrng_state_min_seeded())) + return 0; + return wait_event_interruptible(lrng_init_wait, + lrng_state_min_seeded()); +} +EXPORT_SYMBOL(wait_for_random_bytes); + +/* + * get_random_bytes_arch() - This function will use the architecture-specific + * hardware random number generator if it is available. + * + * The arch-specific hw RNG will almost certainly be faster than what we can + * do in software, but it is impossible to verify that it is implemented + * securely (as opposed, to, say, the AES encryption of a sequence number using + * a key known by the NSA). So it's useful if we need the speed, but only if + * we're willing to trust the hardware manufacturer not to have put in a back + * door. + * + * @buf: buffer allocated by caller to store the random data in + * @nbytes: length of outbuf + * + * Return: number of bytes filled in. + */ +int __must_check get_random_bytes_arch(void *buf, int nbytes) +{ + u8 *p = buf; + + while (nbytes) { + unsigned long v; + int chunk = min_t(int, nbytes, sizeof(unsigned long)); + + if (!arch_get_random_long(&v)) + break; + + memcpy(p, &v, chunk); + p += chunk; + nbytes -= chunk; + } + + if (nbytes) + lrng_drng_get_atomic((u8 *)p, (u32)nbytes); + + return nbytes; +} +EXPORT_SYMBOL(get_random_bytes_arch); + +/* + * Returns whether or not the LRNG has been seeded. + * + * Returns: true if the urandom pool has been seeded. + * false if the urandom pool has not been seeded. + */ +bool rng_is_initialized(void) +{ + return lrng_state_operational(); +} +EXPORT_SYMBOL(rng_is_initialized); + +/************************ LRNG user output interfaces *************************/ + +static ssize_t lrng_read_common(char __user *buf, size_t nbytes) +{ + ssize_t ret = 0; + u8 tmpbuf[LRNG_DRNG_BLOCKSIZE] __aligned(LRNG_KCAPI_ALIGN); + u8 *tmp_large = NULL, *tmp = tmpbuf; + u32 tmplen = sizeof(tmpbuf); + + if (nbytes == 0) + return 0; + + /* + * Satisfy large read requests -- as the common case are smaller + * request sizes, such as 16 or 32 bytes, avoid a kmalloc overhead for + * those by using the stack variable of tmpbuf. + */ + if (!CONFIG_BASE_SMALL && (nbytes > sizeof(tmpbuf))) { + tmplen = min_t(u32, nbytes, LRNG_DRNG_MAX_REQSIZE); + tmp_large = kmalloc(tmplen + LRNG_KCAPI_ALIGN, GFP_KERNEL); + if (!tmp_large) + tmplen = sizeof(tmpbuf); + else + tmp = PTR_ALIGN(tmp_large, LRNG_KCAPI_ALIGN); + } + + while (nbytes) { + u32 todo = min_t(u32, nbytes, tmplen); + int rc = 0; + + /* Reschedule if we received a large request. */ + if ((tmp_large) && need_resched()) { + if (signal_pending(current)) { + if (ret == 0) + ret = -ERESTARTSYS; + break; + } + schedule(); + } + + rc = lrng_drng_get_sleep(tmp, todo); + if (rc <= 0) { + if (rc < 0) + ret = rc; + break; + } + if (copy_to_user(buf, tmp, rc)) { + ret = -EFAULT; + break; + } + + nbytes -= rc; + buf += rc; + ret += rc; + } + + /* Wipe data just returned from memory */ + if (tmp_large) + kfree_sensitive(tmp_large); + else + memzero_explicit(tmpbuf, sizeof(tmpbuf)); + + return ret; +} + +static ssize_t +lrng_read_common_block(int nonblock, char __user *buf, size_t nbytes) +{ + if (nbytes == 0) + return 0; + + if (unlikely(!lrng_state_operational())) { + int ret; + + if (nonblock) + return -EAGAIN; + + ret = wait_event_interruptible(lrng_init_wait, + lrng_state_operational()); + if (unlikely(ret)) + return ret; + } + + return lrng_read_common(buf, nbytes); +} + +static ssize_t lrng_drng_read_block(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) +{ + return lrng_read_common_block(file->f_flags & O_NONBLOCK, buf, nbytes); +} + +static __poll_t lrng_random_poll(struct file *file, poll_table *wait) +{ + __poll_t mask; + + poll_wait(file, &lrng_init_wait, wait); + poll_wait(file, &lrng_write_wait, wait); + mask = 0; + if (lrng_state_operational()) + mask |= EPOLLIN | EPOLLRDNORM; + if (lrng_need_entropy() || + lrng_state_exseed_allow(lrng_noise_source_user)) { + lrng_state_exseed_set(lrng_noise_source_user, false); + mask |= EPOLLOUT | EPOLLWRNORM; + } + return mask; +} + +static ssize_t lrng_drng_write_common(const char __user *buffer, size_t count, + u32 entropy_bits) +{ + ssize_t ret = 0; + u8 buf[64] __aligned(LRNG_KCAPI_ALIGN); + const char __user *p = buffer; + u32 orig_entropy_bits = entropy_bits; + + if (!lrng_get_available()) + return -EAGAIN; + + count = min_t(size_t, count, INT_MAX); + while (count > 0) { + size_t bytes = min_t(size_t, count, sizeof(buf)); + u32 ent = min_t(u32, bytes<<3, entropy_bits); + + if (copy_from_user(&buf, p, bytes)) + return -EFAULT; + /* Inject data into entropy pool */ + lrng_pool_insert_aux(buf, bytes, ent); + + count -= bytes; + p += bytes; + ret += bytes; + entropy_bits -= ent; + + cond_resched(); + } + + /* Force reseed of DRNG during next data request. */ + if (!orig_entropy_bits) + lrng_drng_force_reseed(); + + return ret; +} + +static ssize_t lrng_drng_read(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) +{ + if (!lrng_state_min_seeded()) + pr_notice_ratelimited("%s - use of insufficiently seeded DRNG (%zu bytes read)\n", + current->comm, nbytes); + else if (!lrng_state_operational()) + pr_debug_ratelimited("%s - use of not fully seeded DRNG (%zu bytes read)\n", + current->comm, nbytes); + + return lrng_read_common(buf, nbytes); +} + +static ssize_t lrng_drng_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + return lrng_drng_write_common(buffer, count, 0); +} + +static long lrng_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + u32 digestsize_bits; + int size, ent_count_bits; + int __user *p = (int __user *)arg; + + switch (cmd) { + case RNDGETENTCNT: + ent_count_bits = lrng_avail_entropy(); + if (put_user(ent_count_bits, p)) + return -EFAULT; + return 0; + case RNDADDTOENTCNT: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (get_user(ent_count_bits, p)) + return -EFAULT; + ent_count_bits = (int)lrng_avail_aux_entropy() + ent_count_bits; + if (ent_count_bits < 0) + ent_count_bits = 0; + digestsize_bits = lrng_get_digestsize(); + if (ent_count_bits > digestsize_bits) + ent_count_bits = digestsize_bits; + lrng_pool_set_entropy(ent_count_bits); + return 0; + case RNDADDENTROPY: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (get_user(ent_count_bits, p++)) + return -EFAULT; + if (ent_count_bits < 0) + return -EINVAL; + if (get_user(size, p++)) + return -EFAULT; + if (size < 0) + return -EINVAL; + /* there cannot be more entropy than data */ + ent_count_bits = min(ent_count_bits, size<<3); + return lrng_drng_write_common((const char __user *)p, size, + ent_count_bits); + case RNDZAPENTCNT: + case RNDCLEARPOOL: + /* Clear the entropy pool counter. */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + lrng_pool_set_entropy(0); + return 0; + case RNDRESEEDCRNG: + /* + * We leave the capability check here since it is present + * in the upstream's RNG implementation. Yet, user space + * can trigger a reseed as easy as writing into /dev/random + * or /dev/urandom where no privilege is needed. + */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + /* Force a reseed of all DRNGs */ + lrng_drng_force_reseed(); + return 0; + default: + return -EINVAL; + } +} + +static int lrng_fasync(int fd, struct file *filp, int on) +{ + return fasync_helper(fd, filp, on, &fasync); +} + +const struct file_operations random_fops = { + .read = lrng_drng_read_block, + .write = lrng_drng_write, + .poll = lrng_random_poll, + .unlocked_ioctl = lrng_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .fasync = lrng_fasync, + .llseek = noop_llseek, +}; + +const struct file_operations urandom_fops = { + .read = lrng_drng_read, + .write = lrng_drng_write, + .unlocked_ioctl = lrng_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .fasync = lrng_fasync, + .llseek = noop_llseek, +}; + +SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, + unsigned int, flags) +{ + if (flags & ~(GRND_NONBLOCK|GRND_RANDOM|GRND_INSECURE)) + return -EINVAL; + + /* + * Requesting insecure and blocking randomness at the same time makes + * no sense. + */ + if ((flags & + (GRND_INSECURE|GRND_RANDOM)) == (GRND_INSECURE|GRND_RANDOM)) + return -EINVAL; + + if (count > INT_MAX) + count = INT_MAX; + + if (flags & GRND_INSECURE) + return lrng_drng_read(NULL, buf, count, NULL); + + return lrng_read_common_block(flags & GRND_NONBLOCK, buf, count); +} diff --git a/drivers/char/lrng/lrng_internal.h b/drivers/char/lrng/lrng_internal.h new file mode 100644 index 0000000000000..c01c4e5fa44f5 --- /dev/null +++ b/drivers/char/lrng/lrng_internal.h @@ -0,0 +1,487 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2018 - 2021, Stephan Mueller + */ + +#ifndef _LRNG_INTERNAL_H +#define _LRNG_INTERNAL_H + +#include +#include +#include +#include +#include +#include +#include +#include + +/*************************** General LRNG parameter ***************************/ + +/* Security strength of LRNG -- this must match DRNG security strength */ +#define LRNG_DRNG_SECURITY_STRENGTH_BYTES 32 +#define LRNG_DRNG_SECURITY_STRENGTH_BITS (LRNG_DRNG_SECURITY_STRENGTH_BYTES * 8) +#define LRNG_DRNG_BLOCKSIZE 64 /* Maximum of DRNG block sizes */ +#define LRNG_DRNG_INIT_SEED_SIZE_BITS (LRNG_DRNG_SECURITY_STRENGTH_BITS + \ + CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS) +#define LRNG_DRNG_INIT_SEED_SIZE_BYTES (LRNG_DRNG_INIT_SEED_SIZE_BITS >> 3) + +/* + * SP800-90A defines a maximum request size of 1<<16 bytes. The given value is + * considered a safer margin. + * + * This value is allowed to be changed. + */ +#define LRNG_DRNG_MAX_REQSIZE (1<<12) + +/* + * SP800-90A defines a maximum number of requests between reseeds of 2^48. + * The given value is considered a much safer margin, balancing requests for + * frequent reseeds with the need to conserve entropy. This value MUST NOT be + * larger than INT_MAX because it is used in an atomic_t. + * + * This value is allowed to be changed. + */ +#define LRNG_DRNG_RESEED_THRESH (1<<20) + +/* + * Maximum DRNG generation operations without reseed having full entropy + * This value defines the absolute maximum value of DRNG generation operations + * without a reseed holding full entropy. LRNG_DRNG_RESEED_THRESH is the + * threshold when a new reseed is attempted. But it is possible that this fails + * to deliver full entropy. In this case the DRNG will continue to provide data + * even though it was not reseeded with full entropy. To avoid in the extreme + * case that no reseed is performed for too long, this threshold is enforced. + * If that absolute low value is reached, the LRNG is marked as not operational. + * + * This value is allowed to be changed. + */ +#define LRNG_DRNG_MAX_WITHOUT_RESEED (1<<30) + +/* + * Min required seed entropy is 128 bits covering the minimum entropy + * requirement of SP800-131A and the German BSI's TR02102. + * + * This value is allowed to be changed. + */ +#define LRNG_FULL_SEED_ENTROPY_BITS LRNG_DRNG_SECURITY_STRENGTH_BITS +#define LRNG_MIN_SEED_ENTROPY_BITS 128 +#define LRNG_INIT_ENTROPY_BITS 32 + +/* + * Wakeup value + * + * This value is allowed to be changed but must not be larger than the + * digest size of the hash operation used update the aux_pool. + */ +#ifdef CONFIG_CRYPTO_LIB_SHA256 +# define LRNG_ATOMIC_DIGEST_SIZE SHA256_DIGEST_SIZE +#else +# define LRNG_ATOMIC_DIGEST_SIZE SHA1_DIGEST_SIZE +#endif +#define LRNG_WRITE_WAKEUP_ENTROPY LRNG_ATOMIC_DIGEST_SIZE + +/* + * If the switching support is configured, we must provide support up to + * the largest digest size. Without switching support, we know it is only + * the built-in digest size. + */ +#ifdef CONFIG_LRNG_DRNG_SWITCH +# define LRNG_MAX_DIGESTSIZE 64 +#else +# define LRNG_MAX_DIGESTSIZE LRNG_ATOMIC_DIGEST_SIZE +#endif + +/* + * Oversampling factor of IRQ events to obtain + * LRNG_DRNG_SECURITY_STRENGTH_BYTES. This factor is used when a + * high-resolution time stamp is not available. In this case, jiffies and + * register contents are used to fill the entropy pool. These noise sources + * are much less entropic than the high-resolution timer. The entropy content + * is the entropy content assumed with LRNG_IRQ_ENTROPY_BITS divided by + * LRNG_IRQ_OVERSAMPLING_FACTOR. + * + * This value is allowed to be changed. + */ +#define LRNG_IRQ_OVERSAMPLING_FACTOR 10 + +/* Alignmask that is intended to be identical to CRYPTO_MINALIGN */ +#define LRNG_KCAPI_ALIGN ARCH_KMALLOC_MINALIGN + +/* + * This definition must provide a buffer that is equal to SHASH_DESC_ON_STACK + * as it will be casted into a struct shash_desc. + */ +#define LRNG_POOL_SIZE (sizeof(struct shash_desc) + HASH_MAX_DESCSIZE) + +/************************ Default DRNG implementation *************************/ + +extern struct chacha20_state chacha20; +extern const struct lrng_crypto_cb lrng_cc20_crypto_cb; +void lrng_cc20_init_state(struct chacha20_state *state); + +/********************************** /proc *************************************/ + +#ifdef CONFIG_SYSCTL +void lrng_pool_inc_numa_node(void); +void lrng_proc_update_max_write_thresh(u32 new_digestsize); +#else +static inline void lrng_pool_inc_numa_node(void) { } +static inline void lrng_proc_update_max_write_thresh(u32 new_digestsize) { } +#endif + +/****************************** LRNG interfaces *******************************/ + +extern u32 lrng_write_wakeup_bits; +extern int lrng_drng_reseed_max_time; + +void lrng_writer_wakeup(void); +void lrng_init_wakeup(void); +void lrng_debug_report_seedlevel(const char *name); +void lrng_process_ready_list(void); + +/* External interface to use of the switchable DRBG inside the kernel */ +void get_random_bytes_full(void *buf, int nbytes); + +/************************* Jitter RNG Entropy Source **************************/ + +#ifdef CONFIG_LRNG_JENT +u32 lrng_get_jent(u8 *outbuf, u32 requested_bits); +u32 lrng_jent_entropylevel(u32 requested_bits); +void lrng_jent_es_state(unsigned char *buf, size_t buflen); +#else /* CONFIG_LRNG_JENT */ +static inline u32 lrng_get_jent(u8 *outbuf, u32 requested_bits) { return 0; } +static inline u32 lrng_jent_entropylevel(u32 requested_bits) { return 0; } +static inline void lrng_jent_es_state(unsigned char *buf, size_t buflen) { } +#endif /* CONFIG_LRNG_JENT */ + +/************************** CPU-based Entropy Source **************************/ + +static inline u32 lrng_fast_noise_entropylevel(u32 ent_bits, u32 requested_bits) +{ + /* Obtain entropy statement */ + ent_bits = ent_bits * requested_bits / LRNG_DRNG_SECURITY_STRENGTH_BITS; + /* Cap entropy to buffer size in bits */ + ent_bits = min_t(u32, ent_bits, requested_bits); + return ent_bits; +} + +#ifdef CONFIG_LRNG_CPU +u32 lrng_get_arch(u8 *outbuf, u32 requested_bits); +u32 lrng_archrandom_entropylevel(u32 requested_bits); +void lrng_arch_es_state(unsigned char *buf, size_t buflen); +#else /* CONFIG_LRNG_CPU */ +static inline u32 lrng_get_arch(u8 *outbuf, u32 requested_bits) { return 0; } +static inline u32 lrng_archrandom_entropylevel(u32 requested_bits) { return 0; } +static inline void lrng_arch_es_state(unsigned char *buf, size_t buflen) { } +#endif /* CONFIG_LRNG_CPU */ + +/************************** Interrupt Entropy Source **************************/ + +#ifdef CONFIG_LRNG_IRQ +void lrng_pcpu_reset(void); +u32 lrng_pcpu_avail_pool_size(void); +u32 lrng_pcpu_avail_entropy(void); +int lrng_pcpu_switch_hash(int node, + const struct lrng_crypto_cb *new_cb, void *new_hash, + const struct lrng_crypto_cb *old_cb); +u32 lrng_pcpu_pool_hash(u8 *outbuf, u32 requested_bits, bool fully_seeded); +void lrng_pcpu_array_add_u32(u32 data); +u32 lrng_gcd_analyze(u32 *history, size_t nelem); +void lrng_irq_es_state(unsigned char *buf, size_t buflen); +#else /* CONFIG_LRNG_IRQ */ +static inline void lrng_pcpu_reset(void) { } +static inline u32 lrng_pcpu_avail_pool_size(void) { return 0; } +static inline u32 lrng_pcpu_avail_entropy(void) { return 0; } +static inline int lrng_pcpu_switch_hash(int node, + const struct lrng_crypto_cb *new_cb, void *new_hash, + const struct lrng_crypto_cb *old_cb) +{ + return 0; +} +static inline u32 lrng_pcpu_pool_hash(u8 *outbuf, u32 requested_bits, + bool fully_seeded) +{ + return 0; +} +static inline void lrng_pcpu_array_add_u32(u32 data) { } +static inline void lrng_irq_es_state(unsigned char *buf, size_t buflen) { } +#endif /* CONFIG_LRNG_IRQ */ + +/****************************** DRNG processing *******************************/ + +/* DRNG state handle */ +struct lrng_drng { + void *drng; /* DRNG handle */ + void *hash; /* Hash handle */ + const struct lrng_crypto_cb *crypto_cb; /* Crypto callbacks */ + atomic_t requests; /* Number of DRNG requests */ + atomic_t requests_since_fully_seeded; /* Number DRNG requests since + * last fully seeded + */ + unsigned long last_seeded; /* Last time it was seeded */ + bool fully_seeded; /* Is DRNG fully seeded? */ + bool force_reseed; /* Force a reseed */ + + /* Lock write operations on DRNG state, DRNG replacement of crypto_cb */ + struct mutex lock; + spinlock_t spin_lock; + /* Lock *hash replacement - always take before DRNG lock */ + rwlock_t hash_lock; +}; + +extern struct mutex lrng_crypto_cb_update; + +struct lrng_drng *lrng_drng_init_instance(void); +struct lrng_drng *lrng_drng_atomic_instance(void); + +static __always_inline bool lrng_drng_is_atomic(struct lrng_drng *drng) +{ + return (drng->drng == lrng_drng_atomic_instance()->drng); +} + +/* Lock the DRNG */ +static __always_inline void lrng_drng_lock(struct lrng_drng *drng, + unsigned long *flags) + __acquires(&drng->spin_lock) +{ + /* Use spin lock in case the atomic DRNG context is used */ + if (lrng_drng_is_atomic(drng)) { + spin_lock_irqsave(&drng->spin_lock, *flags); + + /* + * In case a lock transition happened while we were spinning, + * catch this case and use the new lock type. + */ + if (!lrng_drng_is_atomic(drng)) { + spin_unlock_irqrestore(&drng->spin_lock, *flags); + __acquire(&drng->spin_lock); + mutex_lock(&drng->lock); + } + } else { + __acquire(&drng->spin_lock); + mutex_lock(&drng->lock); + } +} + +/* Unlock the DRNG */ +static __always_inline void lrng_drng_unlock(struct lrng_drng *drng, + unsigned long *flags) + __releases(&drng->spin_lock) +{ + if (lrng_drng_is_atomic(drng)) { + spin_unlock_irqrestore(&drng->spin_lock, *flags); + } else { + mutex_unlock(&drng->lock); + __release(&drng->spin_lock); + } +} + +void lrng_reset(void); +void lrng_drngs_init_cc20(bool force_seed); +bool lrng_sp80090c_compliant(void); + +static inline u32 lrng_compress_osr(void) +{ + return lrng_sp80090c_compliant() ? CONFIG_LRNG_OVERSAMPLE_ES_BITS : 0; +} + +static inline u32 lrng_reduce_by_osr(u32 entropy_bits) +{ + u32 osr_bits = lrng_compress_osr(); + + return (entropy_bits >= osr_bits) ? (entropy_bits - osr_bits) : 0; +} + +bool lrng_get_available(void); +void lrng_set_available(void); +void lrng_drng_reset(struct lrng_drng *drng); +int lrng_drng_get_atomic(u8 *outbuf, u32 outbuflen); +int lrng_drng_get_sleep(u8 *outbuf, u32 outbuflen); +void lrng_drng_force_reseed(void); +void lrng_drng_seed_work(struct work_struct *dummy); + +#ifdef CONFIG_NUMA +struct lrng_drng **lrng_drng_instances(void); +void lrng_drngs_numa_alloc(void); +#else /* CONFIG_NUMA */ +static inline struct lrng_drng **lrng_drng_instances(void) { return NULL; } +static inline void lrng_drngs_numa_alloc(void) { return; } +#endif /* CONFIG_NUMA */ + +/************************* Entropy sources management *************************/ + +enum lrng_external_noise_source { + lrng_noise_source_hw, + lrng_noise_source_user +}; + +void lrng_set_entropy_thresh(u32 new); +u32 lrng_avail_entropy(void); +void lrng_reset_state(void); + +bool lrng_state_exseed_allow(enum lrng_external_noise_source source); +void lrng_state_exseed_set(enum lrng_external_noise_source source, bool type); +bool lrng_state_min_seeded(void); +bool lrng_state_fully_seeded(void); +bool lrng_state_operational(void); + +int lrng_pool_trylock(void); +void lrng_pool_unlock(void); +void lrng_pool_all_numa_nodes_seeded(bool set); +void lrng_pool_add_entropy(void); + +struct entropy_buf { + u8 a[LRNG_DRNG_INIT_SEED_SIZE_BYTES]; + u8 b[LRNG_DRNG_INIT_SEED_SIZE_BYTES]; + u8 c[LRNG_DRNG_INIT_SEED_SIZE_BYTES]; + u8 d[LRNG_DRNG_INIT_SEED_SIZE_BYTES]; + u32 now, a_bits, b_bits, c_bits, d_bits; +}; + +bool lrng_fully_seeded(bool fully_seeded, struct entropy_buf *eb); +void lrng_unset_fully_seeded(struct lrng_drng *drng); +void lrng_fill_seed_buffer(struct entropy_buf *entropy_buf, u32 requested_bits); +void lrng_init_ops(struct entropy_buf *eb); + +/*********************** Auxiliary Pool Entropy Source ************************/ + +u32 lrng_avail_aux_entropy(void); +void lrng_aux_es_state(unsigned char *buf, size_t buflen); +u32 lrng_get_digestsize(void); +void lrng_pool_set_entropy(u32 entropy_bits); +int lrng_aux_switch_hash(const struct lrng_crypto_cb *new_cb, void *new_hash, + const struct lrng_crypto_cb *old_cb); +int lrng_pool_insert_aux(const u8 *inbuf, u32 inbuflen, u32 entropy_bits); +void lrng_get_backtrack_aux(struct entropy_buf *entropy_buf, + u32 requested_bits); + +/* Obtain the security strength of the LRNG in bits */ +static inline u32 lrng_security_strength(void) +{ + /* + * We use a hash to read the entropy in the entropy pool. According to + * SP800-90B table 1, the entropy can be at most the digest size. + * Considering this together with the last sentence in section 3.1.5.1.2 + * the security strength of a (approved) hash is equal to its output + * size. On the other hand the entropy cannot be larger than the + * security strength of the used DRBG. + */ + return min_t(u32, LRNG_FULL_SEED_ENTROPY_BITS, lrng_get_digestsize()); +} + +static inline u32 lrng_get_seed_entropy_osr(bool fully_seeded) +{ + u32 requested_bits = lrng_security_strength(); + + /* Apply oversampling during initialization according to SP800-90C */ + if (lrng_sp80090c_compliant() && !fully_seeded) + requested_bits += CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS; + return requested_bits; +} + +/************************** Health Test linking code **************************/ + +enum lrng_health_res { + lrng_health_pass, /* Health test passes on time stamp */ + lrng_health_fail_use, /* Time stamp unhealthy, but mix in */ + lrng_health_fail_drop /* Time stamp unhealthy, drop it */ +}; + +#ifdef CONFIG_LRNG_HEALTH_TESTS +bool lrng_sp80090b_startup_complete(void); +bool lrng_sp80090b_compliant(void); + +enum lrng_health_res lrng_health_test(u32 now_time); +void lrng_health_disable(void); + +#else /* CONFIG_LRNG_HEALTH_TESTS */ +static inline bool lrng_sp80090b_startup_complete(void) { return true; } +static inline bool lrng_sp80090b_compliant(void) { return false; } + +static inline enum lrng_health_res +lrng_health_test(u32 now_time) { return lrng_health_pass; } +static inline void lrng_health_disable(void) { } +#endif /* CONFIG_LRNG_HEALTH_TESTS */ + +/****************************** Helper code ***********************************/ + +static inline u32 atomic_read_u32(atomic_t *v) +{ + return (u32)atomic_read(v); +} + +/******************** Crypto Primitive Switching Support **********************/ + +#ifdef CONFIG_LRNG_DRNG_SWITCH +static inline void lrng_hash_lock(struct lrng_drng *drng, unsigned long *flags) +{ + read_lock_irqsave(&drng->hash_lock, *flags); +} + +static inline void lrng_hash_unlock(struct lrng_drng *drng, unsigned long flags) +{ + read_unlock_irqrestore(&drng->hash_lock, flags); +} +#else /* CONFIG_LRNG_DRNG_SWITCH */ +static inline void lrng_hash_lock(struct lrng_drng *drng, unsigned long *flags) +{ } + +static inline void lrng_hash_unlock(struct lrng_drng *drng, unsigned long flags) +{ } +#endif /* CONFIG_LRNG_DRNG_SWITCH */ + +/*************************** Auxiliary functions ******************************/ + +void invalidate_batched_entropy(void); + +/***************************** Testing code ***********************************/ + +#ifdef CONFIG_LRNG_RAW_HIRES_ENTROPY +bool lrng_raw_hires_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_HIRES_ENTROPY */ +static inline bool lrng_raw_hires_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_HIRES_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_JIFFIES_ENTROPY +bool lrng_raw_jiffies_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_JIFFIES_ENTROPY */ +static inline bool lrng_raw_jiffies_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_JIFFIES_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_IRQ_ENTROPY +bool lrng_raw_irq_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_IRQ_ENTROPY */ +static inline bool lrng_raw_irq_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_IRQ_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_IRQFLAGS_ENTROPY +bool lrng_raw_irqflags_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_IRQFLAGS_ENTROPY */ +static inline bool lrng_raw_irqflags_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_IRQFLAGS_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_RETIP_ENTROPY +bool lrng_raw_retip_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_RETIP_ENTROPY */ +static inline bool lrng_raw_retip_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_RETIP_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_REGS_ENTROPY +bool lrng_raw_regs_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_REGS_ENTROPY */ +static inline bool lrng_raw_regs_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_REGS_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_ARRAY +bool lrng_raw_array_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_ARRAY */ +static inline bool lrng_raw_array_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_ARRAY */ + +#ifdef CONFIG_LRNG_IRQ_PERF +bool lrng_perf_time(u32 start); +#else /* CONFIG_LRNG_IRQ_PERF */ +static inline bool lrng_perf_time(u32 start) { return false; } +#endif /*CONFIG_LRNG_IRQ_PERF */ + +#endif /* _LRNG_INTERNAL_H */ diff --git a/drivers/char/lrng/lrng_kcapi.c b/drivers/char/lrng/lrng_kcapi.c new file mode 100644 index 0000000000000..fa76b2d57b7f1 --- /dev/null +++ b/drivers/char/lrng/lrng_kcapi.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Backend for the LRNG providing the cryptographic primitives using the + * kernel crypto API. + * + * Copyright (C) 2018 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#include "lrng_kcapi_hash.h" + +static char *drng_name = NULL; +module_param(drng_name, charp, 0444); +MODULE_PARM_DESC(drng_name, "Kernel crypto API name of DRNG"); + +static char *pool_hash = "sha512"; +module_param(pool_hash, charp, 0444); +MODULE_PARM_DESC(pool_hash, + "Kernel crypto API name of hash or keyed message digest to read the entropy pool"); + +static char *seed_hash = NULL; +module_param(seed_hash, charp, 0444); +MODULE_PARM_DESC(seed_hash, + "Kernel crypto API name of hash with output size equal to seedsize of DRNG to bring seed string to the size required by the DRNG"); + +struct lrng_drng_info { + struct crypto_rng *kcapi_rng; + void *lrng_hash; +}; + +static void *lrng_kcapi_drng_hash_alloc(void) +{ + return lrng_kcapi_hash_alloc(pool_hash); +} + +static int lrng_kcapi_drng_seed_helper(void *drng, const u8 *inbuf, + u32 inbuflen) +{ + SHASH_DESC_ON_STACK(shash, NULL); + struct lrng_drng_info *lrng_drng_info = (struct lrng_drng_info *)drng; + struct crypto_rng *kcapi_rng = lrng_drng_info->kcapi_rng; + void *hash = lrng_drng_info->lrng_hash; + u32 digestsize = lrng_kcapi_hash_digestsize(hash); + u8 digest[HASH_MAX_DIGESTSIZE] __aligned(8); + int ret; + + if (!hash) + return crypto_rng_reset(kcapi_rng, inbuf, inbuflen); + + ret = lrng_kcapi_hash_init(shash, hash) ?: + lrng_kcapi_hash_update(shash, inbuf, inbuflen) ?: + lrng_kcapi_hash_final(shash, digest); + lrng_kcapi_hash_zero(shash); + if (ret) + return ret; + + ret = crypto_rng_reset(kcapi_rng, digest, digestsize); + if (ret) + return ret; + + memzero_explicit(digest, digestsize); + return 0; +} + +static int lrng_kcapi_drng_generate_helper(void *drng, u8 *outbuf, + u32 outbuflen) +{ + struct lrng_drng_info *lrng_drng_info = (struct lrng_drng_info *)drng; + struct crypto_rng *kcapi_rng = lrng_drng_info->kcapi_rng; + int ret = crypto_rng_get_bytes(kcapi_rng, outbuf, outbuflen); + + if (ret < 0) + return ret; + + return outbuflen; +} + +static void *lrng_kcapi_drng_alloc(u32 sec_strength) +{ + struct lrng_drng_info *lrng_drng_info; + struct crypto_rng *kcapi_rng; + int seedsize; + void *ret = ERR_PTR(-ENOMEM); + + if (!drng_name) { + pr_err("DRNG name missing\n"); + return ERR_PTR(-EINVAL); + } + + if (!memcmp(drng_name, "drbg", 4) || + !memcmp(drng_name, "stdrng", 6) || + !memcmp(drng_name, "jitterentropy_rng", 17)) { + pr_err("Refusing to load the requested random number generator\n"); + return ERR_PTR(-EINVAL); + } + + lrng_drng_info = kmalloc(sizeof(*lrng_drng_info), GFP_KERNEL); + if (!lrng_drng_info) + return ERR_PTR(-ENOMEM); + + kcapi_rng = crypto_alloc_rng(drng_name, 0, 0); + if (IS_ERR(kcapi_rng)) { + pr_err("DRNG %s cannot be allocated\n", drng_name); + ret = ERR_CAST(kcapi_rng); + goto free; + } + lrng_drng_info->kcapi_rng = kcapi_rng; + + seedsize = crypto_rng_seedsize(kcapi_rng); + + if (sec_strength > seedsize) + pr_info("Seedsize DRNG (%u bits) lower than security strength of LRNG noise source (%u bits)\n", + crypto_rng_seedsize(kcapi_rng) * 8, sec_strength * 8); + + if (seedsize) { + void *lrng_hash; + + if (!seed_hash) { + switch (seedsize) { + case 32: + seed_hash = "sha256"; + break; + case 48: + seed_hash = "sha384"; + break; + case 64: + seed_hash = "sha512"; + break; + default: + pr_err("Seed size %d cannot be processed\n", + seedsize); + goto dealloc; + } + } + + lrng_hash = lrng_kcapi_hash_alloc(seed_hash); + if (IS_ERR(lrng_hash)) { + ret = ERR_CAST(lrng_hash); + goto dealloc; + } + + if (seedsize != lrng_kcapi_hash_digestsize(lrng_hash)) { + pr_err("Seed hash output size not equal to DRNG seed size\n"); + lrng_kcapi_hash_dealloc(lrng_hash); + ret = ERR_PTR(-EINVAL); + goto dealloc; + } + + lrng_drng_info->lrng_hash = lrng_hash; + + pr_info("Seed hash %s allocated\n", seed_hash); + } else { + lrng_drng_info->lrng_hash = NULL; + } + + pr_info("Kernel crypto API DRNG %s allocated\n", drng_name); + + return lrng_drng_info; + +dealloc: + crypto_free_rng(kcapi_rng); +free: + kfree(lrng_drng_info); + return ret; +} + +static void lrng_kcapi_drng_dealloc(void *drng) +{ + struct lrng_drng_info *lrng_drng_info = (struct lrng_drng_info *)drng; + struct crypto_rng *kcapi_rng = lrng_drng_info->kcapi_rng; + + crypto_free_rng(kcapi_rng); + if (lrng_drng_info->lrng_hash) + lrng_kcapi_hash_dealloc(lrng_drng_info->lrng_hash); + kfree(lrng_drng_info); + pr_info("DRNG %s deallocated\n", drng_name); +} + +static const char *lrng_kcapi_drng_name(void) +{ + return drng_name; +} + +static const char *lrng_kcapi_pool_hash(void) +{ + return pool_hash; +} + +static const struct lrng_crypto_cb lrng_kcapi_crypto_cb = { + .lrng_drng_name = lrng_kcapi_drng_name, + .lrng_hash_name = lrng_kcapi_pool_hash, + .lrng_drng_alloc = lrng_kcapi_drng_alloc, + .lrng_drng_dealloc = lrng_kcapi_drng_dealloc, + .lrng_drng_seed_helper = lrng_kcapi_drng_seed_helper, + .lrng_drng_generate_helper = lrng_kcapi_drng_generate_helper, + .lrng_hash_alloc = lrng_kcapi_drng_hash_alloc, + .lrng_hash_dealloc = lrng_kcapi_hash_dealloc, + .lrng_hash_digestsize = lrng_kcapi_hash_digestsize, + .lrng_hash_init = lrng_kcapi_hash_init, + .lrng_hash_update = lrng_kcapi_hash_update, + .lrng_hash_final = lrng_kcapi_hash_final, + .lrng_hash_desc_zero = lrng_kcapi_hash_zero, +}; + +static int __init lrng_kcapi_init(void) +{ + return lrng_set_drng_cb(&lrng_kcapi_crypto_cb); +} +static void __exit lrng_kcapi_exit(void) +{ + lrng_set_drng_cb(NULL); +} + +late_initcall(lrng_kcapi_init); +module_exit(lrng_kcapi_exit); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Stephan Mueller "); +MODULE_DESCRIPTION("Linux Random Number Generator - kernel crypto API DRNG backend"); diff --git a/drivers/char/lrng/lrng_kcapi_hash.c b/drivers/char/lrng/lrng_kcapi_hash.c new file mode 100644 index 0000000000000..9927e1022de59 --- /dev/null +++ b/drivers/char/lrng/lrng_kcapi_hash.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Backend for providing the hash primitive using the kernel crypto API. + * + * Copyright (C) 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include + +#include "lrng_kcapi_hash.h" + +struct lrng_hash_info { + struct crypto_shash *tfm; +}; + +static void _lrng_kcapi_hash_free(struct lrng_hash_info *lrng_hash) +{ + struct crypto_shash *tfm = lrng_hash->tfm; + + crypto_free_shash(tfm); + kfree(lrng_hash); +} + +void *lrng_kcapi_hash_alloc(const char *name) +{ + struct lrng_hash_info *lrng_hash; + struct crypto_shash *tfm; + int ret; + + if (!name) { + pr_err("Hash name missing\n"); + return ERR_PTR(-EINVAL); + } + + tfm = crypto_alloc_shash(name, 0, 0); + if (IS_ERR(tfm)) { + pr_err("could not allocate hash %s\n", name); + return ERR_CAST(tfm); + } + + ret = sizeof(struct lrng_hash_info); + lrng_hash = kmalloc(ret, GFP_KERNEL); + if (!lrng_hash) { + crypto_free_shash(tfm); + return ERR_PTR(-ENOMEM); + } + + lrng_hash->tfm = tfm; + + pr_info("Hash %s allocated\n", name); + + return lrng_hash; +} +EXPORT_SYMBOL(lrng_kcapi_hash_alloc); + +u32 lrng_kcapi_hash_digestsize(void *hash) +{ + struct lrng_hash_info *lrng_hash = (struct lrng_hash_info *)hash; + struct crypto_shash *tfm = lrng_hash->tfm; + + return crypto_shash_digestsize(tfm); +} +EXPORT_SYMBOL(lrng_kcapi_hash_digestsize); + +void lrng_kcapi_hash_dealloc(void *hash) +{ + struct lrng_hash_info *lrng_hash = (struct lrng_hash_info *)hash; + + _lrng_kcapi_hash_free(lrng_hash); + pr_info("Hash deallocated\n"); +} +EXPORT_SYMBOL(lrng_kcapi_hash_dealloc); + +int lrng_kcapi_hash_init(struct shash_desc *shash, void *hash) +{ + struct lrng_hash_info *lrng_hash = (struct lrng_hash_info *)hash; + struct crypto_shash *tfm = lrng_hash->tfm; + + shash->tfm = tfm; + return crypto_shash_init(shash); +} +EXPORT_SYMBOL(lrng_kcapi_hash_init); + +int lrng_kcapi_hash_update(struct shash_desc *shash, const u8 *inbuf, + u32 inbuflen) +{ + return crypto_shash_update(shash, inbuf, inbuflen); +} +EXPORT_SYMBOL(lrng_kcapi_hash_update); + +int lrng_kcapi_hash_final(struct shash_desc *shash, u8 *digest) +{ + return crypto_shash_final(shash, digest); +} +EXPORT_SYMBOL(lrng_kcapi_hash_final); + +void lrng_kcapi_hash_zero(struct shash_desc *shash) +{ + shash_desc_zero(shash); +} +EXPORT_SYMBOL(lrng_kcapi_hash_zero); diff --git a/drivers/char/lrng/lrng_kcapi_hash.h b/drivers/char/lrng/lrng_kcapi_hash.h new file mode 100644 index 0000000000000..2f94558d2dd64 --- /dev/null +++ b/drivers/char/lrng/lrng_kcapi_hash.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2020 - 2021, Stephan Mueller + */ + +#ifndef _LRNG_KCAPI_HASH_H +#define _LRNG_KCAPI_HASH_H + +#include + +void *lrng_kcapi_hash_alloc(const char *name); +u32 lrng_kcapi_hash_digestsize(void *hash); +void lrng_kcapi_hash_dealloc(void *hash); +int lrng_kcapi_hash_init(struct shash_desc *shash, void *hash); +int lrng_kcapi_hash_update(struct shash_desc *shash, const u8 *inbuf, + u32 inbuflen); +int lrng_kcapi_hash_final(struct shash_desc *shash, u8 *digest); +void lrng_kcapi_hash_zero(struct shash_desc *shash); + +#endif /* _LRNG_KCAPI_HASH_H */ diff --git a/drivers/char/lrng/lrng_numa.c b/drivers/char/lrng/lrng_numa.c new file mode 100644 index 0000000000000..094929107c462 --- /dev/null +++ b/drivers/char/lrng/lrng_numa.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG NUMA support + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include "lrng_internal.h" + +static struct lrng_drng **lrng_drng __read_mostly = NULL; + +struct lrng_drng **lrng_drng_instances(void) +{ + /* counterpart to cmpxchg_release in _lrng_drngs_numa_alloc */ + return READ_ONCE(lrng_drng); +} + +/* Allocate the data structures for the per-NUMA node DRNGs */ +static void _lrng_drngs_numa_alloc(struct work_struct *work) +{ + struct lrng_drng **drngs; + struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + u32 node; + bool init_drng_used = false; + + mutex_lock(&lrng_crypto_cb_update); + + /* per-NUMA-node DRNGs are already present */ + if (lrng_drng) + goto unlock; + + drngs = kcalloc(nr_node_ids, sizeof(void *), GFP_KERNEL|__GFP_NOFAIL); + for_each_online_node(node) { + struct lrng_drng *drng; + + if (!init_drng_used) { + drngs[node] = lrng_drng_init; + init_drng_used = true; + continue; + } + + drng = kmalloc_node(sizeof(struct lrng_drng), + GFP_KERNEL|__GFP_NOFAIL, node); + memset(drng, 0, sizeof(lrng_drng)); + + drng->crypto_cb = lrng_drng_init->crypto_cb; + drng->drng = drng->crypto_cb->lrng_drng_alloc( + LRNG_DRNG_SECURITY_STRENGTH_BYTES); + if (IS_ERR(drng->drng)) { + kfree(drng); + goto err; + } + + drng->hash = drng->crypto_cb->lrng_hash_alloc(); + if (IS_ERR(drng->hash)) { + drng->crypto_cb->lrng_drng_dealloc(drng->drng); + kfree(drng); + goto err; + } + + mutex_init(&drng->lock); + spin_lock_init(&drng->spin_lock); + rwlock_init(&drng->hash_lock); + + /* + * Switch the hash used by the per-CPU pool. + * We do not need to lock the new hash as it is not usable yet + * due to **drngs not yet being initialized. + */ + if (lrng_pcpu_switch_hash(node, drng->crypto_cb, drng->hash, + &lrng_cc20_crypto_cb)) + goto err; + + /* + * No reseeding of NUMA DRNGs from previous DRNGs as this + * would complicate the code. Let it simply reseed. + */ + lrng_drng_reset(drng); + drngs[node] = drng; + + lrng_pool_inc_numa_node(); + pr_info("DRNG and entropy pool read hash for NUMA node %d allocated\n", + node); + } + + /* counterpart to smp_load_acquire in lrng_drng_instances */ + if (!cmpxchg_release(&lrng_drng, NULL, drngs)) { + lrng_pool_all_numa_nodes_seeded(false); + goto unlock; + } + +err: + for_each_online_node(node) { + struct lrng_drng *drng = drngs[node]; + + if (drng == lrng_drng_init) + continue; + + if (drng) { + lrng_pcpu_switch_hash(node, &lrng_cc20_crypto_cb, NULL, + drng->crypto_cb); + drng->crypto_cb->lrng_hash_dealloc(drng->hash); + drng->crypto_cb->lrng_drng_dealloc(drng->drng); + kfree(drng); + } + } + kfree(drngs); + +unlock: + mutex_unlock(&lrng_crypto_cb_update); +} + +static DECLARE_WORK(lrng_drngs_numa_alloc_work, _lrng_drngs_numa_alloc); + +void lrng_drngs_numa_alloc(void) +{ + schedule_work(&lrng_drngs_numa_alloc_work); +} diff --git a/drivers/char/lrng/lrng_proc.c b/drivers/char/lrng/lrng_proc.c new file mode 100644 index 0000000000000..5c7b4908fb0c2 --- /dev/null +++ b/drivers/char/lrng/lrng_proc.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG proc and sysctl interfaces + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#include +#include +#include +#include +#include + +#include "lrng_internal.h" + +/* + * This function is used to return both the bootid UUID, and random + * UUID. The difference is in whether table->data is NULL; if it is, + * then a new UUID is generated and returned to the user. + * + * If the user accesses this via the proc interface, the UUID will be + * returned as an ASCII string in the standard UUID format; if via the + * sysctl system call, as 16 bytes of binary data. + */ +static int lrng_proc_do_uuid(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table fake_table; + unsigned char buf[64], tmp_uuid[16], *uuid; + + uuid = table->data; + if (!uuid) { + uuid = tmp_uuid; + generate_random_uuid(uuid); + } else { + static DEFINE_SPINLOCK(bootid_spinlock); + + spin_lock(&bootid_spinlock); + if (!uuid[8]) + generate_random_uuid(uuid); + spin_unlock(&bootid_spinlock); + } + + sprintf(buf, "%pU", uuid); + + fake_table.data = buf; + fake_table.maxlen = sizeof(buf); + + return proc_dostring(&fake_table, write, buffer, lenp, ppos); +} + +static int lrng_proc_do_entropy(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table fake_table; + int entropy_count; + + entropy_count = lrng_avail_entropy(); + + fake_table.data = &entropy_count; + fake_table.maxlen = sizeof(entropy_count); + + return proc_dointvec(&fake_table, write, buffer, lenp, ppos); +} + +static int lrng_proc_do_poolsize(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table fake_table; + int entropy_count; + + /* LRNG can at most retain entropy in per-CPU pools and aux pool */ + entropy_count = lrng_get_digestsize() + lrng_pcpu_avail_pool_size(); + + fake_table.data = &entropy_count; + fake_table.maxlen = sizeof(entropy_count); + + return proc_dointvec(&fake_table, write, buffer, lenp, ppos); +} + +static int lrng_min_write_thresh; +static int lrng_max_write_thresh = (LRNG_WRITE_WAKEUP_ENTROPY << 3); +static char lrng_sysctl_bootid[16]; +static int lrng_drng_reseed_max_min; + +void lrng_proc_update_max_write_thresh(u32 new_digestsize) +{ + lrng_max_write_thresh = (int)new_digestsize; + /* Ensure that changes to the global variable are visible */ + mb(); +} + +static struct ctl_table random_table[] = { + { + .procname = "poolsize", + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = lrng_proc_do_poolsize, + }, + { + .procname = "entropy_avail", + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = lrng_proc_do_entropy, + }, + { + .procname = "write_wakeup_threshold", + .data = &lrng_write_wakeup_bits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &lrng_min_write_thresh, + .extra2 = &lrng_max_write_thresh, + }, + { + .procname = "boot_id", + .data = &lrng_sysctl_bootid, + .maxlen = 16, + .mode = 0444, + .proc_handler = lrng_proc_do_uuid, + }, + { + .procname = "uuid", + .maxlen = 16, + .mode = 0444, + .proc_handler = lrng_proc_do_uuid, + }, + { + .procname = "urandom_min_reseed_secs", + .data = &lrng_drng_reseed_max_time, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = &lrng_drng_reseed_max_min, + }, + { } +}; + +/* Number of online DRNGs */ +static u32 numa_drngs = 1; + +void lrng_pool_inc_numa_node(void) +{ + numa_drngs++; +} + +static int lrng_proc_type_show(struct seq_file *m, void *v) +{ + struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + unsigned long flags = 0; + unsigned char buf[250], irq[200], aux[100], cpu[90], jent[45]; + + lrng_drng_lock(lrng_drng_init, &flags); + snprintf(buf, sizeof(buf), + "DRNG name: %s\n" + "LRNG security strength in bits: %d\n" + "number of DRNG instances: %u\n" + "Standards compliance: %s\n" + "Entropy Sources: %s%s%sAuxiliary\n" + "LRNG minimally seeded: %s\n" + "LRNG fully seeded: %s\n", + lrng_drng_init->crypto_cb->lrng_drng_name(), + lrng_security_strength(), + numa_drngs, + lrng_sp80090c_compliant() ? "SP800-90C " : "", + IS_ENABLED(CONFIG_LRNG_IRQ) ? "IRQ " : "", + IS_ENABLED(CONFIG_LRNG_JENT) ? "JitterRNG " : "", + IS_ENABLED(CONFIG_LRNG_CPU) ? "CPU " : "", + lrng_state_min_seeded() ? "true" : "false", + lrng_state_fully_seeded() ? "true" : "false"); + + lrng_aux_es_state(aux, sizeof(aux)); + + irq[0] = '\0'; + lrng_irq_es_state(irq, sizeof(irq)); + + jent[0] = '\0'; + lrng_jent_es_state(jent, sizeof(jent)); + + cpu[0] = '\0'; + lrng_arch_es_state(cpu, sizeof(cpu)); + + lrng_drng_unlock(lrng_drng_init, &flags); + + seq_write(m, buf, strlen(buf)); + seq_write(m, aux, strlen(aux)); + seq_write(m, irq, strlen(irq)); + seq_write(m, jent, strlen(jent)); + seq_write(m, cpu, strlen(cpu)); + + return 0; +} + +/* + * rand_initialize() is called before sysctl_init(), + * so we cannot call register_sysctl_init() in rand_initialize() + */ +static int __init random_sysctls_init(void) +{ + register_sysctl_init("kernel/random", random_table); + return 0; +} +device_initcall(random_sysctls_init); + +static int __init lrng_proc_type_init(void) +{ + proc_create_single("lrng_type", 0444, NULL, &lrng_proc_type_show); + return 0; +} +module_init(lrng_proc_type_init); diff --git a/drivers/char/lrng/lrng_selftest.c b/drivers/char/lrng/lrng_selftest.c new file mode 100644 index 0000000000000..2e81047fe6a78 --- /dev/null +++ b/drivers/char/lrng/lrng_selftest.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG power-on and on-demand self-test + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +/* + * In addition to the self-tests below, the following LRNG components + * are covered with self-tests during regular operation: + * + * * power-on self-test: SP800-90A DRBG provided by the Linux kernel crypto API + * * power-on self-test: PRNG provided by the Linux kernel crypto API + * * runtime test: Raw noise source data testing including SP800-90B compliant + * tests when enabling CONFIG_LRNG_HEALTH_TESTS + * + * Additional developer tests present with LRNG code: + * * SP800-90B APT and RCT test enforcement validation when enabling + * CONFIG_LRNG_APT_BROKEN or CONFIG_LRNG_RCT_BROKEN. + * * Collection of raw entropy from the interrupt noise source when enabling + * CONFIG_LRNG_TESTING and pulling the data from the kernel with the provided + * interface. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#include "lrng_chacha20.h" +#include "lrng_internal.h" + +#define LRNG_SELFTEST_PASSED 0 +#define LRNG_SEFLTEST_ERROR_TIME (1 << 0) +#define LRNG_SEFLTEST_ERROR_CHACHA20 (1 << 1) +#define LRNG_SEFLTEST_ERROR_HASH (1 << 2) +#define LRNG_SEFLTEST_ERROR_GCD (1 << 3) +#define LRNG_SELFTEST_NOT_EXECUTED 0xffffffff + +#ifdef CONFIG_LRNG_IRQ + +#include "lrng_es_irq.h" + +static u32 lrng_data_selftest_ptr = 0; +static u32 lrng_data_selftest[LRNG_DATA_ARRAY_SIZE]; + +static void lrng_data_process_selftest_insert(u32 time) +{ + u32 ptr = lrng_data_selftest_ptr++ & LRNG_DATA_WORD_MASK; + unsigned int array = lrng_data_idx2array(ptr); + unsigned int slot = lrng_data_idx2slot(ptr); + + /* zeroization of slot to ensure the following OR adds the data */ + lrng_data_selftest[array] &= + ~(lrng_data_slot_val(0xffffffff & LRNG_DATA_SLOTSIZE_MASK, + slot)); + lrng_data_selftest[array] |= + lrng_data_slot_val(time & LRNG_DATA_SLOTSIZE_MASK, slot); +} + +static void lrng_data_process_selftest_u32(u32 data) +{ + u32 pre_ptr, ptr, mask; + unsigned int pre_array; + + /* Increment pointer by number of slots taken for input value */ + lrng_data_selftest_ptr += LRNG_DATA_SLOTS_PER_UINT; + + /* ptr to current unit */ + ptr = lrng_data_selftest_ptr; + + lrng_pcpu_split_u32(&ptr, &pre_ptr, &mask); + + /* MSB of data go into previous unit */ + pre_array = lrng_data_idx2array(pre_ptr); + /* zeroization of slot to ensure the following OR adds the data */ + lrng_data_selftest[pre_array] &= ~(0xffffffff & ~mask); + lrng_data_selftest[pre_array] |= data & ~mask; + + /* LSB of data go into current unit */ + lrng_data_selftest[lrng_data_idx2array(ptr)] = data & mask; +} + +static unsigned int lrng_data_process_selftest(void) +{ + u32 time; + u32 idx_zero_compare = (0 << 0) | (1 << 8) | (2 << 16) | (3 << 24); + u32 idx_one_compare = (4 << 0) | (5 << 8) | (6 << 16) | (7 << 24); + u32 idx_last_compare = + (((LRNG_DATA_NUM_VALUES - 4) & LRNG_DATA_SLOTSIZE_MASK) << 0) | + (((LRNG_DATA_NUM_VALUES - 3) & LRNG_DATA_SLOTSIZE_MASK) << 8) | + (((LRNG_DATA_NUM_VALUES - 2) & LRNG_DATA_SLOTSIZE_MASK) << 16) | + (((LRNG_DATA_NUM_VALUES - 1) & LRNG_DATA_SLOTSIZE_MASK) << 24); + + (void)idx_one_compare; + + /* "poison" the array to verify the operation of the zeroization */ + lrng_data_selftest[0] = 0xffffffff; + lrng_data_selftest[1] = 0xffffffff; + + lrng_data_process_selftest_insert(0); + /* + * Note, when using lrng_data_process_u32() on unaligned ptr, + * the first slots will go into next word, and the last slots go + * into the previous word. + */ + lrng_data_process_selftest_u32((4 << 0) | (1 << 8) | (2 << 16) | + (3 << 24)); + lrng_data_process_selftest_insert(5); + lrng_data_process_selftest_insert(6); + lrng_data_process_selftest_insert(7); + + if ((lrng_data_selftest[0] != idx_zero_compare) || + (lrng_data_selftest[1] != idx_one_compare)) + goto err; + + /* Reset for next test */ + lrng_data_selftest[0] = 0; + lrng_data_selftest[1] = 0; + lrng_data_selftest_ptr = 0; + + for (time = 0; time < LRNG_DATA_NUM_VALUES; time++) + lrng_data_process_selftest_insert(time); + + if ((lrng_data_selftest[0] != idx_zero_compare) || + (lrng_data_selftest[1] != idx_one_compare) || + (lrng_data_selftest[LRNG_DATA_ARRAY_SIZE - 1] != idx_last_compare)) + goto err; + + return LRNG_SELFTEST_PASSED; + +err: + pr_err("LRNG data array self-test FAILED\n"); + return LRNG_SEFLTEST_ERROR_TIME; +} + +static unsigned int lrng_gcd_selftest(void) +{ + u32 history[10]; + unsigned int i; + +#define LRNG_GCD_SELFTEST 3 + for (i = 0; i < ARRAY_SIZE(history); i++) + history[i] = i * LRNG_GCD_SELFTEST; + + if (lrng_gcd_analyze(history, ARRAY_SIZE(history)) == LRNG_GCD_SELFTEST) + return LRNG_SELFTEST_PASSED; + + pr_err("LRNG GCD self-test FAILED\n"); + return LRNG_SEFLTEST_ERROR_GCD; +} + +#else /* CONFIG_LRNG_IRQ */ + +static unsigned int lrng_data_process_selftest(void) +{ + return LRNG_SELFTEST_PASSED; +} + +static unsigned int lrng_gcd_selftest(void) +{ + return LRNG_SELFTEST_PASSED; +} + +#endif /* CONFIG_LRNG_IRQ */ + +static void lrng_selftest_bswap32(u32 *ptr, u32 words) +{ + u32 i; + + /* Byte-swap data which is an LE representation */ + for (i = 0; i < words; i++) { + __le32 *p = (__le32 *)ptr; + + *p = cpu_to_le32(*ptr); + ptr++; + } +} + +/* The test vectors are taken from crypto/testmgr.h */ +static unsigned int lrng_hash_selftest(void) +{ + SHASH_DESC_ON_STACK(shash, NULL); + const struct lrng_crypto_cb *crypto_cb = &lrng_cc20_crypto_cb; + static const u8 lrng_hash_selftest_result[] = +#ifdef CONFIG_CRYPTO_LIB_SHA256 + { 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, + 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae, 0x22, 0x23, + 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, + 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad }; +#else /* CONFIG_CRYPTO_LIB_SHA256 */ + { 0xa9, 0x99, 0x3e, 0x36, 0x47, 0x06, 0x81, 0x6a, 0xba, 0x3e, + 0x25, 0x71, 0x78, 0x50, 0xc2, 0x6c, 0x9c, 0xd0, 0xd8, 0x9d }; +#endif /* CONFIG_CRYPTO_LIB_SHA256 */ + static const u8 hash_input[] = { 0x61, 0x62, 0x63 }; /* "abc" */ + u8 digest[sizeof(lrng_hash_selftest_result)] __aligned(sizeof(u32)); + + if (sizeof(digest) != crypto_cb->lrng_hash_digestsize(NULL)) + return LRNG_SEFLTEST_ERROR_HASH; + + if (!crypto_cb->lrng_hash_init(shash, NULL) && + !crypto_cb->lrng_hash_update(shash, hash_input, + sizeof(hash_input)) && + !crypto_cb->lrng_hash_final(shash, digest) && + !memcmp(digest, lrng_hash_selftest_result, sizeof(digest))) + return 0; + + pr_err("LRNG %s Hash self-test FAILED\n", crypto_cb->lrng_hash_name()); + return LRNG_SEFLTEST_ERROR_HASH; +} + +/* + * The test vectors were generated using the ChaCha20 DRNG from + * https://www.chronox.de/chacha20.html + */ +static unsigned int lrng_chacha20_drng_selftest(void) +{ + const struct lrng_crypto_cb *crypto_cb = &lrng_cc20_crypto_cb; + u8 seed[CHACHA_KEY_SIZE * 2] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + }; + struct chacha20_block chacha20; + int ret; + u8 outbuf[CHACHA_KEY_SIZE * 2] __aligned(sizeof(u32)); + + /* + * Expected result when ChaCha20 DRNG state is zero: + * * constants are set to "expand 32-byte k" + * * remaining state is 0 + * and pulling one half ChaCha20 DRNG block. + */ + static const u8 expected_halfblock[CHACHA_KEY_SIZE] = { + 0x76, 0xb8, 0xe0, 0xad, 0xa0, 0xf1, 0x3d, 0x90, + 0x40, 0x5d, 0x6a, 0xe5, 0x53, 0x86, 0xbd, 0x28, + 0xbd, 0xd2, 0x19, 0xb8, 0xa0, 0x8d, 0xed, 0x1a, + 0xa8, 0x36, 0xef, 0xcc, 0x8b, 0x77, 0x0d, 0xc7 }; + + /* + * Expected result when ChaCha20 DRNG state is zero: + * * constants are set to "expand 32-byte k" + * * remaining state is 0 + * followed by a reseed with two keyblocks + * 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + * 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + * 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + * 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + * 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + * 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + * 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + * 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f + * and pulling one ChaCha20 DRNG block. + */ + static const u8 expected_oneblock[CHACHA_KEY_SIZE * 2] = { + 0xe3, 0xb0, 0x8a, 0xcc, 0x34, 0xc3, 0x17, 0x0e, + 0xc3, 0xd8, 0xc3, 0x40, 0xe7, 0x73, 0xe9, 0x0d, + 0xd1, 0x62, 0xa3, 0x5d, 0x7d, 0xf2, 0xf1, 0x4a, + 0x24, 0x42, 0xb7, 0x1e, 0xb0, 0x05, 0x17, 0x07, + 0xb9, 0x35, 0x10, 0x69, 0x8b, 0x46, 0xfb, 0x51, + 0xe9, 0x91, 0x3f, 0x46, 0xf2, 0x4d, 0xea, 0xd0, + 0x81, 0xc1, 0x1b, 0xa9, 0x5d, 0x52, 0x91, 0x5f, + 0xcd, 0xdc, 0xc6, 0xd6, 0xc3, 0x7c, 0x50, 0x23 }; + + /* + * Expected result when ChaCha20 DRNG state is zero: + * * constants are set to "expand 32-byte k" + * * remaining state is 0 + * followed by a reseed with one key block plus one byte + * 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + * 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + * 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + * 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + * 0x20 + * and pulling less than one ChaCha20 DRNG block. + */ + static const u8 expected_block_nonalinged[CHACHA_KEY_SIZE + 4] = { + 0x9c, 0xfc, 0x5e, 0x31, 0x21, 0x62, 0x11, 0x85, + 0xd3, 0x77, 0xd3, 0x69, 0x0f, 0xa8, 0x16, 0x55, + 0xb4, 0x4c, 0xf6, 0x52, 0xf3, 0xa8, 0x37, 0x99, + 0x38, 0x76, 0xa0, 0x66, 0xec, 0xbb, 0xce, 0xa9, + 0x9c, 0x95, 0xa1, 0xfd }; + + BUILD_BUG_ON(sizeof(seed) % sizeof(u32)); + + memset(&chacha20, 0, sizeof(chacha20)); + lrng_cc20_init_rfc7539(&chacha20); + lrng_selftest_bswap32((u32 *)seed, sizeof(seed) / sizeof(u32)); + + /* Generate with zero state */ + ret = crypto_cb->lrng_drng_generate_helper(&chacha20, outbuf, + sizeof(expected_halfblock)); + if (ret != sizeof(expected_halfblock)) + goto err; + if (memcmp(outbuf, expected_halfblock, sizeof(expected_halfblock))) + goto err; + + /* Clear state of DRNG */ + memset(&chacha20.key.u[0], 0, 48); + + /* Reseed with 2 key blocks */ + ret = crypto_cb->lrng_drng_seed_helper(&chacha20, seed, + sizeof(expected_oneblock)); + if (ret < 0) + goto err; + ret = crypto_cb->lrng_drng_generate_helper(&chacha20, outbuf, + sizeof(expected_oneblock)); + if (ret != sizeof(expected_oneblock)) + goto err; + if (memcmp(outbuf, expected_oneblock, sizeof(expected_oneblock))) + goto err; + + /* Clear state of DRNG */ + memset(&chacha20.key.u[0], 0, 48); + + /* Reseed with 1 key block and one byte */ + ret = crypto_cb->lrng_drng_seed_helper(&chacha20, seed, + sizeof(expected_block_nonalinged)); + if (ret < 0) + goto err; + ret = crypto_cb->lrng_drng_generate_helper(&chacha20, outbuf, + sizeof(expected_block_nonalinged)); + if (ret != sizeof(expected_block_nonalinged)) + goto err; + if (memcmp(outbuf, expected_block_nonalinged, + sizeof(expected_block_nonalinged))) + goto err; + + return LRNG_SELFTEST_PASSED; + +err: + pr_err("LRNG ChaCha20 DRNG self-test FAILED\n"); + return LRNG_SEFLTEST_ERROR_CHACHA20; +} + +static unsigned int lrng_selftest_status = LRNG_SELFTEST_NOT_EXECUTED; + +static int lrng_selftest(void) +{ + unsigned int ret = lrng_data_process_selftest(); + + ret |= lrng_chacha20_drng_selftest(); + ret |= lrng_hash_selftest(); + ret |= lrng_gcd_selftest(); + + if (ret) { + if (IS_ENABLED(CONFIG_LRNG_SELFTEST_PANIC)) + panic("LRNG self-tests failed: %u\n", ret); + } else { + pr_info("LRNG self-tests passed\n"); + } + + lrng_selftest_status = ret; + + if (lrng_selftest_status) + return -EFAULT; + return 0; +} + +#ifdef CONFIG_SYSFS +/* Re-perform self-test when any value is written to the sysfs file. */ +static int lrng_selftest_sysfs_set(const char *val, + const struct kernel_param *kp) +{ + return lrng_selftest(); +} + +static const struct kernel_param_ops lrng_selftest_sysfs = { + .set = lrng_selftest_sysfs_set, + .get = param_get_uint, +}; +module_param_cb(selftest_status, &lrng_selftest_sysfs, &lrng_selftest_status, + 0644); +#endif /* CONFIG_SYSFS */ + +static int __init lrng_selftest_init(void) +{ + return lrng_selftest(); +} + +module_init(lrng_selftest_init); diff --git a/drivers/char/lrng/lrng_switch.c b/drivers/char/lrng/lrng_switch.c new file mode 100644 index 0000000000000..5fce401499112 --- /dev/null +++ b/drivers/char/lrng/lrng_switch.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG DRNG switching support + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include + +#include "lrng_internal.h" + +static int lrng_drng_switch(struct lrng_drng *drng_store, + const struct lrng_crypto_cb *cb, int node) +{ + const struct lrng_crypto_cb *old_cb; + unsigned long flags = 0, flags2 = 0; + int ret; + u8 seed[LRNG_DRNG_SECURITY_STRENGTH_BYTES]; + void *new_drng = cb->lrng_drng_alloc(LRNG_DRNG_SECURITY_STRENGTH_BYTES); + void *old_drng, *new_hash, *old_hash; + u32 current_security_strength; + bool sl = false, reset_drng = !lrng_get_available(); + + if (IS_ERR(new_drng)) { + pr_warn("could not allocate new DRNG for NUMA node %d (%ld)\n", + node, PTR_ERR(new_drng)); + return PTR_ERR(new_drng); + } + + new_hash = cb->lrng_hash_alloc(); + if (IS_ERR(new_hash)) { + pr_warn("could not allocate new LRNG pool hash (%ld)\n", + PTR_ERR(new_hash)); + cb->lrng_drng_dealloc(new_drng); + return PTR_ERR(new_hash); + } + + if (cb->lrng_hash_digestsize(new_hash) > LRNG_MAX_DIGESTSIZE) { + pr_warn("digest size of newly requested hash too large\n"); + cb->lrng_hash_dealloc(new_hash); + cb->lrng_drng_dealloc(new_drng); + return -EINVAL; + } + + current_security_strength = lrng_security_strength(); + lrng_drng_lock(drng_store, &flags); + + /* + * Pull from existing DRNG to seed new DRNG regardless of seed status + * of old DRNG -- the entropy state for the DRNG is left unchanged which + * implies that als the new DRNG is reseeded when deemed necessary. This + * seeding of the new DRNG shall only ensure that the new DRNG has the + * same entropy as the old DRNG. + */ + ret = drng_store->crypto_cb->lrng_drng_generate_helper( + drng_store->drng, seed, sizeof(seed)); + lrng_drng_unlock(drng_store, &flags); + + if (ret < 0) { + reset_drng = true; + pr_warn("getting random data from DRNG failed for NUMA node %d (%d)\n", + node, ret); + } else { + /* seed new DRNG with data */ + ret = cb->lrng_drng_seed_helper(new_drng, seed, ret); + memzero_explicit(seed, sizeof(seed)); + if (ret < 0) { + reset_drng = true; + pr_warn("seeding of new DRNG failed for NUMA node %d (%d)\n", + node, ret); + } else { + pr_debug("seeded new DRNG of NUMA node %d instance from old DRNG instance\n", + node); + } + } + + mutex_lock(&drng_store->lock); + write_lock_irqsave(&drng_store->hash_lock, flags2); + /* + * If we switch the DRNG from the initial ChaCha20 DRNG to something + * else, there is a lock transition from spin lock to mutex (see + * lrng_drng_is_atomic and how the lock is taken in lrng_drng_lock). + * Thus, we need to take both locks during the transition phase. + */ + if (lrng_drng_is_atomic(drng_store)) { + spin_lock_irqsave(&drng_store->spin_lock, flags); + sl = true; + } else { + __acquire(&drng_store->spin_lock); + } + + /* Trigger the switch of the aux entropy pool for current node. */ + if (drng_store == lrng_drng_init_instance()) { + ret = lrng_aux_switch_hash(cb, new_hash, drng_store->crypto_cb); + if (ret) + goto err; + } + + /* Trigger the switch of the per-CPU entropy pools for current node. */ + ret = lrng_pcpu_switch_hash(node, cb, new_hash, drng_store->crypto_cb); + if (ret) { + /* Switch the crypto operation back to be consistent */ + WARN_ON(lrng_aux_switch_hash(drng_store->crypto_cb, + drng_store->hash, cb)); + } else { + if (reset_drng) + lrng_drng_reset(drng_store); + + old_drng = drng_store->drng; + old_cb = drng_store->crypto_cb; + drng_store->drng = new_drng; + drng_store->crypto_cb = cb; + + old_hash = drng_store->hash; + drng_store->hash = new_hash; + pr_info("Entropy pool read-hash allocated for DRNG for NUMA node %d\n", + node); + + /* Reseed if previous LRNG security strength was insufficient */ + if (current_security_strength < lrng_security_strength()) + drng_store->force_reseed = true; + + /* Force oversampling seeding as we initialize DRNG */ + if (IS_ENABLED(CONFIG_LRNG_OVERSAMPLE_ENTROPY_SOURCES)) + lrng_unset_fully_seeded(drng_store); + + if (lrng_state_min_seeded()) + lrng_set_entropy_thresh(lrng_get_seed_entropy_osr( + drng_store->fully_seeded)); + + /* ChaCha20 serves as atomic instance left untouched. */ + if (old_drng != &chacha20) { + old_cb->lrng_drng_dealloc(old_drng); + old_cb->lrng_hash_dealloc(old_hash); + } + + pr_info("DRNG of NUMA node %d switched\n", node); + } + +err: + if (sl) + spin_unlock_irqrestore(&drng_store->spin_lock, flags); + else + __release(&drng_store->spin_lock); + write_unlock_irqrestore(&drng_store->hash_lock, flags2); + mutex_unlock(&drng_store->lock); + + return ret; +} + +/* + * Switch the existing DRNG instances with new using the new crypto callbacks. + * The caller must hold the lrng_crypto_cb_update lock. + */ +static int lrng_drngs_switch(const struct lrng_crypto_cb *cb) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + int ret = 0; + + /* Update DRNG */ + if (lrng_drng) { + u32 node; + + for_each_online_node(node) { + if (lrng_drng[node]) + ret = lrng_drng_switch(lrng_drng[node], cb, + node); + } + } else { + ret = lrng_drng_switch(lrng_drng_init, cb, 0); + } + + if (!ret) + lrng_set_available(); + + return 0; +} + +/* + * lrng_set_drng_cb - Register new cryptographic callback functions for DRNG + * The registering implies that all old DRNG states are replaced with new + * DRNG states. + * + * @cb: Callback functions to be registered -- if NULL, use the default + * callbacks pointing to the ChaCha20 DRNG. + * + * Return: + * * 0 on success + * * < 0 on error + */ +int lrng_set_drng_cb(const struct lrng_crypto_cb *cb) +{ + struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + int ret; + + if (!cb) + cb = &lrng_cc20_crypto_cb; + + mutex_lock(&lrng_crypto_cb_update); + + /* + * If a callback other than the default is set, allow it only to be + * set back to the default callback. This ensures that multiple + * different callbacks can be registered at the same time. If a + * callback different from the current callback and the default + * callback shall be set, the current callback must be deregistered + * (e.g. the kernel module providing it must be unloaded) and the new + * implementation can be registered. + */ + if ((cb != &lrng_cc20_crypto_cb) && + (lrng_drng_init->crypto_cb != &lrng_cc20_crypto_cb)) { + pr_warn("disallow setting new cipher callbacks, unload the old callbacks first!\n"); + ret = -EINVAL; + goto out; + } + + ret = lrng_drngs_switch(cb); + +out: + mutex_unlock(&lrng_crypto_cb_update); + return ret; +} +EXPORT_SYMBOL(lrng_set_drng_cb); diff --git a/drivers/char/lrng/lrng_testing.c b/drivers/char/lrng/lrng_testing.c new file mode 100644 index 0000000000000..3517319acb436 --- /dev/null +++ b/drivers/char/lrng/lrng_testing.c @@ -0,0 +1,689 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Linux Random Number Generator (LRNG) testing interfaces + * + * Copyright (C) 2019 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lrng_internal.h" + +#define LRNG_TESTING_RINGBUFFER_SIZE 1024 +#define LRNG_TESTING_RINGBUFFER_MASK (LRNG_TESTING_RINGBUFFER_SIZE - 1) + +struct lrng_testing { + u32 lrng_testing_rb[LRNG_TESTING_RINGBUFFER_SIZE]; + u32 rb_reader; + u32 rb_writer; + atomic_t lrng_testing_enabled; + spinlock_t lock; + wait_queue_head_t read_wait; +}; + +/*************************** Generic Data Handling ****************************/ + +/* + * boot variable: + * 0 ==> No boot test, gathering of runtime data allowed + * 1 ==> Boot test enabled and ready for collecting data, gathering runtime + * data is disabled + * 2 ==> Boot test completed and disabled, gathering of runtime data is + * disabled + */ + +static void lrng_testing_reset(struct lrng_testing *data) +{ + unsigned long flags; + + spin_lock_irqsave(&data->lock, flags); + data->rb_reader = 0; + data->rb_writer = 0; + spin_unlock_irqrestore(&data->lock, flags); +} + +static void lrng_testing_init(struct lrng_testing *data, u32 boot) +{ + /* + * The boot time testing implies we have a running test. If the + * caller wants to clear it, he has to unset the boot_test flag + * at runtime via sysfs to enable regular runtime testing + */ + if (boot) + return; + + lrng_testing_reset(data); + atomic_set(&data->lrng_testing_enabled, 1); + pr_warn("Enabling data collection\n"); +} + +static void lrng_testing_fini(struct lrng_testing *data, u32 boot) +{ + /* If we have boot data, we do not reset yet to allow data to be read */ + if (boot) + return; + + atomic_set(&data->lrng_testing_enabled, 0); + lrng_testing_reset(data); + pr_warn("Disabling data collection\n"); +} + +static bool lrng_testing_store(struct lrng_testing *data, u32 value, + u32 *boot) +{ + unsigned long flags; + + if (!atomic_read(&data->lrng_testing_enabled) && (*boot != 1)) + return false; + + spin_lock_irqsave(&data->lock, flags); + + /* + * Disable entropy testing for boot time testing after ring buffer + * is filled. + */ + if (*boot) { + if (data->rb_writer > LRNG_TESTING_RINGBUFFER_SIZE) { + *boot = 2; + pr_warn_once("One time data collection test disabled\n"); + spin_unlock_irqrestore(&data->lock, flags); + return false; + } + + if (data->rb_writer == 1) + pr_warn("One time data collection test enabled\n"); + } + + data->lrng_testing_rb[data->rb_writer & LRNG_TESTING_RINGBUFFER_MASK] = + value; + data->rb_writer++; + + spin_unlock_irqrestore(&data->lock, flags); + + if (wq_has_sleeper(&data->read_wait)) + wake_up_interruptible(&data->read_wait); + + return true; +} + +static bool lrng_testing_have_data(struct lrng_testing *data) +{ + return ((data->rb_writer & LRNG_TESTING_RINGBUFFER_MASK) != + (data->rb_reader & LRNG_TESTING_RINGBUFFER_MASK)); +} + +static int lrng_testing_reader(struct lrng_testing *data, u32 *boot, + u8 *outbuf, u32 outbuflen) +{ + unsigned long flags; + int collected_data = 0; + + lrng_testing_init(data, *boot); + + while (outbuflen) { + spin_lock_irqsave(&data->lock, flags); + + /* We have no data or reached the writer. */ + if (!data->rb_writer || + (data->rb_writer == data->rb_reader)) { + + spin_unlock_irqrestore(&data->lock, flags); + + /* + * Now we gathered all boot data, enable regular data + * collection. + */ + if (*boot) { + *boot = 0; + goto out; + } + + wait_event_interruptible(data->read_wait, + lrng_testing_have_data(data)); + if (signal_pending(current)) { + collected_data = -ERESTARTSYS; + goto out; + } + + continue; + } + + /* We copy out word-wise */ + if (outbuflen < sizeof(u32)) { + spin_unlock_irqrestore(&data->lock, flags); + goto out; + } + + memcpy(outbuf, &data->lrng_testing_rb[data->rb_reader], + sizeof(u32)); + data->rb_reader++; + + spin_unlock_irqrestore(&data->lock, flags); + + outbuf += sizeof(u32); + outbuflen -= sizeof(u32); + collected_data += sizeof(u32); + } + +out: + lrng_testing_fini(data, *boot); + return collected_data; +} + +static int lrng_testing_extract_user(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos, + int (*reader)(u8 *outbuf, u32 outbuflen)) +{ + u8 *tmp, *tmp_aligned; + int ret = 0, large_request = (nbytes > 256); + + if (!nbytes) + return 0; + + /* + * The intention of this interface is for collecting at least + * 1000 samples due to the SP800-90B requirements. So, we make no + * effort in avoiding allocating more memory that actually needed + * by the user. Hence, we allocate sufficient memory to always hold + * that amount of data. + */ + tmp = kmalloc(LRNG_TESTING_RINGBUFFER_SIZE + sizeof(u32), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + tmp_aligned = PTR_ALIGN(tmp, sizeof(u32)); + + while (nbytes) { + int i; + + if (large_request && need_resched()) { + if (signal_pending(current)) { + if (ret == 0) + ret = -ERESTARTSYS; + break; + } + schedule(); + } + + i = min_t(int, nbytes, LRNG_TESTING_RINGBUFFER_SIZE); + i = reader(tmp_aligned, i); + if (i <= 0) { + if (i < 0) + ret = i; + break; + } + if (copy_to_user(buf, tmp_aligned, i)) { + ret = -EFAULT; + break; + } + + nbytes -= i; + buf += i; + ret += i; + } + + kfree_sensitive(tmp); + + if (ret > 0) + *ppos += ret; + + return ret; +} + +/************** Raw High-Resolution Timer Entropy Data Handling ***************/ + +#ifdef CONFIG_LRNG_RAW_HIRES_ENTROPY + +static u32 boot_raw_hires_test = 0; +module_param(boot_raw_hires_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_hires_test, "Enable gathering boot time high resolution timer entropy of the first entropy events"); + +static struct lrng_testing lrng_raw_hires = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_hires.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_hires.read_wait) +}; + +bool lrng_raw_hires_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_hires, value, &boot_raw_hires_test); +} + +static int lrng_raw_hires_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_hires, &boot_raw_hires_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_hires_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_hires_entropy_reader); +} + +static const struct file_operations lrng_raw_hires_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_hires_read, +}; + +#endif /* CONFIG_LRNG_RAW_HIRES_ENTROPY */ + +/********************* Raw Jiffies Entropy Data Handling **********************/ + +#ifdef CONFIG_LRNG_RAW_JIFFIES_ENTROPY + +static u32 boot_raw_jiffies_test = 0; +module_param(boot_raw_jiffies_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_jiffies_test, "Enable gathering boot time high resolution timer entropy of the first entropy events"); + +static struct lrng_testing lrng_raw_jiffies = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_jiffies.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_jiffies.read_wait) +}; + +bool lrng_raw_jiffies_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_jiffies, value, + &boot_raw_jiffies_test); +} + +static int lrng_raw_jiffies_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_jiffies, &boot_raw_jiffies_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_jiffies_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_jiffies_entropy_reader); +} + +static const struct file_operations lrng_raw_jiffies_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_jiffies_read, +}; + +#endif /* CONFIG_LRNG_RAW_JIFFIES_ENTROPY */ + +/************************** Raw IRQ Data Handling ****************************/ + +#ifdef CONFIG_LRNG_RAW_IRQ_ENTROPY + +static u32 boot_raw_irq_test = 0; +module_param(boot_raw_irq_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_irq_test, "Enable gathering boot time entropy of the first IRQ entropy events"); + +static struct lrng_testing lrng_raw_irq = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_irq.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_irq.read_wait) +}; + +bool lrng_raw_irq_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_irq, value, &boot_raw_irq_test); +} + +static int lrng_raw_irq_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_irq, &boot_raw_irq_test, outbuf, + outbuflen); +} + +static ssize_t lrng_raw_irq_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_irq_entropy_reader); +} + +static const struct file_operations lrng_raw_irq_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_irq_read, +}; + +#endif /* CONFIG_LRNG_RAW_IRQ_ENTROPY */ + +/************************ Raw IRQFLAGS Data Handling **************************/ + +#ifdef CONFIG_LRNG_RAW_IRQFLAGS_ENTROPY + +static u32 boot_raw_irqflags_test = 0; +module_param(boot_raw_irqflags_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_irqflags_test, "Enable gathering boot time entropy of the first IRQ flags entropy events"); + +static struct lrng_testing lrng_raw_irqflags = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_irqflags.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_irqflags.read_wait) +}; + +bool lrng_raw_irqflags_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_irqflags, value, + &boot_raw_irqflags_test); +} + +static int lrng_raw_irqflags_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_irqflags, &boot_raw_irqflags_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_irqflags_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_irqflags_entropy_reader); +} + +static const struct file_operations lrng_raw_irqflags_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_irqflags_read, +}; + +#endif /* CONFIG_LRNG_RAW_IRQFLAGS_ENTROPY */ + +/************************ Raw _RET_IP_ Data Handling **************************/ + +#ifdef CONFIG_LRNG_RAW_RETIP_ENTROPY + +static u32 boot_raw_retip_test = 0; +module_param(boot_raw_retip_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_retip_test, "Enable gathering boot time entropy of the first return instruction pointer entropy events"); + +static struct lrng_testing lrng_raw_retip = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_retip.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_retip.read_wait) +}; + +bool lrng_raw_retip_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_retip, value, &boot_raw_retip_test); +} + +static int lrng_raw_retip_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_retip, &boot_raw_retip_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_retip_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_retip_entropy_reader); +} + +static const struct file_operations lrng_raw_retip_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_retip_read, +}; + +#endif /* CONFIG_LRNG_RAW_RETIP_ENTROPY */ + +/********************** Raw IRQ register Data Handling ************************/ + +#ifdef CONFIG_LRNG_RAW_REGS_ENTROPY + +static u32 boot_raw_regs_test = 0; +module_param(boot_raw_regs_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_regs_test, "Enable gathering boot time entropy of the first interrupt register entropy events"); + +static struct lrng_testing lrng_raw_regs = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_regs.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_regs.read_wait) +}; + +bool lrng_raw_regs_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_regs, value, &boot_raw_regs_test); +} + +static int lrng_raw_regs_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_regs, &boot_raw_regs_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_regs_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_regs_entropy_reader); +} + +static const struct file_operations lrng_raw_regs_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_regs_read, +}; + +#endif /* CONFIG_LRNG_RAW_REGS_ENTROPY */ + +/********************** Raw Entropy Array Data Handling ***********************/ + +#ifdef CONFIG_LRNG_RAW_ARRAY + +static u32 boot_raw_array = 0; +module_param(boot_raw_array, uint, 0644); +MODULE_PARM_DESC(boot_raw_array, "Enable gathering boot time raw noise array data of the first entropy events"); + +static struct lrng_testing lrng_raw_array = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_array.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_array.read_wait) +}; + +bool lrng_raw_array_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_array, value, &boot_raw_array); +} + +static int lrng_raw_array_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_array, &boot_raw_array, outbuf, + outbuflen); +} + +static ssize_t lrng_raw_array_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_array_entropy_reader); +} + +static const struct file_operations lrng_raw_array_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_array_read, +}; + +#endif /* CONFIG_LRNG_RAW_ARRAY */ + +/******************** Interrupt Performance Data Handling *********************/ + +#ifdef CONFIG_LRNG_IRQ_PERF + +static u32 boot_irq_perf = 0; +module_param(boot_irq_perf, uint, 0644); +MODULE_PARM_DESC(boot_irq_perf, "Enable gathering boot time interrupt performance data of the first entropy events"); + +static struct lrng_testing lrng_irq_perf = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_irq_perf.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_irq_perf.read_wait) +}; + +bool lrng_perf_time(u32 start) +{ + return lrng_testing_store(&lrng_irq_perf, random_get_entropy() - start, + &boot_irq_perf); +} + +static int lrng_irq_perf_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_irq_perf, &boot_irq_perf, outbuf, + outbuflen); +} + +static ssize_t lrng_irq_perf_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_irq_perf_reader); +} + +static const struct file_operations lrng_irq_perf_fops = { + .owner = THIS_MODULE, + .read = lrng_irq_perf_read, +}; + +#endif /* CONFIG_LRNG_IRQ_PERF */ + +/*********************************** ACVT ************************************/ + +#ifdef CONFIG_LRNG_ACVT_HASH + +/* maximum amount of data to be hashed as defined by ACVP */ +#define LRNG_ACVT_MAX_SHA_MSG (65536 >> 3) + +/* + * As we use static variables to store the data, it is clear that the + * test interface is only able to handle single threaded testing. This is + * considered to be sufficient for testing. If multi-threaded use of the + * ACVT test interface would be performed, the caller would get garbage + * but the kernel operation is unaffected by this. + */ +static u8 lrng_acvt_hash_data[LRNG_ACVT_MAX_SHA_MSG] + __aligned(LRNG_KCAPI_ALIGN); +static atomic_t lrng_acvt_hash_data_size = ATOMIC_INIT(0); +static u8 lrng_acvt_hash_digest[LRNG_ATOMIC_DIGEST_SIZE]; + +static ssize_t lrng_acvt_hash_write(struct file *file, const char __user *buf, + size_t nbytes, loff_t *ppos) +{ + if (nbytes > LRNG_ACVT_MAX_SHA_MSG) + return -EINVAL; + + atomic_set(&lrng_acvt_hash_data_size, (int)nbytes); + + return simple_write_to_buffer(lrng_acvt_hash_data, + LRNG_ACVT_MAX_SHA_MSG, ppos, buf, nbytes); +} + +static ssize_t lrng_acvt_hash_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + SHASH_DESC_ON_STACK(shash, NULL); + const struct lrng_crypto_cb *crypto_cb = &lrng_cc20_crypto_cb; + ssize_t ret; + + if (count > LRNG_ATOMIC_DIGEST_SIZE) + return -EINVAL; + + ret = crypto_cb->lrng_hash_init(shash, NULL) ?: + crypto_cb->lrng_hash_update(shash, lrng_acvt_hash_data, + atomic_read_u32(&lrng_acvt_hash_data_size)) ?: + crypto_cb->lrng_hash_final(shash, lrng_acvt_hash_digest); + if (ret) + return ret; + + return simple_read_from_buffer(to, count, ppos, lrng_acvt_hash_digest, + sizeof(lrng_acvt_hash_digest)); +} + +static const struct file_operations lrng_acvt_hash_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .llseek = default_llseek, + .read = lrng_acvt_hash_read, + .write = lrng_acvt_hash_write, +}; + +#endif /* CONFIG_LRNG_ACVT_DRNG */ + +/************************************************************************** + * Debugfs interface + **************************************************************************/ + +static int __init lrng_raw_init(void) +{ + struct dentry *lrng_raw_debugfs_root; + + lrng_raw_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL); + +#ifdef CONFIG_LRNG_RAW_HIRES_ENTROPY + debugfs_create_file_unsafe("lrng_raw_hires", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_hires_fops); +#endif +#ifdef CONFIG_LRNG_RAW_JIFFIES_ENTROPY + debugfs_create_file_unsafe("lrng_raw_jiffies", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_jiffies_fops); +#endif +#ifdef CONFIG_LRNG_RAW_IRQ_ENTROPY + debugfs_create_file_unsafe("lrng_raw_irq", 0400, lrng_raw_debugfs_root, + NULL, &lrng_raw_irq_fops); +#endif +#ifdef CONFIG_LRNG_RAW_IRQFLAGS_ENTROPY + debugfs_create_file_unsafe("lrng_raw_irqflags", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_irqflags_fops); +#endif +#ifdef CONFIG_LRNG_RAW_RETIP_ENTROPY + debugfs_create_file_unsafe("lrng_raw_retip", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_retip_fops); +#endif +#ifdef CONFIG_LRNG_RAW_REGS_ENTROPY + debugfs_create_file_unsafe("lrng_raw_regs", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_regs_fops); +#endif +#ifdef CONFIG_LRNG_RAW_ARRAY + debugfs_create_file_unsafe("lrng_raw_array", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_array_fops); +#endif +#ifdef CONFIG_LRNG_IRQ_PERF + debugfs_create_file_unsafe("lrng_irq_perf", 0400, lrng_raw_debugfs_root, + NULL, &lrng_irq_perf_fops); +#endif +#ifdef CONFIG_LRNG_ACVT_HASH + debugfs_create_file_unsafe("lrng_acvt_hash", 0600, + lrng_raw_debugfs_root, NULL, + &lrng_acvt_hash_fops); +#endif + + return 0; +} + +module_init(lrng_raw_init); diff --git a/drivers/char/random.c b/drivers/char/random.c index 3404a91edf292..19bf14e253f70 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -738,11 +738,17 @@ static void invalidate_batched_entropy(void); static void numa_crng_init(void); static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); +static bool trust_bootloader __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER); static int __init parse_trust_cpu(char *arg) { return kstrtobool(arg, &trust_cpu); } +static int __init parse_trust_bootloader(char *arg) +{ + return kstrtobool(arg, &trust_bootloader); +} early_param("random.trust_cpu", parse_trust_cpu); +early_param("random.trust_bootloader", parse_trust_bootloader); static bool crng_init_try_arch(struct crng_state *crng) { @@ -2229,7 +2235,7 @@ EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); */ void add_bootloader_randomness(const void *buf, unsigned int size) { - if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER)) + if (trust_bootloader) add_hwgenerator_randomness(buf, size, size * 8); else add_device_randomness(buf, size); diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index b009e7479b702..783d65fc71f07 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -274,14 +274,6 @@ static void tpm_dev_release(struct device *dev) kfree(chip); } -static void tpm_devs_release(struct device *dev) -{ - struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs); - - /* release the master device reference */ - put_device(&chip->dev); -} - /** * tpm_class_shutdown() - prepare the TPM device for loss of power. * @dev: device to which the chip is associated. @@ -344,7 +336,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, chip->dev_num = rc; device_initialize(&chip->dev); - device_initialize(&chip->devs); chip->dev.class = tpm_class; chip->dev.class->shutdown_pre = tpm_class_shutdown; @@ -352,29 +343,12 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, chip->dev.parent = pdev; chip->dev.groups = chip->groups; - chip->devs.parent = pdev; - chip->devs.class = tpmrm_class; - chip->devs.release = tpm_devs_release; - /* get extra reference on main device to hold on - * behalf of devs. This holds the chip structure - * while cdevs is in use. The corresponding put - * is in the tpm_devs_release (TPM2 only) - */ - if (chip->flags & TPM_CHIP_FLAG_TPM2) - get_device(&chip->dev); - if (chip->dev_num == 0) chip->dev.devt = MKDEV(MISC_MAJOR, TPM_MINOR); else chip->dev.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num); - chip->devs.devt = - MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES); - rc = dev_set_name(&chip->dev, "tpm%d", chip->dev_num); - if (rc) - goto out; - rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num); if (rc) goto out; @@ -382,9 +356,7 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, chip->flags |= TPM_CHIP_FLAG_VIRTUAL; cdev_init(&chip->cdev, &tpm_fops); - cdev_init(&chip->cdevs, &tpmrm_fops); chip->cdev.owner = THIS_MODULE; - chip->cdevs.owner = THIS_MODULE; rc = tpm2_init_space(&chip->work_space, TPM2_SPACE_BUFFER_SIZE); if (rc) { @@ -396,7 +368,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, return chip; out: - put_device(&chip->devs); put_device(&chip->dev); return ERR_PTR(rc); } @@ -445,14 +416,9 @@ static int tpm_add_char_device(struct tpm_chip *chip) } if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip)) { - rc = cdev_device_add(&chip->cdevs, &chip->devs); - if (rc) { - dev_err(&chip->devs, - "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n", - dev_name(&chip->devs), MAJOR(chip->devs.devt), - MINOR(chip->devs.devt), rc); - return rc; - } + rc = tpm_devs_add(chip); + if (rc) + goto err_del_cdev; } /* Make the chip available. */ @@ -460,6 +426,10 @@ static int tpm_add_char_device(struct tpm_chip *chip) idr_replace(&dev_nums_idr, chip, chip->dev_num); mutex_unlock(&idr_lock); + return 0; + +err_del_cdev: + cdev_device_del(&chip->cdev, &chip->dev); return rc; } @@ -654,7 +624,7 @@ void tpm_chip_unregister(struct tpm_chip *chip) hwrng_unregister(&chip->hwrng); tpm_bios_log_teardown(chip); if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip)) - cdev_device_del(&chip->cdevs, &chip->devs); + tpm_devs_remove(chip); tpm_del_char_device(chip); } EXPORT_SYMBOL_GPL(tpm_chip_unregister); diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c index c08cbb306636b..dc4c0a0a51290 100644 --- a/drivers/char/tpm/tpm-dev-common.c +++ b/drivers/char/tpm/tpm-dev-common.c @@ -69,7 +69,13 @@ static void tpm_dev_async_work(struct work_struct *work) ret = tpm_dev_transmit(priv->chip, priv->space, priv->data_buffer, sizeof(priv->data_buffer)); tpm_put_ops(priv->chip); - if (ret > 0) { + + /* + * If ret is > 0 then tpm_dev_transmit returned the size of the + * response. If ret is < 0 then tpm_dev_transmit failed and + * returned an error code. + */ + if (ret != 0) { priv->response_length = ret; mod_timer(&priv->user_read_timer, jiffies + (120 * HZ)); } diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 283f78211c3a7..2163c6ee0d364 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -234,6 +234,8 @@ int tpm2_prepare_space(struct tpm_chip *chip, struct tpm_space *space, u8 *cmd, size_t cmdsiz); int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space, void *buf, size_t *bufsiz); +int tpm_devs_add(struct tpm_chip *chip); +void tpm_devs_remove(struct tpm_chip *chip); void tpm_bios_log_setup(struct tpm_chip *chip); void tpm_bios_log_teardown(struct tpm_chip *chip); diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c index 97e916856cf3e..ffb35f0154c16 100644 --- a/drivers/char/tpm/tpm2-space.c +++ b/drivers/char/tpm/tpm2-space.c @@ -58,12 +58,12 @@ int tpm2_init_space(struct tpm_space *space, unsigned int buf_size) void tpm2_del_space(struct tpm_chip *chip, struct tpm_space *space) { - mutex_lock(&chip->tpm_mutex); - if (!tpm_chip_start(chip)) { + + if (tpm_try_get_ops(chip) == 0) { tpm2_flush_sessions(chip, space); - tpm_chip_stop(chip); + tpm_put_ops(chip); } - mutex_unlock(&chip->tpm_mutex); + kfree(space->context_buf); kfree(space->session_buf); } @@ -574,3 +574,68 @@ int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space, dev_err(&chip->dev, "%s: error %d\n", __func__, rc); return rc; } + +/* + * Put the reference to the main device. + */ +static void tpm_devs_release(struct device *dev) +{ + struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs); + + /* release the master device reference */ + put_device(&chip->dev); +} + +/* + * Remove the device file for exposed TPM spaces and release the device + * reference. This may also release the reference to the master device. + */ +void tpm_devs_remove(struct tpm_chip *chip) +{ + cdev_device_del(&chip->cdevs, &chip->devs); + put_device(&chip->devs); +} + +/* + * Add a device file to expose TPM spaces. Also take a reference to the + * main device. + */ +int tpm_devs_add(struct tpm_chip *chip) +{ + int rc; + + device_initialize(&chip->devs); + chip->devs.parent = chip->dev.parent; + chip->devs.class = tpmrm_class; + + /* + * Get extra reference on main device to hold on behalf of devs. + * This holds the chip structure while cdevs is in use. The + * corresponding put is in the tpm_devs_release. + */ + get_device(&chip->dev); + chip->devs.release = tpm_devs_release; + chip->devs.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES); + cdev_init(&chip->cdevs, &tpmrm_fops); + chip->cdevs.owner = THIS_MODULE; + + rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num); + if (rc) + goto err_put_devs; + + rc = cdev_device_add(&chip->cdevs, &chip->devs); + if (rc) { + dev_err(&chip->devs, + "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n", + dev_name(&chip->devs), MAJOR(chip->devs.devt), + MINOR(chip->devs.devt), rc); + goto err_put_devs; + } + + return 0; + +err_put_devs: + put_device(&chip->devs); + + return rc; +} diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index e3c430539a176..9fa3c76a267f5 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -2245,7 +2245,7 @@ static struct virtio_driver virtio_rproc_serial = { .remove = virtcons_remove, }; -static int __init init(void) +static int __init virtio_console_init(void) { int err; @@ -2280,7 +2280,7 @@ static int __init init(void) return err; } -static void __exit fini(void) +static void __exit virtio_console_fini(void) { reclaim_dma_bufs(); @@ -2290,8 +2290,8 @@ static void __exit fini(void) class_destroy(pdrvdata.class); debugfs_remove_recursive(pdrvdata.debugfs_dir); } -module_init(init); -module_exit(fini); +module_init(virtio_console_init); +module_exit(virtio_console_fini); MODULE_DESCRIPTION("Virtio console driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/clk/actions/owl-s700.c b/drivers/clk/actions/owl-s700.c index a2f34d13fb543..6ea7da1d6d755 100644 --- a/drivers/clk/actions/owl-s700.c +++ b/drivers/clk/actions/owl-s700.c @@ -162,6 +162,7 @@ static struct clk_div_table hdmia_div_table[] = { static struct clk_div_table rmii_div_table[] = { {0, 4}, {1, 10}, + {0, 0} }; /* divider clocks */ diff --git a/drivers/clk/actions/owl-s900.c b/drivers/clk/actions/owl-s900.c index 790890978424a..5144ada2c7e1a 100644 --- a/drivers/clk/actions/owl-s900.c +++ b/drivers/clk/actions/owl-s900.c @@ -140,7 +140,7 @@ static struct clk_div_table rmii_ref_div_table[] = { static struct clk_div_table usb3_mac_div_table[] = { { 1, 2 }, { 2, 3 }, { 3, 4 }, - { 0, 8 }, + { 0, 0 } }; static struct clk_div_table i2s_div_table[] = { diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c index 369dfafabbca2..060e908086a13 100644 --- a/drivers/clk/at91/sama7g5.c +++ b/drivers/clk/at91/sama7g5.c @@ -696,16 +696,16 @@ static const struct { { .n = "pdmc0_gclk", .id = 68, .r = { .max = 50000000 }, - .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, - .pp_mux_table = { 5, 8, }, + .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, + .pp_mux_table = { 5, 9, }, .pp_count = 2, .pp_chg_id = INT_MIN, }, { .n = "pdmc1_gclk", .id = 69, .r = { .max = 50000000, }, - .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, - .pp_mux_table = { 5, 8, }, + .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, + .pp_mux_table = { 5, 9, }, .pp_count = 2, .pp_chg_id = INT_MIN, }, diff --git a/drivers/clk/clk-clps711x.c b/drivers/clk/clk-clps711x.c index a2c6486ef1708..f8417ee2961aa 100644 --- a/drivers/clk/clk-clps711x.c +++ b/drivers/clk/clk-clps711x.c @@ -28,11 +28,13 @@ static const struct clk_div_table spi_div_table[] = { { .val = 1, .div = 8, }, { .val = 2, .div = 2, }, { .val = 3, .div = 1, }, + { /* sentinel */ } }; static const struct clk_div_table timer_div_table[] = { { .val = 0, .div = 256, }, { .val = 1, .div = 1, }, + { /* sentinel */ } }; struct clps711x_clk { diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c index f7b41366666e5..4de098b6b0d4e 100644 --- a/drivers/clk/clk-si5341.c +++ b/drivers/clk/clk-si5341.c @@ -798,6 +798,15 @@ static unsigned long si5341_output_clk_recalc_rate(struct clk_hw *hw, u32 r_divider; u8 r[3]; + err = regmap_read(output->data->regmap, + SI5341_OUT_CONFIG(output), &val); + if (err < 0) + return err; + + /* If SI5341_OUT_CFG_RDIV_FORCE2 is set, r_divider is 2 */ + if (val & SI5341_OUT_CFG_RDIV_FORCE2) + return parent_rate / 2; + err = regmap_bulk_read(output->data->regmap, SI5341_OUT_R_REG(output), r, 3); if (err < 0) @@ -814,13 +823,6 @@ static unsigned long si5341_output_clk_recalc_rate(struct clk_hw *hw, r_divider += 1; r_divider <<= 1; - err = regmap_read(output->data->regmap, - SI5341_OUT_CONFIG(output), &val); - if (err < 0) - return err; - - if (val & SI5341_OUT_CFG_RDIV_FORCE2) - r_divider = 2; return parent_rate / r_divider; } diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 8de6a22498e70..2fdfce116087a 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -632,6 +632,24 @@ static void clk_core_get_boundaries(struct clk_core *core, *max_rate = min(*max_rate, clk_user->max_rate); } +static bool clk_core_check_boundaries(struct clk_core *core, + unsigned long min_rate, + unsigned long max_rate) +{ + struct clk *user; + + lockdep_assert_held(&prepare_lock); + + if (min_rate > core->max_rate || max_rate < core->min_rate) + return false; + + hlist_for_each_entry(user, &core->clks, clks_node) + if (min_rate > user->max_rate || max_rate < user->min_rate) + return false; + + return true; +} + void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate, unsigned long max_rate) { @@ -2348,6 +2366,11 @@ int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max) clk->min_rate = min; clk->max_rate = max; + if (!clk_core_check_boundaries(clk->core, min, max)) { + ret = -EINVAL; + goto out; + } + rate = clk_core_get_rate_nolock(clk->core); if (rate < min || rate > max) { /* @@ -2376,6 +2399,7 @@ int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max) } } +out: if (clk->exclusive_count) clk_core_rate_protect(clk->core); @@ -3456,6 +3480,19 @@ static void clk_core_reparent_orphans_nolock(void) __clk_set_parent_after(orphan, parent, NULL); __clk_recalc_accuracies(orphan); __clk_recalc_rates(orphan, 0); + + /* + * __clk_init_parent() will set the initial req_rate to + * 0 if the clock doesn't have clk_ops::recalc_rate and + * is an orphan when it's registered. + * + * 'req_rate' is used by clk_set_rate_range() and + * clk_put() to trigger a clk_set_rate() call whenever + * the boundaries are modified. Let's make sure + * 'req_rate' is set to something non-zero so that + * clk_set_rate_range() doesn't drop the frequency. + */ + orphan->req_rate = orphan->rate; } } } @@ -3773,8 +3810,9 @@ struct clk *clk_hw_create_clk(struct device *dev, struct clk_hw *hw, struct clk *clk_hw_get_clk(struct clk_hw *hw, const char *con_id) { struct device *dev = hw->core->dev; + const char *name = dev ? dev_name(dev) : NULL; - return clk_hw_create_clk(dev, hw, dev_name(dev), con_id); + return clk_hw_create_clk(dev, hw, name, con_id); } EXPORT_SYMBOL(clk_hw_get_clk); diff --git a/drivers/clk/hisilicon/clk-hi3559a.c b/drivers/clk/hisilicon/clk-hi3559a.c index 56012a3d02192..9ea1a80acbe8b 100644 --- a/drivers/clk/hisilicon/clk-hi3559a.c +++ b/drivers/clk/hisilicon/clk-hi3559a.c @@ -611,8 +611,8 @@ static struct hisi_mux_clock hi3559av100_shub_mux_clks[] = { /* shub div clk */ -static struct clk_div_table shub_spi_clk_table[] = {{0, 8}, {1, 4}, {2, 2}}; -static struct clk_div_table shub_uart_div_clk_table[] = {{1, 8}, {2, 4}}; +static struct clk_div_table shub_spi_clk_table[] = {{0, 8}, {1, 4}, {2, 2}, {/*sentinel*/}}; +static struct clk_div_table shub_uart_div_clk_table[] = {{1, 8}, {2, 4}, {/*sentinel*/}}; static struct hisi_divider_clock hi3559av100_shub_div_clks[] = { { HI3559AV100_SHUB_SPI_SOURCE_CLK, "clk_spi_clk", "shub_clk", 0, 0x20, 24, 2, diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c index c4e0f1c07192f..3f6fd7ef2a68f 100644 --- a/drivers/clk/imx/clk-imx7d.c +++ b/drivers/clk/imx/clk-imx7d.c @@ -849,7 +849,6 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node) hws[IMX7D_WDOG4_ROOT_CLK] = imx_clk_hw_gate4("wdog4_root_clk", "wdog_post_div", base + 0x49f0, 0); hws[IMX7D_KPP_ROOT_CLK] = imx_clk_hw_gate4("kpp_root_clk", "ipg_root_clk", base + 0x4aa0, 0); hws[IMX7D_CSI_MCLK_ROOT_CLK] = imx_clk_hw_gate4("csi_mclk_root_clk", "csi_mclk_post_div", base + 0x4490, 0); - hws[IMX7D_AUDIO_MCLK_ROOT_CLK] = imx_clk_hw_gate4("audio_mclk_root_clk", "audio_mclk_post_div", base + 0x4790, 0); hws[IMX7D_WRCLK_ROOT_CLK] = imx_clk_hw_gate4("wrclk_root_clk", "wrclk_post_div", base + 0x47a0, 0); hws[IMX7D_USB_CTRL_CLK] = imx_clk_hw_gate4("usb_ctrl_clk", "ahb_root_clk", base + 0x4680, 0); hws[IMX7D_USB_PHY1_CLK] = imx_clk_hw_gate4("usb_phy1_clk", "pll_usb1_main_clk", base + 0x46a0, 0); diff --git a/drivers/clk/imx/clk-imx8qxp-lpcg.c b/drivers/clk/imx/clk-imx8qxp-lpcg.c index b23758083ce52..5e31a6a24b3a3 100644 --- a/drivers/clk/imx/clk-imx8qxp-lpcg.c +++ b/drivers/clk/imx/clk-imx8qxp-lpcg.c @@ -248,7 +248,7 @@ static int imx_lpcg_parse_clks_from_dt(struct platform_device *pdev, for (i = 0; i < count; i++) { idx = bit_offset[i] / 4; - if (idx > IMX_LPCG_MAX_CLKS) { + if (idx >= IMX_LPCG_MAX_CLKS) { dev_warn(&pdev->dev, "invalid bit offset of clock %d\n", i); ret = -EINVAL; diff --git a/drivers/clk/loongson1/clk-loongson1c.c b/drivers/clk/loongson1/clk-loongson1c.c index 703f87622cf5f..1ebf740380efb 100644 --- a/drivers/clk/loongson1/clk-loongson1c.c +++ b/drivers/clk/loongson1/clk-loongson1c.c @@ -37,6 +37,7 @@ static const struct clk_div_table ahb_div_table[] = { [1] = { .val = 1, .div = 4 }, [2] = { .val = 2, .div = 3 }, [3] = { .val = 3, .div = 3 }, + [4] = { /* sentinel */ } }; void __init ls1x_clk_init(void) diff --git a/drivers/clk/mediatek/clk-mt8192.c b/drivers/clk/mediatek/clk-mt8192.c index cbc7c6dbe0f44..79ddb3cc0b98a 100644 --- a/drivers/clk/mediatek/clk-mt8192.c +++ b/drivers/clk/mediatek/clk-mt8192.c @@ -1236,9 +1236,17 @@ static int clk_mt8192_infra_probe(struct platform_device *pdev) r = mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks), clk_data); if (r) - return r; + goto free_clk_data; + + r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data); + if (r) + goto free_clk_data; + + return r; - return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data); +free_clk_data: + mtk_free_clk_data(clk_data); + return r; } static int clk_mt8192_peri_probe(struct platform_device *pdev) @@ -1253,9 +1261,17 @@ static int clk_mt8192_peri_probe(struct platform_device *pdev) r = mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks), clk_data); if (r) - return r; + goto free_clk_data; + + r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data); + if (r) + goto free_clk_data; - return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data); + return r; + +free_clk_data: + mtk_free_clk_data(clk_data); + return r; } static int clk_mt8192_apmixed_probe(struct platform_device *pdev) @@ -1271,9 +1287,17 @@ static int clk_mt8192_apmixed_probe(struct platform_device *pdev) mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); r = mtk_clk_register_gates(node, apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data); if (r) - return r; + goto free_clk_data; - return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data); + r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data); + if (r) + goto free_clk_data; + + return r; + +free_clk_data: + mtk_free_clk_data(clk_data); + return r; } static const struct of_device_id of_match_clk_mt8192[] = { diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index e1b1b426fae4b..f675fd969c4de 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -264,7 +264,7 @@ static int clk_rcg2_determine_floor_rate(struct clk_hw *hw, static int __clk_rcg2_configure(struct clk_rcg2 *rcg, const struct freq_tbl *f) { - u32 cfg, mask; + u32 cfg, mask, d_val, not2d_val, n_minus_m; struct clk_hw *hw = &rcg->clkr.hw; int ret, index = qcom_find_src_index(hw, rcg->parent_map, f->src); @@ -283,8 +283,17 @@ static int __clk_rcg2_configure(struct clk_rcg2 *rcg, const struct freq_tbl *f) if (ret) return ret; + /* Calculate 2d value */ + d_val = f->n; + + n_minus_m = f->n - f->m; + n_minus_m *= 2; + + d_val = clamp_t(u32, d_val, f->m, n_minus_m); + not2d_val = ~d_val & mask; + ret = regmap_update_bits(rcg->clkr.regmap, - RCG_D_OFFSET(rcg), mask, ~f->n); + RCG_D_OFFSET(rcg), mask, not2d_val); if (ret) return ret; } @@ -720,6 +729,7 @@ static const struct frac_entry frac_table_pixel[] = { { 2, 9 }, { 4, 9 }, { 1, 1 }, + { 2, 3 }, { } }; diff --git a/drivers/clk/qcom/gcc-ipq8074.c b/drivers/clk/qcom/gcc-ipq8074.c index 108fe27bee10f..541016db3c4bb 100644 --- a/drivers/clk/qcom/gcc-ipq8074.c +++ b/drivers/clk/qcom/gcc-ipq8074.c @@ -60,11 +60,6 @@ static const struct parent_map gcc_xo_gpll0_gpll0_out_main_div2_map[] = { { P_GPLL0_DIV2, 4 }, }; -static const char * const gcc_xo_gpll0[] = { - "xo", - "gpll0", -}; - static const struct parent_map gcc_xo_gpll0_map[] = { { P_XO, 0 }, { P_GPLL0, 1 }, @@ -956,6 +951,11 @@ static struct clk_rcg2 blsp1_uart6_apps_clk_src = { }, }; +static const struct clk_parent_data gcc_xo_gpll0[] = { + { .fw_name = "xo" }, + { .hw = &gpll0.clkr.hw }, +}; + static const struct freq_tbl ftbl_pcie_axi_clk_src[] = { F(19200000, P_XO, 1, 0, 0), F(200000000, P_GPLL0, 4, 0, 0), @@ -969,7 +969,7 @@ static struct clk_rcg2 pcie0_axi_clk_src = { .parent_map = gcc_xo_gpll0_map, .clkr.hw.init = &(struct clk_init_data){ .name = "pcie0_axi_clk_src", - .parent_names = gcc_xo_gpll0, + .parent_data = gcc_xo_gpll0, .num_parents = 2, .ops = &clk_rcg2_ops, }, @@ -1016,7 +1016,7 @@ static struct clk_rcg2 pcie1_axi_clk_src = { .parent_map = gcc_xo_gpll0_map, .clkr.hw.init = &(struct clk_init_data){ .name = "pcie1_axi_clk_src", - .parent_names = gcc_xo_gpll0, + .parent_data = gcc_xo_gpll0, .num_parents = 2, .ops = &clk_rcg2_ops, }, @@ -1074,7 +1074,7 @@ static struct clk_rcg2 sdcc1_apps_clk_src = { .name = "sdcc1_apps_clk_src", .parent_names = gcc_xo_gpll0_gpll2_gpll0_out_main_div2, .num_parents = 4, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_floor_ops, }, }; @@ -1330,7 +1330,7 @@ static struct clk_rcg2 nss_ce_clk_src = { .parent_map = gcc_xo_gpll0_map, .clkr.hw.init = &(struct clk_init_data){ .name = "nss_ce_clk_src", - .parent_names = gcc_xo_gpll0, + .parent_data = gcc_xo_gpll0, .num_parents = 2, .ops = &clk_rcg2_ops, }, @@ -4329,8 +4329,7 @@ static struct clk_rcg2 pcie0_rchng_clk_src = { .parent_map = gcc_xo_gpll0_map, .clkr.hw.init = &(struct clk_init_data){ .name = "pcie0_rchng_clk_src", - .parent_hws = (const struct clk_hw *[]) { - &gpll0.clkr.hw }, + .parent_data = gcc_xo_gpll0, .num_parents = 2, .ops = &clk_rcg2_ops, }, diff --git a/drivers/clk/qcom/gcc-msm8994.c b/drivers/clk/qcom/gcc-msm8994.c index f09499999eb3a..6b702cdacbf2e 100644 --- a/drivers/clk/qcom/gcc-msm8994.c +++ b/drivers/clk/qcom/gcc-msm8994.c @@ -77,6 +77,7 @@ static struct clk_alpha_pll gpll4_early = { static struct clk_alpha_pll_postdiv gpll4 = { .offset = 0x1dc0, + .width = 4, .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll4", diff --git a/drivers/clk/renesas/r8a779f0-cpg-mssr.c b/drivers/clk/renesas/r8a779f0-cpg-mssr.c index e6ec02c2c2a8b..344957d533d81 100644 --- a/drivers/clk/renesas/r8a779f0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779f0-cpg-mssr.c @@ -103,7 +103,7 @@ static const struct cpg_core_clk r8a779f0_core_clks[] __initconst = { DEF_FIXED("s0d12_hsc", R8A779F0_CLK_S0D12_HSC, CLK_S0, 12, 1), DEF_FIXED("cl16m_hsc", R8A779F0_CLK_CL16M_HSC, CLK_S0, 48, 1), DEF_FIXED("s0d2_cc", R8A779F0_CLK_S0D2_CC, CLK_S0, 2, 1), - DEF_FIXED("rsw2", R8A779F0_CLK_RSW2, CLK_PLL5, 2, 1), + DEF_FIXED("rsw2", R8A779F0_CLK_RSW2, CLK_PLL5_DIV2, 5, 1), DEF_FIXED("cbfusa", R8A779F0_CLK_CBFUSA, CLK_EXTAL, 2, 1), DEF_FIXED("cpex", R8A779F0_CLK_CPEX, CLK_EXTAL, 2, 1), diff --git a/drivers/clk/renesas/r9a07g044-cpg.c b/drivers/clk/renesas/r9a07g044-cpg.c index 79042bf46fe85..46359afef0d43 100644 --- a/drivers/clk/renesas/r9a07g044-cpg.c +++ b/drivers/clk/renesas/r9a07g044-cpg.c @@ -88,8 +88,8 @@ static const struct cpg_core_clk r9a07g044_core_clks[] __initconst = { DEF_FIXED(".osc", R9A07G044_OSCCLK, CLK_EXTAL, 1, 1), DEF_FIXED(".osc_div1000", CLK_OSC_DIV1000, CLK_EXTAL, 1, 1000), DEF_SAMPLL(".pll1", CLK_PLL1, CLK_EXTAL, PLL146_CONF(0)), - DEF_FIXED(".pll2", CLK_PLL2, CLK_EXTAL, 133, 2), - DEF_FIXED(".pll3", CLK_PLL3, CLK_EXTAL, 133, 2), + DEF_FIXED(".pll2", CLK_PLL2, CLK_EXTAL, 200, 3), + DEF_FIXED(".pll3", CLK_PLL3, CLK_EXTAL, 200, 3), DEF_FIXED(".pll3_400", CLK_PLL3_400, CLK_PLL3, 1, 4), DEF_FIXED(".pll3_533", CLK_PLL3_533, CLK_PLL3, 1, 3), diff --git a/drivers/clk/rockchip/clk-rk3568.c b/drivers/clk/rockchip/clk-rk3568.c index 69a9e8069a486..604a367bc498a 100644 --- a/drivers/clk/rockchip/clk-rk3568.c +++ b/drivers/clk/rockchip/clk-rk3568.c @@ -1038,13 +1038,13 @@ static struct rockchip_clk_branch rk3568_clk_branches[] __initdata = { RK3568_CLKGATE_CON(20), 8, GFLAGS), GATE(HCLK_VOP, "hclk_vop", "hclk_vo", 0, RK3568_CLKGATE_CON(20), 9, GFLAGS), - COMPOSITE(DCLK_VOP0, "dclk_vop0", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, + COMPOSITE(DCLK_VOP0, "dclk_vop0", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_NO_REPARENT, RK3568_CLKSEL_CON(39), 10, 2, MFLAGS, 0, 8, DFLAGS, RK3568_CLKGATE_CON(20), 10, GFLAGS), - COMPOSITE(DCLK_VOP1, "dclk_vop1", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT, + COMPOSITE(DCLK_VOP1, "dclk_vop1", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_NO_REPARENT, RK3568_CLKSEL_CON(40), 10, 2, MFLAGS, 0, 8, DFLAGS, RK3568_CLKGATE_CON(20), 11, GFLAGS), - COMPOSITE(DCLK_VOP2, "dclk_vop2", hpll_vpll_gpll_cpll_p, 0, + COMPOSITE(DCLK_VOP2, "dclk_vop2", hpll_vpll_gpll_cpll_p, CLK_SET_RATE_NO_REPARENT, RK3568_CLKSEL_CON(41), 10, 2, MFLAGS, 0, 8, DFLAGS, RK3568_CLKGATE_CON(20), 12, GFLAGS), GATE(CLK_VOP_PWM, "clk_vop_pwm", "xin24m", 0, diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c index b7be7e11b0dfe..bb8a844309bf5 100644 --- a/drivers/clk/rockchip/clk.c +++ b/drivers/clk/rockchip/clk.c @@ -180,6 +180,7 @@ static void rockchip_fractional_approximation(struct clk_hw *hw, unsigned long rate, unsigned long *parent_rate, unsigned long *m, unsigned long *n) { + struct clk_fractional_divider *fd = to_clk_fd(hw); unsigned long p_rate, p_parent_rate; struct clk_hw *p_parent; @@ -190,6 +191,8 @@ static void rockchip_fractional_approximation(struct clk_hw *hw, *parent_rate = p_parent_rate; } + fd->flags |= CLK_FRAC_DIVIDER_POWER_OF_TWO_PS; + clk_fractional_divider_general_approximation(hw, rate, parent_rate, m, n); } diff --git a/drivers/clk/starfive/clk-starfive-jh7100.c b/drivers/clk/starfive/clk-starfive-jh7100.c index 25d31afa0f871..4b59338b5d7d4 100644 --- a/drivers/clk/starfive/clk-starfive-jh7100.c +++ b/drivers/clk/starfive/clk-starfive-jh7100.c @@ -32,6 +32,13 @@ #define JH7100_CLK_MUX_MASK GENMASK(27, 24) #define JH7100_CLK_MUX_SHIFT 24 #define JH7100_CLK_DIV_MASK GENMASK(23, 0) +#define JH7100_CLK_FRAC_MASK GENMASK(15, 8) +#define JH7100_CLK_FRAC_SHIFT 8 +#define JH7100_CLK_INT_MASK GENMASK(7, 0) + +/* fractional divider min/max */ +#define JH7100_CLK_FRAC_MIN 100UL +#define JH7100_CLK_FRAC_MAX 25599UL /* clock data */ #define JH7100_GATE(_idx, _name, _flags, _parent) [_idx] = { \ @@ -55,6 +62,13 @@ .parents = { [0] = _parent }, \ } +#define JH7100_FDIV(_idx, _name, _parent) [_idx] = { \ + .name = _name, \ + .flags = 0, \ + .max = JH7100_CLK_FRAC_MAX, \ + .parents = { [0] = _parent }, \ +} + #define JH7100__MUX(_idx, _name, _nparents, ...) [_idx] = { \ .name = _name, \ .flags = 0, \ @@ -225,7 +239,7 @@ static const struct { JH7100__MUX(JH7100_CLK_USBPHY_25M, "usbphy_25m", 2, JH7100_CLK_OSC_SYS, JH7100_CLK_USBPHY_PLLDIV25M), - JH7100__DIV(JH7100_CLK_AUDIO_DIV, "audio_div", 131072, JH7100_CLK_AUDIO_ROOT), + JH7100_FDIV(JH7100_CLK_AUDIO_DIV, "audio_div", JH7100_CLK_AUDIO_ROOT), JH7100_GATE(JH7100_CLK_AUDIO_SRC, "audio_src", 0, JH7100_CLK_AUDIO_DIV), JH7100_GATE(JH7100_CLK_AUDIO_12288, "audio_12288", 0, JH7100_CLK_OSC_AUD), JH7100_GDIV(JH7100_CLK_VIN_SRC, "vin_src", 0, 4, JH7100_CLK_VIN_ROOT), @@ -399,22 +413,13 @@ static unsigned long jh7100_clk_recalc_rate(struct clk_hw *hw, return div ? parent_rate / div : 0; } -static unsigned long jh7100_clk_bestdiv(struct jh7100_clk *clk, - unsigned long rate, unsigned long parent) -{ - unsigned long max = clk->max_div; - unsigned long div = DIV_ROUND_UP(parent, rate); - - return min(div, max); -} - static int jh7100_clk_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { struct jh7100_clk *clk = jh7100_clk_from(hw); unsigned long parent = req->best_parent_rate; unsigned long rate = clamp(req->rate, req->min_rate, req->max_rate); - unsigned long div = jh7100_clk_bestdiv(clk, rate, parent); + unsigned long div = min_t(unsigned long, DIV_ROUND_UP(parent, rate), clk->max_div); unsigned long result = parent / div; /* @@ -442,12 +447,56 @@ static int jh7100_clk_set_rate(struct clk_hw *hw, unsigned long parent_rate) { struct jh7100_clk *clk = jh7100_clk_from(hw); - unsigned long div = jh7100_clk_bestdiv(clk, rate, parent_rate); + unsigned long div = clamp(DIV_ROUND_CLOSEST(parent_rate, rate), + 1UL, (unsigned long)clk->max_div); jh7100_clk_reg_rmw(clk, JH7100_CLK_DIV_MASK, div); return 0; } +static unsigned long jh7100_clk_frac_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct jh7100_clk *clk = jh7100_clk_from(hw); + u32 reg = jh7100_clk_reg_get(clk); + unsigned long div100 = 100 * (reg & JH7100_CLK_INT_MASK) + + ((reg & JH7100_CLK_FRAC_MASK) >> JH7100_CLK_FRAC_SHIFT); + + return (div100 >= JH7100_CLK_FRAC_MIN) ? 100 * parent_rate / div100 : 0; +} + +static int jh7100_clk_frac_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + unsigned long parent100 = 100 * req->best_parent_rate; + unsigned long rate = clamp(req->rate, req->min_rate, req->max_rate); + unsigned long div100 = clamp(DIV_ROUND_CLOSEST(parent100, rate), + JH7100_CLK_FRAC_MIN, JH7100_CLK_FRAC_MAX); + unsigned long result = parent100 / div100; + + /* clamp the result as in jh7100_clk_determine_rate() above */ + if (result > req->max_rate && div100 < JH7100_CLK_FRAC_MAX) + result = parent100 / (div100 + 1); + if (result < req->min_rate && div100 > JH7100_CLK_FRAC_MIN) + result = parent100 / (div100 - 1); + + req->rate = result; + return 0; +} + +static int jh7100_clk_frac_set_rate(struct clk_hw *hw, + unsigned long rate, + unsigned long parent_rate) +{ + struct jh7100_clk *clk = jh7100_clk_from(hw); + unsigned long div100 = clamp(DIV_ROUND_CLOSEST(100 * parent_rate, rate), + JH7100_CLK_FRAC_MIN, JH7100_CLK_FRAC_MAX); + u32 value = ((div100 % 100) << JH7100_CLK_FRAC_SHIFT) | (div100 / 100); + + jh7100_clk_reg_rmw(clk, JH7100_CLK_DIV_MASK, value); + return 0; +} + static u8 jh7100_clk_get_parent(struct clk_hw *hw) { struct jh7100_clk *clk = jh7100_clk_from(hw); @@ -534,6 +583,13 @@ static const struct clk_ops jh7100_clk_div_ops = { .debug_init = jh7100_clk_debug_init, }; +static const struct clk_ops jh7100_clk_fdiv_ops = { + .recalc_rate = jh7100_clk_frac_recalc_rate, + .determine_rate = jh7100_clk_frac_determine_rate, + .set_rate = jh7100_clk_frac_set_rate, + .debug_init = jh7100_clk_debug_init, +}; + static const struct clk_ops jh7100_clk_gdiv_ops = { .enable = jh7100_clk_enable, .disable = jh7100_clk_disable, @@ -572,6 +628,8 @@ static const struct clk_ops *__init jh7100_clk_ops(u32 max) if (max & JH7100_CLK_DIV_MASK) { if (max & JH7100_CLK_ENABLE) return &jh7100_clk_gdiv_ops; + if (max == JH7100_CLK_FRAC_MAX) + return &jh7100_clk_fdiv_ops; return &jh7100_clk_div_ops; } diff --git a/drivers/clk/tegra/clk-tegra124-emc.c b/drivers/clk/tegra/clk-tegra124-emc.c index 74c1d894cca86..219c80653dbdb 100644 --- a/drivers/clk/tegra/clk-tegra124-emc.c +++ b/drivers/clk/tegra/clk-tegra124-emc.c @@ -198,6 +198,7 @@ static struct tegra_emc *emc_ensure_emc_driver(struct tegra_clk_emc *tegra) tegra->emc = platform_get_drvdata(pdev); if (!tegra->emc) { + put_device(&pdev->dev); pr_err("%s: cannot find EMC driver\n", __func__); return NULL; } diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c index 3da33c786d77c..29eafab4353ef 100644 --- a/drivers/clk/ti/clk.c +++ b/drivers/clk/ti/clk.c @@ -131,7 +131,7 @@ int ti_clk_setup_ll_ops(struct ti_clk_ll_ops *ops) void __init ti_dt_clocks_register(struct ti_dt_clk oclks[]) { struct ti_dt_clk *c; - struct device_node *node, *parent; + struct device_node *node, *parent, *child; struct clk *clk; struct of_phandle_args clkspec; char buf[64]; @@ -171,10 +171,13 @@ void __init ti_dt_clocks_register(struct ti_dt_clk oclks[]) node = of_find_node_by_name(NULL, buf); if (num_args && compat_mode) { parent = node; - node = of_get_child_by_name(parent, "clock"); - if (!node) - node = of_get_child_by_name(parent, "clk"); - of_node_put(parent); + child = of_get_child_by_name(parent, "clock"); + if (!child) + child = of_get_child_by_name(parent, "clk"); + if (child) { + of_node_put(parent); + node = child; + } } clkspec.np = node; diff --git a/drivers/clk/uniphier/clk-uniphier-fixed-rate.c b/drivers/clk/uniphier/clk-uniphier-fixed-rate.c index 5319cd3804801..3bc55ab75314b 100644 --- a/drivers/clk/uniphier/clk-uniphier-fixed-rate.c +++ b/drivers/clk/uniphier/clk-uniphier-fixed-rate.c @@ -24,6 +24,7 @@ struct clk_hw *uniphier_clk_register_fixed_rate(struct device *dev, init.name = name; init.ops = &clk_fixed_rate_ops; + init.flags = 0; init.parent_names = NULL; init.num_parents = 0; diff --git a/drivers/clk/visconti/clkc-tmpv770x.c b/drivers/clk/visconti/clkc-tmpv770x.c index c2b2f41a85a45..6c753b2cb558f 100644 --- a/drivers/clk/visconti/clkc-tmpv770x.c +++ b/drivers/clk/visconti/clkc-tmpv770x.c @@ -176,7 +176,7 @@ static const struct visconti_clk_gate_table clk_gate_tables[] = { { TMPV770X_CLK_WRCK, "wrck", clks_parent_data, ARRAY_SIZE(clks_parent_data), 0, 0x68, 0x168, 9, 32, - -1, }, /* No reset */ + NO_RESET, }, { TMPV770X_CLK_PICKMON, "pickmon", clks_parent_data, ARRAY_SIZE(clks_parent_data), 0, 0x10, 0x110, 8, 4, diff --git a/drivers/clk/visconti/clkc.c b/drivers/clk/visconti/clkc.c index 56a8a4ffebca8..d0b193b5d0b35 100644 --- a/drivers/clk/visconti/clkc.c +++ b/drivers/clk/visconti/clkc.c @@ -147,7 +147,7 @@ int visconti_clk_register_gates(struct visconti_clk_provider *ctx, if (!dev_name) return -ENOMEM; - if (clks[i].rs_id >= 0) { + if (clks[i].rs_id != NO_RESET) { rson_offset = reset[clks[i].rs_id].rson_offset; rsoff_offset = reset[clks[i].rs_id].rsoff_offset; rs_idx = reset[clks[i].rs_id].rs_idx; diff --git a/drivers/clk/visconti/clkc.h b/drivers/clk/visconti/clkc.h index 09ed82ff64e45..8756a1ec42efc 100644 --- a/drivers/clk/visconti/clkc.h +++ b/drivers/clk/visconti/clkc.h @@ -73,4 +73,7 @@ int visconti_clk_register_gates(struct visconti_clk_provider *data, int num_gate, const struct visconti_reset_data *reset, spinlock_t *lock); + +#define NO_RESET 0xFF + #endif /* _VISCONTI_CLKC_H_ */ diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c index eb596ff9e7bb3..279ddff81ab49 100644 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c @@ -229,8 +229,10 @@ static int __init parse_pmtmr(char *arg) int ret; ret = kstrtouint(arg, 16, &base); - if (ret) - return ret; + if (ret) { + pr_warn("PMTMR: invalid 'pmtmr=' value: '%s'\n", arg); + return 1; + } pr_info("PMTMR IOPort override: 0x%04x -> 0x%04x\n", pmtmr_ioport, base); diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 6db3d5511b0ff..03782b399ea1a 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -541,6 +541,11 @@ static int __init exynos4_timer_interrupts(struct device_node *np, * irqs are specified. */ nr_irqs = of_irq_count(np); + if (nr_irqs > ARRAY_SIZE(mct_irqs)) { + pr_err("exynos-mct: too many (%d) interrupts configured in DT\n", + nr_irqs); + nr_irqs = ARRAY_SIZE(mct_irqs); + } for (i = MCT_L0_IRQ; i < nr_irqs; i++) mct_irqs[i] = irq_of_parse_and_map(np, i); @@ -553,11 +558,14 @@ static int __init exynos4_timer_interrupts(struct device_node *np, mct_irqs[MCT_L0_IRQ], err); } else { for_each_possible_cpu(cpu) { - int mct_irq = mct_irqs[MCT_L0_IRQ + cpu]; + int mct_irq; struct mct_clock_event_device *pcpu_mevt = per_cpu_ptr(&percpu_mct_tick, cpu); pcpu_mevt->evt.irq = -1; + if (MCT_L0_IRQ + cpu >= ARRAY_SIZE(mct_irqs)) + break; + mct_irq = mct_irqs[MCT_L0_IRQ + cpu]; irq_set_status_flags(mct_irq, IRQ_NOAUTOEN); if (request_irq(mct_irq, diff --git a/drivers/clocksource/timer-microchip-pit64b.c b/drivers/clocksource/timer-microchip-pit64b.c index cfa4ec7ef3968..790d2c9b42a70 100644 --- a/drivers/clocksource/timer-microchip-pit64b.c +++ b/drivers/clocksource/timer-microchip-pit64b.c @@ -165,7 +165,7 @@ static u64 mchp_pit64b_clksrc_read(struct clocksource *cs) return mchp_pit64b_cnt_read(mchp_pit64b_cs_base); } -static u64 mchp_pit64b_sched_read_clk(void) +static u64 notrace mchp_pit64b_sched_read_clk(void) { return mchp_pit64b_cnt_read(mchp_pit64b_cs_base); } diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index 529cc6a51cdb3..c3f54d9912be7 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -157,9 +157,9 @@ static __init int timer_of_base_init(struct device_node *np, of_base->base = of_base->name ? of_io_request_and_map(np, of_base->index, of_base->name) : of_iomap(np, of_base->index); - if (IS_ERR(of_base->base)) { - pr_err("Failed to iomap (%s)\n", of_base->name); - return PTR_ERR(of_base->base); + if (IS_ERR_OR_NULL(of_base->base)) { + pr_err("Failed to iomap (%s:%s)\n", np->name, of_base->name); + return of_base->base ? PTR_ERR(of_base->base) : -ENOMEM; } return 0; diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c index 1fccb457fcc54..2737407ff0698 100644 --- a/drivers/clocksource/timer-ti-dm-systimer.c +++ b/drivers/clocksource/timer-ti-dm-systimer.c @@ -694,9 +694,9 @@ static int __init dmtimer_percpu_quirk_init(struct device_node *np, u32 pa) return 0; } - if (pa == 0x48034000) /* dra7 dmtimer3 */ + if (pa == 0x4882c000) /* dra7 dmtimer15 */ return dmtimer_percpu_timer_init(np, 0); - else if (pa == 0x48036000) /* dra7 dmtimer4 */ + else if (pa == 0x4882e000) /* dra7 dmtimer16 */ return dmtimer_percpu_timer_init(np, 1); return 0; diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index db17196266e4b..82d370ae6a4a5 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -303,52 +303,48 @@ static u64 cppc_get_dmi_max_khz(void) /* * If CPPC lowest_freq and nominal_freq registers are exposed then we can - * use them to convert perf to freq and vice versa - * - * If the perf/freq point lies between Nominal and Lowest, we can treat - * (Low perf, Low freq) and (Nom Perf, Nom freq) as 2D co-ordinates of a line - * and extrapolate the rest - * For perf/freq > Nominal, we use the ratio perf:freq at Nominal for conversion + * use them to convert perf to freq and vice versa. The conversion is + * extrapolated as an affine function passing by the 2 points: + * - (Low perf, Low freq) + * - (Nominal perf, Nominal perf) */ static unsigned int cppc_cpufreq_perf_to_khz(struct cppc_cpudata *cpu_data, unsigned int perf) { struct cppc_perf_caps *caps = &cpu_data->perf_caps; + s64 retval, offset = 0; static u64 max_khz; u64 mul, div; if (caps->lowest_freq && caps->nominal_freq) { - if (perf >= caps->nominal_perf) { - mul = caps->nominal_freq; - div = caps->nominal_perf; - } else { - mul = caps->nominal_freq - caps->lowest_freq; - div = caps->nominal_perf - caps->lowest_perf; - } + mul = caps->nominal_freq - caps->lowest_freq; + div = caps->nominal_perf - caps->lowest_perf; + offset = caps->nominal_freq - div64_u64(caps->nominal_perf * mul, div); } else { if (!max_khz) max_khz = cppc_get_dmi_max_khz(); mul = max_khz; div = caps->highest_perf; } - return (u64)perf * mul / div; + + retval = offset + div64_u64(perf * mul, div); + if (retval >= 0) + return retval; + return 0; } static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu_data, unsigned int freq) { struct cppc_perf_caps *caps = &cpu_data->perf_caps; + s64 retval, offset = 0; static u64 max_khz; u64 mul, div; if (caps->lowest_freq && caps->nominal_freq) { - if (freq >= caps->nominal_freq) { - mul = caps->nominal_perf; - div = caps->nominal_freq; - } else { - mul = caps->lowest_perf; - div = caps->lowest_freq; - } + mul = caps->nominal_perf - caps->lowest_perf; + div = caps->nominal_freq - caps->lowest_freq; + offset = caps->nominal_perf - div64_u64(caps->nominal_freq * mul, div); } else { if (!max_khz) max_khz = cppc_get_dmi_max_khz(); @@ -356,7 +352,10 @@ static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu_data, div = max_khz; } - return (u64)freq * mul / div; + retval = offset + div64_u64(freq * mul, div); + if (retval >= 0) + return retval; + return 0; } static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index d1744b5d96190..6dfa86971a757 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -130,7 +130,7 @@ static void get_krait_bin_format_b(struct device *cpu_dev, } /* Check PVS_BLOW_STATUS */ - pte_efuse = *(((u32 *)buf) + 4); + pte_efuse = *(((u32 *)buf) + 1); pte_efuse &= BIT(21); if (pte_efuse) { dev_dbg(cpu_dev, "PVS bin: %d\n", *pvs); diff --git a/drivers/cpuidle/cpuidle-qcom-spm.c b/drivers/cpuidle/cpuidle-qcom-spm.c index 01e77913a4144..5f27dcc6c110f 100644 --- a/drivers/cpuidle/cpuidle-qcom-spm.c +++ b/drivers/cpuidle/cpuidle-qcom-spm.c @@ -155,6 +155,22 @@ static struct platform_driver spm_cpuidle_driver = { }, }; +static bool __init qcom_spm_find_any_cpu(void) +{ + struct device_node *cpu_node, *saw_node; + + for_each_of_cpu_node(cpu_node) { + saw_node = of_parse_phandle(cpu_node, "qcom,saw", 0); + if (of_device_is_available(saw_node)) { + of_node_put(saw_node); + of_node_put(cpu_node); + return true; + } + of_node_put(saw_node); + } + return false; +} + static int __init qcom_spm_cpuidle_init(void) { struct platform_device *pdev; @@ -164,6 +180,10 @@ static int __init qcom_spm_cpuidle_init(void) if (ret) return ret; + /* Make sure there is actually any CPU managed by the SPM */ + if (!qcom_spm_find_any_cpu()) + return 0; + pdev = platform_device_register_simple("qcom-spm-cpuidle", -1, NULL, 0); if (IS_ERR(pdev)) { diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index 54ae8d16e4931..35e3cadccac2b 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -11,6 +11,7 @@ * You could find a link for the datasheet in Documentation/arm/sunxi.rst */ +#include #include #include #include @@ -283,7 +284,9 @@ static int sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq) flow = rctx->flow; err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm)); + local_bh_disable(); crypto_finalize_skcipher_request(engine, breq, err); + local_bh_enable(); return 0; } diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c index 88194718a806c..859b7522faaac 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c @@ -9,6 +9,7 @@ * * You could find the datasheet in Documentation/arm/sunxi.rst */ +#include #include #include #include @@ -414,6 +415,8 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) theend: kfree(buf); kfree(result); + local_bh_disable(); crypto_finalize_hash_request(engine, breq, err); + local_bh_enable(); return 0; } diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index 9ef1c85c4aaa5..554e400d41cad 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -11,6 +11,7 @@ * You could find a link for the datasheet in Documentation/arm/sunxi.rst */ +#include #include #include #include @@ -274,7 +275,9 @@ static int sun8i_ss_handle_cipher_request(struct crypto_engine *engine, void *ar struct skcipher_request *breq = container_of(areq, struct skcipher_request, base); err = sun8i_ss_cipher(breq); + local_bh_disable(); crypto_finalize_skcipher_request(engine, breq, err); + local_bh_enable(); return 0; } diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c index 80e89066dbd1a..319fe3279a716 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c @@ -30,6 +30,8 @@ static const struct ss_variant ss_a80_variant = { .alg_cipher = { SS_ALG_AES, SS_ALG_DES, SS_ALG_3DES, }, + .alg_hash = { SS_ID_NOTSUPP, SS_ID_NOTSUPP, SS_ID_NOTSUPP, SS_ID_NOTSUPP, + }, .op_mode = { SS_OP_ECB, SS_OP_CBC, }, .ss_clks = { diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index 3c073eb3db038..1a71ed49d2333 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -9,6 +9,7 @@ * * You could find the datasheet in Documentation/arm/sunxi.rst */ +#include #include #include #include @@ -442,6 +443,8 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) theend: kfree(pad); kfree(result); + local_bh_disable(); crypto_finalize_hash_request(engine, breq, err); + local_bh_enable(); return 0; } diff --git a/drivers/crypto/amlogic/amlogic-gxl-cipher.c b/drivers/crypto/amlogic/amlogic-gxl-cipher.c index c6865cbd334b2..e79514fce731f 100644 --- a/drivers/crypto/amlogic/amlogic-gxl-cipher.c +++ b/drivers/crypto/amlogic/amlogic-gxl-cipher.c @@ -265,7 +265,9 @@ static int meson_handle_cipher_request(struct crypto_engine *engine, struct skcipher_request *breq = container_of(areq, struct skcipher_request, base); err = meson_cipher(breq); + local_bh_disable(); crypto_finalize_skcipher_request(engine, breq, err); + local_bh_enable(); return 0; } diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index d718db224be42..7d4b4ad1db1f3 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -632,6 +632,20 @@ static int ccp_terminate_all(struct dma_chan *dma_chan) return 0; } +static void ccp_dma_release(struct ccp_device *ccp) +{ + struct ccp_dma_chan *chan; + struct dma_chan *dma_chan; + unsigned int i; + + for (i = 0; i < ccp->cmd_q_count; i++) { + chan = ccp->ccp_dma_chan + i; + dma_chan = &chan->dma_chan; + tasklet_kill(&chan->cleanup_tasklet); + list_del_rcu(&dma_chan->device_node); + } +} + int ccp_dmaengine_register(struct ccp_device *ccp) { struct ccp_dma_chan *chan; @@ -736,6 +750,7 @@ int ccp_dmaengine_register(struct ccp_device *ccp) return 0; err_reg: + ccp_dma_release(ccp); kmem_cache_destroy(ccp->dma_desc_cache); err_cache: @@ -752,6 +767,7 @@ void ccp_dmaengine_unregister(struct ccp_device *ccp) return; dma_async_device_unregister(dma_dev); + ccp_dma_release(ccp); kmem_cache_destroy(ccp->dma_desc_cache); kmem_cache_destroy(ccp->dma_cmd_cache); diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 8fd774a10edc3..6ab93dfd478a9 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -413,7 +413,7 @@ static int __sev_platform_init_locked(int *error) { struct psp_device *psp = psp_master; struct sev_device *sev; - int rc, psp_ret; + int rc, psp_ret = -1; int (*init_function)(int *error); if (!psp || !psp->sev_data) diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index a5e041d9d2cf1..11e0278c8631d 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -258,6 +258,13 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg, { int ret = 0; + if (!nbytes) { + *mapped_nents = 0; + *lbytes = 0; + *nents = 0; + return 0; + } + *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes); if (*nents > max_sg_nents) { *nents = 0; diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index 78833491f534d..309da6334a0a0 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -257,8 +257,8 @@ static void cc_cipher_exit(struct crypto_tfm *tfm) &ctx_p->user.key_dma_addr); /* Free key buffer in context */ - kfree_sensitive(ctx_p->user.key); dev_dbg(dev, "Free key buffer in context. key=@%p\n", ctx_p->user.key); + kfree_sensitive(ctx_p->user.key); } struct tdes_keys { diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c index c1c2b1d866639..f2be0a7d7f7ac 100644 --- a/drivers/crypto/gemini/sl3516-ce-cipher.c +++ b/drivers/crypto/gemini/sl3516-ce-cipher.c @@ -264,7 +264,9 @@ static int sl3516_ce_handle_cipher_request(struct crypto_engine *engine, void *a struct skcipher_request *breq = container_of(areq, struct skcipher_request, base); err = sl3516_ce_cipher(breq); + local_bh_disable(); crypto_finalize_skcipher_request(engine, breq, err); + local_bh_enable(); return 0; } diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index c5b84a5ea3501..3b29c8993b8c7 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4295,7 +4295,7 @@ static void qm_vf_get_qos(struct hisi_qm *qm, u32 fun_num) static int qm_vf_read_qos(struct hisi_qm *qm) { int cnt = 0; - int ret; + int ret = -EINVAL; /* reset mailbox qos val */ qm->mb_qos = 0; diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 6a45bd23b3635..090920ed50c8f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -2284,9 +2284,10 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx, struct aead_request *aead_req, bool encrypt) { - struct aead_request *subreq = aead_request_ctx(aead_req); struct sec_auth_ctx *a_ctx = &ctx->a_ctx; struct device *dev = ctx->dev; + struct aead_request *subreq; + int ret; /* Kunpeng920 aead mode not support input 0 size */ if (!a_ctx->fallback_aead_tfm) { @@ -2294,6 +2295,10 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx, return -EINVAL; } + subreq = aead_request_alloc(a_ctx->fallback_aead_tfm, GFP_KERNEL); + if (!subreq) + return -ENOMEM; + aead_request_set_tfm(subreq, a_ctx->fallback_aead_tfm); aead_request_set_callback(subreq, aead_req->base.flags, aead_req->base.complete, aead_req->base.data); @@ -2301,8 +2306,13 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx, aead_req->cryptlen, aead_req->iv); aead_request_set_ad(subreq, aead_req->assoclen); - return encrypt ? crypto_aead_encrypt(subreq) : - crypto_aead_decrypt(subreq); + if (encrypt) + ret = crypto_aead_encrypt(subreq); + else + ret = crypto_aead_decrypt(subreq); + aead_request_free(subreq); + + return ret; } static int sec_aead_crypto(struct aead_request *a_req, bool encrypt) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 26d3ab1d308ba..89d4cc767d361 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -443,9 +443,11 @@ static int sec_engine_init(struct hisi_qm *qm) writel(SEC_SAA_ENABLE, qm->io_base + SEC_SAA_EN_REG); - /* Enable sm4 extra mode, as ctr/ecb */ - writel_relaxed(SEC_BD_ERR_CHK_EN0, - qm->io_base + SEC_BD_ERR_CHK_EN_REG0); + /* HW V2 enable sm4 extra mode, as ctr/ecb */ + if (qm->ver < QM_HW_V3) + writel_relaxed(SEC_BD_ERR_CHK_EN0, + qm->io_base + SEC_BD_ERR_CHK_EN_REG0); + /* Enable sm4 xts mode multiple iv */ writel_relaxed(SEC_BD_ERR_CHK_EN1, qm->io_base + SEC_BD_ERR_CHK_EN_REG1); diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig index 9125199f1702b..a48591af12d02 100644 --- a/drivers/crypto/marvell/Kconfig +++ b/drivers/crypto/marvell/Kconfig @@ -47,6 +47,7 @@ config CRYPTO_DEV_OCTEONTX2_CPT select CRYPTO_SKCIPHER select CRYPTO_HASH select CRYPTO_AEAD + select NET_DEVLINK help This driver allows you to utilize the Marvell Cryptographic Accelerator Unit(CPT) found in OcteonTX2 series of processors. diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c index 1b4d425bbf0e4..7fd4503d9cfc8 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c @@ -1076,6 +1076,39 @@ static void delete_engine_grps(struct pci_dev *pdev, delete_engine_group(&pdev->dev, &eng_grps->grp[i]); } +#define PCI_DEVID_CN10K_RNM 0xA098 +#define RNM_ENTROPY_STATUS 0x8 + +static void rnm_to_cpt_errata_fixup(struct device *dev) +{ + struct pci_dev *pdev; + void __iomem *base; + int timeout = 5000; + + pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10K_RNM, NULL); + if (!pdev) + return; + + base = pci_ioremap_bar(pdev, 0); + if (!base) + goto put_pdev; + + while ((readq(base + RNM_ENTROPY_STATUS) & 0x7F) != 0x40) { + cpu_relax(); + udelay(1); + timeout--; + if (!timeout) { + dev_warn(dev, "RNM is not producing entropy\n"); + break; + } + } + + iounmap(base); + +put_pdev: + pci_dev_put(pdev); +} + int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type) { @@ -1189,9 +1222,17 @@ int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf, if (is_dev_otx2(pdev)) goto unlock; + + /* + * Ensure RNM_ENTROPY_STATUS[NORMAL_CNT] = 0x40 before writing + * CPT_AF_CTL[RNM_REQ_EN] = 1 as a workaround for HW errata. + */ + rnm_to_cpt_errata_fixup(&pdev->dev); + /* * Configure engine group mask to allow context prefetching - * for the groups. + * for the groups and enable random number request, to enable + * CPT to request random numbers from RNM. */ otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTL, OTX2_CPT_ALL_ENG_GRPS_MASK << 3 | BIT_ULL(16), diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c index 2748a3327e391..f8f8542ce3e47 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c @@ -1634,16 +1634,13 @@ static inline int cpt_register_algs(void) { int i, err = 0; - if (!IS_ENABLED(CONFIG_DM_CRYPT)) { - for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++) - otx2_cpt_skciphers[i].base.cra_flags &= - ~CRYPTO_ALG_DEAD; - - err = crypto_register_skciphers(otx2_cpt_skciphers, - ARRAY_SIZE(otx2_cpt_skciphers)); - if (err) - return err; - } + for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++) + otx2_cpt_skciphers[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; + + err = crypto_register_skciphers(otx2_cpt_skciphers, + ARRAY_SIZE(otx2_cpt_skciphers)); + if (err) + return err; for (i = 0; i < ARRAY_SIZE(otx2_cpt_aeads); i++) otx2_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index d19e5ffb5104b..d6f9e2fe863d7 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -331,7 +331,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128); } - for_each_sg(req->src, src, sg_nents(src), i) { + for_each_sg(req->src, src, sg_nents(req->src), i) { src_buf = sg_virt(src); len = sg_dma_len(src); tlen += len; diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index 6d10edc40aca0..68d39c833332e 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -52,7 +52,7 @@ static const char *const dev_cfg_services[] = { static int get_service_enabled(struct adf_accel_dev *accel_dev) { char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0}; - u32 ret; + int ret; ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC, ADF_SERVICES_ENABLED, services); diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c index a6c78b9c730bc..fa4c350c1bf92 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c @@ -75,6 +75,13 @@ static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) if (ret) goto err; + /* Temporarily set the number of crypto instances to zero to avoid + * registering the crypto algorithms. + * This will be removed when the algorithms will support the + * CRYPTO_TFM_REQ_MAY_BACKLOG flag + */ + instances = 0; + for (i = 0; i < instances; i++) { val = i; bank = i * 2; diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c index 8efbedf63bc80..3b3ea849c5e53 100644 --- a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c +++ b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c @@ -9,15 +9,12 @@ #include "adf_pfvf_pf_proto.h" #include "adf_pfvf_utils.h" -#define ADF_4XXX_MAX_NUM_VFS 16 - #define ADF_4XXX_PF2VM_OFFSET(i) (0x40B010 + ((i) * 0x20)) #define ADF_4XXX_VM2PF_OFFSET(i) (0x40B014 + ((i) * 0x20)) /* VF2PF interrupt source registers */ -#define ADF_4XXX_VM2PF_SOU(i) (0x41A180 + ((i) * 4)) -#define ADF_4XXX_VM2PF_MSK(i) (0x41A1C0 + ((i) * 4)) -#define ADF_4XXX_VM2PF_INT_EN_MSK BIT(0) +#define ADF_4XXX_VM2PF_SOU 0x41A180 +#define ADF_4XXX_VM2PF_MSK 0x41A1C0 #define ADF_PFVF_GEN4_MSGTYPE_SHIFT 2 #define ADF_PFVF_GEN4_MSGTYPE_MASK 0x3F @@ -41,51 +38,30 @@ static u32 adf_gen4_pf_get_vf2pf_offset(u32 i) static u32 adf_gen4_get_vf2pf_sources(void __iomem *pmisc_addr) { - int i; u32 sou, mask; - int num_csrs = ADF_4XXX_MAX_NUM_VFS; - u32 vf_mask = 0; - for (i = 0; i < num_csrs; i++) { - sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU(i)); - mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK(i)); - sou &= ~mask; - vf_mask |= sou << i; - } + sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU); + mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK); - return vf_mask; + return sou &= ~mask; } static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { - int num_csrs = ADF_4XXX_MAX_NUM_VFS; - unsigned long mask = vf_mask; unsigned int val; - int i; - - for_each_set_bit(i, &mask, num_csrs) { - unsigned int offset = ADF_4XXX_VM2PF_MSK(i); - val = ADF_CSR_RD(pmisc_addr, offset) & ~ADF_4XXX_VM2PF_INT_EN_MSK; - ADF_CSR_WR(pmisc_addr, offset, val); - } + val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) & ~vf_mask; + ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val); } static void adf_gen4_disable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { - int num_csrs = ADF_4XXX_MAX_NUM_VFS; - unsigned long mask = vf_mask; unsigned int val; - int i; - - for_each_set_bit(i, &mask, num_csrs) { - unsigned int offset = ADF_4XXX_VM2PF_MSK(i); - val = ADF_CSR_RD(pmisc_addr, offset) | ADF_4XXX_VM2PF_INT_EN_MSK; - ADF_CSR_WR(pmisc_addr, offset, val); - } + val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) | vf_mask; + ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val); } static int adf_gen4_pfvf_send(struct adf_accel_dev *accel_dev, diff --git a/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c b/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c index 14b222691c9c2..1141258db4b65 100644 --- a/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c @@ -96,7 +96,7 @@ int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev) int adf_vf2pf_get_capabilities(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; - struct capabilities_v3 cap_msg = { { 0 }, }; + struct capabilities_v3 cap_msg = { 0 }; unsigned int len = sizeof(cap_msg); if (accel_dev->vf.pf_compat_ver < ADF_PFVF_COMPAT_CAPABILITIES) @@ -141,7 +141,7 @@ int adf_vf2pf_get_capabilities(struct adf_accel_dev *accel_dev) int adf_vf2pf_get_ring_to_svc(struct adf_accel_dev *accel_dev) { - struct ring_to_svc_map_v1 rts_map_msg = { { 0 }, }; + struct ring_to_svc_map_v1 rts_map_msg = { 0 }; unsigned int len = sizeof(rts_map_msg); if (accel_dev->vf.pf_compat_ver < ADF_PFVF_COMPAT_RING_TO_SVC_MAP) diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 7234c4940fae4..67c9588e89df9 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -161,6 +161,13 @@ int qat_crypto_dev_config(struct adf_accel_dev *accel_dev) if (ret) goto err; + /* Temporarily set the number of crypto instances to zero to avoid + * registering the crypto algorithms. + * This will be removed when the algorithms will support the + * CRYPTO_TFM_REQ_MAY_BACKLOG flag + */ + instances = 0; + for (i = 0; i < instances; i++) { val = i; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i); diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index 1cece1a7d3f00..5bbf0d2722e11 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -506,7 +506,6 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = { .exit = rk_ablk_exit_tfm, .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES_BLOCK_SIZE, .setkey = rk_tdes_setkey, .encrypt = rk_des3_ede_ecb_encrypt, .decrypt = rk_des3_ede_ecb_decrypt, diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig index c85fab7ef0bdd..b2c28b87f14b3 100644 --- a/drivers/crypto/vmx/Kconfig +++ b/drivers/crypto/vmx/Kconfig @@ -2,7 +2,11 @@ config CRYPTO_DEV_VMX_ENCRYPT tristate "Encryption acceleration support on P8 CPU" depends on CRYPTO_DEV_VMX + select CRYPTO_AES + select CRYPTO_CBC + select CRYPTO_CTR select CRYPTO_GHASH + select CRYPTO_XTS default m help Support for VMX cryptographic acceleration instructions on Power8 CPU. diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 40ab50318dafa..a90202ac88d2f 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_CXL_BUS) += cxl_core.o ccflags-y += -I$(srctree)/drivers/cxl -cxl_core-y := bus.o +cxl_core-y := port.o cxl_core-y += pmem.o cxl_core-y += regs.o cxl_core-y += memdev.o diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/port.c similarity index 99% rename from drivers/cxl/core/bus.c rename to drivers/cxl/core/port.c index 3f9b98ecd18b7..aa5239ac67c67 100644 --- a/drivers/cxl/core/bus.c +++ b/drivers/cxl/core/port.c @@ -182,6 +182,7 @@ static void cxl_decoder_release(struct device *dev) ida_free(&port->decoder_ida, cxld->id); kfree(cxld); + put_device(&port->dev); } static const struct device_type cxl_decoder_switch_type = { @@ -500,7 +501,10 @@ struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets) if (rc < 0) goto err; + /* need parent to stick around to release the id */ + get_device(&port->dev); cxld->id = rc; + cxld->nr_targets = nr_targets; dev = &cxld->dev; device_initialize(dev); diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index e37e23bf43553..6a18ff8739e00 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -35,7 +35,7 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map) { int cap, cap_count; - u64 cap_array; + u32 cap_array; *map = (struct cxl_component_reg_map) { 0 }; @@ -45,11 +45,11 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, */ base += CXL_CM_OFFSET; - cap_array = readq(base + CXL_CM_CAP_HDR_OFFSET); + cap_array = readl(base + CXL_CM_CAP_HDR_OFFSET); if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) { dev_err(dev, - "Couldn't locate the CXL.cache and CXL.mem capability array header./n"); + "Couldn't locate the CXL.cache and CXL.mem capability array header.\n"); return; } diff --git a/drivers/dax/super.c b/drivers/dax/super.c index e3029389d8097..6bd565fe2e63b 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -476,6 +476,7 @@ static int dax_fs_init(void) static void dax_fs_exit(void) { kern_unmount(dax_mnt); + rcu_barrier(); kmem_cache_destroy(dax_cache); } diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index c57a609db75be..e7330684d3b82 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -190,6 +190,10 @@ static long udmabuf_create(struct miscdevice *device, if (ubuf->pagecount > pglimit) goto err; } + + if (!ubuf->pagecount) + goto err; + ubuf->pages = kmalloc_array(ubuf->pagecount, sizeof(*ubuf->pages), GFP_KERNEL); if (!ubuf->pages) { diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c index 97c87a7cba879..43817ced3a3e1 100644 --- a/drivers/dma/hisi_dma.c +++ b/drivers/dma/hisi_dma.c @@ -30,7 +30,7 @@ #define HISI_DMA_MODE 0x217c #define HISI_DMA_OFFSET 0x100 -#define HISI_DMA_MSI_NUM 30 +#define HISI_DMA_MSI_NUM 32 #define HISI_DMA_CHAN_NUM 30 #define HISI_DMA_Q_DEPTH_VAL 1024 diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 573ad8b86804e..3061fe857d69f 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -681,8 +681,13 @@ static void idxd_groups_clear_state(struct idxd_device *idxd) group->use_rdbuf_limit = false; group->rdbufs_allowed = 0; group->rdbufs_reserved = 0; - group->tc_a = -1; - group->tc_b = -1; + if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override) { + group->tc_a = 1; + group->tc_b = 1; + } else { + group->tc_a = -1; + group->tc_b = -1; + } } } diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c index b26ed690f03c8..158e5e7defaeb 100644 --- a/drivers/dma/sh/shdma-base.c +++ b/drivers/dma/sh/shdma-base.c @@ -115,10 +115,8 @@ static dma_cookie_t shdma_tx_submit(struct dma_async_tx_descriptor *tx) ret = pm_runtime_get(schan->dev); spin_unlock_irq(&schan->chan_lock); - if (ret < 0) { + if (ret < 0) dev_err(schan->dev, "%s(): GET = %d\n", __func__, ret); - pm_runtime_put(schan->dev); - } pm_runtime_barrier(schan->dev); diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index 35b56c8ba0c0e..492f3a9197ec2 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -204,7 +204,8 @@ scmi_clock_describe_rates_get(const struct scmi_protocol_handle *ph, u32 clk_id, if (rate_discrete && rate) { clk->list.num_rates = tot_rate_cnt; - sort(rate, tot_rate_cnt, sizeof(*rate), rate_cmp_func, NULL); + sort(clk->list.rates, tot_rate_cnt, sizeof(*rate), + rate_cmp_func, NULL); } clk->rate_discrete = rate_discrete; diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index d76bab3aaac45..e815b8f987393 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -652,7 +652,8 @@ static void scmi_handle_response(struct scmi_chan_info *cinfo, xfer = scmi_xfer_command_acquire(cinfo, msg_hdr); if (IS_ERR(xfer)) { - scmi_clear_channel(info, cinfo); + if (MSG_XTRACT_TYPE(msg_hdr) == MSG_TYPE_DELAYED_RESP) + scmi_clear_channel(info, cinfo); return; } diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index 0ef086e43090b..7e771c56c13c6 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -266,7 +266,7 @@ static int efi_pstore_write(struct pstore_record *record) efi_name[i] = name[i]; ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES, - preemptible(), record->size, record->psi->buf); + false, record->size, record->psi->buf); if (record->reason == KMSG_DUMP_OOPS && try_module_get(THIS_MODULE)) if (!schedule_work(&efivar_work)) diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig index 931544c9f63d4..983e07dc022ed 100644 --- a/drivers/firmware/google/Kconfig +++ b/drivers/firmware/google/Kconfig @@ -21,7 +21,7 @@ config GOOGLE_SMI config GOOGLE_COREBOOT_TABLE tristate "Coreboot Table Access" - depends on ACPI || OF + depends on HAS_IOMEM && (ACPI || OF) help This option enables the coreboot_table module, which provides other firmware modules access to the coreboot table. The coreboot table diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c index 7db8066b19fd5..3f67bf774821d 100644 --- a/drivers/firmware/qcom_scm.c +++ b/drivers/firmware/qcom_scm.c @@ -749,12 +749,6 @@ int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) }; int ret; - desc.args[0] = addr; - desc.args[1] = size; - desc.args[2] = spare; - desc.arginfo = QCOM_SCM_ARGS(3, QCOM_SCM_RW, QCOM_SCM_VAL, - QCOM_SCM_VAL); - ret = qcom_scm_call(__scm->dev, &desc, NULL); /* the pg table has been initialized already, ignore the error */ diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 29c0a616b3177..c4bf934e3553e 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -477,7 +477,7 @@ static int svc_normal_to_secure_thread(void *data) case INTEL_SIP_SMC_RSU_ERROR: pr_err("%s: STATUS_ERROR\n", __func__); cbdata->status = BIT(SVC_STATUS_ERROR); - cbdata->kaddr1 = NULL; + cbdata->kaddr1 = &res.a1; cbdata->kaddr2 = NULL; cbdata->kaddr3 = NULL; pdata->chan->scl->receive_cb(pdata->chan->scl, cbdata); diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c index 303a491e520d1..757cc8b9f3de9 100644 --- a/drivers/firmware/sysfb_simplefb.c +++ b/drivers/firmware/sysfb_simplefb.c @@ -113,16 +113,21 @@ __init int sysfb_create_simplefb(const struct screen_info *si, sysfb_apply_efi_quirks(pd); ret = platform_device_add_resources(pd, &res, 1); - if (ret) { - platform_device_put(pd); - return ret; - } + if (ret) + goto err_put_device; ret = platform_device_add_data(pd, mode, sizeof(*mode)); - if (ret) { - platform_device_put(pd); - return ret; - } + if (ret) + goto err_put_device; + + ret = platform_device_add(pd); + if (ret) + goto err_put_device; + + return 0; + +err_put_device: + platform_device_put(pd); - return platform_device_add(pd); + return ret; } diff --git a/drivers/fsi/fsi-master-aspeed.c b/drivers/fsi/fsi-master-aspeed.c index 8606e55c1721c..0bed2fab80558 100644 --- a/drivers/fsi/fsi-master-aspeed.c +++ b/drivers/fsi/fsi-master-aspeed.c @@ -542,25 +542,28 @@ static int fsi_master_aspeed_probe(struct platform_device *pdev) return rc; } - aspeed = devm_kzalloc(&pdev->dev, sizeof(*aspeed), GFP_KERNEL); + aspeed = kzalloc(sizeof(*aspeed), GFP_KERNEL); if (!aspeed) return -ENOMEM; aspeed->dev = &pdev->dev; aspeed->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(aspeed->base)) - return PTR_ERR(aspeed->base); + if (IS_ERR(aspeed->base)) { + rc = PTR_ERR(aspeed->base); + goto err_free_aspeed; + } aspeed->clk = devm_clk_get(aspeed->dev, NULL); if (IS_ERR(aspeed->clk)) { dev_err(aspeed->dev, "couldn't get clock\n"); - return PTR_ERR(aspeed->clk); + rc = PTR_ERR(aspeed->clk); + goto err_free_aspeed; } rc = clk_prepare_enable(aspeed->clk); if (rc) { dev_err(aspeed->dev, "couldn't enable clock\n"); - return rc; + goto err_free_aspeed; } rc = setup_cfam_reset(aspeed); @@ -595,7 +598,7 @@ static int fsi_master_aspeed_probe(struct platform_device *pdev) rc = opb_readl(aspeed, ctrl_base + FSI_MVER, &raw); if (rc) { dev_err(&pdev->dev, "failed to read hub version\n"); - return rc; + goto err_release; } reg = be32_to_cpu(raw); @@ -634,6 +637,8 @@ static int fsi_master_aspeed_probe(struct platform_device *pdev) err_release: clk_disable_unprepare(aspeed->clk); +err_free_aspeed: + kfree(aspeed); return rc; } diff --git a/drivers/fsi/fsi-scom.c b/drivers/fsi/fsi-scom.c index da1486bb6a144..bcb756dc98663 100644 --- a/drivers/fsi/fsi-scom.c +++ b/drivers/fsi/fsi-scom.c @@ -145,7 +145,7 @@ static int put_indirect_scom_form0(struct scom_device *scom, uint64_t value, uint64_t addr, uint32_t *status) { uint64_t ind_data, ind_addr; - int rc, retries, err = 0; + int rc, err; if (value & ~XSCOM_DATA_IND_DATA) return -EINVAL; @@ -156,19 +156,14 @@ static int put_indirect_scom_form0(struct scom_device *scom, uint64_t value, if (rc || (*status & SCOM_STATUS_ANY_ERR)) return rc; - for (retries = 0; retries < SCOM_MAX_IND_RETRIES; retries++) { - rc = __get_scom(scom, &ind_data, addr, status); - if (rc || (*status & SCOM_STATUS_ANY_ERR)) - return rc; + rc = __get_scom(scom, &ind_data, addr, status); + if (rc || (*status & SCOM_STATUS_ANY_ERR)) + return rc; - err = (ind_data & XSCOM_DATA_IND_ERR_MASK) >> XSCOM_DATA_IND_ERR_SHIFT; - *status = err << SCOM_STATUS_PIB_RESP_SHIFT; - if ((ind_data & XSCOM_DATA_IND_COMPLETE) || (err != SCOM_PIB_BLOCKED)) - return 0; + err = (ind_data & XSCOM_DATA_IND_ERR_MASK) >> XSCOM_DATA_IND_ERR_SHIFT; + *status = err << SCOM_STATUS_PIB_RESP_SHIFT; - msleep(1); - } - return rc; + return 0; } static int put_indirect_scom_form1(struct scom_device *scom, uint64_t value, @@ -188,7 +183,7 @@ static int get_indirect_scom_form0(struct scom_device *scom, uint64_t *value, uint64_t addr, uint32_t *status) { uint64_t ind_data, ind_addr; - int rc, retries, err = 0; + int rc, err; ind_addr = addr & XSCOM_ADDR_DIRECT_PART; ind_data = (addr & XSCOM_ADDR_INDIRECT_PART) | XSCOM_DATA_IND_READ; @@ -196,21 +191,15 @@ static int get_indirect_scom_form0(struct scom_device *scom, uint64_t *value, if (rc || (*status & SCOM_STATUS_ANY_ERR)) return rc; - for (retries = 0; retries < SCOM_MAX_IND_RETRIES; retries++) { - rc = __get_scom(scom, &ind_data, addr, status); - if (rc || (*status & SCOM_STATUS_ANY_ERR)) - return rc; - - err = (ind_data & XSCOM_DATA_IND_ERR_MASK) >> XSCOM_DATA_IND_ERR_SHIFT; - *status = err << SCOM_STATUS_PIB_RESP_SHIFT; - *value = ind_data & XSCOM_DATA_IND_DATA; + rc = __get_scom(scom, &ind_data, addr, status); + if (rc || (*status & SCOM_STATUS_ANY_ERR)) + return rc; - if ((ind_data & XSCOM_DATA_IND_COMPLETE) || (err != SCOM_PIB_BLOCKED)) - return 0; + err = (ind_data & XSCOM_DATA_IND_ERR_MASK) >> XSCOM_DATA_IND_ERR_SHIFT; + *status = err << SCOM_STATUS_PIB_RESP_SHIFT; + *value = ind_data & XSCOM_DATA_IND_DATA; - msleep(1); - } - return rc; + return 0; } static int raw_put_scom(struct scom_device *scom, uint64_t value, @@ -289,7 +278,7 @@ static int put_scom(struct scom_device *scom, uint64_t value, int rc; rc = raw_put_scom(scom, value, addr, &status); - if (rc == -ENODEV) + if (rc) return rc; rc = handle_fsi2pib_status(scom, status); @@ -308,7 +297,7 @@ static int get_scom(struct scom_device *scom, uint64_t *value, int rc; rc = raw_get_scom(scom, value, addr, &status); - if (rc == -ENODEV) + if (rc) return rc; rc = handle_fsi2pib_status(scom, status); diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 8e5d87984a489..41c31b10ae848 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -134,7 +134,7 @@ static int gpio_sim_get_multiple(struct gpio_chip *gc, struct gpio_sim_chip *chip = gpiochip_get_data(gc); mutex_lock(&chip->lock); - bitmap_copy(bits, chip->value_map, gc->ngpio); + bitmap_replace(bits, bits, chip->value_map, mask, gc->ngpio); mutex_unlock(&chip->lock); return 0; @@ -146,7 +146,7 @@ static void gpio_sim_set_multiple(struct gpio_chip *gc, struct gpio_sim_chip *chip = gpiochip_get_data(gc); mutex_lock(&chip->lock); - bitmap_copy(chip->value_map, bits, gc->ngpio); + bitmap_replace(chip->value_map, chip->value_map, bits, mask, gc->ngpio); mutex_unlock(&chip->lock); } diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index a5495ad31c9ce..b7c2f2af1dee5 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -387,8 +387,8 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, pin = agpio->pin_table[0]; if (pin <= 255) { - char ev_name[5]; - sprintf(ev_name, "_%c%02hhX", + char ev_name[8]; + sprintf(ev_name, "_%c%02X", agpio->triggering == ACPI_EDGE_SENSITIVE ? 'E' : 'L', pin); if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle))) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 6630d92e30ada..2c5748396e6ea 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1404,6 +1404,16 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset) { struct irq_domain *domain = gc->irq.domain; +#ifdef CONFIG_GPIOLIB_IRQCHIP + /* + * Avoid race condition with other code, which tries to lookup + * an IRQ before the irqchip has been properly registered, + * i.e. while gpiochip is still being brought up. + */ + if (!gc->irq.initialized) + return -EPROBE_DEFER; +#endif + if (!gpiochip_irqchip_irq_valid(gc, offset)) return -ENXIO; @@ -1591,8 +1601,26 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc, gpiochip_set_irq_hooks(gc); + /* + * Using barrier() here to prevent compiler from reordering + * gc->irq.initialized before initialization of above + * GPIO chip irq members. + */ + barrier(); + + gc->irq.initialized = true; + acpi_gpiochip_request_interrupts(gc); + /* + * Using barrier() here to prevent compiler from reordering + * gc->irq.initialized before initialization of above + * GPIO chip irq members. + */ + barrier(); + + gc->irq.initialized = true; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h index 5b393622f5920..a0f0a17e224fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h +++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h @@ -119,6 +119,7 @@ #define CONNECTOR_OBJECT_ID_eDP 0x14 #define CONNECTOR_OBJECT_ID_MXM 0x15 #define CONNECTOR_OBJECT_ID_LVDS_eDP 0x16 +#define CONNECTOR_OBJECT_ID_USBC 0x17 /* deleted */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f9bab963a948a..5df387c4d7fbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1813,12 +1813,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( true); ret = unreserve_bo_and_vms(&ctx, false, false); - /* Only apply no TLB flush on Aldebaran to - * workaround regressions on other Asics. - */ - if (table_freed && (adev->asic_type != CHIP_ALDEBARAN)) - *table_freed = true; - goto out; out_unreserve: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index c16a2704ced65..f3160b951df3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -175,7 +175,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) /* Check if bpc is within clock limit. Try to degrade gracefully otherwise */ if ((bpc == 12) && (mode_clock * 3/2 > max_tmds_clock)) { - if ((connector->display_info.edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30) && + if ((connector->display_info.edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30) && (mode_clock * 5/4 <= max_tmds_clock)) bpc = 10; else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 06d07502a1f68..a34be65c9eaac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1509,6 +1509,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, return 0; default: + dma_fence_put(fence); return -EINVAL; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ed077de426d9b..052816f0efed4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -2073,6 +2074,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) */ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { + struct drm_device *dev = adev_to_drm(adev); + struct pci_dev *parent; int i, r; amdgpu_device_enable_virtual_display(adev); @@ -2137,6 +2140,18 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) break; } + if (amdgpu_has_atpx() && + (amdgpu_is_atpx_hybrid() || + amdgpu_has_atpx_dgpu_power_cntl()) && + ((adev->flags & AMD_IS_APU) == 0) && + !pci_is_thunderbolt_attached(to_pci_dev(dev->dev))) + adev->flags |= AMD_IS_PX; + + if (!(adev->flags & AMD_IS_APU)) { + parent = pci_upstream_bridge(adev->pdev); + adev->has_pr3 = parent ? pci_pr3_present(parent) : false; + } + amdgpu_amdkfd_device_probe(adev); adev->pm.pp_feature = amdgpu_pp_feature_mask; @@ -2708,11 +2723,11 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) } } - amdgpu_amdkfd_suspend(adev, false); - amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + amdgpu_amdkfd_suspend(adev, false); + /* Workaroud for ASICs need to disable SMC first */ amdgpu_device_smu_fini_early(adev); @@ -5663,7 +5678,7 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) return; #endif if (adev->gmc.xgmi.connected_to_cpu) @@ -5679,7 +5694,7 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) return; #endif if (adev->gmc.xgmi.connected_to_cpu) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0ead08ba58c2a..c853266957ce1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -686,7 +686,7 @@ MODULE_PARM_DESC(sched_policy, * Maximum number of processes that HWS can schedule concurrently. The maximum is the * number of VMIDs assigned to the HWS, which is also the default. */ -int hws_max_conc_proc = 8; +int hws_max_conc_proc = -1; module_param(hws_max_conc_proc, int, 0444); MODULE_PARM_DESC(hws_max_conc_proc, "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))"); @@ -2276,18 +2276,23 @@ static int amdgpu_pmops_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - int r; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else adev->in_s3 = true; - r = amdgpu_device_suspend(drm_dev, true); - if (r) - return r; + return amdgpu_device_suspend(drm_dev, true); +} + +static int amdgpu_pmops_suspend_noirq(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + if (!adev->in_s0ix) - r = amdgpu_asic_reset(adev); - return r; + return amdgpu_asic_reset(adev); + + return 0; } static int amdgpu_pmops_resume(struct device *dev) @@ -2528,6 +2533,7 @@ static const struct dev_pm_ops amdgpu_pm_ops = { .prepare = amdgpu_pmops_prepare, .complete = amdgpu_pmops_complete, .suspend = amdgpu_pmops_suspend, + .suspend_noirq = amdgpu_pmops_suspend_noirq, .resume = amdgpu_pmops_resume, .freeze = amdgpu_pmops_freeze, .thaw = amdgpu_pmops_thaw, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 2a786e7886277..978c46395ced0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -91,17 +91,13 @@ static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, int amdgpu_fru_get_product_info(struct amdgpu_device *adev) { - unsigned char buff[AMDGPU_PRODUCT_NAME_LEN+2]; + unsigned char buff[AMDGPU_PRODUCT_NAME_LEN]; u32 addrptr; int size, len; - int offset = 2; if (!is_fru_eeprom_supported(adev)) return 0; - if (adev->asic_type == CHIP_ALDEBARAN) - offset = 0; - /* If algo exists, it means that the i2c_adapter's initialized */ if (!adev->pm.smu_i2c.algo) { DRM_WARN("Cannot access FRU, EEPROM accessor not initialized"); @@ -143,8 +139,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) AMDGPU_PRODUCT_NAME_LEN); len = AMDGPU_PRODUCT_NAME_LEN - 1; } - /* Start at 2 due to buff using fields 0 and 1 for the address */ - memcpy(adev->product_name, &buff[offset], len); + memcpy(adev->product_name, buff, len); adev->product_name[len] = '\0'; addrptr += size + 1; @@ -162,7 +157,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) DRM_WARN("FRU Product Number is larger than 16 characters. This is likely a mistake"); len = sizeof(adev->product_number) - 1; } - memcpy(adev->product_number, &buff[offset], len); + memcpy(adev->product_number, buff, len); adev->product_number[len] = '\0'; addrptr += size + 1; @@ -189,7 +184,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) DRM_WARN("FRU Serial Number is larger than 16 characters. This is likely a mistake"); len = sizeof(adev->serial) - 1; } - memcpy(adev->serial, &buff[offset], len); + memcpy(adev->serial, buff, len); adev->serial[len] = '\0'; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 1916ec84dd71f..e7845df6cad22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -266,7 +266,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; - while (queue_bit-- >= 0) { + while (--queue_bit >= 0) { if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 1ebb91db22743..11a385264bbd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -152,21 +152,10 @@ static void amdgpu_get_audio_func(struct amdgpu_device *adev) int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) { struct drm_device *dev; - struct pci_dev *parent; int r, acpi_status; dev = adev_to_drm(adev); - if (amdgpu_has_atpx() && - (amdgpu_is_atpx_hybrid() || - amdgpu_has_atpx_dgpu_power_cntl()) && - ((flags & AMD_IS_APU) == 0) && - !pci_is_thunderbolt_attached(to_pci_dev(dev->dev))) - flags |= AMD_IS_PX; - - parent = pci_upstream_bridge(adev->pdev); - adev->has_pr3 = parent ? pci_pr3_present(parent) : false; - /* amdgpu_device_init should report only fatal error * like memory allocation failure or iomapping failure, * or memory manager initialization failure, it must diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5661b82d84d46..dda53fe30975d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1303,7 +1303,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) return; - dma_resv_lock(bo->base.resv, NULL); + if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) + return; r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence); if (!WARN_ON(r)) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9189fb85a4dd4..5831aa40b1e81 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1334,6 +1334,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, + /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ + { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, { 0, 0, 0, 0, 0 }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index a2f8ed0e6a644..f1b794d5d87d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -788,7 +788,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) { + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev); adev->gmc.aper_size = adev->gmc.real_vram_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index ab8adbff9e2d0..5206e2da334a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -381,8 +381,9 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU && - adev->gmc.real_vram_size > adev->gmc.aper_size) { + if ((adev->flags & AMD_IS_APU) && + adev->gmc.real_vram_size > adev->gmc.aper_size && + !amdgpu_passthrough(adev)) { adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; adev->gmc.aper_size = adev->gmc.real_vram_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 054733838292c..d07d36786836e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -581,7 +581,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) { + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; adev->gmc.aper_size = adev->gmc.real_vram_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 88c1eb9ad0684..2fb24178eaef9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1420,7 +1420,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) */ /* check whether both host-gpu and gpu-gpu xgmi links exist */ - if ((adev->flags & AMD_IS_APU) || + if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) || (adev->gmc.xgmi.supported && adev->gmc.xgmi.connected_to_cpu)) { adev->gmc.aper_base = @@ -1684,7 +1684,7 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); amdgpu_gart_table_vram_free(adev); - amdgpu_bo_unref(&adev->gmc.pdb0_bo); + amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index da11ceba06981..ad9bfc772bdff 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -569,8 +569,8 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); /* VCN global tiling registers */ - WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( - UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + UVD, inst_idx, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) @@ -1474,8 +1474,11 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; + vcn_v3_0_pause_dpg_mode(adev, 0, &state); + /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 4bfc0c8ab764b..70122978bdd0f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1416,6 +1416,12 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, return ret; } +static bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) { + return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) || + (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && + dev->adev->sdma.instance[0].fw_version >= 18); +} + static int kfd_ioctl_map_memory_to_gpu(struct file *filep, struct kfd_process *p, void *data) { @@ -1503,7 +1509,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, } /* Flush TLBs after waiting for the page table updates to complete */ - if (table_freed) { + if (table_freed || !kfd_flush_tlb_after_unmap(dev)) { for (i = 0; i < args->n_devices; i++) { peer = kfd_device_by_id(devices_arr[i]); if (WARN_ON_ONCE(!peer)) @@ -1603,7 +1609,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, } mutex_unlock(&p->mutex); - if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) { + if (kfd_flush_tlb_after_unmap(dev)) { err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true); if (err) { @@ -1840,13 +1846,9 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) if (!args->start_addr || !args->size) return -EINVAL; - mutex_lock(&p->mutex); - r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr, args->attrs); - mutex_unlock(&p->mutex); - return r; } #else diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 9624bbe8b5013..281def1c6c08e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1567,7 +1567,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) /* Fetch the CRAT table from ACPI */ status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); if (status == AE_NOT_FOUND) { - pr_warn("CRAT table not found\n"); + pr_info("CRAT table not found\n"); return -ENODATA; } else if (ACPI_FAILURE(status)) { const char *err = acpi_format_exception(status); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 2b65d0acae2ce..2fdbe2f475e4f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -480,15 +480,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, } /* Verify module parameters regarding mapped process number*/ - if ((hws_max_conc_proc < 0) - || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { - dev_err(kfd_device, - "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", - hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, - kfd->vm_info.vmid_num_kfd); + if (hws_max_conc_proc >= 0) + kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd); + else kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; - } else - kfd->max_proc_per_quantum = hws_max_conc_proc; /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index afe72dd11325d..6ca7e12bdab84 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -531,6 +531,8 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) event_waiters = kmalloc_array(num_events, sizeof(struct kfd_event_waiter), GFP_KERNEL); + if (!event_waiters) + return NULL; for (i = 0; (event_waiters) && (i < num_events) ; i++) { init_wait(&event_waiters[i].wait); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d1145da5348f4..74f162887d3b1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1150,7 +1150,6 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); - cancel_delayed_work_sync(&p->svms.restore_work); mutex_lock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index deae12dc777d2..40d0d8cb3fe83 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -268,15 +268,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) return ret; } - ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client, - O_RDWR); - if (ret < 0) { - kfifo_free(&client->fifo); - kfree(client); - return ret; - } - *fd = ret; - init_waitqueue_head(&client->wait_queue); spin_lock_init(&client->lock); client->events = 0; @@ -286,5 +277,20 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) list_add_rcu(&client->list, &dev->smi_clients); spin_unlock(&dev->smi_lock); + ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client, + O_RDWR); + if (ret < 0) { + spin_lock(&dev->smi_lock); + list_del_rcu(&client->list); + spin_unlock(&dev->smi_lock); + + synchronize_rcu(); + + kfifo_free(&client->fifo); + kfree(client); + return ret; + } + *fd = ret; + return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index f2805ba74c80b..ffec25e642e25 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1643,13 +1643,14 @@ static void svm_range_restore_work(struct work_struct *work) pr_debug("restore svm ranges\n"); - /* kfd_process_notifier_release destroys this worker thread. So during - * the lifetime of this thread, kfd_process and mm will be valid. - */ p = container_of(svms, struct kfd_process, svms); - mm = p->mm; - if (!mm) + + /* Keep mm reference when svm_range_validate_and_map ranges */ + mm = get_task_mm(p->lead_thread); + if (!mm) { + pr_debug("svms 0x%p process mm gone\n", svms); return; + } svm_range_list_lock_and_flush_work(svms, mm); mutex_lock(&svms->lock); @@ -1703,6 +1704,7 @@ static void svm_range_restore_work(struct work_struct *work) out_reschedule: mutex_unlock(&svms->lock); mmap_write_unlock(mm); + mmput(mm); /* If validation failed, reschedule another attempt */ if (evicted_ranges) { @@ -1985,10 +1987,9 @@ svm_range_update_notifier_and_interval_tree(struct mm_struct *mm, } static void -svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) +svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange, + struct mm_struct *mm) { - struct mm_struct *mm = prange->work_item.mm; - switch (prange->work_item.op) { case SVM_OP_NULL: pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n", @@ -2065,40 +2066,44 @@ static void svm_range_deferred_list_work(struct work_struct *work) struct svm_range_list *svms; struct svm_range *prange; struct mm_struct *mm; - struct kfd_process *p; svms = container_of(work, struct svm_range_list, deferred_list_work); pr_debug("enter svms 0x%p\n", svms); - p = container_of(svms, struct kfd_process, svms); - /* Avoid mm is gone when inserting mmu notifier */ - mm = get_task_mm(p->lead_thread); - if (!mm) { - pr_debug("svms 0x%p process mm gone\n", svms); - return; - } -retry: - mmap_write_lock(mm); - - /* Checking for the need to drain retry faults must be inside - * mmap write lock to serialize with munmap notifiers. - */ - if (unlikely(atomic_read(&svms->drain_pagefaults))) { - mmap_write_unlock(mm); - svm_range_drain_retry_fault(svms); - goto retry; - } - spin_lock(&svms->deferred_list_lock); while (!list_empty(&svms->deferred_range_list)) { prange = list_first_entry(&svms->deferred_range_list, struct svm_range, deferred_list); - list_del_init(&prange->deferred_list); spin_unlock(&svms->deferred_list_lock); pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange, prange->start, prange->last, prange->work_item.op); + mm = prange->work_item.mm; +retry: + mmap_write_lock(mm); + + /* Checking for the need to drain retry faults must be inside + * mmap write lock to serialize with munmap notifiers. + */ + if (unlikely(atomic_read(&svms->drain_pagefaults))) { + mmap_write_unlock(mm); + svm_range_drain_retry_fault(svms); + goto retry; + } + + /* Remove from deferred_list must be inside mmap write lock, for + * two race cases: + * 1. unmap_from_cpu may change work_item.op and add the range + * to deferred_list again, cause use after free bug. + * 2. svm_range_list_lock_and_flush_work may hold mmap write + * lock and continue because deferred_list is empty, but + * deferred_list work is actually waiting for mmap lock. + */ + spin_lock(&svms->deferred_list_lock); + list_del_init(&prange->deferred_list); + spin_unlock(&svms->deferred_list_lock); + mutex_lock(&svms->lock); mutex_lock(&prange->migrate_mutex); while (!list_empty(&prange->child_list)) { @@ -2109,19 +2114,20 @@ static void svm_range_deferred_list_work(struct work_struct *work) pr_debug("child prange 0x%p op %d\n", pchild, pchild->work_item.op); list_del_init(&pchild->child_list); - svm_range_handle_list_op(svms, pchild); + svm_range_handle_list_op(svms, pchild, mm); } mutex_unlock(&prange->migrate_mutex); - svm_range_handle_list_op(svms, prange); + svm_range_handle_list_op(svms, prange, mm); mutex_unlock(&svms->lock); + mmap_write_unlock(mm); + + /* Pairs with mmget in svm_range_add_list_work */ + mmput(mm); spin_lock(&svms->deferred_list_lock); } spin_unlock(&svms->deferred_list_lock); - - mmap_write_unlock(mm); - mmput(mm); pr_debug("exit svms 0x%p\n", svms); } @@ -2139,6 +2145,9 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, prange->work_item.op = op; } else { prange->work_item.op = op; + + /* Pairs with mmput in deferred_list_work */ + mmget(mm); prange->work_item.mm = mm; list_add_tail(&prange->deferred_list, &prange->svms->deferred_range_list); @@ -2830,6 +2839,8 @@ void svm_range_list_fini(struct kfd_process *p) pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms); + cancel_delayed_work_sync(&p->svms.restore_work); + /* Ensure list work is finished before process is destroyed */ flush_work(&p->svms.deferred_list_work); @@ -2840,7 +2851,6 @@ void svm_range_list_fini(struct kfd_process *p) atomic_inc(&p->svms.drain_pagefaults); svm_range_drain_retry_fault(&p->svms); - list_for_each_entry_safe(prange, next, &p->svms.list, list) { svm_range_unlink(prange); svm_range_remove_notifier(prange); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 075429bea4275..24db2297857b4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2613,10 +2613,13 @@ static int dm_resume(void *handle) * before the 0 streams commit. * * DC expects that link encoder assignments are *not* valid - * when committing a state, so as a workaround it needs to be - * cleared here. + * when committing a state, so as a workaround we can copy + * off of the current state. + * + * We lose the previous assignments, but we had already + * commit 0 streams anyway. */ - link_enc_cfg_init(dm->dc, dc_state); + link_enc_cfg_copy(adev->dm.dc->current_state, dc_state); if (dc_enable_dmub_notifications(adev->dm.dc)) amdgpu_dm_outbox_init(adev); @@ -2690,7 +2693,8 @@ static int dm_resume(void *handle) * this is the case when traversing through already created * MST connectors, should be skipped */ - if (aconnector->mst_port) + if (aconnector->dc_link && + aconnector->dc_link->type == dc_connection_mst_branch) continue; mutex_lock(&aconnector->hpd_lock); @@ -3948,7 +3952,7 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap max - min); } -static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, +static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, int bl_idx, u32 user_brightness) { @@ -3979,7 +3983,8 @@ static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx); } - return rc ? 0 : 1; + if (rc) + dm->actual_brightness[bl_idx] = user_brightness; } static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) @@ -8144,6 +8149,9 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder, mode = amdgpu_dm_create_common_mode(encoder, common_modes[i].name, common_modes[i].w, common_modes[i].h); + if (!mode) + continue; + drm_mode_probed_add(connector, mode); amdgpu_dm_connector->num_modes++; } @@ -9908,7 +9916,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) /* restore the backlight level */ for (i = 0; i < dm->num_of_edps; i++) { if (dm->backlight_dev[i] && - (amdgpu_dm_backlight_get_level(dm, i) != dm->brightness[i])) + (dm->actual_brightness[i] != dm->brightness[i])) amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); } #endif @@ -10858,10 +10866,13 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state, static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm_crtc *crtc) { struct drm_connector *connector; - struct drm_connector_state *conn_state; + struct drm_connector_state *conn_state, *old_conn_state; struct amdgpu_dm_connector *aconnector = NULL; int i; - for_each_new_connector_in_state(state, connector, conn_state, i) { + for_each_oldnew_connector_in_state(state, connector, old_conn_state, conn_state, i) { + if (!conn_state->crtc) + conn_state = old_conn_state; + if (conn_state->crtc != crtc) continue; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index b9a69b0cef23b..7a23cd603714b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -540,6 +540,12 @@ struct amdgpu_display_manager { * cached backlight values. */ u32 brightness[AMDGPU_DM_MAX_NUM_EDP]; + /** + * @actual_brightness: + * + * last successfully applied backlight values. + */ + u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP]; }; enum dsc_clock_force_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 26719efa5396d..12d437d9a0e4c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -227,8 +227,10 @@ static ssize_t dp_link_settings_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -389,8 +391,10 @@ static ssize_t dp_phy_settings_read(struct file *f, char __user *buf, break; r = put_user((*(rd_buf + result)), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1359,8 +1363,10 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1376,8 +1382,10 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1546,8 +1554,10 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1563,8 +1573,10 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1731,8 +1743,10 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1748,8 +1762,10 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1912,8 +1928,10 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1929,8 +1947,10 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -2088,8 +2108,10 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -2105,8 +2127,10 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -2145,8 +2169,10 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -2162,8 +2188,10 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -2217,8 +2245,10 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -2234,8 +2264,10 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -2289,8 +2321,10 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -2306,8 +2340,10 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -3459,8 +3495,10 @@ static ssize_t dcc_en_bits_read( dc->hwss.get_dcc_en_bits(dc, dcc_en_bits); rd_buf = kcalloc(rd_buf_size, sizeof(char), GFP_KERNEL); - if (!rd_buf) + if (!rd_buf) { + kfree(dcc_en_bits); return -ENOMEM; + } for (i = 0; i < num_pipes; i++) offset += snprintf(rd_buf + offset, rd_buf_size - offset, @@ -3473,8 +3511,10 @@ static ssize_t dcc_en_bits_read( if (*pos >= rd_buf_size) break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; *pos += 1; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index c510638b4f997..a009fc654ac95 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -149,10 +149,8 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) link = stream->link; - psr_config.psr_version = link->dpcd_caps.psr_caps.psr_version; - - if (psr_config.psr_version > 0) { - psr_config.psr_exit_link_training_required = 0x1; + if (link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) { + psr_config.psr_version = link->psr_settings.psr_version; psr_config.psr_frame_capture_indication_req = 0; psr_config.psr_rfb_setup_time = 0x37; psr_config.psr_sdp_transmit_line_num_deadline = 0x20; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index ba1aa994db4b7..62bc6ce887535 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -76,6 +76,8 @@ #include "dc_trace.h" +#include "dce/dmub_outbox.h" + #define CTX \ dc->ctx @@ -3707,13 +3709,23 @@ void dc_hardware_release(struct dc *dc) } #endif -/** - * dc_enable_dmub_notifications - Returns whether dmub notification can be enabled - * @dc: dc structure +/* + ***************************************************************************** + * Function: dc_is_dmub_outbox_supported - + * + * @brief + * Checks whether DMUB FW supports outbox notifications, if supported + * DM should register outbox interrupt prior to actually enabling interrupts + * via dc_enable_dmub_outbox * - * Returns: True to enable dmub notifications, False otherwise + * @param + * [in] dc: dc structure + * + * @return + * True if DMUB FW supports outbox notifications, False otherwise + ***************************************************************************** */ -bool dc_enable_dmub_notifications(struct dc *dc) +bool dc_is_dmub_outbox_supported(struct dc *dc) { #if defined(CONFIG_DRM_AMD_DC_DCN) /* YELLOW_CARP B0 USB4 DPIA needs dmub notifications for interrupts */ @@ -3728,6 +3740,48 @@ bool dc_enable_dmub_notifications(struct dc *dc) /** * dc_process_dmub_aux_transfer_async - Submits aux command to dmub via inbox message + * Function: dc_enable_dmub_notifications + * + * @brief + * Calls dc_is_dmub_outbox_supported to check if dmub fw supports outbox + * notifications. All DMs shall switch to dc_is_dmub_outbox_supported. + * This API shall be removed after switching. + * + * @param + * [in] dc: dc structure + * + * @return + * True if DMUB FW supports outbox notifications, False otherwise + ***************************************************************************** + */ +bool dc_enable_dmub_notifications(struct dc *dc) +{ + return dc_is_dmub_outbox_supported(dc); +} + +/** + ***************************************************************************** + * Function: dc_enable_dmub_outbox + * + * @brief + * Enables DMUB unsolicited notifications to x86 via outbox + * + * @param + * [in] dc: dc structure + * + * @return + * None + ***************************************************************************** + */ +void dc_enable_dmub_outbox(struct dc *dc) +{ + struct dc_context *dc_ctx = dc->ctx; + + dmub_enable_outbox_notification(dc_ctx->dmub_srv); +} + +/** + ***************************************************************************** * Sets port index appropriately for legacy DDC * @dc: dc structure * @link_index: link index @@ -3829,7 +3883,7 @@ uint8_t get_link_index_from_dpia_port_index(const struct dc *dc, * [in] payload: aux payload * [out] notify: set_config immediate reply * - * @return + * @return * True if successful, False if failure ***************************************************************************** */ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 61b8f29a0c303..49d5271dcfdc8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2378,22 +2378,27 @@ static enum link_training_result dp_perform_8b_10b_link_training( repeater_id--) { status = perform_clock_recovery_sequence(link, link_res, lt_settings, repeater_id); - if (status != LINK_TRAINING_SUCCESS) + if (status != LINK_TRAINING_SUCCESS) { + repeater_training_done(link, repeater_id); break; + } status = perform_channel_equalization_sequence(link, link_res, lt_settings, repeater_id); + repeater_training_done(link, repeater_id); + if (status != LINK_TRAINING_SUCCESS) break; - repeater_training_done(link, repeater_id); + for (lane = 0; lane < LANE_COUNT_DP_MAX; lane++) { + lt_settings->dpcd_lane_settings[lane].raw = 0; + lt_settings->hw_lane_settings[lane].VOLTAGE_SWING = 0; + lt_settings->hw_lane_settings[lane].PRE_EMPHASIS = 0; + } } - - for (lane = 0; lane < (uint8_t)lt_settings->link_settings.lane_count; lane++) - lt_settings->dpcd_lane_settings[lane].raw = 0; } if (status == LINK_TRAINING_SUCCESS) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c index a55944da8d53f..72a3fded7142a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c @@ -122,6 +122,7 @@ static void remove_link_enc_assignment( stream->link_enc = NULL; state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].eng_id = ENGINE_ID_UNKNOWN; state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream = NULL; + dc_stream_release(stream); break; } } @@ -271,6 +272,13 @@ void link_enc_cfg_init( state->res_ctx.link_enc_cfg_ctx.mode = LINK_ENC_CFG_STEADY; } +void link_enc_cfg_copy(const struct dc_state *src_ctx, struct dc_state *dst_ctx) +{ + memcpy(&dst_ctx->res_ctx.link_enc_cfg_ctx, + &src_ctx->res_ctx.link_enc_cfg_ctx, + sizeof(dst_ctx->res_ctx.link_enc_cfg_ctx)); +} + void link_enc_cfg_link_encs_assign( struct dc *dc, struct dc_state *state, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 18757c1585232..f0a97b82c33a3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1640,6 +1640,9 @@ static bool are_stream_backends_same( if (is_timing_changed(stream_a, stream_b)) return false; + if (stream_a->signal != stream_b->signal) + return false; + if (stream_a->dpms_off != stream_b->dpms_off) return false; @@ -1664,8 +1667,8 @@ bool dc_is_stream_unchanged( if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param) return false; - // Only Have Audio left to check whether it is same or not. This is a corner case for Tiled sinks - if (old_stream->audio_info.mode_count != stream->audio_info.mode_count) + /*compare audio info*/ + if (memcmp(&old_stream->audio_info, &stream->audio_info, sizeof(stream->audio_info)) != 0) return false; return true; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index b518648906212..6a8c100a3688d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1448,8 +1448,11 @@ void dc_z10_restore(const struct dc *dc); void dc_z10_save_init(struct dc *dc); #endif +bool dc_is_dmub_outbox_supported(struct dc *dc); bool dc_enable_dmub_notifications(struct dc *dc); +void dc_enable_dmub_outbox(struct dc *dc); + bool dc_process_dmub_aux_transfer_async(struct dc *dc, uint32_t link_index, struct aux_payload *payload); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c index faad8555ddbb6..fff1d07d865d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c @@ -22,20 +22,23 @@ * Authors: AMD */ -#include "dmub_outbox.h" +#include "dc.h" #include "dc_dmub_srv.h" +#include "dmub_outbox.h" #include "dmub/inc/dmub_cmd.h" -/** - * dmub_enable_outbox_notification - Sends inbox cmd to dmub to enable outbox1 - * messages with interrupt. Dmub sends outbox1 - * message and triggers outbox1 interrupt. - * @dc: dc structure +/* + * Function: dmub_enable_outbox_notification + * + * @brief + * Sends inbox cmd to dmub for enabling outbox notifications to x86. + * + * @param + * [in] dmub_srv: dmub_srv structure */ -void dmub_enable_outbox_notification(struct dc *dc) +void dmub_enable_outbox_notification(struct dc_dmub_srv *dmub_srv) { union dmub_rb_cmd cmd; - struct dc_context *dc_ctx = dc->ctx; memset(&cmd, 0x0, sizeof(cmd)); cmd.outbox1_enable.header.type = DMUB_CMD__OUTBOX1_ENABLE; @@ -45,7 +48,7 @@ void dmub_enable_outbox_notification(struct dc *dc) sizeof(cmd.outbox1_enable.header); cmd.outbox1_enable.enable = true; - dc_dmub_srv_cmd_queue(dc_ctx->dmub_srv, &cmd); - dc_dmub_srv_cmd_execute(dc_ctx->dmub_srv); - dc_dmub_srv_wait_idle(dc_ctx->dmub_srv); + dc_dmub_srv_cmd_queue(dmub_srv, &cmd); + dc_dmub_srv_cmd_execute(dmub_srv); + dc_dmub_srv_wait_idle(dmub_srv); } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.h index 4e0aa0d1a2d5c..58ceabb9d497d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.h @@ -26,8 +26,8 @@ #ifndef _DMUB_OUTBOX_H_ #define _DMUB_OUTBOX_H_ -#include "dc.h" +struct dc_dmub_srv; -void dmub_enable_outbox_notification(struct dc *dc); +void dmub_enable_outbox_notification(struct dc_dmub_srv *dmub_srv); #endif /* _DMUB_OUTBOX_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index f4f423d0b8c3f..80595d7f060c3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -940,6 +940,7 @@ static const struct hubbub_funcs hubbub1_funcs = { .program_watermarks = hubbub1_program_watermarks, .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled, .allow_self_refresh_control = hubbub1_allow_self_refresh_control, + .verify_allow_pstate_change_high = hubbub1_verify_allow_pstate_change_high, }; void hubbub1_construct(struct hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 530a72e3eefe2..8ca4c06ac5607 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1112,9 +1112,13 @@ static bool dcn10_hw_wa_force_recovery(struct dc *dc) void dcn10_verify_allow_pstate_change_high(struct dc *dc) { + struct hubbub *hubbub = dc->res_pool->hubbub; static bool should_log_hw_state; /* prevent hw state log by default */ - if (!hubbub1_verify_allow_pstate_change_high(dc->res_pool->hubbub)) { + if (!hubbub->funcs->verify_allow_pstate_change_high) + return; + + if (!hubbub->funcs->verify_allow_pstate_change_high(hubbub)) { int i = 0; if (should_log_hw_state) @@ -1123,8 +1127,8 @@ void dcn10_verify_allow_pstate_change_high(struct dc *dc) TRACE_DC_PIPE_STATE(pipe_ctx, i, MAX_PIPES); BREAK_TO_DEBUGGER(); if (dcn10_hw_wa_force_recovery(dc)) { - /*check again*/ - if (!hubbub1_verify_allow_pstate_change_high(dc->res_pool->hubbub)) + /*check again*/ + if (!hubbub->funcs->verify_allow_pstate_change_high(hubbub)) BREAK_TO_DEBUGGER(); } } @@ -1493,21 +1497,13 @@ void dcn10_init_hw(struct dc *dc) link->link_status.link_active = true; } - /* Power gate DSCs */ - if (!is_optimized_init_done) { - for (i = 0; i < res_pool->res_cap->num_dsc; i++) - if (hws->funcs.dsc_pg_control != NULL) - hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false); - } - - /* Enable outbox notification feature of dmub */ - if (dc->debug.enable_dmub_aux_for_legacy_ddc) - dmub_enable_outbox_notification(dc); - /* we want to turn off all dp displays before doing detection */ if (dc->config.power_down_display_on_boot) dc_link_blank_all_dp_displays(dc); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(dc->hwseq, true); + /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which * pipes we want to use. @@ -1560,8 +1556,6 @@ void dcn10_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); if (dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); @@ -2526,14 +2520,18 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) struct mpc *mpc = dc->res_pool->mpc; struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); - if (per_pixel_alpha) - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; - else - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; - blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; + if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else if (per_pixel_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; + } + if (pipe_ctx->plane_state->global_alpha) blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 4991e93e5308c..8a72b7007b9d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2313,14 +2313,18 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) struct mpc *mpc = dc->res_pool->mpc; struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); - if (per_pixel_alpha) - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; - else - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; - blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; + if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else if (per_pixel_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; + } + if (pipe_ctx->plane_state->global_alpha) blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index e5cc6bf45743a..ca1bbc942fd40 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -873,7 +873,7 @@ static const struct dc_debug_options debug_defaults_drv = { .clock_trace = true, .disable_pplib_clock_request = true, .min_disp_clk_khz = 100000, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c index f4414de96acc5..152c9c5733f1c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c @@ -448,6 +448,7 @@ static const struct hubbub_funcs hubbub30_funcs = { .program_watermarks = hubbub3_program_watermarks, .allow_self_refresh_control = hubbub1_allow_self_refresh_control, .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled, + .verify_allow_pstate_change_high = hubbub1_verify_allow_pstate_change_high, .force_wm_propagate_to_pipes = hubbub3_force_wm_propagate_to_pipes, .force_pstate_change_control = hubbub3_force_pstate_change_control, .init_watermarks = hubbub3_init_watermarks, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index 1db1ca19411d8..05dc0a3ae2a3b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -548,6 +548,9 @@ void dcn30_init_hw(struct dc *dc) if (dc->config.power_down_display_on_boot) dc_link_blank_all_dp_displays(dc); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(dc->hwseq, true); + /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which * pipes we want to use. @@ -625,8 +628,6 @@ void dcn30_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c index 1e3bd2e9cdcc4..a046664e20316 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c @@ -60,6 +60,7 @@ static const struct hubbub_funcs hubbub301_funcs = { .program_watermarks = hubbub3_program_watermarks, .allow_self_refresh_control = hubbub1_allow_self_refresh_control, .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled, + .verify_allow_pstate_change_high = hubbub1_verify_allow_pstate_change_high, .force_wm_propagate_to_pipes = hubbub3_force_wm_propagate_to_pipes, .force_pstate_change_control = hubbub3_force_pstate_change_control, .hubbub_read_state = hubbub2_read_state, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c index 5e3bcaf12cac4..51c5f3685470a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c @@ -949,6 +949,65 @@ static void hubbub31_get_dchub_ref_freq(struct hubbub *hubbub, } } +static bool hubbub31_verify_allow_pstate_change_high(struct hubbub *hubbub) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + + /* + * Pstate latency is ~20us so if we wait over 40us and pstate allow + * still not asserted, we are probably stuck and going to hang + */ + const unsigned int pstate_wait_timeout_us = 100; + const unsigned int pstate_wait_expected_timeout_us = 40; + + static unsigned int max_sampled_pstate_wait_us; /* data collection */ + static bool forced_pstate_allow; /* help with revert wa */ + + unsigned int debug_data = 0; + unsigned int i; + + if (forced_pstate_allow) { + /* we hacked to force pstate allow to prevent hang last time + * we verify_allow_pstate_change_high. so disable force + * here so we can check status + */ + REG_UPDATE_2(DCHUBBUB_ARB_DRAM_STATE_CNTL, + DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_VALUE, 0, + DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_ENABLE, 0); + forced_pstate_allow = false; + } + + REG_WRITE(DCHUBBUB_TEST_DEBUG_INDEX, hubbub2->debug_test_index_pstate); + + for (i = 0; i < pstate_wait_timeout_us; i++) { + debug_data = REG_READ(DCHUBBUB_TEST_DEBUG_DATA); + + /* Debug bit is specific to ASIC. */ + if (debug_data & (1 << 26)) { + if (i > pstate_wait_expected_timeout_us) + DC_LOG_WARNING("pstate took longer than expected ~%dus\n", i); + return true; + } + if (max_sampled_pstate_wait_us < i) + max_sampled_pstate_wait_us = i; + + udelay(1); + } + + /* force pstate allow to prevent system hang + * and break to debugger to investigate + */ + REG_UPDATE_2(DCHUBBUB_ARB_DRAM_STATE_CNTL, + DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_VALUE, 1, + DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_ENABLE, 1); + forced_pstate_allow = true; + + DC_LOG_WARNING("pstate TEST_DEBUG_DATA: 0x%X\n", + debug_data); + + return false; +} + static const struct hubbub_funcs hubbub31_funcs = { .update_dchub = hubbub2_update_dchub, .init_dchub_sys_ctx = hubbub31_init_dchub_sys_ctx, @@ -961,6 +1020,7 @@ static const struct hubbub_funcs hubbub31_funcs = { .program_watermarks = hubbub31_program_watermarks, .allow_self_refresh_control = hubbub1_allow_self_refresh_control, .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled, + .verify_allow_pstate_change_high = hubbub31_verify_allow_pstate_change_high, .program_det_size = dcn31_program_det_size, .program_compbuf_size = dcn31_program_compbuf_size, .init_crb = dcn31_init_crb, @@ -982,5 +1042,7 @@ void hubbub31_construct(struct dcn20_hubbub *hubbub31, hubbub31->detile_buf_size = det_size_kb * 1024; hubbub31->pixel_chunk_size = pixel_chunk_size_kb * 1024; hubbub31->crb_size_segs = config_return_buffer_size_kb / DCN31_CRB_SEGMENT_SIZE_KB; + + hubbub31->debug_test_index_pstate = 0x6; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 4206ce5bf9a92..bdc4467b40d79 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -194,12 +194,15 @@ void dcn31_init_hw(struct dc *dc) /* Enables outbox notifications for usb4 dpia */ if (dc->res_pool->usb4_dpia_count) - dmub_enable_outbox_notification(dc); + dmub_enable_outbox_notification(dc->ctx->dmub_srv); /* we want to turn off all dp displays before doing detection */ if (dc->config.power_down_display_on_boot) dc_link_blank_all_dp_displays(dc); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(dc->hwseq, true); + /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which * pipes we want to use. @@ -249,8 +252,6 @@ void dcn31_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 8d64187478e42..f3933c9f57468 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1011,7 +1011,7 @@ static const struct dc_debug_options debug_defaults_drv = { .max_downscale_src_width = 4096,/*upto true 4K*/ .disable_pplib_wm_range = false, .scl_reset_length10 = true, - .sanity_checks = false, + .sanity_checks = true, .underflow_assert_delay_us = 0xFFFFFFFF, .dwb_fi_phase = -1, // -1 = disable, .dmub_command_table = true, @@ -2025,7 +2025,9 @@ bool dcn31_validate_bandwidth(struct dc *dc, BW_VAL_TRACE_COUNT(); + DC_FP_START(); out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); + DC_FP_END(); // Disable fast_validate to set min dcfclk in alculate_wm_and_dlg if (pipe_cnt == 0) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index 9c74564cbd8de..8973d3a38f9c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -864,11 +864,11 @@ static bool setup_dsc_config( min_slices_h = inc_num_slices(dsc_common_caps.slice_caps, min_slices_h); } + is_dsc_possible = (min_slices_h <= max_slices_h); + if (pic_width % min_slices_h != 0) min_slices_h = 0; // DSC TODO: Maybe try increasing the number of slices first? - is_dsc_possible = (min_slices_h <= max_slices_h); - if (min_slices_h == 0 && max_slices_h == 0) is_dsc_possible = false; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index 713f5558f5e17..9195dec294c2d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -154,6 +154,8 @@ struct hubbub_funcs { bool (*is_allow_self_refresh_enabled)(struct hubbub *hubbub); void (*allow_self_refresh_control)(struct hubbub *hubbub, bool allow); + bool (*verify_allow_pstate_change_high)(struct hubbub *hubbub); + void (*apply_DEDCN21_147_wa)(struct hubbub *hubbub); void (*force_wm_propagate_to_pipes)(struct hubbub *hubbub); diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h b/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h index a4e43b4826e0e..59ceb9ed385db 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h @@ -39,6 +39,11 @@ void link_enc_cfg_init( const struct dc *dc, struct dc_state *state); +/* + * Copies a link encoder assignment from another state. + */ +void link_enc_cfg_copy(const struct dc_state *src_ctx, struct dc_state *dst_ctx); + /* * Algorithm for assigning available DIG link encoders to streams. * diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c index 0f15bcada4e99..717977aec6d06 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c @@ -265,14 +265,6 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &pflip_irq_info_funcs\ } -#define vupdate_int_entry(reg_num)\ - [DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\ - IRQ_REG_ENTRY(OTG, reg_num,\ - OTG_GLOBAL_SYNC_STATUS, VUPDATE_INT_EN,\ - OTG_GLOBAL_SYNC_STATUS, VUPDATE_EVENT_CLEAR),\ - .funcs = &vblank_irq_info_funcs\ - } - /* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic * of DCE's DC_IRQ_SOURCE_VUPDATEx. */ @@ -401,12 +393,6 @@ irq_source_info_dcn21[DAL_IRQ_SOURCES_NUMBER] = { dc_underflow_int_entry(6), [DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(), [DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(), - vupdate_int_entry(0), - vupdate_int_entry(1), - vupdate_int_entry(2), - vupdate_int_entry(3), - vupdate_int_entry(4), - vupdate_int_entry(5), vupdate_no_lock_int_entry(0), vupdate_no_lock_int_entry(1), vupdate_no_lock_int_entry(2), diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index 57f198de5e2cb..4e075b01d48bb 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -100,7 +100,8 @@ enum vsc_packet_revision { //PB7 = MD0 #define MASK_VTEM_MD0__VRR_EN 0x01 #define MASK_VTEM_MD0__M_CONST 0x02 -#define MASK_VTEM_MD0__RESERVED2 0x0C +#define MASK_VTEM_MD0__QMS_EN 0x04 +#define MASK_VTEM_MD0__RESERVED2 0x08 #define MASK_VTEM_MD0__FVA_FACTOR_M1 0xF0 //MD1 @@ -109,7 +110,7 @@ enum vsc_packet_revision { //MD2 #define MASK_VTEM_MD2__BASE_REFRESH_RATE_98 0x03 #define MASK_VTEM_MD2__RB 0x04 -#define MASK_VTEM_MD2__RESERVED3 0xF8 +#define MASK_VTEM_MD2__NEXT_TFR 0xF8 //MD3 #define MASK_VTEM_MD3__BASE_REFRESH_RATE_07 0xFF diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 08362d506534b..a68496b3f9296 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -1045,6 +1045,17 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) if (!pp_funcs || !pp_funcs->get_asic_baco_capability) return false; + /* Don't use baco for reset in S3. + * This is a workaround for some platforms + * where entering BACO during suspend + * seems to cause reboots or hangs. + * This might be related to the fact that BACO controls + * power to the whole GPU including devices like audio and USB. + * Powering down/up everything may adversely affect these other + * devices. Needs more investigation. + */ + if (adev->in_s3) + return false; if (pp_funcs->get_asic_baco_capability(pp_handle, &baco_cap)) return false; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 48cc009d9bdf3..dc910003f3cab 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2134,8 +2134,8 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } } - /* setting should not be allowed from VF */ - if (amdgpu_sriov_vf(adev)) { + /* setting should not be allowed from VF if not in one VF mode */ + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) { dev_attr->attr.mode &= ~S_IWUGO; dev_attr->store = NULL; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index 9ddd8491ff008..ede71de2343dc 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -773,13 +773,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, hwmgr->display_config->num_display > 3 ? - data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk : + (data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk / 100) : min_mclk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinSocclkByFreq, - data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk, + data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinVcn, @@ -792,11 +792,11 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, - data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk, + data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxSocclkByFreq, - data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk, + data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxVcn, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index d93d28c1af95b..b51368fa30253 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -138,7 +138,7 @@ int smu_get_dpm_freq_range(struct smu_context *smu, uint32_t *min, uint32_t *max) { - int ret = 0; + int ret = -ENOTSUPP; if (!min && !max) return -EINVAL; diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h index 592ecfcf00caf..6a882891d91c5 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -169,6 +169,7 @@ #define ADV7511_PACKET_ENABLE_SPARE2 BIT(1) #define ADV7511_PACKET_ENABLE_SPARE1 BIT(0) +#define ADV7535_REG_POWER2_HPD_OVERRIDE BIT(6) #define ADV7511_REG_POWER2_HPD_SRC_MASK 0xc0 #define ADV7511_REG_POWER2_HPD_SRC_BOTH 0x00 #define ADV7511_REG_POWER2_HPD_SRC_HPD 0x40 diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index f8e5da1485999..77118c3395bf0 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -351,11 +351,17 @@ static void __adv7511_power_on(struct adv7511 *adv7511) * from standby or are enabled. When the HPD goes low the adv7511 is * reset and the outputs are disabled which might cause the monitor to * go to standby again. To avoid this we ignore the HPD pin for the - * first few seconds after enabling the output. + * first few seconds after enabling the output. On the other hand + * adv7535 require to enable HPD Override bit for proper HPD. */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2, - ADV7511_REG_POWER2_HPD_SRC_MASK, - ADV7511_REG_POWER2_HPD_SRC_NONE); + if (adv7511->type == ADV7535) + regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2, + ADV7535_REG_POWER2_HPD_OVERRIDE, + ADV7535_REG_POWER2_HPD_OVERRIDE); + else + regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2, + ADV7511_REG_POWER2_HPD_SRC_MASK, + ADV7511_REG_POWER2_HPD_SRC_NONE); } static void adv7511_power_on(struct adv7511 *adv7511) @@ -375,6 +381,10 @@ static void adv7511_power_on(struct adv7511 *adv7511) static void __adv7511_power_off(struct adv7511 *adv7511) { /* TODO: setup additional power down modes */ + if (adv7511->type == ADV7535) + regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2, + ADV7535_REG_POWER2_HPD_OVERRIDE, 0); + regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER, ADV7511_POWER_POWER_DOWN, ADV7511_POWER_POWER_DOWN); @@ -672,9 +682,14 @@ adv7511_detect(struct adv7511 *adv7511, struct drm_connector *connector) status = connector_status_disconnected; } else { /* Renable HPD sensing */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2, - ADV7511_REG_POWER2_HPD_SRC_MASK, - ADV7511_REG_POWER2_HPD_SRC_BOTH); + if (adv7511->type == ADV7535) + regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2, + ADV7535_REG_POWER2_HPD_OVERRIDE, + ADV7535_REG_POWER2_HPD_OVERRIDE); + else + regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2, + ADV7511_REG_POWER2_HPD_SRC_MASK, + ADV7511_REG_POWER2_HPD_SRC_BOTH); } adv7511->status = status; diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 2346dbcc505f2..e596cacce9e3e 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -846,7 +846,8 @@ static int segments_edid_read(struct anx7625_data *ctx, static int sp_tx_edid_read(struct anx7625_data *ctx, u8 *pedid_blocks_buf) { - u8 offset, edid_pos; + u8 offset; + int edid_pos; int count, blocks_num; u8 pblock_buf[MAX_DPCD_BUFFER_SIZE]; u8 i, j; diff --git a/drivers/gpu/drm/bridge/cdns-dsi.c b/drivers/gpu/drm/bridge/cdns-dsi.c index d8a15c459b42c..829e1a1446567 100644 --- a/drivers/gpu/drm/bridge/cdns-dsi.c +++ b/drivers/gpu/drm/bridge/cdns-dsi.c @@ -1284,6 +1284,7 @@ static const struct of_device_id cdns_dsi_of_match[] = { { .compatible = "cdns,dsi" }, { }, }; +MODULE_DEVICE_TABLE(of, cdns_dsi_of_match); static struct platform_driver cdns_dsi_platform_driver = { .probe = cdns_dsi_drm_probe, diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index dafb1b47c15fb..00597eb54661f 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -1164,7 +1164,11 @@ static int lt9611_probe(struct i2c_client *client, lt9611_enable_hpd_interrupts(lt9611); - return lt9611_audio_init(dev, lt9611); + ret = lt9611_audio_init(dev, lt9611); + if (ret) + goto err_remove_bridge; + + return 0; err_remove_bridge: drm_bridge_remove(<9611->bridge); diff --git a/drivers/gpu/drm/bridge/nwl-dsi.c b/drivers/gpu/drm/bridge/nwl-dsi.c index af07eeb47ca02..691039aba87f4 100644 --- a/drivers/gpu/drm/bridge/nwl-dsi.c +++ b/drivers/gpu/drm/bridge/nwl-dsi.c @@ -861,18 +861,19 @@ nwl_dsi_bridge_mode_set(struct drm_bridge *bridge, memcpy(&dsi->mode, adjusted_mode, sizeof(dsi->mode)); drm_mode_debug_printmodeline(adjusted_mode); - pm_runtime_get_sync(dev); + if (pm_runtime_resume_and_get(dev) < 0) + return; if (clk_prepare_enable(dsi->lcdif_clk) < 0) - return; + goto runtime_put; if (clk_prepare_enable(dsi->core_clk) < 0) - return; + goto runtime_put; /* Step 1 from DSI reset-out instructions */ ret = reset_control_deassert(dsi->rst_pclk); if (ret < 0) { DRM_DEV_ERROR(dev, "Failed to deassert PCLK: %d\n", ret); - return; + goto runtime_put; } /* Step 2 from DSI reset-out instructions */ @@ -882,13 +883,18 @@ nwl_dsi_bridge_mode_set(struct drm_bridge *bridge, ret = reset_control_deassert(dsi->rst_esc); if (ret < 0) { DRM_DEV_ERROR(dev, "Failed to deassert ESC: %d\n", ret); - return; + goto runtime_put; } ret = reset_control_deassert(dsi->rst_byte); if (ret < 0) { DRM_DEV_ERROR(dev, "Failed to deassert BYTE: %d\n", ret); - return; + goto runtime_put; } + + return; + +runtime_put: + pm_runtime_put_sync(dev); } static void @@ -1204,6 +1210,7 @@ static int nwl_dsi_probe(struct platform_device *pdev) ret = nwl_dsi_select_input(dsi); if (ret < 0) { + pm_runtime_disable(dev); mipi_dsi_host_unregister(&dsi->dsi_host); return ret; } diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 843265d7f1b12..ec7745c31da07 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -2120,7 +2120,7 @@ static void sii8620_init_rcp_input_dev(struct sii8620 *ctx) if (ret) { dev_err(ctx->dev, "Failed to register RC device\n"); ctx->error = ret; - rc_free_device(ctx->rc_dev); + rc_free_device(rc_dev); return; } ctx->rc_dev = rc_dev; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 54d8fdad395f5..97cdc61b57f61 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -2551,8 +2551,9 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, if (!output_fmts) return NULL; - /* If dw-hdmi is the only bridge, avoid negociating with ourselves */ - if (list_is_singular(&bridge->encoder->bridge_chain)) { + /* If dw-hdmi is the first or only bridge, avoid negociating with ourselves */ + if (list_is_singular(&bridge->encoder->bridge_chain) || + list_is_first(&bridge->chain_node, &bridge->encoder->bridge_chain)) { *num_output_fmts = 1; output_fmts[0] = MEDIA_BUS_FMT_FIXED; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c index e44e18a0112af..56c3fd08c6a0b 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c @@ -1199,6 +1199,7 @@ __dw_mipi_dsi_probe(struct platform_device *pdev, ret = mipi_dsi_host_register(&dsi->dsi_host); if (ret) { dev_err(dev, "Failed to register MIPI host: %d\n", ret); + pm_runtime_disable(dev); dw_mipi_dsi_debugfs_remove(dsi); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c index 945f08de45f1d..314a84ffcea3d 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c @@ -560,10 +560,14 @@ static int sn65dsi83_parse_dt(struct sn65dsi83 *ctx, enum sn65dsi83_model model) ctx->host_node = of_graph_get_remote_port_parent(endpoint); of_node_put(endpoint); - if (ctx->dsi_lanes < 0 || ctx->dsi_lanes > 4) - return -EINVAL; - if (!ctx->host_node) - return -ENODEV; + if (ctx->dsi_lanes < 0 || ctx->dsi_lanes > 4) { + ret = -EINVAL; + goto err_put_node; + } + if (!ctx->host_node) { + ret = -ENODEV; + goto err_put_node; + } ctx->lvds_dual_link = false; ctx->lvds_dual_link_even_odd_swap = false; @@ -590,16 +594,22 @@ static int sn65dsi83_parse_dt(struct sn65dsi83 *ctx, enum sn65dsi83_model model) ret = drm_of_find_panel_or_bridge(dev->of_node, 2, 0, &panel, &panel_bridge); if (ret < 0) - return ret; + goto err_put_node; if (panel) { panel_bridge = devm_drm_panel_bridge_add(dev, panel); - if (IS_ERR(panel_bridge)) - return PTR_ERR(panel_bridge); + if (IS_ERR(panel_bridge)) { + ret = PTR_ERR(panel_bridge); + goto err_put_node; + } } ctx->panel_bridge = panel_bridge; return 0; + +err_put_node: + of_node_put(ctx->host_node); + return ret; } static int sn65dsi83_host_attach(struct sn65dsi83 *ctx) @@ -673,8 +683,10 @@ static int sn65dsi83_probe(struct i2c_client *client, return ret; ctx->regmap = devm_regmap_init_i2c(client, &sn65dsi83_regmap_config); - if (IS_ERR(ctx->regmap)) - return PTR_ERR(ctx->regmap); + if (IS_ERR(ctx->regmap)) { + ret = PTR_ERR(ctx->regmap); + goto err_put_node; + } dev_set_drvdata(dev, ctx); i2c_set_clientdata(client, ctx); @@ -691,6 +703,8 @@ static int sn65dsi83_probe(struct i2c_client *client, err_remove_bridge: drm_bridge_remove(&ctx->bridge); +err_put_node: + of_node_put(ctx->host_node); return ret; } diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index 23f9073bc473a..c9528aa62c9c9 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -144,16 +144,6 @@ u8 drm_dp_get_adjust_tx_ffe_preset(const u8 link_status[DP_LINK_STATUS_SIZE], } EXPORT_SYMBOL(drm_dp_get_adjust_tx_ffe_preset); -u8 drm_dp_get_adjust_request_post_cursor(const u8 link_status[DP_LINK_STATUS_SIZE], - unsigned int lane) -{ - unsigned int offset = DP_ADJUST_REQUEST_POST_CURSOR2; - u8 value = dp_link_status(link_status, offset); - - return (value >> (lane << 1)) & 0x3; -} -EXPORT_SYMBOL(drm_dp_get_adjust_request_post_cursor); - static int __8b10b_clock_recovery_delay_us(const struct drm_dp_aux *aux, u8 rd_interval) { if (rd_interval > 4) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index f5f5de362ff2c..83e5c115e7547 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -212,9 +212,7 @@ static const struct edid_quirk { /* Windows Mixed Reality Headsets */ EDID_QUIRK('A', 'C', 'R', 0x7fce, EDID_QUIRK_NON_DESKTOP), - EDID_QUIRK('H', 'P', 'N', 0x3515, EDID_QUIRK_NON_DESKTOP), EDID_QUIRK('L', 'E', 'N', 0x0408, EDID_QUIRK_NON_DESKTOP), - EDID_QUIRK('L', 'E', 'N', 0xb800, EDID_QUIRK_NON_DESKTOP), EDID_QUIRK('F', 'U', 'J', 0x1970, EDID_QUIRK_NON_DESKTOP), EDID_QUIRK('D', 'E', 'L', 0x7fce, EDID_QUIRK_NON_DESKTOP), EDID_QUIRK('S', 'E', 'C', 0x144a, EDID_QUIRK_NON_DESKTOP), @@ -4848,7 +4846,8 @@ bool drm_detect_monitor_audio(struct edid *edid) if (!edid_ext) goto end; - has_audio = ((edid_ext[3] & EDID_BASIC_AUDIO) != 0); + has_audio = (edid_ext[0] == CEA_EXT && + (edid_ext[3] & EDID_BASIC_AUDIO) != 0); if (has_audio) { DRM_DEBUG_KMS("Monitor has basic audio support\n"); @@ -5075,21 +5074,21 @@ static void drm_parse_hdmi_deep_color_info(struct drm_connector *connector, if (hdmi[6] & DRM_EDID_HDMI_DC_30) { dc_bpc = 10; - info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_30; + info->edid_hdmi_rgb444_dc_modes |= DRM_EDID_HDMI_DC_30; DRM_DEBUG("%s: HDMI sink does deep color 30.\n", connector->name); } if (hdmi[6] & DRM_EDID_HDMI_DC_36) { dc_bpc = 12; - info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_36; + info->edid_hdmi_rgb444_dc_modes |= DRM_EDID_HDMI_DC_36; DRM_DEBUG("%s: HDMI sink does deep color 36.\n", connector->name); } if (hdmi[6] & DRM_EDID_HDMI_DC_48) { dc_bpc = 16; - info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_48; + info->edid_hdmi_rgb444_dc_modes |= DRM_EDID_HDMI_DC_48; DRM_DEBUG("%s: HDMI sink does deep color 48.\n", connector->name); } @@ -5104,16 +5103,9 @@ static void drm_parse_hdmi_deep_color_info(struct drm_connector *connector, connector->name, dc_bpc); info->bpc = dc_bpc; - /* - * Deep color support mandates RGB444 support for all video - * modes and forbids YCRCB422 support for all video modes per - * HDMI 1.3 spec. - */ - info->color_formats = DRM_COLOR_FORMAT_RGB444; - /* YCRCB444 is optional according to spec. */ if (hdmi[6] & DRM_EDID_HDMI_DC_Y444) { - info->color_formats |= DRM_COLOR_FORMAT_YCRCB444; + info->edid_hdmi_ycbcr444_dc_modes = info->edid_hdmi_rgb444_dc_modes; DRM_DEBUG("%s: HDMI sink does YCRCB444 in deep color.\n", connector->name); } @@ -5333,17 +5325,13 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi info->width_mm = edid->width_cm * 10; info->height_mm = edid->height_cm * 10; - info->non_desktop = !!(quirks & EDID_QUIRK_NON_DESKTOP); - drm_get_monitor_range(connector, edid); - DRM_DEBUG_KMS("non_desktop set to %d\n", info->non_desktop); - if (edid->revision < 3) - return quirks; + goto out; if (!(edid->input & DRM_EDID_INPUT_DIGITAL)) - return quirks; + goto out; info->color_formats |= DRM_COLOR_FORMAT_RGB444; drm_parse_cea_ext(connector, edid); @@ -5364,7 +5352,7 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi /* Only defined for 1.4 with digital displays */ if (edid->revision < 4) - return quirks; + goto out; switch (edid->input & DRM_EDID_DIGITAL_DEPTH_MASK) { case DRM_EDID_DIGITAL_DEPTH_6: @@ -5401,6 +5389,13 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi drm_update_mso(connector, edid); +out: + if (quirks & EDID_QUIRK_NON_DESKTOP) { + drm_dbg_kms(connector->dev, "Non-desktop display%s\n", + info->non_desktop ? " (redundant quirk)" : ""); + info->non_desktop = true; + } + return quirks; } diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index ed43b987d306a..f15127a32f7a7 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -2346,6 +2346,7 @@ static int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->fbops = &drm_fbdev_fb_ops; fbi->screen_size = sizes->surface_height * fb->pitches[0]; fbi->fix.smem_len = fbi->screen_size; + fbi->flags = FBINFO_DEFAULT; drm_fb_helper_fill_info(fbi, fb_helper, sizes); @@ -2353,19 +2354,21 @@ static int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->screen_buffer = vzalloc(fbi->screen_size); if (!fbi->screen_buffer) return -ENOMEM; + fbi->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST; fbi->fbdefio = &drm_fbdev_defio; - fb_deferred_io_init(fbi); } else { /* buffer is mapped for HW framebuffer */ ret = drm_client_buffer_vmap(fb_helper->buffer, &map); if (ret) return ret; - if (map.is_iomem) + if (map.is_iomem) { fbi->screen_base = map.vaddr_iomem; - else + } else { fbi->screen_buffer = map.vaddr; + fbi->flags |= FBINFO_VIRTFB; + } /* * Shamelessly leak the physical address to user-space. As diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index b910978d3e480..4e853acfd1e8a 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -180,6 +180,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MicroPC"), }, .driver_data = (void *)&lcd720x1280_rightside_up, + }, { /* GPD Win Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1619-01"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* * GPD Pocket, note that the the DMI data is less generic then * it seems, devices with a board-vendor of "AMI Corporation" diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index c313a5b4549c4..7e48dcd1bee4d 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -853,12 +853,57 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, &args->handle); } + +/* + * Try to flatten a dma_fence_chain into a dma_fence_array so that it can be + * added as timeline fence to a chain again. + */ +static int drm_syncobj_flatten_chain(struct dma_fence **f) +{ + struct dma_fence_chain *chain = to_dma_fence_chain(*f); + struct dma_fence *tmp, **fences; + struct dma_fence_array *array; + unsigned int count; + + if (!chain) + return 0; + + count = 0; + dma_fence_chain_for_each(tmp, &chain->base) + ++count; + + fences = kmalloc_array(count, sizeof(*fences), GFP_KERNEL); + if (!fences) + return -ENOMEM; + + count = 0; + dma_fence_chain_for_each(tmp, &chain->base) + fences[count++] = dma_fence_get(tmp); + + array = dma_fence_array_create(count, fences, + dma_fence_context_alloc(1), + 1, false); + if (!array) + goto free_fences; + + dma_fence_put(*f); + *f = &array->base; + return 0; + +free_fences: + while (count--) + dma_fence_put(fences[count]); + + kfree(fences); + return -ENOMEM; +} + static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private, struct drm_syncobj_transfer *args) { struct drm_syncobj *timeline_syncobj = NULL; - struct dma_fence *fence; struct dma_fence_chain *chain; + struct dma_fence *fence; int ret; timeline_syncobj = drm_syncobj_find(file_private, args->dst_handle); @@ -869,16 +914,22 @@ static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private, args->src_point, args->flags, &fence); if (ret) - goto err; + goto err_put_timeline; + + ret = drm_syncobj_flatten_chain(&fence); + if (ret) + goto err_free_fence; + chain = dma_fence_chain_alloc(); if (!chain) { ret = -ENOMEM; - goto err1; + goto err_free_fence; } + drm_syncobj_add_point(timeline_syncobj, chain, fence, args->dst_point); -err1: +err_free_fence: dma_fence_put(fence); -err: +err_put_timeline: drm_syncobj_put(timeline_syncobj); return ret; diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 8ac196e814d5d..d351b834a6551 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -966,7 +966,8 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) * cause. */ if (!intel_can_enable_sagv(dev_priv, new_bw_state)) { - allowed_points = BIT(max_bw_point); + allowed_points &= ADLS_PSF_PT_MASK; + allowed_points |= BIT(max_bw_point); drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n", max_bw_point); } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index b5e2508db1cfe..62e763faf0aa5 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4831,7 +4831,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *dig_port, bool long_hpd) struct intel_dp *intel_dp = &dig_port->dp; if (dig_port->base.type == INTEL_OUTPUT_EDP && - (long_hpd || !intel_pps_have_power(intel_dp))) { + (long_hpd || !intel_pps_have_panel_power_or_vdd(intel_dp))) { /* * vdd off can generate a long/short pulse on eDP which * would require vdd on to handle it, and thus we diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 3b5b9e7b05b7b..866ac090e3e32 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1836,6 +1836,7 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, bool has_hdmi_sink) { struct drm_i915_private *dev_priv = intel_hdmi_to_i915(hdmi); + enum phy phy = intel_port_to_phy(dev_priv, hdmi_to_dig_port(hdmi)->base.port); if (clock < 25000) return MODE_CLOCK_LOW; @@ -1856,6 +1857,14 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, if (IS_CHERRYVIEW(dev_priv) && clock > 216000 && clock < 240000) return MODE_CLOCK_RANGE; + /* ICL+ combo PHY PLL can't generate 500-533.2 MHz */ + if (intel_phy_is_combo(dev_priv, phy) && clock > 500000 && clock < 533200) + return MODE_CLOCK_RANGE; + + /* ICL+ TC PHY PLL can't generate 500-532.8 MHz */ + if (intel_phy_is_tc(dev_priv, phy) && clock > 500000 && clock < 532800) + return MODE_CLOCK_RANGE; + /* * SNPS PHYs' MPLLB table-based programming can only handle a fixed * set of link rates. @@ -1912,7 +1921,7 @@ static bool intel_hdmi_sink_bpc_possible(struct drm_connector *connector, if (ycbcr420_output) return hdmi->y420_dc_modes & DRM_EDID_YCBCR420_DC_36; else - return info->edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_36; + return info->edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_36; case 10: if (!has_hdmi_sink) return false; @@ -1920,7 +1929,7 @@ static bool intel_hdmi_sink_bpc_possible(struct drm_connector *connector, if (ycbcr420_output) return hdmi->y420_dc_modes & DRM_EDID_YCBCR420_DC_30; else - return info->edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30; + return info->edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30; case 8: return true; default: diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 4a2662838cd8d..df10b6898987a 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -375,6 +375,21 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, return -EINVAL; } + /* + * The port numbering and mapping here is bizarre. The now-obsolete + * swsci spec supports ports numbered [0..4]. Port E is handled as a + * special case, but port F and beyond are not. The functionality is + * supposed to be obsolete for new platforms. Just bail out if the port + * number is out of bounds after mapping. + */ + if (port > 4) { + drm_dbg_kms(&dev_priv->drm, + "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", + intel_encoder->base.base.id, intel_encoder->base.name, + port_name(intel_encoder->port), port); + return -EINVAL; + } + if (!enable) parm |= 4 << 8; diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index e9c679bb1b2eb..5edd188d97479 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -1075,14 +1075,14 @@ static void intel_pps_vdd_sanitize(struct intel_dp *intel_dp) edp_panel_vdd_schedule_off(intel_dp); } -bool intel_pps_have_power(struct intel_dp *intel_dp) +bool intel_pps_have_panel_power_or_vdd(struct intel_dp *intel_dp) { intel_wakeref_t wakeref; bool have_power = false; with_intel_pps_lock(intel_dp, wakeref) { - have_power = edp_have_panel_power(intel_dp) && - edp_have_panel_vdd(intel_dp); + have_power = edp_have_panel_power(intel_dp) || + edp_have_panel_vdd(intel_dp); } return have_power; diff --git a/drivers/gpu/drm/i915/display/intel_pps.h b/drivers/gpu/drm/i915/display/intel_pps.h index fbb47f6f453e4..e64144659d31f 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.h +++ b/drivers/gpu/drm/i915/display/intel_pps.h @@ -37,7 +37,7 @@ void intel_pps_vdd_on(struct intel_dp *intel_dp); void intel_pps_on(struct intel_dp *intel_dp); void intel_pps_off(struct intel_dp *intel_dp); void intel_pps_vdd_off_sync(struct intel_dp *intel_dp); -bool intel_pps_have_power(struct intel_dp *intel_dp); +bool intel_pps_have_panel_power_or_vdd(struct intel_dp *intel_dp); void intel_pps_wait_power_cycle(struct intel_dp *intel_dp); void intel_pps_init(struct intel_dp *intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 00279e8c27756..b00de57cc957e 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1816,6 +1816,9 @@ static void _intel_psr_post_plane_update(const struct intel_atomic_state *state, mutex_lock(&psr->lock); + if (psr->sink_not_reliable) + goto exit; + drm_WARN_ON(&dev_priv->drm, psr->enabled && !crtc_state->active_planes); /* Only enable if there is active planes */ @@ -1826,6 +1829,7 @@ static void _intel_psr_post_plane_update(const struct intel_atomic_state *state, if (crtc_state->crc_enabled && psr->enabled) psr_force_hw_tracking_exit(intel_dp); +exit: mutex_unlock(&psr->lock); } } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 1478c02a82cbe..d270d9a918e80 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -67,7 +67,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, * mmap ioctl is disallowed for all discrete platforms, * and for all platforms with GRAPHICS_VER > 12. */ - if (IS_DGFX(i915) || GRAPHICS_VER(i915) > 12) + if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) > IP_VER(12, 0)) return -EOPNOTSUPP; if (args->flags & ~(I915_MMAP_WC)) @@ -439,7 +439,7 @@ vm_access(struct vm_area_struct *area, unsigned long addr, return -EACCES; addr -= area->vm_start; - if (addr >= obj->base.size) + if (range_overflows_t(u64, addr, len, obj->base.size)) return -EINVAL; i915_gem_ww_ctx_init(&ww, true); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0c70ab08fc0c9..73efed2f30ca7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1146,7 +1146,7 @@ static inline struct intel_gt *to_gt(struct drm_i915_private *i915) (GRAPHICS_VER(i915) >= (from) && GRAPHICS_VER(i915) <= (until)) #define MEDIA_VER(i915) (INTEL_INFO(i915)->media.ver) -#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media.arch, \ +#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media.ver, \ INTEL_INFO(i915)->media.rel) #define IS_MEDIA_VER(i915, from, until) \ (MEDIA_VER(i915) >= (from) && MEDIA_VER(i915) <= (until)) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fae4f7818d28b..12120474c80c7 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3722,8 +3722,7 @@ skl_setup_sagv_block_time(struct drm_i915_private *dev_priv) MISSING_CASE(DISPLAY_VER(dev_priv)); } - /* Default to an unusable block time */ - dev_priv->sagv_block_time_us = -1; + dev_priv->sagv_block_time_us = 0; } /* @@ -5652,7 +5651,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, result->min_ddb_alloc = max(min_ddb_alloc, blocks) + 1; result->enable = true; - if (DISPLAY_VER(dev_priv) < 12) + if (DISPLAY_VER(dev_priv) < 12 && dev_priv->sagv_block_time_us) result->can_sagv = latency >= dev_priv->sagv_block_time_us; } @@ -5683,7 +5682,10 @@ static void tgl_compute_sagv_wm(const struct intel_crtc_state *crtc_state, struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); struct skl_wm_level *sagv_wm = &plane_wm->sagv.wm0; struct skl_wm_level *levels = plane_wm->wm; - unsigned int latency = dev_priv->wm.skl_latency[0] + dev_priv->sagv_block_time_us; + unsigned int latency = 0; + + if (dev_priv->sagv_block_time_us) + latency = dev_priv->sagv_block_time_us + dev_priv->wm.skl_latency[0]; skl_compute_plane_wm(crtc_state, 0, latency, wm_params, &levels[0], diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c index 87428fb23d9ff..a2277a0d6d06f 100644 --- a/drivers/gpu/drm/imx/dw_hdmi-imx.c +++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c @@ -222,6 +222,7 @@ static int dw_hdmi_imx_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; const struct of_device_id *match = of_match_node(dw_hdmi_imx_dt_ids, np); struct imx_hdmi *hdmi; + int ret; hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL); if (!hdmi) @@ -243,10 +244,15 @@ static int dw_hdmi_imx_probe(struct platform_device *pdev) hdmi->bridge = of_drm_find_bridge(np); if (!hdmi->bridge) { dev_err(hdmi->dev, "Unable to find bridge\n"); + dw_hdmi_remove(hdmi->hdmi); return -ENODEV; } - return component_add(&pdev->dev, &dw_hdmi_imx_ops); + ret = component_add(&pdev->dev, &dw_hdmi_imx_ops); + if (ret) + dw_hdmi_remove(hdmi->hdmi); + + return ret; } static int dw_hdmi_imx_remove(struct platform_device *pdev) diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index e5078d03020d9..fb0e951248f68 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -572,6 +572,8 @@ static int imx_ldb_panel_ddc(struct device *dev, edidp = of_get_property(child, "edid", &edid_len); if (edidp) { channel->edid = kmemdup(edidp, edid_len, GFP_KERNEL); + if (!channel->edid) + return -ENOMEM; } else if (!channel->panel) { /* fallback to display-timings node */ ret = of_get_drm_display_mode(child, diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index 06cb1a59b9bcd..63ba2ad846791 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -75,8 +75,10 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector) ret = of_get_drm_display_mode(np, &imxpd->mode, &imxpd->bus_flags, OF_USE_NATIVE_MODE); - if (ret) + if (ret) { + drm_mode_destroy(connector->dev, mode); return ret; + } drm_mode_copy(mode, &imxpd->mode); mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 80f1d439841a6..26aeaf0ab86ef 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -302,42 +302,42 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) if (priv->afbcd.ops) { ret = priv->afbcd.ops->init(priv); if (ret) - return ret; + goto free_drm; } /* Encoder Initialization */ ret = meson_encoder_cvbs_init(priv); if (ret) - goto free_drm; + goto exit_afbcd; if (has_components) { ret = component_bind_all(drm->dev, drm); if (ret) { dev_err(drm->dev, "Couldn't bind all components\n"); - goto free_drm; + goto exit_afbcd; } } ret = meson_encoder_hdmi_init(priv); if (ret) - goto free_drm; + goto exit_afbcd; ret = meson_plane_create(priv); if (ret) - goto free_drm; + goto exit_afbcd; ret = meson_overlay_create(priv); if (ret) - goto free_drm; + goto exit_afbcd; ret = meson_crtc_create(priv); if (ret) - goto free_drm; + goto exit_afbcd; ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm); if (ret) - goto free_drm; + goto exit_afbcd; drm_mode_config_reset(drm); @@ -355,6 +355,9 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) uninstall_irq: free_irq(priv->vsync_irq, drm); +exit_afbcd: + if (priv->afbcd.ops) + priv->afbcd.ops->exit(priv); free_drm: drm_dev_put(drm); @@ -385,10 +388,8 @@ static void meson_drv_unbind(struct device *dev) free_irq(priv->vsync_irq, drm); drm_dev_put(drm); - if (priv->afbcd.ops) { - priv->afbcd.ops->reset(priv); - meson_rdma_free(priv); - } + if (priv->afbcd.ops) + priv->afbcd.ops->exit(priv); } static const struct component_master_ops meson_drv_master_ops = { diff --git a/drivers/gpu/drm/meson/meson_osd_afbcd.c b/drivers/gpu/drm/meson/meson_osd_afbcd.c index ffc6b584dbf85..0cdbe899402f8 100644 --- a/drivers/gpu/drm/meson/meson_osd_afbcd.c +++ b/drivers/gpu/drm/meson/meson_osd_afbcd.c @@ -79,11 +79,6 @@ static bool meson_gxm_afbcd_supported_fmt(u64 modifier, uint32_t format) return meson_gxm_afbcd_pixel_fmt(modifier, format) >= 0; } -static int meson_gxm_afbcd_init(struct meson_drm *priv) -{ - return 0; -} - static int meson_gxm_afbcd_reset(struct meson_drm *priv) { writel_relaxed(VIU_SW_RESET_OSD1_AFBCD, @@ -93,6 +88,16 @@ static int meson_gxm_afbcd_reset(struct meson_drm *priv) return 0; } +static int meson_gxm_afbcd_init(struct meson_drm *priv) +{ + return 0; +} + +static void meson_gxm_afbcd_exit(struct meson_drm *priv) +{ + meson_gxm_afbcd_reset(priv); +} + static int meson_gxm_afbcd_enable(struct meson_drm *priv) { writel_relaxed(FIELD_PREP(OSD1_AFBCD_ID_FIFO_THRD, 0x40) | @@ -172,6 +177,7 @@ static int meson_gxm_afbcd_setup(struct meson_drm *priv) struct meson_afbcd_ops meson_afbcd_gxm_ops = { .init = meson_gxm_afbcd_init, + .exit = meson_gxm_afbcd_exit, .reset = meson_gxm_afbcd_reset, .enable = meson_gxm_afbcd_enable, .disable = meson_gxm_afbcd_disable, @@ -269,6 +275,18 @@ static bool meson_g12a_afbcd_supported_fmt(u64 modifier, uint32_t format) return meson_g12a_afbcd_pixel_fmt(modifier, format) >= 0; } +static int meson_g12a_afbcd_reset(struct meson_drm *priv) +{ + meson_rdma_reset(priv); + + meson_rdma_writel_sync(priv, VIU_SW_RESET_G12A_AFBC_ARB | + VIU_SW_RESET_G12A_OSD1_AFBCD, + VIU_SW_RESET); + meson_rdma_writel_sync(priv, 0, VIU_SW_RESET); + + return 0; +} + static int meson_g12a_afbcd_init(struct meson_drm *priv) { int ret; @@ -286,16 +304,10 @@ static int meson_g12a_afbcd_init(struct meson_drm *priv) return 0; } -static int meson_g12a_afbcd_reset(struct meson_drm *priv) +static void meson_g12a_afbcd_exit(struct meson_drm *priv) { - meson_rdma_reset(priv); - - meson_rdma_writel_sync(priv, VIU_SW_RESET_G12A_AFBC_ARB | - VIU_SW_RESET_G12A_OSD1_AFBCD, - VIU_SW_RESET); - meson_rdma_writel_sync(priv, 0, VIU_SW_RESET); - - return 0; + meson_g12a_afbcd_reset(priv); + meson_rdma_free(priv); } static int meson_g12a_afbcd_enable(struct meson_drm *priv) @@ -380,6 +392,7 @@ static int meson_g12a_afbcd_setup(struct meson_drm *priv) struct meson_afbcd_ops meson_afbcd_g12a_ops = { .init = meson_g12a_afbcd_init, + .exit = meson_g12a_afbcd_exit, .reset = meson_g12a_afbcd_reset, .enable = meson_g12a_afbcd_enable, .disable = meson_g12a_afbcd_disable, diff --git a/drivers/gpu/drm/meson/meson_osd_afbcd.h b/drivers/gpu/drm/meson/meson_osd_afbcd.h index 5e5523304f42f..e77ddeb6416f3 100644 --- a/drivers/gpu/drm/meson/meson_osd_afbcd.h +++ b/drivers/gpu/drm/meson/meson_osd_afbcd.h @@ -14,6 +14,7 @@ struct meson_afbcd_ops { int (*init)(struct meson_drm *priv); + void (*exit)(struct meson_drm *priv); int (*reset)(struct meson_drm *priv); int (*enable)(struct meson_drm *priv); int (*disable)(struct meson_drm *priv); diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index b983541a4c530..cd9ba13ad5fc8 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -529,7 +529,10 @@ static void mgag200_set_format_regs(struct mga_device *mdev, WREG_GFX(3, 0x00); WREG_GFX(4, 0x00); WREG_GFX(5, 0x40); - WREG_GFX(6, 0x05); + /* GCTL6 should be 0x05, but we configure memmapsl to 0xb8000 (text mode), + * so that it doesn't hang when running kexec/kdump on G200_SE rev42. + */ + WREG_GFX(6, 0x0d); WREG_GFX(7, 0x0f); WREG_GFX(8, 0x0f); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 17cfad6424db6..19622fb1fa35b 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -655,19 +655,23 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); const u32 *regs = a6xx_protect; - unsigned i, count = ARRAY_SIZE(a6xx_protect), count_max = 32; - - BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32); - BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48); + unsigned i, count, count_max; if (adreno_is_a650(adreno_gpu)) { regs = a650_protect; count = ARRAY_SIZE(a650_protect); count_max = 48; + BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48); } else if (adreno_is_a660_family(adreno_gpu)) { regs = a660_protect; count = ARRAY_SIZE(a660_protect); count_max = 48; + BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48); + } else { + regs = a6xx_protect; + count = ARRAY_SIZE(a6xx_protect); + count_max = 32; + BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32); } /* @@ -1710,7 +1714,7 @@ a6xx_create_private_address_space(struct msm_gpu *gpu) return ERR_CAST(mmu); return msm_gem_address_space_create(mmu, - "gpu", 0x100000000ULL, 0x1ffffffffULL); + "gpu", 0x100000000ULL, SZ_4G); } static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 1e648db439f9b..16ae0cccbbb1e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -168,7 +168,6 @@ enum dpu_enc_rc_states { * @vsync_event_work: worker to handle vsync event for autorefresh * @topology: topology of the display * @idle_timeout: idle timeout duration in milliseconds - * @dp: msm_dp pointer, for DP encoders */ struct dpu_encoder_virt { struct drm_encoder base; @@ -207,8 +206,6 @@ struct dpu_encoder_virt { struct msm_display_topology topology; u32 idle_timeout; - - struct msm_dp *dp; }; #define to_dpu_encoder_virt(x) container_of(x, struct dpu_encoder_virt, base) @@ -1099,7 +1096,7 @@ static void _dpu_encoder_virt_enable_helper(struct drm_encoder *drm_enc) } - if (dpu_enc->disp_info.intf_type == DRM_MODE_CONNECTOR_DisplayPort && + if (dpu_enc->disp_info.intf_type == DRM_MODE_ENCODER_TMDS && dpu_enc->cur_master->hw_mdptop && dpu_enc->cur_master->hw_mdptop->ops.intf_audio_select) dpu_enc->cur_master->hw_mdptop->ops.intf_audio_select( @@ -2128,8 +2125,6 @@ int dpu_encoder_setup(struct drm_device *dev, struct drm_encoder *enc, timer_setup(&dpu_enc->vsync_event_timer, dpu_encoder_vsync_event_handler, 0); - else if (disp_info->intf_type == DRM_MODE_ENCODER_TMDS) - dpu_enc->dp = priv->dp[disp_info->h_tile_instance[0]]; INIT_DELAYED_WORK(&dpu_enc->delayed_off_work, dpu_encoder_off_work); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index f9c83d6e427ad..24fbaf562d418 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -35,6 +35,14 @@ int dpu_rm_destroy(struct dpu_rm *rm) { int i; + for (i = 0; i < ARRAY_SIZE(rm->dspp_blks); i++) { + struct dpu_hw_dspp *hw; + + if (rm->dspp_blks[i]) { + hw = to_dpu_hw_dspp(rm->dspp_blks[i]); + dpu_hw_dspp_destroy(hw); + } + } for (i = 0; i < ARRAY_SIZE(rm->pingpong_blks); i++) { struct dpu_hw_pingpong *hw; diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index c724cb0bde9dc..8d1ea694d06cd 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1365,60 +1365,44 @@ static int dp_ctrl_enable_stream_clocks(struct dp_ctrl_private *ctrl) return ret; } -int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip, bool reset) +void dp_ctrl_reset_irq_ctrl(struct dp_ctrl *dp_ctrl, bool enable) +{ + struct dp_ctrl_private *ctrl; + + ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); + + dp_catalog_ctrl_reset(ctrl->catalog); + + if (enable) + dp_catalog_ctrl_enable_irq(ctrl->catalog, enable); +} + +void dp_ctrl_phy_init(struct dp_ctrl *dp_ctrl) { struct dp_ctrl_private *ctrl; struct dp_io *dp_io; struct phy *phy; - if (!dp_ctrl) { - DRM_ERROR("Invalid input data\n"); - return -EINVAL; - } - ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); dp_io = &ctrl->parser->io; phy = dp_io->phy; - ctrl->dp_ctrl.orientation = flip; - - if (reset) - dp_catalog_ctrl_reset(ctrl->catalog); - - DRM_DEBUG_DP("flip=%d\n", flip); dp_catalog_ctrl_phy_reset(ctrl->catalog); phy_init(phy); - dp_catalog_ctrl_enable_irq(ctrl->catalog, true); - - return 0; } -/** - * dp_ctrl_host_deinit() - Uninitialize DP controller - * @dp_ctrl: Display Port Driver data - * - * Perform required steps to uninitialize DP controller - * and its resources. - */ -void dp_ctrl_host_deinit(struct dp_ctrl *dp_ctrl) +void dp_ctrl_phy_exit(struct dp_ctrl *dp_ctrl) { struct dp_ctrl_private *ctrl; struct dp_io *dp_io; struct phy *phy; - if (!dp_ctrl) { - DRM_ERROR("Invalid input data\n"); - return; - } - ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); dp_io = &ctrl->parser->io; phy = dp_io->phy; - dp_catalog_ctrl_enable_irq(ctrl->catalog, false); + dp_catalog_ctrl_phy_reset(ctrl->catalog); phy_exit(phy); - - DRM_DEBUG_DP("Host deinitialized successfully\n"); } static bool dp_ctrl_use_fixed_nvid(struct dp_ctrl_private *ctrl) @@ -1488,7 +1472,10 @@ static int dp_ctrl_deinitialize_mainlink(struct dp_ctrl_private *ctrl) } phy_power_off(phy); + + /* aux channel down, reinit phy */ phy_exit(phy); + phy_init(phy); return 0; } @@ -1761,6 +1748,9 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) /* end with failure */ break; /* lane == 1 already */ } + + /* stop link training before start re training */ + dp_ctrl_clear_training_pattern(ctrl); } } @@ -1893,8 +1883,14 @@ int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl) return ret; } + DRM_DEBUG_DP("Before, phy=%x init_count=%d power_on=%d\n", + (u32)(uintptr_t)phy, phy->init_count, phy->power_count); + phy_power_off(phy); + DRM_DEBUG_DP("After, phy=%x init_count=%d power_on=%d\n", + (u32)(uintptr_t)phy, phy->init_count, phy->power_count); + /* aux channel down, reinit phy */ phy_exit(phy); phy_init(phy); @@ -1903,23 +1899,6 @@ int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl) return ret; } -void dp_ctrl_off_phy(struct dp_ctrl *dp_ctrl) -{ - struct dp_ctrl_private *ctrl; - struct dp_io *dp_io; - struct phy *phy; - - ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); - dp_io = &ctrl->parser->io; - phy = dp_io->phy; - - dp_catalog_ctrl_reset(ctrl->catalog); - - phy_exit(phy); - - DRM_DEBUG_DP("DP off phy done\n"); -} - int dp_ctrl_off(struct dp_ctrl *dp_ctrl) { struct dp_ctrl_private *ctrl; @@ -1947,10 +1926,14 @@ int dp_ctrl_off(struct dp_ctrl *dp_ctrl) DRM_ERROR("Failed to disable link clocks. ret=%d\n", ret); } + DRM_DEBUG_DP("Before, phy=%x init_count=%d power_on=%d\n", + (u32)(uintptr_t)phy, phy->init_count, phy->power_count); + phy_power_off(phy); - phy_exit(phy); - DRM_DEBUG_DP("DP off done\n"); + DRM_DEBUG_DP("After, phy=%x init_count=%d power_on=%d\n", + (u32)(uintptr_t)phy, phy->init_count, phy->power_count); + return ret; } diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.h b/drivers/gpu/drm/msm/dp/dp_ctrl.h index 2363a2df9597b..2433edbc70a6d 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.h +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.h @@ -19,12 +19,9 @@ struct dp_ctrl { u32 pixel_rate; }; -int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip, bool reset); -void dp_ctrl_host_deinit(struct dp_ctrl *dp_ctrl); int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl); int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl); int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl); -void dp_ctrl_off_phy(struct dp_ctrl *dp_ctrl); int dp_ctrl_off(struct dp_ctrl *dp_ctrl); void dp_ctrl_push_idle(struct dp_ctrl *dp_ctrl); void dp_ctrl_isr(struct dp_ctrl *dp_ctrl); @@ -34,4 +31,9 @@ struct dp_ctrl *dp_ctrl_get(struct device *dev, struct dp_link *link, struct dp_power *power, struct dp_catalog *catalog, struct dp_parser *parser); +void dp_ctrl_reset_irq_ctrl(struct dp_ctrl *dp_ctrl, bool enable); +void dp_ctrl_phy_init(struct dp_ctrl *dp_ctrl); +void dp_ctrl_phy_exit(struct dp_ctrl *dp_ctrl); +void dp_ctrl_irq_phy_exit(struct dp_ctrl *dp_ctrl); + #endif /* _DP_CTRL_H_ */ diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 7cc4d21f20911..af9c09c308601 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -83,6 +83,7 @@ struct dp_display_private { /* state variables */ bool core_initialized; + bool phy_initialized; bool hpd_irq_on; bool audio_supported; @@ -372,36 +373,45 @@ static int dp_display_process_hpd_high(struct dp_display_private *dp) return rc; } -static void dp_display_host_init(struct dp_display_private *dp, int reset) +static void dp_display_host_phy_init(struct dp_display_private *dp) { - bool flip = false; + DRM_DEBUG_DP("core_init=%d phy_init=%d\n", + dp->core_initialized, dp->phy_initialized); - DRM_DEBUG_DP("core_initialized=%d\n", dp->core_initialized); - if (dp->core_initialized) { - DRM_DEBUG_DP("DP core already initialized\n"); - return; + if (!dp->phy_initialized) { + dp_ctrl_phy_init(dp->ctrl); + dp->phy_initialized = true; } +} - if (dp->usbpd->orientation == ORIENTATION_CC2) - flip = true; +static void dp_display_host_phy_exit(struct dp_display_private *dp) +{ + DRM_DEBUG_DP("core_init=%d phy_init=%d\n", + dp->core_initialized, dp->phy_initialized); - dp_power_init(dp->power, flip); - dp_ctrl_host_init(dp->ctrl, flip, reset); + if (dp->phy_initialized) { + dp_ctrl_phy_exit(dp->ctrl); + dp->phy_initialized = false; + } +} + +static void dp_display_host_init(struct dp_display_private *dp) +{ + DRM_DEBUG_DP("core_initialized=%d\n", dp->core_initialized); + + dp_power_init(dp->power, false); + dp_ctrl_reset_irq_ctrl(dp->ctrl, true); dp_aux_init(dp->aux); dp->core_initialized = true; } static void dp_display_host_deinit(struct dp_display_private *dp) { - if (!dp->core_initialized) { - DRM_DEBUG_DP("DP core not initialized\n"); - return; - } + DRM_DEBUG_DP("core_initialized=%d\n", dp->core_initialized); - dp_ctrl_host_deinit(dp->ctrl); + dp_ctrl_reset_irq_ctrl(dp->ctrl, false); dp_aux_deinit(dp->aux); dp_power_deinit(dp->power); - dp->core_initialized = false; } @@ -409,7 +419,7 @@ static int dp_display_usbpd_configure_cb(struct device *dev) { struct dp_display_private *dp = dev_get_dp_display_private(dev); - dp_display_host_init(dp, false); + dp_display_host_phy_init(dp); return dp_display_process_hpd_high(dp); } @@ -530,11 +540,6 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) ret = dp_display_usbpd_configure_cb(&dp->pdev->dev); if (ret) { /* link train failed */ dp->hpd_state = ST_DISCONNECTED; - - if (ret == -ECONNRESET) { /* cable unplugged */ - dp->core_initialized = false; - } - } else { /* start sentinel checking in case of missing uevent */ dp_add_event(dp, EV_CONNECT_PENDING_TIMEOUT, 0, tout); @@ -546,6 +551,12 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) mutex_unlock(&dp->event_mutex); + /* + * add fail safe mode outside event_mutex scope + * to avoid potiential circular lock with drm thread + */ + dp_panel_add_fail_safe_mode(dp->dp_display.connector); + /* uevent will complete connection part */ return 0; }; @@ -604,8 +615,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) if (state == ST_DISCONNECTED) { /* triggered by irq_hdp with sink_count = 0 */ if (dp->link->sink_count == 0) { - dp_ctrl_off_phy(dp->ctrl); - dp->core_initialized = false; + dp_display_host_phy_exit(dp); } mutex_unlock(&dp->event_mutex); return 0; @@ -667,7 +677,6 @@ static int dp_disconnect_pending_timeout(struct dp_display_private *dp, u32 data static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) { u32 state; - int ret; mutex_lock(&dp->event_mutex); @@ -692,16 +701,8 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) return 0; } - /* - * dp core (ahb/aux clks) must be initialized before - * irq_hpd be handled - */ - if (dp->core_initialized) { - ret = dp_display_usbpd_attention_cb(&dp->pdev->dev); - if (ret == -ECONNRESET) { /* cable unplugged */ - dp->core_initialized = false; - } - } + dp_display_usbpd_attention_cb(&dp->pdev->dev); + DRM_DEBUG_DP("hpd_state=%d\n", state); mutex_unlock(&dp->event_mutex); @@ -892,12 +893,19 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data) dp_display->audio_enabled = false; - /* triggered by irq_hpd with sink_count = 0 */ if (dp->link->sink_count == 0) { + /* + * irq_hpd with sink_count = 0 + * hdmi unplugged out of dongle + */ dp_ctrl_off_link_stream(dp->ctrl); } else { + /* + * unplugged interrupt + * dongle unplugged out of DUT + */ dp_ctrl_off(dp->ctrl); - dp->core_initialized = false; + dp_display_host_phy_exit(dp); } dp_display->power_on = false; @@ -1027,7 +1035,7 @@ void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp) static void dp_display_config_hpd(struct dp_display_private *dp) { - dp_display_host_init(dp, true); + dp_display_host_init(dp); dp_catalog_ctrl_hpd_config(dp->catalog); /* Enable interrupt first time @@ -1306,20 +1314,23 @@ static int dp_pm_resume(struct device *dev) dp->hpd_state = ST_DISCONNECTED; /* turn on dp ctrl/phy */ - dp_display_host_init(dp, true); + dp_display_host_init(dp); dp_catalog_ctrl_hpd_config(dp->catalog); - /* - * set sink to normal operation mode -- D0 - * before dpcd read - */ - dp_link_psm_config(dp->link, &dp->panel->link_info, false); if (dp_catalog_link_is_connected(dp->catalog)) { + /* + * set sink to normal operation mode -- D0 + * before dpcd read + */ + dp_display_host_phy_init(dp); + dp_link_psm_config(dp->link, &dp->panel->link_info, false); sink_count = drm_dp_read_sink_count(dp->aux); if (sink_count < 0) sink_count = 0; + + dp_display_host_phy_exit(dp); } dp->link->sink_count = sink_count; @@ -1358,18 +1369,16 @@ static int dp_pm_suspend(struct device *dev) DRM_DEBUG_DP("Before, core_inited=%d power_on=%d\n", dp->core_initialized, dp_display->power_on); - if (dp->core_initialized == true) { - /* mainlink enabled */ - if (dp_power_clk_status(dp->power, DP_CTRL_PM)) - dp_ctrl_off_link_stream(dp->ctrl); + /* mainlink enabled */ + if (dp_power_clk_status(dp->power, DP_CTRL_PM)) + dp_ctrl_off_link_stream(dp->ctrl); - dp_display_host_deinit(dp); - } - - dp->hpd_state = ST_SUSPENDED; + dp_display_host_phy_exit(dp); /* host_init will be called at pm_resume */ - dp->core_initialized = false; + dp_display_host_deinit(dp); + + dp->hpd_state = ST_SUSPENDED; DRM_DEBUG_DP("After, core_inited=%d power_on=%d\n", dp->core_initialized, dp_display->power_on); @@ -1460,6 +1469,7 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, struct drm_encoder *encoder) { struct msm_drm_private *priv; + struct dp_display_private *dp_priv; int ret; if (WARN_ON(!encoder) || WARN_ON(!dp_display) || WARN_ON(!dev)) @@ -1468,6 +1478,8 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, priv = dev->dev_private; dp_display->drm_dev = dev; + dp_priv = container_of(dp_display, struct dp_display_private, dp_display); + ret = dp_display_request_irq(dp_display); if (ret) { DRM_ERROR("request_irq failed, ret=%d\n", ret); @@ -1485,6 +1497,8 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, return ret; } + dp_priv->panel->connector = dp_display->connector; + priv->connectors[priv->num_connectors++] = dp_display->connector; dp_display->bridge = msm_dp_bridge_init(dp_display, dev, encoder); @@ -1535,7 +1549,7 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder) state = dp_display->hpd_state; if (state == ST_DISPLAY_OFF) - dp_display_host_init(dp_display, true); + dp_display_host_phy_init(dp_display); dp_display_enable(dp_display, 0); diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c index d4d360d19ebad..26ef41a4c1b68 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.c +++ b/drivers/gpu/drm/msm/dp/dp_drm.c @@ -169,16 +169,6 @@ struct drm_connector *dp_drm_connector_init(struct msm_dp *dp_display) drm_connector_attach_encoder(connector, dp_display->encoder); - if (dp_display->panel_bridge) { - ret = drm_bridge_attach(dp_display->encoder, - dp_display->panel_bridge, NULL, - DRM_BRIDGE_ATTACH_NO_CONNECTOR); - if (ret < 0) { - DRM_ERROR("failed to attach panel bridge: %d\n", ret); - return ERR_PTR(ret); - } - } - return connector; } @@ -246,5 +236,16 @@ struct drm_bridge *msm_dp_bridge_init(struct msm_dp *dp_display, struct drm_devi return ERR_PTR(rc); } + if (dp_display->panel_bridge) { + rc = drm_bridge_attach(dp_display->encoder, + dp_display->panel_bridge, bridge, + DRM_BRIDGE_ATTACH_NO_CONNECTOR); + if (rc < 0) { + DRM_ERROR("failed to attach panel bridge: %d\n", rc); + drm_bridge_remove(bridge); + return ERR_PTR(rc); + } + } + return bridge; } diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 71db10c0f262d..26c3653c99ec9 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -151,6 +151,15 @@ static int dp_panel_update_modes(struct drm_connector *connector, return rc; } +void dp_panel_add_fail_safe_mode(struct drm_connector *connector) +{ + /* fail safe edid */ + mutex_lock(&connector->dev->mode_config.mutex); + if (drm_add_modes_noedid(connector, 640, 480)) + drm_set_preferred_mode(connector, 640, 480); + mutex_unlock(&connector->dev->mode_config.mutex); +} + int dp_panel_read_sink_caps(struct dp_panel *dp_panel, struct drm_connector *connector) { @@ -207,11 +216,7 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel, goto end; } - /* fail safe edid */ - mutex_lock(&connector->dev->mode_config.mutex); - if (drm_add_modes_noedid(connector, 640, 480)) - drm_set_preferred_mode(connector, 640, 480); - mutex_unlock(&connector->dev->mode_config.mutex); + dp_panel_add_fail_safe_mode(connector); } if (panel->aux_cfg_update_done) { diff --git a/drivers/gpu/drm/msm/dp/dp_panel.h b/drivers/gpu/drm/msm/dp/dp_panel.h index 9023e5bb4b8b2..99739ea679a77 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.h +++ b/drivers/gpu/drm/msm/dp/dp_panel.h @@ -59,6 +59,7 @@ int dp_panel_init_panel_info(struct dp_panel *dp_panel); int dp_panel_deinit(struct dp_panel *dp_panel); int dp_panel_timing_cfg(struct dp_panel *dp_panel); void dp_panel_dump_regs(struct dp_panel *dp_panel); +void dp_panel_add_fail_safe_mode(struct drm_connector *connector); int dp_panel_read_sink_caps(struct dp_panel *dp_panel, struct drm_connector *connector); u32 dp_panel_get_mode_bpp(struct dp_panel *dp_panel, u32 mode_max_bpp, diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 6b3ced4aaaf5d..3a3f53f0c8ae1 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1877,7 +1877,7 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi) /* do not autoenable, will be enabled later */ ret = devm_request_irq(&pdev->dev, msm_host->irq, dsi_host_irq, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT | IRQF_NO_AUTOEN, + IRQF_TRIGGER_HIGH | IRQF_NO_AUTOEN, "dsi_isr", msm_host); if (ret < 0) { dev_err(&pdev->dev, "failed to request IRQ%u: %d\n", diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index f19bae475c966..cd7b41b7d5180 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -641,7 +641,7 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id) return connector; fail: - connector->funcs->destroy(msm_dsi->connector); + connector->funcs->destroy(connector); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c index d8128f50b0dd5..0b782cc18b3f4 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c @@ -562,7 +562,9 @@ static int pll_10nm_register(struct dsi_pll_10nm *pll_10nm, struct clk_hw **prov char clk_name[32], parent[32], vco_name[32]; char parent2[32], parent3[32], parent4[32]; struct clk_init_data vco_init = { - .parent_names = (const char *[]){ "xo" }, + .parent_data = &(const struct clk_parent_data) { + .fw_name = "ref", + }, .num_parents = 1, .name = vco_name, .flags = CLK_IGNORE_UNUSED, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c index 7414966f198e3..75557ac99adf1 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c @@ -802,7 +802,9 @@ static int pll_14nm_register(struct dsi_pll_14nm *pll_14nm, struct clk_hw **prov { char clk_name[32], parent[32], vco_name[32]; struct clk_init_data vco_init = { - .parent_names = (const char *[]){ "xo" }, + .parent_data = &(const struct clk_parent_data) { + .fw_name = "ref", + }, .num_parents = 1, .name = vco_name, .flags = CLK_IGNORE_UNUSED, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c index 2da673a2add69..48eab80b548e1 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c @@ -521,7 +521,9 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm, struct clk_hw **prov { char clk_name[32], parent1[32], parent2[32], vco_name[32]; struct clk_init_data vco_init = { - .parent_names = (const char *[]){ "xo" }, + .parent_data = &(const struct clk_parent_data) { + .fw_name = "ref", .name = "xo", + }, .num_parents = 1, .name = vco_name, .flags = CLK_IGNORE_UNUSED, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c index 71ed4aa0dc67e..fc56cdcc9ad64 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c @@ -385,7 +385,9 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm, struct clk_hw **prov { char *clk_name, *parent_name, *vco_name; struct clk_init_data vco_init = { - .parent_names = (const char *[]){ "pxo" }, + .parent_data = &(const struct clk_parent_data) { + .fw_name = "ref", + }, .num_parents = 1, .flags = CLK_IGNORE_UNUSED, .ops = &clk_ops_dsi_pll_28nm_vco, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 079613d2aaa98..6e506feb111fd 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -588,7 +588,9 @@ static int pll_7nm_register(struct dsi_pll_7nm *pll_7nm, struct clk_hw **provide char clk_name[32], parent[32], vco_name[32]; char parent2[32], parent3[32], parent4[32]; struct clk_init_data vco_init = { - .parent_names = (const char *[]){ "bi_tcxo" }, + .parent_data = &(const struct clk_parent_data) { + .fw_name = "ref", + }, .num_parents = 1, .name = vco_name, .flags = CLK_IGNORE_UNUSED, @@ -862,20 +864,26 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, /* Alter PHY configurations if data rate less than 1.5GHZ*/ less_than_1500_mhz = (clk_req->bitclk_rate <= 1500000000); - /* For C-PHY, no low power settings for lower clk rate */ - if (phy->cphy_mode) - less_than_1500_mhz = false; - if (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V4_1) { vreg_ctrl_0 = less_than_1500_mhz ? 0x53 : 0x52; - glbl_rescode_top_ctrl = less_than_1500_mhz ? 0x3d : 0x00; - glbl_rescode_bot_ctrl = less_than_1500_mhz ? 0x39 : 0x3c; + if (phy->cphy_mode) { + glbl_rescode_top_ctrl = 0x00; + glbl_rescode_bot_ctrl = 0x3c; + } else { + glbl_rescode_top_ctrl = less_than_1500_mhz ? 0x3d : 0x00; + glbl_rescode_bot_ctrl = less_than_1500_mhz ? 0x39 : 0x3c; + } glbl_str_swi_cal_sel_ctrl = 0x00; glbl_hstx_str_ctrl_0 = 0x88; } else { vreg_ctrl_0 = less_than_1500_mhz ? 0x5B : 0x59; - glbl_str_swi_cal_sel_ctrl = less_than_1500_mhz ? 0x03 : 0x00; - glbl_hstx_str_ctrl_0 = less_than_1500_mhz ? 0x66 : 0x88; + if (phy->cphy_mode) { + glbl_str_swi_cal_sel_ctrl = 0x03; + glbl_hstx_str_ctrl_0 = 0x66; + } else { + glbl_str_swi_cal_sel_ctrl = less_than_1500_mhz ? 0x03 : 0x00; + glbl_hstx_str_ctrl_0 = less_than_1500_mhz ? 0x66 : 0x88; + } glbl_rescode_top_ctrl = 0x03; glbl_rescode_bot_ctrl = 0x3c; } diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 02b9ae65a96a8..a4f61972667b5 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -926,6 +926,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, get_pid_task(aspace->pid, PIDTYPE_PID); if (task) { comm = kstrdup(task->comm, GFP_KERNEL); + put_task_struct(task); } else { comm = NULL; } diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index 9bf319be11f60..12641616acd30 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -83,6 +83,12 @@ static struct devfreq_dev_profile msm_devfreq_profile = { static void msm_devfreq_boost_work(struct kthread_work *work); static void msm_devfreq_idle_work(struct kthread_work *work); +static bool has_devfreq(struct msm_gpu *gpu) +{ + struct msm_gpu_devfreq *df = &gpu->devfreq; + return !!df->devfreq; +} + void msm_devfreq_init(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq; @@ -149,6 +155,9 @@ void msm_devfreq_cleanup(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq; + if (!has_devfreq(gpu)) + return; + devfreq_cooling_unregister(gpu->cooling); dev_pm_qos_remove_request(&df->boost_freq); dev_pm_qos_remove_request(&df->idle_freq); @@ -156,16 +165,24 @@ void msm_devfreq_cleanup(struct msm_gpu *gpu) void msm_devfreq_resume(struct msm_gpu *gpu) { - gpu->devfreq.busy_cycles = 0; - gpu->devfreq.time = ktime_get(); + struct msm_gpu_devfreq *df = &gpu->devfreq; - devfreq_resume_device(gpu->devfreq.devfreq); + if (!has_devfreq(gpu)) + return; + + df->busy_cycles = 0; + df->time = ktime_get(); + + devfreq_resume_device(df->devfreq); } void msm_devfreq_suspend(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq; + if (!has_devfreq(gpu)) + return; + devfreq_suspend_device(df->devfreq); cancel_idle_work(df); @@ -185,6 +202,9 @@ void msm_devfreq_boost(struct msm_gpu *gpu, unsigned factor) struct msm_gpu_devfreq *df = &gpu->devfreq; uint64_t freq; + if (!has_devfreq(gpu)) + return; + freq = get_freq(gpu); freq *= factor; @@ -207,7 +227,7 @@ void msm_devfreq_active(struct msm_gpu *gpu) struct devfreq_dev_status status; unsigned int idle_time; - if (!df->devfreq) + if (!has_devfreq(gpu)) return; /* @@ -253,7 +273,7 @@ void msm_devfreq_idle(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq; - if (!df->devfreq) + if (!has_devfreq(gpu)) return; msm_hrtimer_queue_work(&df->idle_work, ms_to_ktime(1), diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index ae2f2abc8f5a5..daf9f87477ba1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -101,7 +101,6 @@ nv40_backlight_init(struct nouveau_encoder *encoder, if (!(nvif_rd32(device, NV40_PMC_BACKLIGHT) & NV40_PMC_BACKLIGHT_MASK)) return -ENODEV; - props->type = BACKLIGHT_RAW; props->max_brightness = 31; *ops = &nv40_bl_ops; return 0; @@ -294,7 +293,8 @@ nv50_backlight_init(struct nouveau_backlight *bl, struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev); struct nvif_object *device = &drm->client.device.object; - if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) - 1))) + if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) - 1)) || + nv_conn->base.status != connector_status_connected) return -ENODEV; if (nv_conn->type == DCB_CONNECTOR_eDP) { @@ -342,7 +342,6 @@ nv50_backlight_init(struct nouveau_backlight *bl, else *ops = &nva3_bl_ops; - props->type = BACKLIGHT_RAW; props->max_brightness = 100; return 0; @@ -410,6 +409,7 @@ nouveau_backlight_init(struct drm_connector *connector) goto fail_alloc; } + props.type = BACKLIGHT_RAW; bl->dev = backlight_device_register(backlight_name, connector->kdev, nv_encoder, ops, &props); if (IS_ERR(bl->dev)) { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c index 667fa016496ee..a6ea89a5d51ab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c @@ -142,11 +142,12 @@ nvkm_acr_hsfw_load_bl(struct nvkm_acr *acr, const char *name, int ver, hsfw->imem_size = desc->code_size; hsfw->imem_tag = desc->start_tag; - hsfw->imem = kmalloc(desc->code_size, GFP_KERNEL); - memcpy(hsfw->imem, data + desc->code_off, desc->code_size); - + hsfw->imem = kmemdup(data + desc->code_off, desc->code_size, GFP_KERNEL); nvkm_firmware_put(fw); - return 0; + if (!hsfw->imem) + return -ENOMEM; + else + return 0; } int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c index e1772211b0a4b..612310d5d4812 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c @@ -216,6 +216,7 @@ gm20b_pmu = { .intr = gt215_pmu_intr, .recv = gm20b_pmu_recv, .initmsg = gm20b_pmu_initmsg, + .reset = gf100_pmu_reset, }; #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c index 6bf7fc1bd1e3b..1a6f9c3af5ecd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c @@ -23,7 +23,7 @@ */ #include "priv.h" -static void +void gp102_pmu_reset(struct nvkm_pmu *pmu) { struct nvkm_device *device = pmu->subdev.device; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c index ba1583bb618b2..94cfb1791af6e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c @@ -83,6 +83,7 @@ gp10b_pmu = { .intr = gt215_pmu_intr, .recv = gm20b_pmu_recv, .initmsg = gm20b_pmu_initmsg, + .reset = gp102_pmu_reset, }; #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h index bcaade758ff72..21abf31f44420 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h @@ -41,6 +41,7 @@ int gt215_pmu_send(struct nvkm_pmu *, u32[2], u32, u32, u32, u32); bool gf100_pmu_enabled(struct nvkm_pmu *); void gf100_pmu_reset(struct nvkm_pmu *); +void gp102_pmu_reset(struct nvkm_pmu *pmu); void gk110_pmu_pgob(struct nvkm_pmu *, bool); diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c index 2c3378a259b1e..e1542451ef9d0 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c @@ -612,8 +612,10 @@ static int ili9341_dbi_probe(struct spi_device *spi, struct gpio_desc *dc, int ret; vcc = devm_regulator_get_optional(dev, "vcc"); - if (IS_ERR(vcc)) + if (IS_ERR(vcc)) { dev_err(dev, "get optional vcc failed\n"); + vcc = NULL; + } dbidev = devm_drm_dev_alloc(dev, &ili9341_dbi_driver, struct mipi_dbi_dev, drm); diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index bbe628b306ee3..f8355de6e335d 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -360,8 +360,11 @@ int panfrost_gpu_init(struct panfrost_device *pfdev) panfrost_gpu_init_features(pfdev); - dma_set_mask_and_coherent(pfdev->dev, + err = dma_set_mask_and_coherent(pfdev->dev, DMA_BIT_MASK(FIELD_GET(0xff00, pfdev->features.mmu_features))); + if (err) + return err; + dma_set_max_seg_size(pfdev->dev, UINT_MAX); irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu"); diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 607ad5620bd99..1546abcadacf4 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -204,7 +204,7 @@ int radeon_get_monitor_bpc(struct drm_connector *connector) /* Check if bpc is within clock limit. Try to degrade gracefully otherwise */ if ((bpc == 12) && (mode_clock * 3/2 > max_tmds_clock)) { - if ((connector->display_info.edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30) && + if ((connector->display_info.edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30) && (mode_clock * 5/4 <= max_tmds_clock)) bpc = 10; else diff --git a/drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c b/drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c index 6b4759ed6bfd4..c491429f1a029 100644 --- a/drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c +++ b/drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c @@ -131,8 +131,10 @@ sideband_msg_req_encode_decode(struct drm_dp_sideband_msg_req_body *in) return false; txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL); - if (!txmsg) + if (!txmsg) { + kfree(out); return false; + } drm_dp_encode_sideband_req(in, txmsg); ret = drm_dp_decode_sideband_req(txmsg, out); diff --git a/drivers/gpu/drm/sprd/sprd_dpu.c b/drivers/gpu/drm/sprd/sprd_dpu.c index 06a3414ee43a3..1637203ea1036 100644 --- a/drivers/gpu/drm/sprd/sprd_dpu.c +++ b/drivers/gpu/drm/sprd/sprd_dpu.c @@ -790,6 +790,11 @@ static int sprd_dpu_context_init(struct sprd_dpu *dpu, int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "failed to get I/O resource\n"); + return -EINVAL; + } + ctx->base = devm_ioremap(dev, res->start, resource_size(res)); if (!ctx->base) { dev_err(dev, "failed to map dpu registers\n"); diff --git a/drivers/gpu/drm/sprd/sprd_drm.c b/drivers/gpu/drm/sprd/sprd_drm.c index a077e2d4d7217..af2be97d5ed08 100644 --- a/drivers/gpu/drm/sprd/sprd_drm.c +++ b/drivers/gpu/drm/sprd/sprd_drm.c @@ -155,7 +155,7 @@ static void sprd_drm_shutdown(struct platform_device *pdev) struct drm_device *drm = platform_get_drvdata(pdev); if (!drm) { - drm_warn(drm, "drm device is not available, no shutdown\n"); + dev_warn(&pdev->dev, "drm device is not available, no shutdown\n"); return; } diff --git a/drivers/gpu/drm/sprd/sprd_dsi.c b/drivers/gpu/drm/sprd/sprd_dsi.c index 911b3cddc2640..12b67a5d59231 100644 --- a/drivers/gpu/drm/sprd/sprd_dsi.c +++ b/drivers/gpu/drm/sprd/sprd_dsi.c @@ -907,6 +907,11 @@ static int sprd_dsi_context_init(struct sprd_dsi *dsi, struct resource *res; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "failed to get I/O resource\n"); + return -EINVAL; + } + ctx->base = devm_ioremap(dev, res->start, resource_size(res)); if (!ctx->base) { drm_err(dsi->drm, "failed to map dsi host registers\n"); diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c index 70dfb7d1dec55..f5535eb04c6b1 100644 --- a/drivers/gpu/drm/tegra/dp.c +++ b/drivers/gpu/drm/tegra/dp.c @@ -549,6 +549,15 @@ static void drm_dp_link_get_adjustments(struct drm_dp_link *link, { struct drm_dp_link_train_set *adjust = &link->train.adjust; unsigned int i; + u8 post_cursor; + int err; + + err = drm_dp_dpcd_read(link->aux, DP_ADJUST_REQUEST_POST_CURSOR2, + &post_cursor, sizeof(post_cursor)); + if (err < 0) { + DRM_ERROR("failed to read post_cursor2: %d\n", err); + post_cursor = 0; + } for (i = 0; i < link->lanes; i++) { adjust->voltage_swing[i] = @@ -560,7 +569,7 @@ static void drm_dp_link_get_adjustments(struct drm_dp_link *link, DP_TRAIN_PRE_EMPHASIS_SHIFT; adjust->post_cursor[i] = - drm_dp_get_adjust_request_post_cursor(status, i); + (post_cursor >> (i << 1)) & 0x3; } } diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index f46d377f0c304..de1333dc0d867 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -1538,8 +1538,10 @@ static int tegra_dsi_ganged_probe(struct tegra_dsi *dsi) dsi->slave = platform_get_drvdata(gangster); of_node_put(np); - if (!dsi->slave) + if (!dsi->slave) { + put_device(&gangster->dev); return -EPROBE_DEFER; + } dsi->slave->master = dsi; } diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index 04146da2d1d8e..11576e0297e41 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -798,6 +798,9 @@ static int simpledrm_device_init_modeset(struct simpledrm_device *sdev) if (ret) return ret; drm_connector_helper_add(connector, &simpledrm_connector_helper_funcs); + drm_connector_set_panel_orientation_with_quirk(connector, + DRM_MODE_PANEL_ORIENTATION_UNKNOWN, + mode->hdisplay, mode->vdisplay); formats = simpledrm_device_formats(sdev, &nformats); diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index bd46396a1ae07..1afcd54fbbd53 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -219,6 +219,7 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) int ret; u32 mmu_debug; u32 ident1; + u64 mask; v3d = devm_drm_dev_alloc(dev, &v3d_drm_driver, struct v3d_dev, drm); if (IS_ERR(v3d)) @@ -237,8 +238,11 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) return ret; mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO); - dma_set_mask_and_coherent(dev, - DMA_BIT_MASK(30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_PA_WIDTH))); + mask = DMA_BIT_MASK(30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_PA_WIDTH)); + ret = dma_set_mask_and_coherent(dev, mask); + if (ret) + return ret; + v3d->va_width = 30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_VA_WIDTH); ident1 = V3D_READ(V3D_HUB_IDENT1); diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index c7ed2e1cbab6e..92bc0faee84f3 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -798,7 +798,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, if (!render->base.perfmon) { ret = -ENOENT; - goto fail; + goto fail_perfmon; } } @@ -847,6 +847,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, fail_unreserve: mutex_unlock(&v3d->sched_lock); +fail_perfmon: drm_gem_unlock_reservations(last_job->bo, last_job->bo_count, &acquire_ctx); fail: @@ -1027,7 +1028,7 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data, args->perfmon_id); if (!job->base.perfmon) { ret = -ENOENT; - goto fail; + goto fail_perfmon; } } @@ -1056,6 +1057,7 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data, fail_unreserve: mutex_unlock(&v3d->sched_lock); +fail_perfmon: drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, &acquire_ctx); fail: diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index 2de61b63ef91d..48d3c9955f0dd 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -248,6 +248,9 @@ void virtio_gpu_array_put_free(struct virtio_gpu_object_array *objs) { u32 i; + if (!objs) + return; + for (i = 0; i < objs->nents; i++) drm_gem_object_put(objs->objs[i]); virtio_gpu_array_free(objs); diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 6994f8c0e02ea..80c685ab3e30d 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -447,7 +447,6 @@ static int host1x_probe(struct platform_device *pdev) if (syncpt_irq < 0) return syncpt_irq; - host1x_bo_cache_init(&host->cache); mutex_init(&host->devices_lock); INIT_LIST_HEAD(&host->devices); INIT_LIST_HEAD(&host->list); @@ -489,10 +488,12 @@ static int host1x_probe(struct platform_device *pdev) if (err) return err; + host1x_bo_cache_init(&host->cache); + err = host1x_iommu_init(host); if (err < 0) { dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err); - return err; + goto destroy_cache; } err = host1x_channel_list_init(&host->channel_list, @@ -553,6 +554,8 @@ static int host1x_probe(struct platform_device *pdev) host1x_channel_list_free(&host->channel_list); iommu_exit: host1x_iommu_exit(host); +destroy_cache: + host1x_bo_cache_destroy(&host->cache); return err; } @@ -568,6 +571,7 @@ static int host1x_remove(struct platform_device *pdev) host1x_intr_deinit(host); host1x_syncpt_deinit(host); + host1x_channel_list_free(&host->channel_list); host1x_iommu_exit(host); host1x_bo_cache_destroy(&host->cache); diff --git a/drivers/gpu/ipu-v3/ipu-di.c b/drivers/gpu/ipu-v3/ipu-di.c index 666223c6bec4d..0a34e0ab4fe60 100644 --- a/drivers/gpu/ipu-v3/ipu-di.c +++ b/drivers/gpu/ipu-v3/ipu-di.c @@ -447,8 +447,9 @@ static void ipu_di_config_clock(struct ipu_di *di, error = rate / (sig->mode.pixelclock / 1000); - dev_dbg(di->ipu->dev, " IPU clock can give %lu with divider %u, error %d.%u%%\n", - rate, div, (signed)(error - 1000) / 10, error % 10); + dev_dbg(di->ipu->dev, " IPU clock can give %lu with divider %u, error %c%d.%d%%\n", + rate, div, error < 1000 ? '-' : '+', + abs(error - 1000) / 10, abs(error - 1000) % 10); /* Allow a 1% error */ if (error < 1010 && error >= 990) { diff --git a/drivers/greybus/svc.c b/drivers/greybus/svc.c index ce7740ef449ba..51d0875a34800 100644 --- a/drivers/greybus/svc.c +++ b/drivers/greybus/svc.c @@ -866,8 +866,14 @@ static int gb_svc_hello(struct gb_operation *op) gb_svc_debugfs_init(svc); - return gb_svc_queue_deferred_request(op); + ret = gb_svc_queue_deferred_request(op); + if (ret) + goto err_remove_debugfs; + + return 0; +err_remove_debugfs: + gb_svc_debugfs_exit(svc); err_unregister_device: gb_svc_watchdog_destroy(svc); device_del(&svc->dev); diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 7dc89dc6b0f0e..590376d776a18 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -748,15 +748,15 @@ static const struct hid_device_id apple_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY), .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021), - .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, + .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021), .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021), - .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, + .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021), .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021), - .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, + .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021), .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 6726567d72976..8d6fc50dab65f 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -618,6 +618,17 @@ static int i2c_hid_get_raw_report(struct hid_device *hid, if (report_type == HID_OUTPUT_REPORT) return -EINVAL; + /* + * In case of unnumbered reports the response from the device will + * not have the report ID that the upper layers expect, so we need + * to stash it the buffer ourselves and adjust the data size. + */ + if (!report_number) { + buf[0] = 0; + buf++; + count--; + } + /* +2 bytes to include the size of the reply in the query buffer */ ask_count = min(count + 2, (size_t)ihid->bufsize); @@ -639,6 +650,9 @@ static int i2c_hid_get_raw_report(struct hid_device *hid, count = min(count, ret_count - 2); memcpy(buf, ihid->rawbuf + 2, count); + if (!report_number) + count++; + return count; } @@ -655,17 +669,19 @@ static int i2c_hid_output_raw_report(struct hid_device *hid, __u8 *buf, mutex_lock(&ihid->reset_lock); - if (report_id) { - buf++; - count--; - } - + /* + * Note that both numbered and unnumbered reports passed here + * are supposed to have report ID stored in the 1st byte of the + * buffer, so we strip it off unconditionally before passing payload + * to i2c_hid_set_or_send_report which takes care of encoding + * everything properly. + */ ret = i2c_hid_set_or_send_report(client, report_type == HID_FEATURE_REPORT ? 0x03 : 0x02, - report_id, buf, count, use_data); + report_id, buf + 1, count - 1, use_data); - if (report_id && ret >= 0) - ret++; /* add report_id to the number of transfered bytes */ + if (ret >= 0) + ret++; /* add report_id to the number of transferred bytes */ mutex_unlock(&ihid->reset_lock); diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c index e24988586710d..16aa030af8453 100644 --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c @@ -661,21 +661,12 @@ static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data, */ payload_max_size &= ~(L1_CACHE_BYTES - 1); - dma_buf = kmalloc(payload_max_size, GFP_KERNEL | GFP_DMA32); + dma_buf = dma_alloc_coherent(devc, payload_max_size, &dma_buf_phy, GFP_KERNEL); if (!dma_buf) { client_data->flag_retry = true; return -ENOMEM; } - dma_buf_phy = dma_map_single(devc, dma_buf, payload_max_size, - DMA_TO_DEVICE); - if (dma_mapping_error(devc, dma_buf_phy)) { - dev_err(cl_data_to_dev(client_data), "DMA map failed\n"); - client_data->flag_retry = true; - rv = -ENOMEM; - goto end_err_dma_buf_release; - } - ldr_xfer_dma_frag.fragment.hdr.command = LOADER_CMD_XFER_FRAGMENT; ldr_xfer_dma_frag.fragment.xfer_mode = LOADER_XFER_MODE_DIRECT_DMA; ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy; @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data, ldr_xfer_dma_frag.fragment.size = fragment_size; memcpy(dma_buf, &fw->data[fragment_offset], fragment_size); - dma_sync_single_for_device(devc, dma_buf_phy, - payload_max_size, - DMA_TO_DEVICE); - - /* - * Flush cache here because the dma_sync_single_for_device() - * does not do for x86. - */ + /* Flush cache to be sure the data is in main memory. */ clflush_cache_range(dma_buf, payload_max_size); dev_dbg(cl_data_to_dev(client_data), @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data, fragment_offset += fragment_size; } - dma_unmap_single(devc, dma_buf_phy, payload_max_size, DMA_TO_DEVICE); - kfree(dma_buf); - return 0; - end_err_resp_buf_release: - /* Free ISH buffer if not done already, in error case */ - dma_unmap_single(devc, dma_buf_phy, payload_max_size, DMA_TO_DEVICE); -end_err_dma_buf_release: - kfree(dma_buf); + dma_free_coherent(devc, payload_max_size, dma_buf, dma_buf_phy); return rv; } diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 60375879612f3..67be81208a2d9 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -380,7 +380,7 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel) * execute: * * (a) In the "normal (i.e., not resuming from hibernation)" path, - * the full barrier in smp_store_mb() guarantees that the store + * the full barrier in virt_store_mb() guarantees that the store * is propagated to all CPUs before the add_channel_work work * is queued. In turn, add_channel_work is queued before the * channel's ring buffer is allocated/initialized and the @@ -392,14 +392,14 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel) * recv_int_page before retrieving the channel pointer from the * array of channels. * - * (b) In the "resuming from hibernation" path, the smp_store_mb() + * (b) In the "resuming from hibernation" path, the virt_store_mb() * guarantees that the store is propagated to all CPUs before * the VMBus connection is marked as ready for the resume event * (cf. check_ready_for_resume_event()). The interrupt handler * of the VMBus driver and vmbus_chan_sched() can not run before * vmbus_bus_resume() has completed execution (cf. resume_noirq). */ - smp_store_mb( + virt_store_mb( vmbus_connection.channels[channel->offermsg.child_relid], channel); } diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index f2d05bff42453..3cf334c46c312 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1563,7 +1563,7 @@ static void balloon_onchannelcallback(void *context) break; default: - pr_warn("Unhandled message: type: %d\n", dm_hdr->type); + pr_warn_ratelimited("Unhandled message: type: %d\n", dm_hdr->type); } } @@ -1653,6 +1653,38 @@ static void disable_page_reporting(void) } } +static int ballooning_enabled(void) +{ + /* + * Disable ballooning if the page size is not 4k (HV_HYP_PAGE_SIZE), + * since currently it's unclear to us whether an unballoon request can + * make sure all page ranges are guest page size aligned. + */ + if (PAGE_SIZE != HV_HYP_PAGE_SIZE) { + pr_info("Ballooning disabled because page size is not 4096 bytes\n"); + return 0; + } + + return 1; +} + +static int hot_add_enabled(void) +{ + /* + * Disable hot add on ARM64, because we currently rely on + * memory_add_physaddr_to_nid() to get a node id of a hot add range, + * however ARM64's memory_add_physaddr_to_nid() always return 0 and + * DM_MEM_HOT_ADD_REQUEST doesn't have the NUMA node information for + * add_memory(). + */ + if (IS_ENABLED(CONFIG_ARM64)) { + pr_info("Memory hot add disabled on ARM64\n"); + return 0; + } + + return 1; +} + static int balloon_connect_vsp(struct hv_device *dev) { struct dm_version_request version_req; @@ -1724,8 +1756,8 @@ static int balloon_connect_vsp(struct hv_device *dev) * currently still requires the bits to be set, so we have to add code * to fail the host's hot-add and balloon up/down requests, if any. */ - cap_msg.caps.cap_bits.balloon = 1; - cap_msg.caps.cap_bits.hot_add = 1; + cap_msg.caps.cap_bits.balloon = ballooning_enabled(); + cap_msg.caps.cap_bits.hot_add = hot_add_enabled(); /* * Specify our alignment requirements as it relates diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 181d16bbf49d7..820e814062519 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -216,6 +217,16 @@ bool hv_query_ext_cap(u64 cap_query) } EXPORT_SYMBOL_GPL(hv_query_ext_cap); +void hv_setup_dma_ops(struct device *dev, bool coherent) +{ + /* + * Hyper-V does not offer a vIOMMU in the guest + * VM, so pass 0/NULL for the IOMMU settings + */ + arch_setup_dma_ops(dev, 0, 0, NULL, coherent); +} +EXPORT_SYMBOL_GPL(hv_setup_dma_ops); + bool hv_is_hibernation_supported(void) { return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 71efacb909659..3d215d9dec433 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -439,7 +439,16 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi) { u32 priv_read_loc = rbi->priv_read_index; - u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index); + u32 write_loc; + + /* + * The Hyper-V host writes the packet data, then uses + * store_release() to update the write_index. Use load_acquire() + * here to prevent loads of the packet data from being re-ordered + * before the read of the write_index and potentially getting + * stale data. + */ + write_loc = virt_load_acquire(&rbi->ring_buffer->write_index); if (write_loc >= priv_read_loc) return write_loc - priv_read_loc; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 12a2b37e87f30..3cd0d3a44fa2e 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -77,8 +77,8 @@ static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, /* * Hyper-V should be notified only once about a panic. If we will be - * doing hyperv_report_panic_msg() later with kmsg data, don't do - * the notification here. + * doing hv_kmsg_dump() with kmsg data later, don't do the notification + * here. */ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE && hyperv_report_reg()) { @@ -100,8 +100,8 @@ static int hyperv_die_event(struct notifier_block *nb, unsigned long val, /* * Hyper-V should be notified only once about a panic. If we will be - * doing hyperv_report_panic_msg() later with kmsg data, don't do - * the notification here. + * doing hv_kmsg_dump() with kmsg data later, don't do the notification + * here. */ if (hyperv_report_reg()) hyperv_report_panic(regs, val, true); @@ -920,6 +920,21 @@ static int vmbus_probe(struct device *child_device) return ret; } +/* + * vmbus_dma_configure -- Configure DMA coherence for VMbus device + */ +static int vmbus_dma_configure(struct device *child_device) +{ + /* + * On ARM64, propagate the DMA coherence setting from the top level + * VMbus ACPI device to the child VMbus device being added here. + * On x86/x64 coherence is assumed and these calls have no effect. + */ + hv_setup_dma_ops(child_device, + device_get_dma_attr(&hv_acpi_dev->dev) == DEV_DMA_COHERENT); + return 0; +} + /* * vmbus_remove - Remove a vmbus device */ @@ -1040,6 +1055,7 @@ static struct bus_type hv_bus = { .remove = vmbus_remove, .probe = vmbus_probe, .uevent = vmbus_uevent, + .dma_configure = vmbus_dma_configure, .dev_groups = vmbus_dev_groups, .drv_groups = vmbus_drv_groups, .bus_groups = vmbus_bus_groups, @@ -1546,14 +1562,20 @@ static int vmbus_bus_init(void) if (ret) goto err_connect; + if (hv_is_isolation_supported()) + sysctl_record_panic_msg = 0; + /* * Only register if the crash MSRs are available */ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { u64 hyperv_crash_ctl; /* - * Sysctl registration is not fatal, since by default - * reporting is enabled. + * Panic message recording (sysctl_record_panic_msg) + * is enabled by default in non-isolated guests and + * disabled by default in isolated guests; the panic + * message recording won't be available in isolated + * guests should the following registration fail. */ hv_ctl_table_hdr = register_sysctl_table(hv_root_table); if (!hv_ctl_table_hdr) @@ -2097,6 +2119,10 @@ int vmbus_device_register(struct hv_device *child_device_obj) child_device_obj->device.parent = &hv_acpi_dev->dev; child_device_obj->device.release = vmbus_device_release; + child_device_obj->device.dma_parms = &child_device_obj->dma_parms; + child_device_obj->device.dma_mask = &child_device_obj->dma_mask; + dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); + /* * Register with the LDM. This will kick off the driver/device * binding...which will eventually call vmbus_match() and vmbus_probe() @@ -2122,9 +2148,6 @@ int vmbus_device_register(struct hv_device *child_device_obj) } hv_debug_add_dev_dir(child_device_obj); - child_device_obj->device.dma_parms = &child_device_obj->dma_parms; - child_device_obj->device.dma_mask = &child_device_obj->dma_mask; - dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); return 0; err_kset_unregister: @@ -2428,6 +2451,21 @@ static int vmbus_acpi_add(struct acpi_device *device) hv_acpi_dev = device; + /* + * Older versions of Hyper-V for ARM64 fail to include the _CCA + * method on the top level VMbus device in the DSDT. But devices + * are hardware coherent in all current Hyper-V use cases, so fix + * up the ACPI device to behave as if _CCA is present and indicates + * hardware coherence. + */ + ACPI_COMPANION_SET(&device->dev, device); + if (IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) && + device_get_dma_attr(&device->dev) == DEV_DMA_NOT_SUPPORTED) { + pr_info("No ACPI _CCA found; assuming coherent device I/O\n"); + device->flags.cca_seen = true; + device->flags.coherent_dma = true; + } + result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, vmbus_walk_resources, NULL); @@ -2780,10 +2818,15 @@ static void __exit vmbus_exit(void) if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { kmsg_dump_unregister(&hv_kmsg_dumper); unregister_die_notifier(&hyperv_die_block); - atomic_notifier_chain_unregister(&panic_notifier_list, - &hyperv_panic_block); } + /* + * The panic notifier is always registered, hence we should + * also unconditionally unregister it here as well. + */ + atomic_notifier_chain_unregister(&panic_notifier_list, + &hyperv_panic_block); + free_page((unsigned long)hv_panic_page); unregister_sysctl_table(hv_ctl_table_hdr); hv_ctl_table_hdr = NULL; diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 8df25f1079bac..faeaf9757f65d 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -2252,16 +2252,31 @@ config SENSORS_ASUS_WMI config SENSORS_ASUS_WMI_EC tristate "ASUS WMI B550/X570" - depends on ACPI_WMI + depends on ACPI_WMI && SENSORS_ASUS_EC=n help If you say yes here you get support for the ACPI embedded controller hardware monitoring interface found in B550/X570 ASUS motherboards. This driver will provide readings of fans, voltages and temperatures through the system firmware. + This driver is deprecated in favor of the ASUS EC Sensors driver + which provides fully compatible output. + This driver can also be built as a module. If so, the module will be called asus_wmi_sensors_ec. +config SENSORS_ASUS_EC + tristate "ASUS EC Sensors" + depends on X86 + help + If you say yes here you get support for the ACPI embedded controller + hardware monitoring interface found in ASUS motherboards. The driver + currently supports B550/X570 boards, although other ASUS boards might + provide this monitoring interface as well. + + This driver can also be built as a module. If so, the module + will be called asus_ec_sensors. + endif # ACPI endif # HWMON diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 185f946d698b0..7bba3415ca3df 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_HWMON_VID) += hwmon-vid.o # APCI drivers obj-$(CONFIG_SENSORS_ACPI_POWER) += acpi_power_meter.o obj-$(CONFIG_SENSORS_ATK0110) += asus_atk0110.o +obj-$(CONFIG_SENSORS_ASUS_EC) += asus-ec-sensors.o obj-$(CONFIG_SENSORS_ASUS_WMI) += asus_wmi_sensors.o obj-$(CONFIG_SENSORS_ASUS_WMI_EC) += asus_wmi_ec_sensors.o diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c new file mode 100644 index 0000000000000..3ad8eadea68f3 --- /dev/null +++ b/drivers/hwmon/asus-ec-sensors.c @@ -0,0 +1,717 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * HWMON driver for ASUS motherboards that publish some sensor values + * via the embedded controller registers. + * + * Copyright (C) 2021 Eugene Shalygin + + * EC provides: + * - Chipset temperature + * - CPU temperature + * - Motherboard temperature + * - T_Sensor temperature + * - VRM temperature + * - Water In temperature + * - Water Out temperature + * - CPU Optional fan RPM + * - Chipset fan RPM + * - VRM Heat Sink fan RPM + * - Water Flow fan RPM + * - CPU current + * - CPU core voltage + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static char *mutex_path_override; + +/* Writing to this EC register switches EC bank */ +#define ASUS_EC_BANK_REGISTER 0xff +#define SENSOR_LABEL_LEN 16 + +/* + * Arbitrary set max. allowed bank number. Required for sorting banks and + * currently is overkill with just 2 banks used at max, but for the sake + * of alignment let's set it to a higher value. + */ +#define ASUS_EC_MAX_BANK 3 + +#define ACPI_LOCK_DELAY_MS 500 + +/* ACPI mutex for locking access to the EC for the firmware */ +#define ASUS_HW_ACCESS_MUTEX_ASMX "\\AMW0.ASMX" + +/* There are two variants of the vendor spelling */ +#define VENDOR_ASUS_UPPER_CASE "ASUSTeK COMPUTER INC." + +typedef union { + u32 value; + struct { + u8 index; + u8 bank; + u8 size; + u8 dummy; + } components; +} sensor_address; + +#define MAKE_SENSOR_ADDRESS(size, bank, index) { \ + .value = (size << 16) + (bank << 8) + index \ + } + +static u32 hwmon_attributes[hwmon_max] = { + [hwmon_chip] = HWMON_C_REGISTER_TZ, + [hwmon_temp] = HWMON_T_INPUT | HWMON_T_LABEL, + [hwmon_in] = HWMON_I_INPUT | HWMON_I_LABEL, + [hwmon_curr] = HWMON_C_INPUT | HWMON_C_LABEL, + [hwmon_fan] = HWMON_F_INPUT | HWMON_F_LABEL, +}; + +struct ec_sensor_info { + char label[SENSOR_LABEL_LEN]; + enum hwmon_sensor_types type; + sensor_address addr; +}; + +#define EC_SENSOR(sensor_label, sensor_type, size, bank, index) { \ + .label = sensor_label, .type = sensor_type, \ + .addr = MAKE_SENSOR_ADDRESS(size, bank, index), \ + } + +enum ec_sensors { + /* chipset temperature [℃] */ + ec_sensor_temp_chipset, + /* CPU temperature [℃] */ + ec_sensor_temp_cpu, + /* motherboard temperature [℃] */ + ec_sensor_temp_mb, + /* "T_Sensor" temperature sensor reading [℃] */ + ec_sensor_temp_t_sensor, + /* VRM temperature [℃] */ + ec_sensor_temp_vrm, + /* CPU Core voltage [mV] */ + ec_sensor_in_cpu_core, + /* CPU_Opt fan [RPM] */ + ec_sensor_fan_cpu_opt, + /* VRM heat sink fan [RPM] */ + ec_sensor_fan_vrm_hs, + /* Chipset fan [RPM] */ + ec_sensor_fan_chipset, + /* Water flow sensor reading [RPM] */ + ec_sensor_fan_water_flow, + /* CPU current [A] */ + ec_sensor_curr_cpu, + /* "Water_In" temperature sensor reading [℃] */ + ec_sensor_temp_water_in, + /* "Water_Out" temperature sensor reading [℃] */ + ec_sensor_temp_water_out, +}; + +#define SENSOR_TEMP_CHIPSET BIT(ec_sensor_temp_chipset) +#define SENSOR_TEMP_CPU BIT(ec_sensor_temp_cpu) +#define SENSOR_TEMP_MB BIT(ec_sensor_temp_mb) +#define SENSOR_TEMP_T_SENSOR BIT(ec_sensor_temp_t_sensor) +#define SENSOR_TEMP_VRM BIT(ec_sensor_temp_vrm) +#define SENSOR_IN_CPU_CORE BIT(ec_sensor_in_cpu_core) +#define SENSOR_FAN_CPU_OPT BIT(ec_sensor_fan_cpu_opt) +#define SENSOR_FAN_VRM_HS BIT(ec_sensor_fan_vrm_hs) +#define SENSOR_FAN_CHIPSET BIT(ec_sensor_fan_chipset) +#define SENSOR_FAN_WATER_FLOW BIT(ec_sensor_fan_water_flow) +#define SENSOR_CURR_CPU BIT(ec_sensor_curr_cpu) +#define SENSOR_TEMP_WATER_IN BIT(ec_sensor_temp_water_in) +#define SENSOR_TEMP_WATER_OUT BIT(ec_sensor_temp_water_out) + +/* All the known sensors for ASUS EC controllers */ +static const struct ec_sensor_info known_ec_sensors[] = { + [ec_sensor_temp_chipset] = + EC_SENSOR("Chipset", hwmon_temp, 1, 0x00, 0x3a), + [ec_sensor_temp_cpu] = EC_SENSOR("CPU", hwmon_temp, 1, 0x00, 0x3b), + [ec_sensor_temp_mb] = + EC_SENSOR("Motherboard", hwmon_temp, 1, 0x00, 0x3c), + [ec_sensor_temp_t_sensor] = + EC_SENSOR("T_Sensor", hwmon_temp, 1, 0x00, 0x3d), + [ec_sensor_temp_vrm] = EC_SENSOR("VRM", hwmon_temp, 1, 0x00, 0x3e), + [ec_sensor_in_cpu_core] = + EC_SENSOR("CPU Core", hwmon_in, 2, 0x00, 0xa2), + [ec_sensor_fan_cpu_opt] = + EC_SENSOR("CPU_Opt", hwmon_fan, 2, 0x00, 0xb0), + [ec_sensor_fan_vrm_hs] = EC_SENSOR("VRM HS", hwmon_fan, 2, 0x00, 0xb2), + [ec_sensor_fan_chipset] = + EC_SENSOR("Chipset", hwmon_fan, 2, 0x00, 0xb4), + [ec_sensor_fan_water_flow] = + EC_SENSOR("Water_Flow", hwmon_fan, 2, 0x00, 0xbc), + [ec_sensor_curr_cpu] = EC_SENSOR("CPU", hwmon_curr, 1, 0x00, 0xf4), + [ec_sensor_temp_water_in] = + EC_SENSOR("Water_In", hwmon_temp, 1, 0x01, 0x00), + [ec_sensor_temp_water_out] = + EC_SENSOR("Water_Out", hwmon_temp, 1, 0x01, 0x01), +}; + +/* Shortcuts for common combinations */ +#define SENSOR_SET_TEMP_CHIPSET_CPU_MB \ + (SENSOR_TEMP_CHIPSET | SENSOR_TEMP_CPU | SENSOR_TEMP_MB) +#define SENSOR_SET_TEMP_WATER (SENSOR_TEMP_WATER_IN | SENSOR_TEMP_WATER_OUT) + +#define DMI_EXACT_MATCH_BOARD(vendor, name, sensors) { \ + .matches = { \ + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, vendor), \ + DMI_EXACT_MATCH(DMI_BOARD_NAME, name), \ + }, \ + .driver_data = (void *)(sensors), \ +} + +static const struct dmi_system_id asus_ec_dmi_table[] __initconst = { + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "PRIME X570-PRO", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM | + SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "Pro WS X570-ACE", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM | + SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, + "ROG CROSSHAIR VIII DARK HERO", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER | + SENSOR_FAN_CPU_OPT | SENSOR_FAN_WATER_FLOW | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, + "ROG CROSSHAIR VIII FORMULA", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG CROSSHAIR VIII HERO", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER | + SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET | + SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, + "ROG CROSSHAIR VIII HERO (WI-FI)", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER | + SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET | + SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, + "ROG CROSSHAIR VIII IMPACT", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-E GAMING", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | + SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_CPU_OPT), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-I GAMING", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | + SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_VRM_HS | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-E GAMING", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | + SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-F GAMING", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | + SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-I GAMING", + SENSOR_TEMP_T_SENSOR | SENSOR_FAN_VRM_HS | + SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + {} +}; + +struct ec_sensor { + unsigned int info_index; + s32 cached_value; +}; + +struct ec_sensors_data { + unsigned long board_sensors; + struct ec_sensor *sensors; + /* EC registers to read from */ + u16 *registers; + u8 *read_buffer; + /* sorted list of unique register banks */ + u8 banks[ASUS_EC_MAX_BANK + 1]; + /* in jiffies */ + unsigned long last_updated; + acpi_handle aml_mutex; + /* number of board EC sensors */ + u8 nr_sensors; + /* + * number of EC registers to read + * (sensor might span more than 1 register) + */ + u8 nr_registers; + /* number of unique register banks */ + u8 nr_banks; +}; + +static u8 register_bank(u16 reg) +{ + return reg >> 8; +} + +static u8 register_index(u16 reg) +{ + return reg & 0x00ff; +} + +static bool is_sensor_data_signed(const struct ec_sensor_info *si) +{ + /* + * guessed from WMI functions in DSDT code for boards + * of the X470 generation + */ + return si->type == hwmon_temp; +} + +static const struct ec_sensor_info * +get_sensor_info(const struct ec_sensors_data *state, int index) +{ + return &known_ec_sensors[state->sensors[index].info_index]; +} + +static int find_ec_sensor_index(const struct ec_sensors_data *ec, + enum hwmon_sensor_types type, int channel) +{ + unsigned int i; + + for (i = 0; i < ec->nr_sensors; i++) { + if (get_sensor_info(ec, i)->type == type) { + if (channel == 0) + return i; + channel--; + } + } + return -ENOENT; +} + +static int __init bank_compare(const void *a, const void *b) +{ + return *((const s8 *)a) - *((const s8 *)b); +} + +static int __init board_sensors_count(unsigned long sensors) +{ + return hweight_long(sensors); +} + +static void __init setup_sensor_data(struct ec_sensors_data *ec) +{ + struct ec_sensor *s = ec->sensors; + bool bank_found; + int i, j; + u8 bank; + + ec->nr_banks = 0; + ec->nr_registers = 0; + + for_each_set_bit(i, &ec->board_sensors, + BITS_PER_TYPE(ec->board_sensors)) { + s->info_index = i; + s->cached_value = 0; + ec->nr_registers += + known_ec_sensors[s->info_index].addr.components.size; + bank_found = false; + bank = known_ec_sensors[s->info_index].addr.components.bank; + for (j = 0; j < ec->nr_banks; j++) { + if (ec->banks[j] == bank) { + bank_found = true; + break; + } + } + if (!bank_found) { + ec->banks[ec->nr_banks++] = bank; + } + s++; + } + sort(ec->banks, ec->nr_banks, 1, bank_compare, NULL); +} + +static void __init fill_ec_registers(struct ec_sensors_data *ec) +{ + const struct ec_sensor_info *si; + unsigned int i, j, register_idx = 0; + + for (i = 0; i < ec->nr_sensors; ++i) { + si = get_sensor_info(ec, i); + for (j = 0; j < si->addr.components.size; ++j, ++register_idx) { + ec->registers[register_idx] = + (si->addr.components.bank << 8) + + si->addr.components.index + j; + } + } +} + +static acpi_handle __init asus_hw_access_mutex(struct device *dev) +{ + const char *mutex_path; + acpi_handle res; + int status; + + mutex_path = mutex_path_override ? + mutex_path_override : ASUS_HW_ACCESS_MUTEX_ASMX; + + status = acpi_get_handle(NULL, (acpi_string)mutex_path, &res); + if (ACPI_FAILURE(status)) { + dev_err(dev, + "Could not get hardware access guard mutex '%s': error %d", + mutex_path, status); + return NULL; + } + return res; +} + +static int asus_ec_bank_switch(u8 bank, u8 *old) +{ + int status = 0; + + if (old) { + status = ec_read(ASUS_EC_BANK_REGISTER, old); + } + if (status || (old && (*old == bank))) + return status; + return ec_write(ASUS_EC_BANK_REGISTER, bank); +} + +static int asus_ec_block_read(const struct device *dev, + struct ec_sensors_data *ec) +{ + int ireg, ibank, status; + u8 bank, reg_bank, prev_bank; + + bank = 0; + status = asus_ec_bank_switch(bank, &prev_bank); + if (status) { + dev_warn(dev, "EC bank switch failed"); + return status; + } + + if (prev_bank) { + /* oops... somebody else is working with the EC too */ + dev_warn(dev, + "Concurrent access to the ACPI EC detected.\nRace condition possible."); + } + + /* read registers minimizing bank switches. */ + for (ibank = 0; ibank < ec->nr_banks; ibank++) { + if (bank != ec->banks[ibank]) { + bank = ec->banks[ibank]; + if (asus_ec_bank_switch(bank, NULL)) { + dev_warn(dev, "EC bank switch to %d failed", + bank); + break; + } + } + for (ireg = 0; ireg < ec->nr_registers; ireg++) { + reg_bank = register_bank(ec->registers[ireg]); + if (reg_bank < bank) { + continue; + } + ec_read(register_index(ec->registers[ireg]), + ec->read_buffer + ireg); + } + } + + status = asus_ec_bank_switch(prev_bank, NULL); + return status; +} + +static inline s32 get_sensor_value(const struct ec_sensor_info *si, u8 *data) +{ + if (is_sensor_data_signed(si)) { + switch (si->addr.components.size) { + case 1: + return (s8)*data; + case 2: + return (s16)get_unaligned_be16(data); + case 4: + return (s32)get_unaligned_be32(data); + default: + return 0; + } + } else { + switch (si->addr.components.size) { + case 1: + return *data; + case 2: + return get_unaligned_be16(data); + case 4: + return get_unaligned_be32(data); + default: + return 0; + } + } +} + +static void update_sensor_values(struct ec_sensors_data *ec, u8 *data) +{ + const struct ec_sensor_info *si; + struct ec_sensor *s; + + for (s = ec->sensors; s != ec->sensors + ec->nr_sensors; s++) { + si = &known_ec_sensors[s->info_index]; + s->cached_value = get_sensor_value(si, data); + data += si->addr.components.size; + } +} + +static int update_ec_sensors(const struct device *dev, + struct ec_sensors_data *ec) +{ + int status; + + /* + * ASUS DSDT does not specify that access to the EC has to be guarded, + * but firmware does access it via ACPI + */ + if (ACPI_FAILURE(acpi_acquire_mutex(ec->aml_mutex, NULL, + ACPI_LOCK_DELAY_MS))) { + dev_err(dev, "Failed to acquire AML mutex"); + status = -EBUSY; + goto cleanup; + } + + status = asus_ec_block_read(dev, ec); + + if (!status) { + update_sensor_values(ec, ec->read_buffer); + } + if (ACPI_FAILURE(acpi_release_mutex(ec->aml_mutex, NULL))) { + dev_err(dev, "Failed to release AML mutex"); + } +cleanup: + return status; +} + +static long scale_sensor_value(s32 value, int data_type) +{ + switch (data_type) { + case hwmon_curr: + case hwmon_temp: + return value * MILLI; + default: + return value; + } +} + +static int get_cached_value_or_update(const struct device *dev, + int sensor_index, + struct ec_sensors_data *state, s32 *value) +{ + if (time_after(jiffies, state->last_updated + HZ)) { + if (update_ec_sensors(dev, state)) { + dev_err(dev, "update_ec_sensors() failure\n"); + return -EIO; + } + + state->last_updated = jiffies; + } + + *value = state->sensors[sensor_index].cached_value; + return 0; +} + +/* + * Now follow the functions that implement the hwmon interface + */ + +static int asus_ec_hwmon_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + int ret; + s32 value = 0; + + struct ec_sensors_data *state = dev_get_drvdata(dev); + int sidx = find_ec_sensor_index(state, type, channel); + + if (sidx < 0) { + return sidx; + } + + ret = get_cached_value_or_update(dev, sidx, state, &value); + if (!ret) { + *val = scale_sensor_value(value, + get_sensor_info(state, sidx)->type); + } + + return ret; +} + +static int asus_ec_hwmon_read_string(struct device *dev, + enum hwmon_sensor_types type, u32 attr, + int channel, const char **str) +{ + struct ec_sensors_data *state = dev_get_drvdata(dev); + int sensor_index = find_ec_sensor_index(state, type, channel); + *str = get_sensor_info(state, sensor_index)->label; + + return 0; +} + +static umode_t asus_ec_hwmon_is_visible(const void *drvdata, + enum hwmon_sensor_types type, u32 attr, + int channel) +{ + const struct ec_sensors_data *state = drvdata; + + return find_ec_sensor_index(state, type, channel) >= 0 ? S_IRUGO : 0; +} + +static int __init +asus_ec_hwmon_add_chan_info(struct hwmon_channel_info *asus_ec_hwmon_chan, + struct device *dev, int num, + enum hwmon_sensor_types type, u32 config) +{ + int i; + u32 *cfg = devm_kcalloc(dev, num + 1, sizeof(*cfg), GFP_KERNEL); + + if (!cfg) + return -ENOMEM; + + asus_ec_hwmon_chan->type = type; + asus_ec_hwmon_chan->config = cfg; + for (i = 0; i < num; i++, cfg++) + *cfg = config; + + return 0; +} + +static const struct hwmon_ops asus_ec_hwmon_ops = { + .is_visible = asus_ec_hwmon_is_visible, + .read = asus_ec_hwmon_read, + .read_string = asus_ec_hwmon_read_string, +}; + +static struct hwmon_chip_info asus_ec_chip_info = { + .ops = &asus_ec_hwmon_ops, +}; + +static unsigned long __init get_board_sensors(void) +{ + const struct dmi_system_id *dmi_entry = + dmi_first_match(asus_ec_dmi_table); + + return dmi_entry ? (unsigned long)dmi_entry->driver_data : 0; +} + +static int __init asus_ec_probe(struct platform_device *pdev) +{ + const struct hwmon_channel_info **ptr_asus_ec_ci; + int nr_count[hwmon_max] = { 0 }, nr_types = 0; + struct hwmon_channel_info *asus_ec_hwmon_chan; + const struct hwmon_chip_info *chip_info; + struct device *dev = &pdev->dev; + struct ec_sensors_data *ec_data; + const struct ec_sensor_info *si; + enum hwmon_sensor_types type; + unsigned long board_sensors; + struct device *hwdev; + unsigned int i; + + board_sensors = get_board_sensors(); + if (!board_sensors) + return -ENODEV; + + ec_data = devm_kzalloc(dev, sizeof(struct ec_sensors_data), + GFP_KERNEL); + if (!ec_data) + return -ENOMEM; + + dev_set_drvdata(dev, ec_data); + ec_data->board_sensors = board_sensors; + ec_data->nr_sensors = board_sensors_count(ec_data->board_sensors); + ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors, + sizeof(struct ec_sensor), GFP_KERNEL); + + setup_sensor_data(ec_data); + ec_data->registers = devm_kcalloc(dev, ec_data->nr_registers, + sizeof(u16), GFP_KERNEL); + ec_data->read_buffer = devm_kcalloc(dev, ec_data->nr_registers, + sizeof(u8), GFP_KERNEL); + + if (!ec_data->registers || !ec_data->read_buffer) + return -ENOMEM; + + fill_ec_registers(ec_data); + + ec_data->aml_mutex = asus_hw_access_mutex(dev); + + for (i = 0; i < ec_data->nr_sensors; ++i) { + si = get_sensor_info(ec_data, i); + if (!nr_count[si->type]) + ++nr_types; + ++nr_count[si->type]; + } + + if (nr_count[hwmon_temp]) + nr_count[hwmon_chip]++, nr_types++; + + asus_ec_hwmon_chan = devm_kcalloc( + dev, nr_types, sizeof(*asus_ec_hwmon_chan), GFP_KERNEL); + if (!asus_ec_hwmon_chan) + return -ENOMEM; + + ptr_asus_ec_ci = devm_kcalloc(dev, nr_types + 1, + sizeof(*ptr_asus_ec_ci), GFP_KERNEL); + if (!ptr_asus_ec_ci) + return -ENOMEM; + + asus_ec_chip_info.info = ptr_asus_ec_ci; + chip_info = &asus_ec_chip_info; + + for (type = 0; type < hwmon_max; ++type) { + if (!nr_count[type]) + continue; + + asus_ec_hwmon_add_chan_info(asus_ec_hwmon_chan, dev, + nr_count[type], type, + hwmon_attributes[type]); + *ptr_asus_ec_ci++ = asus_ec_hwmon_chan++; + } + + dev_info(dev, "board has %d EC sensors that span %d registers", + ec_data->nr_sensors, ec_data->nr_registers); + + hwdev = devm_hwmon_device_register_with_info(dev, "asusec", + ec_data, chip_info, NULL); + + return PTR_ERR_OR_ZERO(hwdev); +} + + +static const struct acpi_device_id acpi_ec_ids[] = { + /* Embedded Controller Device */ + { "PNP0C09", 0 }, + {} +}; + +static struct platform_driver asus_ec_sensors_platform_driver = { + .driver = { + .name = "asus-ec-sensors", + .acpi_match_table = acpi_ec_ids, + }, +}; + +MODULE_DEVICE_TABLE(dmi, asus_ec_dmi_table); +module_platform_driver_probe(asus_ec_sensors_platform_driver, asus_ec_probe); + +module_param_named(mutex_path, mutex_path_override, charp, 0); +MODULE_PARM_DESC(mutex_path, + "Override ACPI mutex path used to guard access to hardware"); + +MODULE_AUTHOR("Eugene Shalygin "); +MODULE_DESCRIPTION( + "HWMON driver for sensors accessible via ACPI EC in ASUS motherboards"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hwmon/asus_wmi_sensors.c b/drivers/hwmon/asus_wmi_sensors.c index c80eee874b6c0..9e935e34c9983 100644 --- a/drivers/hwmon/asus_wmi_sensors.c +++ b/drivers/hwmon/asus_wmi_sensors.c @@ -71,12 +71,13 @@ static const struct dmi_system_id asus_wmi_dmi_table[] = { DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X399-A"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X470-PRO"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI EXTREME"), - DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI HERO"), + DMI_EXACT_MATCH_ASUS_BOARD_NAME("CROSSHAIR VI HERO"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI HERO (WI-FI AC)"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO (WI-FI)"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-E GAMING"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-F GAMING"), + DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-F GAMING II"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-I GAMING"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX X399-E GAMING"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX X470-F GAMING"), diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 098d12b9ecdad..2b91f7e05126e 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -308,6 +308,7 @@ static void superio_exit(struct nct6775_sio_data *sio_data) #define NUM_TEMP 10 /* Max number of temp attribute sets w/ limits*/ #define NUM_TEMP_FIXED 6 /* Max number of fixed temp attribute sets */ +#define NUM_TSI_TEMP 8 /* Max number of TSI temp register pairs */ #define NUM_REG_ALARM 7 /* Max number of alarm registers */ #define NUM_REG_BEEP 5 /* Max number of beep registers */ @@ -498,6 +499,8 @@ static const u16 NCT6775_REG_TEMP_CRIT[32] = { [11] = 0xa07 }; +static const u16 NCT6775_REG_TSI_TEMP[] = { 0x669 }; + /* NCT6776 specific data */ /* STEP_UP_TIME and STEP_DOWN_TIME regs are swapped for all chips but NCT6775 */ @@ -581,6 +584,9 @@ static const u16 NCT6776_REG_TEMP_CRIT[32] = { [12] = 0x70a, }; +static const u16 NCT6776_REG_TSI_TEMP[] = { + 0x409, 0x40b, 0x40d, 0x40f, 0x411, 0x413, 0x415, 0x417 }; + /* NCT6779 specific data */ static const u16 NCT6779_REG_IN[] = { @@ -864,6 +870,8 @@ static const char *const nct6796_temp_label[] = { #define NCT6796_TEMP_MASK 0xbfff0ffe #define NCT6796_VIRT_TEMP_MASK 0x80000c00 +static const u16 NCT6796_REG_TSI_TEMP[] = { 0x409, 0x40b }; + static const char *const nct6798_temp_label[] = { "", "SYSTIN", @@ -1005,6 +1013,8 @@ static const u16 NCT6106_REG_TEMP_CRIT[32] = { [12] = 0x205, }; +static const u16 NCT6106_REG_TSI_TEMP[] = { 0x59, 0x5b, 0x5d, 0x5f, 0x61, 0x63, 0x65, 0x67 }; + /* NCT6112D/NCT6114D/NCT6116D specific data */ static const u16 NCT6116_REG_FAN[] = { 0x20, 0x22, 0x24, 0x26, 0x28 }; @@ -1069,6 +1079,8 @@ static const s8 NCT6116_BEEP_BITS[] = { 34, -1 /* intrusion0, intrusion1 */ }; +static const u16 NCT6116_REG_TSI_TEMP[] = { 0x59, 0x5b }; + static enum pwm_enable reg_to_pwm_enable(int pwm, int mode) { if (mode == 0 && pwm == 255) @@ -1169,6 +1181,12 @@ static inline u8 in_to_reg(u32 val, u8 nr) return clamp_val(DIV_ROUND_CLOSEST(val * 100, scale_in[nr]), 0, 255); } +/* TSI temperatures are in 8.3 format */ +static inline unsigned int tsi_temp_from_reg(unsigned int reg) +{ + return (reg >> 5) * 125; +} + /* * Data structures and manipulation thereof */ @@ -1179,7 +1197,7 @@ struct nct6775_data { enum kinds kind; const char *name; - const struct attribute_group *groups[6]; + const struct attribute_group *groups[7]; u16 reg_temp[5][NUM_TEMP]; /* 0=temp, 1=temp_over, 2=temp_hyst, * 3=temp_crit, 4=temp_lcrit @@ -1240,6 +1258,8 @@ struct nct6775_data { const u16 *REG_ALARM; const u16 *REG_BEEP; + const u16 *REG_TSI_TEMP; + unsigned int (*fan_from_reg)(u16 reg, unsigned int divreg); unsigned int (*fan_from_reg_min)(u16 reg, unsigned int divreg); @@ -1267,6 +1287,7 @@ struct nct6775_data { s8 temp_offset[NUM_TEMP_FIXED]; s16 temp[5][NUM_TEMP]; /* 0=temp, 1=temp_over, 2=temp_hyst, * 3=temp_crit, 4=temp_lcrit */ + s16 tsi_temp[NUM_TSI_TEMP]; u64 alarms; u64 beeps; @@ -1315,6 +1336,7 @@ struct nct6775_data { u16 have_temp; u16 have_temp_fixed; + u16 have_tsi_temp; u16 have_in; /* Remember extra register values over suspend/resume */ @@ -1464,13 +1486,15 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) switch (data->kind) { case nct6106: return reg == 0x20 || reg == 0x22 || reg == 0x24 || + (reg >= 0x59 && reg < 0x69 && (reg & 1)) || reg == 0xe0 || reg == 0xe2 || reg == 0xe4 || reg == 0x111 || reg == 0x121 || reg == 0x131; case nct6116: return reg == 0x20 || reg == 0x22 || reg == 0x24 || - reg == 0x26 || reg == 0x28 || reg == 0xe0 || reg == 0xe2 || - reg == 0xe4 || reg == 0xe6 || reg == 0xe8 || reg == 0x111 || - reg == 0x121 || reg == 0x131 || reg == 0x191 || reg == 0x1a1; + reg == 0x26 || reg == 0x28 || reg == 0x59 || reg == 0x5b || + reg == 0xe0 || reg == 0xe2 || reg == 0xe4 || reg == 0xe6 || + reg == 0xe8 || reg == 0x111 || reg == 0x121 || reg == 0x131 || + reg == 0x191 || reg == 0x1a1; case nct6775: return (((reg & 0xff00) == 0x100 || (reg & 0xff00) == 0x200) && @@ -1479,7 +1503,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) (reg & 0x00ff) == 0x55)) || (reg & 0xfff0) == 0x630 || reg == 0x640 || reg == 0x642 || - reg == 0x662 || + reg == 0x662 || reg == 0x669 || ((reg & 0xfff0) == 0x650 && (reg & 0x000f) >= 0x06) || reg == 0x73 || reg == 0x75 || reg == 0x77; case nct6776: @@ -1490,6 +1514,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) (reg & 0x00ff) == 0x55)) || (reg & 0xfff0) == 0x630 || reg == 0x402 || + (reg >= 0x409 && reg < 0x419 && (reg & 1)) || reg == 0x640 || reg == 0x642 || ((reg & 0xfff0) == 0x650 && (reg & 0x000f) >= 0x06) || reg == 0x73 || reg == 0x75 || reg == 0x77; @@ -1504,6 +1529,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) return reg == 0x150 || reg == 0x153 || reg == 0x155 || (reg & 0xfff0) == 0x4c0 || reg == 0x402 || + (reg >= 0x409 && reg < 0x419 && (reg & 1)) || reg == 0x63a || reg == 0x63c || reg == 0x63e || reg == 0x640 || reg == 0x642 || reg == 0x64a || reg == 0x64c || @@ -1987,6 +2013,12 @@ static struct nct6775_data *nct6775_update_device(struct device *dev) data->REG_TEMP_OFFSET[i]); } + for (i = 0; i < NUM_TSI_TEMP; i++) { + if (!(data->have_tsi_temp & BIT(i))) + continue; + data->tsi_temp[i] = data->read_value(data, data->REG_TSI_TEMP[i]); + } + data->alarms = 0; for (i = 0; i < NUM_REG_ALARM; i++) { u8 alarm; @@ -2670,6 +2702,44 @@ static const struct sensor_template_group nct6775_temp_template_group = { .base = 1, }; +static ssize_t show_tsi_temp(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct nct6775_data *data = nct6775_update_device(dev); + struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr); + + return sysfs_emit(buf, "%u\n", tsi_temp_from_reg(data->tsi_temp[sattr->index])); +} + +static ssize_t show_tsi_temp_label(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr); + + return sysfs_emit(buf, "TSI%d_TEMP\n", sattr->index); +} + +SENSOR_TEMPLATE(tsi_temp_input, "temp%d_input", 0444, show_tsi_temp, NULL, 0); +SENSOR_TEMPLATE(tsi_temp_label, "temp%d_label", 0444, show_tsi_temp_label, NULL, 0); + +static umode_t nct6775_tsi_temp_is_visible(struct kobject *kobj, struct attribute *attr, + int index) +{ + struct device *dev = kobj_to_dev(kobj); + struct nct6775_data *data = dev_get_drvdata(dev); + int temp = index / 2; + + return (data->have_tsi_temp & BIT(temp)) ? attr->mode : 0; +} + +/* + * The index calculation in nct6775_tsi_temp_is_visible() must be kept in + * sync with the size of this array. + */ +static struct sensor_device_template *nct6775_tsi_temp_template[] = { + &sensor_dev_template_tsi_temp_input, + &sensor_dev_template_tsi_temp_label, + NULL +}; + static ssize_t show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf) { @@ -3948,10 +4018,11 @@ static int nct6775_probe(struct platform_device *pdev) const u16 *reg_temp, *reg_temp_over, *reg_temp_hyst, *reg_temp_config; const u16 *reg_temp_mon, *reg_temp_alternate, *reg_temp_crit; const u16 *reg_temp_crit_l = NULL, *reg_temp_crit_h = NULL; - int num_reg_temp, num_reg_temp_mon; + int num_reg_temp, num_reg_temp_mon, num_reg_tsi_temp; u8 cr2a; struct attribute_group *group; struct device *hwmon_dev; + struct sensor_template_group tsi_temp_tg; int num_attr_groups = 0; if (sio_data->access == access_direct) { @@ -4043,11 +4114,13 @@ static int nct6775_probe(struct platform_device *pdev) data->ALARM_BITS = NCT6106_ALARM_BITS; data->REG_BEEP = NCT6106_REG_BEEP; data->BEEP_BITS = NCT6106_BEEP_BITS; + data->REG_TSI_TEMP = NCT6106_REG_TSI_TEMP; reg_temp = NCT6106_REG_TEMP; reg_temp_mon = NCT6106_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6106_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6106_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6106_REG_TSI_TEMP); reg_temp_over = NCT6106_REG_TEMP_OVER; reg_temp_hyst = NCT6106_REG_TEMP_HYST; reg_temp_config = NCT6106_REG_TEMP_CONFIG; @@ -4116,11 +4189,13 @@ static int nct6775_probe(struct platform_device *pdev) data->ALARM_BITS = NCT6116_ALARM_BITS; data->REG_BEEP = NCT6106_REG_BEEP; data->BEEP_BITS = NCT6116_BEEP_BITS; + data->REG_TSI_TEMP = NCT6116_REG_TSI_TEMP; reg_temp = NCT6106_REG_TEMP; reg_temp_mon = NCT6106_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6106_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6106_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6116_REG_TSI_TEMP); reg_temp_over = NCT6106_REG_TEMP_OVER; reg_temp_hyst = NCT6106_REG_TEMP_HYST; reg_temp_config = NCT6106_REG_TEMP_CONFIG; @@ -4191,11 +4266,13 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE; data->REG_ALARM = NCT6775_REG_ALARM; data->REG_BEEP = NCT6775_REG_BEEP; + data->REG_TSI_TEMP = NCT6775_REG_TSI_TEMP; reg_temp = NCT6775_REG_TEMP; reg_temp_mon = NCT6775_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6775_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6775_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6775_REG_TSI_TEMP); reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6775_REG_TEMP_CONFIG; @@ -4264,11 +4341,13 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE; data->REG_ALARM = NCT6775_REG_ALARM; data->REG_BEEP = NCT6776_REG_BEEP; + data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP; reg_temp = NCT6775_REG_TEMP; reg_temp_mon = NCT6775_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6775_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6775_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP); reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6776_REG_TEMP_CONFIG; @@ -4341,11 +4420,13 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE; data->REG_ALARM = NCT6779_REG_ALARM; data->REG_BEEP = NCT6776_REG_BEEP; + data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP; reg_temp = NCT6779_REG_TEMP; reg_temp_mon = NCT6779_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6779_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP); reg_temp_over = NCT6779_REG_TEMP_OVER; reg_temp_hyst = NCT6779_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; @@ -4460,6 +4541,24 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_BEEP = NCT6776_REG_BEEP; else data->REG_BEEP = NCT6792_REG_BEEP; + switch (data->kind) { + case nct6791: + case nct6792: + case nct6793: + data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP; + num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP); + break; + case nct6795: + case nct6796: + case nct6797: + case nct6798: + data->REG_TSI_TEMP = NCT6796_REG_TSI_TEMP; + num_reg_tsi_temp = ARRAY_SIZE(NCT6796_REG_TSI_TEMP); + break; + default: + num_reg_tsi_temp = 0; + break; + } reg_temp = NCT6779_REG_TEMP; num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP); @@ -4659,6 +4758,12 @@ static int nct6775_probe(struct platform_device *pdev) } #endif /* USE_ALTERNATE */ + /* Check which TSIx_TEMP registers are active */ + for (i = 0; i < num_reg_tsi_temp; i++) { + if (data->read_value(data, data->REG_TSI_TEMP[i])) + data->have_tsi_temp |= BIT(i); + } + /* Initialize the chip */ nct6775_init_device(data); @@ -4766,6 +4871,18 @@ static int nct6775_probe(struct platform_device *pdev) return PTR_ERR(group); data->groups[num_attr_groups++] = group; + + if (data->have_tsi_temp) { + tsi_temp_tg.templates = nct6775_tsi_temp_template; + tsi_temp_tg.is_visible = nct6775_tsi_temp_is_visible; + tsi_temp_tg.base = fls(data->have_temp) + 1; + group = nct6775_create_attr_group(dev, &tsi_temp_tg, fls(data->have_tsi_temp)); + if (IS_ERR(group)) + return PTR_ERR(group); + + data->groups[num_attr_groups++] = group; + } + data->groups[num_attr_groups++] = &nct6775_group_other; hwmon_dev = devm_hwmon_device_register_with_groups(dev, data->name, @@ -4985,9 +5102,14 @@ static struct platform_device *pdev[2]; static const char * const asus_wmi_boards[] = { "ProArt X570-CREATOR WIFI", + "Pro B550M-C", "Pro WS X570-ACE", "PRIME B360-PLUS", "PRIME B460-PLUS", + "PRIME B550-PLUS", + "PRIME B550M-A", + "PRIME B550M-A (WI-FI)", + "PRIME X570-P", "PRIME X570-PRO", "ROG CROSSHAIR VIII DARK HERO", "ROG CROSSHAIR VIII FORMULA", @@ -4997,10 +5119,22 @@ static const char * const asus_wmi_boards[] = { "ROG STRIX B550-E GAMING", "ROG STRIX B550-F GAMING", "ROG STRIX B550-F GAMING (WI-FI)", + "ROG STRIX B550-F GAMING WIFI II", "ROG STRIX B550-I GAMING", + "ROG STRIX B550-XE GAMING (WI-FI)", + "ROG STRIX X570-E GAMING", "ROG STRIX X570-F GAMING", "ROG STRIX X570-I GAMING", "ROG STRIX Z390-E GAMING", + "ROG STRIX Z390-F GAMING", + "ROG STRIX Z390-H GAMING", + "ROG STRIX Z390-I GAMING", + "ROG STRIX Z490-A GAMING", + "ROG STRIX Z490-E GAMING", + "ROG STRIX Z490-F GAMING", + "ROG STRIX Z490-G GAMING", + "ROG STRIX Z490-G GAMING (WI-FI)", + "ROG STRIX Z490-H GAMING", "ROG STRIX Z490-I GAMING", "TUF GAMING B550M-PLUS", "TUF GAMING B550M-PLUS (WI-FI)", diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h index e0aa8aa46d8c4..ef3a8ecde4dfc 100644 --- a/drivers/hwmon/pmbus/pmbus.h +++ b/drivers/hwmon/pmbus/pmbus.h @@ -319,6 +319,7 @@ enum pmbus_fan_mode { percent = 0, rpm }; /* * STATUS_VOUT, STATUS_INPUT */ +#define PB_VOLTAGE_VIN_OFF BIT(3) #define PB_VOLTAGE_UV_FAULT BIT(4) #define PB_VOLTAGE_UV_WARNING BIT(5) #define PB_VOLTAGE_OV_WARNING BIT(6) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index ac2fbee1ba9c0..ca0bfaf2f6911 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -1373,7 +1373,7 @@ static const struct pmbus_limit_attr vin_limit_attrs[] = { .reg = PMBUS_VIN_UV_FAULT_LIMIT, .attr = "lcrit", .alarm = "lcrit_alarm", - .sbit = PB_VOLTAGE_UV_FAULT, + .sbit = PB_VOLTAGE_UV_FAULT | PB_VOLTAGE_VIN_OFF, }, { .reg = PMBUS_VIN_OV_WARN_LIMIT, .attr = "max", @@ -2391,10 +2391,14 @@ static int pmbus_regulator_is_enabled(struct regulator_dev *rdev) { struct device *dev = rdev_get_dev(rdev); struct i2c_client *client = to_i2c_client(dev->parent); + struct pmbus_data *data = i2c_get_clientdata(client); u8 page = rdev_get_id(rdev); int ret; + mutex_lock(&data->update_lock); ret = pmbus_read_byte_data(client, page, PMBUS_OPERATION); + mutex_unlock(&data->update_lock); + if (ret < 0) return ret; @@ -2405,11 +2409,17 @@ static int _pmbus_regulator_on_off(struct regulator_dev *rdev, bool enable) { struct device *dev = rdev_get_dev(rdev); struct i2c_client *client = to_i2c_client(dev->parent); + struct pmbus_data *data = i2c_get_clientdata(client); u8 page = rdev_get_id(rdev); + int ret; - return pmbus_update_byte_data(client, page, PMBUS_OPERATION, - PB_OPERATION_CONTROL_ON, - enable ? PB_OPERATION_CONTROL_ON : 0); + mutex_lock(&data->update_lock); + ret = pmbus_update_byte_data(client, page, PMBUS_OPERATION, + PB_OPERATION_CONTROL_ON, + enable ? PB_OPERATION_CONTROL_ON : 0); + mutex_unlock(&data->update_lock); + + return ret; } static int pmbus_regulator_enable(struct regulator_dev *rdev) diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c index 40cdadad35e52..f85eede6d7663 100644 --- a/drivers/hwmon/sch56xx-common.c +++ b/drivers/hwmon/sch56xx-common.c @@ -422,7 +422,7 @@ void sch56xx_watchdog_register(struct device *parent, u16 addr, u32 revision, data->wddev.max_timeout = 255 * 60; watchdog_set_nowayout(&data->wddev, nowayout); if (output_enable & SCH56XX_WDOG_OUTPUT_ENABLE) - set_bit(WDOG_ACTIVE, &data->wddev.status); + set_bit(WDOG_HW_RUNNING, &data->wddev.status); /* Since the watchdog uses a downcounter there is no register to read the BIOS set timeout from (if any was set at all) -> diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c index a0640fa5c55bd..57e94424a8d65 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c @@ -367,8 +367,12 @@ static ssize_t mode_store(struct device *dev, mode = ETM_MODE_QELEM(config->mode); /* start by clearing QE bits */ config->cfg &= ~(BIT(13) | BIT(14)); - /* if supported, Q elements with instruction counts are enabled */ - if ((mode & BIT(0)) && (drvdata->q_support & BIT(0))) + /* + * if supported, Q elements with instruction counts are enabled. + * Always set the low bit for any requested mode. Valid combos are + * 0b00, 0b01 and 0b11. + */ + if (mode && drvdata->q_support) config->cfg |= BIT(13); /* * if supported, Q elements with and without instruction diff --git a/drivers/hwtracing/coresight/coresight-syscfg.c b/drivers/hwtracing/coresight/coresight-syscfg.c index 098fc34c48293..11850fd8c3b5b 100644 --- a/drivers/hwtracing/coresight/coresight-syscfg.c +++ b/drivers/hwtracing/coresight/coresight-syscfg.c @@ -1049,7 +1049,7 @@ static int cscfg_create_device(void) err = device_register(dev); if (err) - cscfg_dev_release(dev); + put_device(dev); create_dev_exit_unlock: mutex_unlock(&cscfg_mutex); diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index 5149454eef4a5..f72c6576d8a36 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -454,18 +454,20 @@ static int bcm2835_i2c_probe(struct platform_device *pdev) ret = clk_prepare_enable(i2c_dev->bus_clk); if (ret) { dev_err(&pdev->dev, "Couldn't prepare clock"); - return ret; + goto err_put_exclusive_rate; } i2c_dev->irq = platform_get_irq(pdev, 0); - if (i2c_dev->irq < 0) - return i2c_dev->irq; + if (i2c_dev->irq < 0) { + ret = i2c_dev->irq; + goto err_disable_unprepare_clk; + } ret = request_irq(i2c_dev->irq, bcm2835_i2c_isr, IRQF_SHARED, dev_name(&pdev->dev), i2c_dev); if (ret) { dev_err(&pdev->dev, "Could not request IRQ\n"); - return -ENODEV; + goto err_disable_unprepare_clk; } adap = &i2c_dev->adapter; @@ -489,7 +491,16 @@ static int bcm2835_i2c_probe(struct platform_device *pdev) ret = i2c_add_adapter(adap); if (ret) - free_irq(i2c_dev->irq, i2c_dev); + goto err_free_irq; + + return 0; + +err_free_irq: + free_irq(i2c_dev->irq, i2c_dev); +err_disable_unprepare_clk: + clk_disable_unprepare(i2c_dev->bus_clk); +err_put_exclusive_rate: + clk_rate_exclusive_put(i2c_dev->bus_clk); return ret; } diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c index ef73a42577cc7..07eb819072c4f 100644 --- a/drivers/i2c/busses/i2c-meson.c +++ b/drivers/i2c/busses/i2c-meson.c @@ -465,18 +465,18 @@ static int meson_i2c_probe(struct platform_device *pdev) */ meson_i2c_set_mask(i2c, REG_CTRL, REG_CTRL_START, 0); - ret = i2c_add_adapter(&i2c->adap); - if (ret < 0) { - clk_disable_unprepare(i2c->clk); - return ret; - } - /* Disable filtering */ meson_i2c_set_mask(i2c, REG_SLAVE_ADDR, REG_SLV_SDA_FILTER | REG_SLV_SCL_FILTER, 0); meson_i2c_set_clk_div(i2c, timings.bus_freq_hz); + ret = i2c_add_adapter(&i2c->adap); + if (ret < 0) { + clk_disable_unprepare(i2c->clk); + return ret; + } + return 0; } diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c index 4e161a4089d85..9028ffb58cc07 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.c +++ b/drivers/i2c/busses/i2c-pasemi-core.c @@ -137,6 +137,12 @@ static int pasemi_i2c_xfer_msg(struct i2c_adapter *adapter, TXFIFO_WR(smbus, msg->buf[msg->len-1] | (stop ? MTXFIFO_STOP : 0)); + + if (stop) { + err = pasemi_smb_waitready(smbus); + if (err) + goto reset_out; + } } return 0; @@ -333,7 +339,6 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus) smbus->adapter.owner = THIS_MODULE; snprintf(smbus->adapter.name, sizeof(smbus->adapter.name), "PA Semi SMBus adapter (%s)", dev_name(smbus->dev)); - smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; smbus->adapter.algo = &smbus_algorithm; smbus->adapter.algo_data = smbus; diff --git a/drivers/i2c/busses/i2c-pasemi-pci.c b/drivers/i2c/busses/i2c-pasemi-pci.c index 1ab1f28744fb2..cfc89e04eb94c 100644 --- a/drivers/i2c/busses/i2c-pasemi-pci.c +++ b/drivers/i2c/busses/i2c-pasemi-pci.c @@ -56,6 +56,7 @@ static int pasemi_smb_pci_probe(struct pci_dev *dev, if (!smbus->ioaddr) return -EBUSY; + smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; error = pasemi_i2c_common_probe(smbus); if (error) return error; diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index eb789cfb99739..ffefe3c482e9c 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -734,7 +734,6 @@ static const struct i2c_adapter_quirks xiic_quirks = { static const struct i2c_adapter xiic_adapter = { .owner = THIS_MODULE, - .name = DRIVER_NAME, .class = I2C_CLASS_DEPRECATED, .algo = &xiic_algorithm, .quirks = &xiic_quirks, @@ -771,6 +770,8 @@ static int xiic_i2c_probe(struct platform_device *pdev) i2c_set_adapdata(&i2c->adap, i2c); i2c->adap.dev.parent = &pdev->dev; i2c->adap.dev.of_node = pdev->dev.of_node; + snprintf(i2c->adap.name, sizeof(i2c->adap.name), + DRIVER_NAME " %s", pdev->name); mutex_init(&i2c->lock); diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 2c59dd748a49f..121bbad56cfac 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1424,7 +1424,7 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr) if (irq <= 0) return -ENXIO; - generic_handle_irq(irq); + generic_dispatch_irq(irq); return 0; } diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index cf5d049342ead..6fd2b6718b086 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -668,16 +668,21 @@ static int i2cdev_attach_adapter(struct device *dev, void *dummy) i2c_dev->dev.class = i2c_dev_class; i2c_dev->dev.parent = &adap->dev; i2c_dev->dev.release = i2cdev_dev_release; - dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr); + + res = dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr); + if (res) + goto err_put_i2c_dev; res = cdev_device_add(&i2c_dev->cdev, &i2c_dev->dev); - if (res) { - put_i2c_dev(i2c_dev, false); - return res; - } + if (res) + goto err_put_i2c_dev; pr_debug("adapter [%s] registered as minor %d\n", adap->name, adap->nr); return 0; + +err_put_i2c_dev: + put_i2c_dev(i2c_dev, false); + return res; } static int i2cdev_detach_adapter(struct device *dev, void *dummy) diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index 5365199a31f41..f7a7405d4350a 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -261,7 +261,7 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev) err = device_create_file(&pdev->dev, &dev_attr_available_masters); if (err) - goto err_rollback; + goto err_rollback_activation; err = device_create_file(&pdev->dev, &dev_attr_current_master); if (err) @@ -271,8 +271,9 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev) err_rollback_available: device_remove_file(&pdev->dev, &dev_attr_available_masters); -err_rollback: +err_rollback_activation: i2c_demux_deactivate_master(priv); +err_rollback: for (j = 0; j < i; j++) { of_node_put(priv->chan[j].parent_np); of_changeset_destroy(&priv->chan[j].chgset); diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 64b82b4503ada..a21fdb015c6c0 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -176,6 +176,7 @@ static const struct mma8452_event_regs trans_ev_regs = { * @enabled_events: event flags enabled and handled by this driver */ struct mma_chip_info { + const char *name; u8 chip_id; const struct iio_chan_spec *channels; int num_channels; @@ -379,8 +380,8 @@ static ssize_t mma8452_show_scale_avail(struct device *dev, struct device_attribute *attr, char *buf) { - struct mma8452_data *data = iio_priv(i2c_get_clientdata( - to_i2c_client(dev))); + struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct mma8452_data *data = iio_priv(indio_dev); return mma8452_show_int_plus_micros(buf, data->chip_info->mma_scales, ARRAY_SIZE(data->chip_info->mma_scales)); @@ -1301,6 +1302,7 @@ enum { static const struct mma_chip_info mma_chip_info_table[] = { [mma8451] = { + .name = "mma8451", .chip_id = MMA8451_DEVICE_ID, .channels = mma8451_channels, .num_channels = ARRAY_SIZE(mma8451_channels), @@ -1325,6 +1327,7 @@ static const struct mma_chip_info mma_chip_info_table[] = { MMA8452_INT_FF_MT, }, [mma8452] = { + .name = "mma8452", .chip_id = MMA8452_DEVICE_ID, .channels = mma8452_channels, .num_channels = ARRAY_SIZE(mma8452_channels), @@ -1341,6 +1344,7 @@ static const struct mma_chip_info mma_chip_info_table[] = { MMA8452_INT_FF_MT, }, [mma8453] = { + .name = "mma8453", .chip_id = MMA8453_DEVICE_ID, .channels = mma8453_channels, .num_channels = ARRAY_SIZE(mma8453_channels), @@ -1357,6 +1361,7 @@ static const struct mma_chip_info mma_chip_info_table[] = { MMA8452_INT_FF_MT, }, [mma8652] = { + .name = "mma8652", .chip_id = MMA8652_DEVICE_ID, .channels = mma8652_channels, .num_channels = ARRAY_SIZE(mma8652_channels), @@ -1366,6 +1371,7 @@ static const struct mma_chip_info mma_chip_info_table[] = { .enabled_events = MMA8452_INT_FF_MT, }, [mma8653] = { + .name = "mma8653", .chip_id = MMA8653_DEVICE_ID, .channels = mma8653_channels, .num_channels = ARRAY_SIZE(mma8653_channels), @@ -1380,6 +1386,7 @@ static const struct mma_chip_info mma_chip_info_table[] = { .enabled_events = MMA8452_INT_FF_MT, }, [fxls8471] = { + .name = "fxls8471", .chip_id = FXLS8471_DEVICE_ID, .channels = mma8451_channels, .num_channels = ARRAY_SIZE(mma8451_channels), @@ -1522,13 +1529,6 @@ static int mma8452_probe(struct i2c_client *client, struct mma8452_data *data; struct iio_dev *indio_dev; int ret; - const struct of_device_id *match; - - match = of_match_device(mma8452_dt_ids, &client->dev); - if (!match) { - dev_err(&client->dev, "unknown device model\n"); - return -ENODEV; - } indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data)); if (!indio_dev) @@ -1537,7 +1537,14 @@ static int mma8452_probe(struct i2c_client *client, data = iio_priv(indio_dev); data->client = client; mutex_init(&data->lock); - data->chip_info = match->data; + + data->chip_info = device_get_match_data(&client->dev); + if (!data->chip_info && id) { + data->chip_info = &mma_chip_info_table[id->driver_data]; + } else { + dev_err(&client->dev, "unknown device model\n"); + return -ENODEV; + } data->vdd_reg = devm_regulator_get(&client->dev, "vdd"); if (IS_ERR(data->vdd_reg)) @@ -1581,11 +1588,11 @@ static int mma8452_probe(struct i2c_client *client, } dev_info(&client->dev, "registering %s accelerometer; ID 0x%x\n", - match->compatible, data->chip_info->chip_id); + data->chip_info->name, data->chip_info->chip_id); i2c_set_clientdata(client, indio_dev); indio_dev->info = &mma8452_info; - indio_dev->name = id->name; + indio_dev->name = data->chip_info->name; indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->channels = data->chip_info->channels; indio_dev->num_channels = data->chip_info->num_channels; @@ -1810,7 +1817,7 @@ MODULE_DEVICE_TABLE(i2c, mma8452_id); static struct i2c_driver mma8452_driver = { .driver = { .name = "mma8452", - .of_match_table = of_match_ptr(mma8452_dt_ids), + .of_match_table = mma8452_dt_ids, .pm = &mma8452_pm_ops, }, .probe = mma8452_probe, diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index e939b84cbb561..0793d2474cdcf 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -539,7 +539,9 @@ static int aspeed_adc_probe(struct platform_device *pdev) data->clk_scaler = devm_clk_hw_register_divider( &pdev->dev, clk_name, clk_parent_name, scaler_flags, data->base + ASPEED_REG_CLOCK_CONTROL, 0, - data->model_data->scaler_bit_width, 0, &data->clk_lock); + data->model_data->scaler_bit_width, + data->model_data->need_prescaler ? CLK_DIVIDER_ONE_BASED : 0, + &data->clk_lock); if (IS_ERR(data->clk_scaler)) return PTR_ERR(data->clk_scaler); diff --git a/drivers/iio/adc/twl6030-gpadc.c b/drivers/iio/adc/twl6030-gpadc.c index afdb59e0b5267..d0223e39d59af 100644 --- a/drivers/iio/adc/twl6030-gpadc.c +++ b/drivers/iio/adc/twl6030-gpadc.c @@ -911,6 +911,8 @@ static int twl6030_gpadc_probe(struct platform_device *pdev) ret = devm_request_threaded_irq(dev, irq, NULL, twl6030_gpadc_irq_handler, IRQF_ONESHOT, "twl6030_gpadc", indio_dev); + if (ret) + return ret; ret = twl6030_gpadc_enable_irq(TWL6030_GPADC_RT_SW1_EOC_MASK); if (ret < 0) { diff --git a/drivers/iio/adc/xilinx-ams.c b/drivers/iio/adc/xilinx-ams.c index 8343c5f74121e..7bf097fa10cb7 100644 --- a/drivers/iio/adc/xilinx-ams.c +++ b/drivers/iio/adc/xilinx-ams.c @@ -91,8 +91,8 @@ #define AMS_CONF1_SEQ_MASK GENMASK(15, 12) #define AMS_CONF1_SEQ_DEFAULT FIELD_PREP(AMS_CONF1_SEQ_MASK, 0) -#define AMS_CONF1_SEQ_CONTINUOUS FIELD_PREP(AMS_CONF1_SEQ_MASK, 1) -#define AMS_CONF1_SEQ_SINGLE_CHANNEL FIELD_PREP(AMS_CONF1_SEQ_MASK, 2) +#define AMS_CONF1_SEQ_CONTINUOUS FIELD_PREP(AMS_CONF1_SEQ_MASK, 2) +#define AMS_CONF1_SEQ_SINGLE_CHANNEL FIELD_PREP(AMS_CONF1_SEQ_MASK, 3) #define AMS_REG_SEQ0_MASK GENMASK(15, 0) #define AMS_REG_SEQ2_MASK GENMASK(21, 16) @@ -530,14 +530,18 @@ static int ams_enable_single_channel(struct ams *ams, unsigned int offset) return -EINVAL; } - /* set single channel, sequencer off mode */ + /* put sysmon in a soft reset to change the sequence */ ams_ps_update_reg(ams, AMS_REG_CONFIG1, AMS_CONF1_SEQ_MASK, - AMS_CONF1_SEQ_SINGLE_CHANNEL); + AMS_CONF1_SEQ_DEFAULT); /* write the channel number */ ams_ps_update_reg(ams, AMS_REG_CONFIG0, AMS_CONF0_CHANNEL_NUM_MASK, channel_num); + /* set single channel, sequencer off mode */ + ams_ps_update_reg(ams, AMS_REG_CONFIG1, AMS_CONF1_SEQ_MASK, + AMS_CONF1_SEQ_SINGLE_CHANNEL); + return 0; } @@ -551,6 +555,8 @@ static int ams_read_vcc_reg(struct ams *ams, unsigned int offset, u32 *data) if (ret) return ret; + /* clear end-of-conversion flag, wait for next conversion to complete */ + writel(expect, ams->base + AMS_ISR_1); ret = readl_poll_timeout(ams->base + AMS_ISR_1, reg, (reg & expect), AMS_INIT_POLL_TIME_US, AMS_INIT_TIMEOUT_US); if (ret) @@ -1224,6 +1230,7 @@ static int ams_init_module(struct iio_dev *indio_dev, /* add PS channels to iio device channels */ memcpy(channels, ams_ps_channels, sizeof(ams_ps_channels)); + num_channels = ARRAY_SIZE(ams_ps_channels); } else if (fwnode_property_match_string(fwnode, "compatible", "xlnx,zynqmp-ams-pl") == 0) { ams->pl_base = fwnode_iomap(fwnode, 0); diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c index 774eb3044edd8..271d73e420c42 100644 --- a/drivers/iio/afe/iio-rescale.c +++ b/drivers/iio/afe/iio-rescale.c @@ -39,7 +39,7 @@ static int rescale_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct rescale *rescale = iio_priv(indio_dev); - unsigned long long tmp; + s64 tmp; int ret; switch (mask) { @@ -77,10 +77,10 @@ static int rescale_read_raw(struct iio_dev *indio_dev, *val2 = rescale->denominator; return IIO_VAL_FRACTIONAL; case IIO_VAL_FRACTIONAL_LOG2: - tmp = *val * 1000000000LL; - do_div(tmp, rescale->denominator); + tmp = (s64)*val * 1000000000LL; + tmp = div_s64(tmp, rescale->denominator); tmp *= rescale->numerator; - do_div(tmp, 1000000000LL); + tmp = div_s64(tmp, 1000000000LL); *val = tmp; return ret; default: diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 93f0c6bce502c..b1d8d5a66f01f 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -1633,7 +1633,7 @@ st_lsm6dsx_sysfs_sampling_frequency_avail(struct device *dev, struct device_attribute *attr, char *buf) { - struct st_lsm6dsx_sensor *sensor = iio_priv(dev_get_drvdata(dev)); + struct st_lsm6dsx_sensor *sensor = iio_priv(dev_to_iio_dev(dev)); const struct st_lsm6dsx_odr_table_entry *odr_table; int i, len = 0; @@ -1651,7 +1651,7 @@ static ssize_t st_lsm6dsx_sysfs_scale_avail(struct device *dev, struct device_attribute *attr, char *buf) { - struct st_lsm6dsx_sensor *sensor = iio_priv(dev_get_drvdata(dev)); + struct st_lsm6dsx_sensor *sensor = iio_priv(dev_to_iio_dev(dev)); const struct st_lsm6dsx_fs_table_entry *fs_table; struct st_lsm6dsx_hw *hw = sensor->hw; int i, len = 0; diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 0222885b334c1..df74765d33dcb 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -595,28 +595,50 @@ EXPORT_SYMBOL_GPL(iio_read_channel_average_raw); static int iio_convert_raw_to_processed_unlocked(struct iio_channel *chan, int raw, int *processed, unsigned int scale) { - int scale_type, scale_val, scale_val2, offset; + int scale_type, scale_val, scale_val2; + int offset_type, offset_val, offset_val2; s64 raw64 = raw; - int ret; - ret = iio_channel_read(chan, &offset, NULL, IIO_CHAN_INFO_OFFSET); - if (ret >= 0) - raw64 += offset; + offset_type = iio_channel_read(chan, &offset_val, &offset_val2, + IIO_CHAN_INFO_OFFSET); + if (offset_type >= 0) { + switch (offset_type) { + case IIO_VAL_INT: + break; + case IIO_VAL_INT_PLUS_MICRO: + case IIO_VAL_INT_PLUS_NANO: + /* + * Both IIO_VAL_INT_PLUS_MICRO and IIO_VAL_INT_PLUS_NANO + * implicitely truncate the offset to it's integer form. + */ + break; + case IIO_VAL_FRACTIONAL: + offset_val /= offset_val2; + break; + case IIO_VAL_FRACTIONAL_LOG2: + offset_val >>= offset_val2; + break; + default: + return -EINVAL; + } + + raw64 += offset_val; + } scale_type = iio_channel_read(chan, &scale_val, &scale_val2, IIO_CHAN_INFO_SCALE); if (scale_type < 0) { /* - * Just pass raw values as processed if no scaling is - * available. + * If no channel scaling is available apply consumer scale to + * raw value and return. */ - *processed = raw; + *processed = raw * scale; return 0; } switch (scale_type) { case IIO_VAL_INT: - *processed = raw64 * scale_val; + *processed = raw64 * scale_val * scale; break; case IIO_VAL_INT_PLUS_MICRO: if (scale_val2 < 0) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 35f0d5e7533d6..1c107d6d03b99 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -2824,6 +2824,7 @@ static int cm_dreq_handler(struct cm_work *work) switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: + case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->msg); break; case IB_CM_ESTABLISHED: @@ -2831,8 +2832,6 @@ static int cm_dreq_handler(struct cm_work *work) cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) ib_cancel_mad(cm_id_priv->msg); break; - case IB_CM_MRA_REP_RCVD: - break; case IB_CM_TIMEWAIT: atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] [CM_DREQ_COUNTER]); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 50c53409ceb61..fabca5e51e3d4 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2642,7 +2642,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout) { struct rdma_id_private *id_priv; - if (id->qp_type != IB_QPT_RC) + if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI) return -EINVAL; id_priv = container_of(id, struct rdma_id_private, id); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index f5aacaf7fb8ef..ca24ce34da766 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1951,9 +1951,10 @@ static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[], u32 port) { struct rdma_hw_stats *stats; - int rem, i, index, ret = 0; struct nlattr *entry_attr; unsigned long *target; + int rem, i, ret = 0; + u32 index; stats = ib_get_hw_stats_port(device, port); if (!stats) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index c18634bec2126..e821dc94a43ed 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2153,6 +2153,7 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return mr; mr->device = pd->device; + mr->type = IB_MR_TYPE_USER; mr->pd = pd; mr->dm = NULL; atomic_inc(&pd->usecnt); diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 876cc78a22cca..7333646021bb8 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -80,6 +80,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) unsigned long flags; struct list_head del_list; + /* Prevent freeing of mm until we are completely finished. */ + mmgrab(handler->mn.mm); + /* Unregister first so we don't get any more notifications. */ mmu_notifier_unregister(&handler->mn, handler->mn.mm); @@ -102,6 +105,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) do_remove(handler, &del_list); + /* Now the mm may be freed. */ + mmdrop(handler->mn.mm); + kfree(handler); } diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index dc9211f3a0098..99d0743133cac 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1397,8 +1397,7 @@ static int query_port(struct rvt_dev_info *rdi, u32 port_num, 4096 : hfi1_max_mtu), IB_MTU_4096); props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_4096); - props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu : - ib_mtu_enum_to_int(props->max_mtu); + props->phys_mtu = hfi1_max_mtu; return 0; } diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 3141a9c85de5a..e7554b6043e4b 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -433,7 +433,7 @@ enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_c cqp = qp->dev->cqp; if (qp->qp_uk.qp_id < cqp->dev->hw_attrs.min_hw_qp_id || - qp->qp_uk.qp_id > (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt - 1)) + qp->qp_uk.qp_id >= (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt)) return IRDMA_ERR_INVALID_QP_ID; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); @@ -2512,10 +2512,10 @@ static enum irdma_status_code irdma_sc_cq_create(struct irdma_sc_cq *cq, enum irdma_status_code ret_code = 0; cqp = cq->dev->cqp; - if (cq->cq_uk.cq_id > (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt - 1)) + if (cq->cq_uk.cq_id >= (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt)) return IRDMA_ERR_INVALID_CQ_ID; - if (cq->ceq_id > (cq->dev->hmc_fpm_misc.max_ceqs - 1)) + if (cq->ceq_id >= (cq->dev->hmc_fpm_misc.max_ceqs)) return IRDMA_ERR_INVALID_CEQ_ID; ceq = cq->dev->ceq[cq->ceq_id]; @@ -3617,7 +3617,7 @@ enum irdma_status_code irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, info->elem_cnt > info->dev->hw_attrs.max_hw_ceq_size) return IRDMA_ERR_INVALID_SIZE; - if (info->ceq_id > (info->dev->hmc_fpm_misc.max_ceqs - 1)) + if (info->ceq_id >= (info->dev->hmc_fpm_misc.max_ceqs)) return IRDMA_ERR_INVALID_CEQ_ID; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; @@ -4166,7 +4166,7 @@ enum irdma_status_code irdma_sc_ccq_init(struct irdma_sc_cq *cq, info->num_elem > info->dev->hw_attrs.uk_attrs.max_hw_cq_size) return IRDMA_ERR_INVALID_SIZE; - if (info->ceq_id > (info->dev->hmc_fpm_misc.max_ceqs - 1)) + if (info->ceq_id >= (info->dev->hmc_fpm_misc.max_ceqs )) return IRDMA_ERR_INVALID_CEQ_ID; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 89234d04cc652..e46e3240cc9fd 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1608,7 +1608,7 @@ static enum irdma_status_code irdma_initialize_dev(struct irdma_pci_f *rf) info.fpm_commit_buf = mem.va; info.bar0 = rf->hw.hw_addr; - info.hmc_fn_id = PCI_FUNC(rf->pcidev->devfn); + info.hmc_fn_id = rf->pf_id; info.hw = &rf->hw; status = irdma_sc_dev_init(rf->rdma_ver, &rf->sc_dev, &info); if (status) diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c index 43e962b97d6a3..0886783db647c 100644 --- a/drivers/infiniband/hw/irdma/i40iw_if.c +++ b/drivers/infiniband/hw/irdma/i40iw_if.c @@ -77,6 +77,7 @@ static void i40iw_fill_device_info(struct irdma_device *iwdev, struct i40e_info rf->rdma_ver = IRDMA_GEN_1; rf->gen_ops.request_reset = i40iw_request_reset; rf->pcidev = cdev_info->pcidev; + rf->pf_id = cdev_info->fid; rf->hw.hw_addr = cdev_info->hw_addr; rf->cdev = cdev_info; rf->msix_count = cdev_info->msix_count; diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 9fab29039f1c0..5e8e8860686dc 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -226,6 +226,7 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf rf->hw.hw_addr = pf->hw.hw_addr; rf->pcidev = pf->pdev; rf->msix_count = pf->num_rdma_msix; + rf->pf_id = pf->hw.pf_id; rf->msix_entries = &pf->msix_entries[pf->rdma_base_vector]; rf->default_vsi.vsi_idx = vsi->vsi_num; rf->protocol_used = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index cb218cab79ac1..fb7faa85e4c9d 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -257,6 +257,7 @@ struct irdma_pci_f { u8 *mem_rsrc; u8 rdma_ver; u8 rst_to; + u8 pf_id; enum irdma_protocol_used protocol_used; u32 sd_type; u32 msix_count; diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 398736d8c78a4..e81b74a518dd0 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -150,31 +150,35 @@ int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct in_ifaddr *ifa = ptr; - struct net_device *netdev = ifa->ifa_dev->dev; + struct net_device *real_dev, *netdev = ifa->ifa_dev->dev; struct irdma_device *iwdev; struct ib_device *ibdev; u32 local_ipaddr; - ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_IRDMA); + real_dev = rdma_vlan_dev_real_dev(netdev); + if (!real_dev) + real_dev = netdev; + + ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); if (!ibdev) return NOTIFY_DONE; iwdev = to_iwdev(ibdev); local_ipaddr = ntohl(ifa->ifa_address); ibdev_dbg(&iwdev->ibdev, - "DEV: netdev %p event %lu local_ip=%pI4 MAC=%pM\n", netdev, - event, &local_ipaddr, netdev->dev_addr); + "DEV: netdev %p event %lu local_ip=%pI4 MAC=%pM\n", real_dev, + event, &local_ipaddr, real_dev->dev_addr); switch (event) { case NETDEV_DOWN: - irdma_manage_arp_cache(iwdev->rf, netdev->dev_addr, + irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr, &local_ipaddr, true, IRDMA_ARP_DELETE); - irdma_if_notify(iwdev, netdev, &local_ipaddr, true, false); + irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, false); irdma_gid_change_event(&iwdev->ibdev); break; case NETDEV_UP: case NETDEV_CHANGEADDR: - irdma_add_arp(iwdev->rf, &local_ipaddr, true, netdev->dev_addr); - irdma_if_notify(iwdev, netdev, &local_ipaddr, true, true); + irdma_add_arp(iwdev->rf, &local_ipaddr, true, real_dev->dev_addr); + irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, true); irdma_gid_change_event(&iwdev->ibdev); break; default: @@ -196,32 +200,36 @@ int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = ptr; - struct net_device *netdev = ifa->idev->dev; + struct net_device *real_dev, *netdev = ifa->idev->dev; struct irdma_device *iwdev; struct ib_device *ibdev; u32 local_ipaddr6[4]; - ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_IRDMA); + real_dev = rdma_vlan_dev_real_dev(netdev); + if (!real_dev) + real_dev = netdev; + + ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); if (!ibdev) return NOTIFY_DONE; iwdev = to_iwdev(ibdev); irdma_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32); ibdev_dbg(&iwdev->ibdev, - "DEV: netdev %p event %lu local_ip=%pI6 MAC=%pM\n", netdev, - event, local_ipaddr6, netdev->dev_addr); + "DEV: netdev %p event %lu local_ip=%pI6 MAC=%pM\n", real_dev, + event, local_ipaddr6, real_dev->dev_addr); switch (event) { case NETDEV_DOWN: - irdma_manage_arp_cache(iwdev->rf, netdev->dev_addr, + irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr, local_ipaddr6, false, IRDMA_ARP_DELETE); - irdma_if_notify(iwdev, netdev, local_ipaddr6, false, false); + irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, false); irdma_gid_change_event(&iwdev->ibdev); break; case NETDEV_UP: case NETDEV_CHANGEADDR: irdma_add_arp(iwdev->rf, local_ipaddr6, false, - netdev->dev_addr); - irdma_if_notify(iwdev, netdev, local_ipaddr6, false, true); + real_dev->dev_addr); + irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, true); irdma_gid_change_event(&iwdev->ibdev); break; default: @@ -243,14 +251,18 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct neighbour *neigh = ptr; + struct net_device *real_dev, *netdev = (struct net_device *)neigh->dev; struct irdma_device *iwdev; struct ib_device *ibdev; __be32 *p; u32 local_ipaddr[4] = {}; bool ipv4 = true; - ibdev = ib_device_get_by_netdev((struct net_device *)neigh->dev, - RDMA_DRIVER_IRDMA); + real_dev = rdma_vlan_dev_real_dev(netdev); + if (!real_dev) + real_dev = netdev; + + ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); if (!ibdev) return NOTIFY_DONE; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 460e757d3fe61..1bf6404ec8340 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2509,7 +2509,7 @@ static int irdma_dealloc_mw(struct ib_mw *ibmw) cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; memset(info, 0, sizeof(*info)); - info->pd_id = iwpd->sc_pd.pd_id & 0x00007fff; + info->pd_id = iwpd->sc_pd.pd_id; info->stag_idx = ibmw->rkey >> IRDMA_CQPSQ_STAG_IDX_S; info->mr = false; cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG; @@ -3021,7 +3021,7 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; memset(info, 0, sizeof(*info)); - info->pd_id = iwpd->sc_pd.pd_id & 0x00007fff; + info->pd_id = iwpd->sc_pd.pd_id; info->stag_idx = ib_mr->rkey >> IRDMA_CQPSQ_STAG_IDX_S; info->mr = true; if (iwpbl->pbl_allocated) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 08b7f6bc56c37..15c0884d1f498 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1886,8 +1886,10 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, key_level2, obj_event, GFP_KERNEL); - if (err) + if (err) { + kfree(obj_event); return err; + } INIT_LIST_HEAD(&obj_event->obj_sub_list); } diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 157d862fb8642..d40a1460ef971 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -541,8 +541,10 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) spin_lock_irq(&ent->lock); if (ent->disabled) goto out; - if (need_delay) + if (need_delay) { queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); + goto out; + } remove_cache_mr_locked(ent); queue_adjust_cache_locked(ent); } @@ -585,6 +587,8 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, ent = &cache->ent[entry]; spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { + queue_adjust_cache_locked(ent); + ent->miss++; spin_unlock_irq(&ent->lock); mr = create_cache_mr(ent); if (IS_ERR(mr)) @@ -628,6 +632,7 @@ static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_cache_ent *ent = mr->cache_ent; + WRITE_ONCE(dev->cache.last_add, jiffies); spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); ent->available_mrs++; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index ae50b56e89132..8ef112f883a77 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -3190,7 +3190,11 @@ void rvt_ruc_loopback(struct rvt_qp *sqp) spin_lock_irqsave(&sqp->s_lock, flags); rvt_send_complete(sqp, wqe, send_status); if (sqp->ibqp.qp_type == IB_QPT_RC) { - int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + int lastwqe; + + spin_lock(&sqp->r_lock); + lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + spin_unlock(&sqp->r_lock); sqp->s_flags &= ~RVT_S_BUSY; spin_unlock_irqrestore(&sqp->s_lock, flags); diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 38c7b6fb39d70..360a567159fe5 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -99,11 +99,14 @@ void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr) av->network_type = type; } -struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp) { struct rxe_ah *ah; u32 ah_num; + if (ahp) + *ahp = NULL; + if (!pkt || !pkt->qp) return NULL; @@ -117,10 +120,22 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) if (ah_num) { /* only new user provider or kernel client */ ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num); - if (!ah || ah->ah_num != ah_num || rxe_ah_pd(ah) != pkt->qp->pd) { + if (!ah) { pr_warn("Unable to find AH matching ah_num\n"); return NULL; } + + if (rxe_ah_pd(ah) != pkt->qp->pd) { + pr_warn("PDs don't match for AH and QP\n"); + rxe_drop_ref(ah); + return NULL; + } + + if (ahp) + *ahp = ah; + else + rxe_drop_ref(ah); + return &ah->av; } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index b1e174afb1d49..b92bb7a152905 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -19,7 +19,7 @@ void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr); void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr); -struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt); +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp); /* rxe_cq.c */ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, @@ -102,7 +102,8 @@ void rxe_mw_cleanup(struct rxe_pool_elem *arg); /* rxe_net.c */ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, int paylen, struct rxe_pkt_info *pkt); -int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb); +int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb); int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, struct sk_buff *skb); const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num); diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index be72bdbfb4ba7..580cfd742dd2f 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -289,13 +289,13 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); } -static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) +static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; bool xnet = false; __be16 df = htons(IP_DF); - struct rxe_av *av = rxe_get_av(pkt); struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; @@ -315,11 +315,11 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) return 0; } -static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) +static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; - struct rxe_av *av = rxe_get_av(pkt); struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; @@ -340,16 +340,17 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) return 0; } -int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb) +int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { int err = 0; if (skb->protocol == htons(ETH_P_IP)) - err = prepare4(pkt, skb); + err = prepare4(av, pkt, skb); else if (skb->protocol == htons(ETH_P_IPV6)) - err = prepare6(pkt, skb); + err = prepare6(av, pkt, skb); - if (ether_addr_equal(skb->dev->dev_addr, rxe_get_av(pkt)->dmac)) + if (ether_addr_equal(skb->dev->dev_addr, av->dmac)) pkt->mask |= RXE_LOOPBACK_MASK; return err; diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 5eb89052dd668..204e31bbd61f7 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -358,14 +358,14 @@ static inline int get_mtu(struct rxe_qp *qp) } static struct sk_buff *init_req_packet(struct rxe_qp *qp, + struct rxe_av *av, struct rxe_send_wqe *wqe, - int opcode, int payload, + int opcode, u32 payload, struct rxe_pkt_info *pkt) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct sk_buff *skb; struct rxe_send_wr *ibwr = &wqe->wr; - struct rxe_av *av; int pad = (-payload) & 0x3; int paylen; int solicited; @@ -374,21 +374,9 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, /* length from start of bth to end of icrc */ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; - - /* pkt->hdr, port_num and mask are initialized in ifc layer */ - pkt->rxe = rxe; - pkt->opcode = opcode; - pkt->qp = qp; - pkt->psn = qp->req.psn; - pkt->mask = rxe_opcode[opcode].mask; - pkt->paylen = paylen; - pkt->wqe = wqe; + pkt->paylen = paylen; /* init skb */ - av = rxe_get_av(pkt); - if (!av) - return NULL; - skb = rxe_init_packet(rxe, av, paylen, pkt); if (unlikely(!skb)) return NULL; @@ -447,13 +435,13 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, return skb; } -static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, struct sk_buff *skb, - int paylen) +static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, + struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, u32 paylen) { int err; - err = rxe_prepare(pkt, skb); + err = rxe_prepare(av, pkt, skb); if (err) return err; @@ -497,7 +485,7 @@ static void update_wqe_state(struct rxe_qp *qp, static void update_wqe_psn(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, - int payload) + u32 payload) { /* number of packets left to send including current one */ int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; @@ -540,7 +528,7 @@ static void rollback_state(struct rxe_send_wqe *wqe, } static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, int payload) + struct rxe_pkt_info *pkt, u32 payload) { qp->req.opcode = pkt->opcode; @@ -608,17 +596,20 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) int rxe_requester(void *arg) { struct rxe_qp *qp = (struct rxe_qp *)arg; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_pkt_info pkt; struct sk_buff *skb; struct rxe_send_wqe *wqe; enum rxe_hdr_mask mask; - int payload; + u32 payload; int mtu; int opcode; int ret; struct rxe_send_wqe rollback_wqe; u32 rollback_psn; struct rxe_queue *q = qp->sq.queue; + struct rxe_ah *ah; + struct rxe_av *av; rxe_add_ref(qp); @@ -705,14 +696,28 @@ int rxe_requester(void *arg) payload = mtu; } - skb = init_req_packet(qp, wqe, opcode, payload, &pkt); + pkt.rxe = rxe; + pkt.opcode = opcode; + pkt.qp = qp; + pkt.psn = qp->req.psn; + pkt.mask = rxe_opcode[opcode].mask; + pkt.wqe = wqe; + + av = rxe_get_av(&pkt, &ah); + if (unlikely(!av)) { + pr_err("qp#%d Failed no address vector\n", qp_num(qp)); + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err_drop_ah; + } + + skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt); if (unlikely(!skb)) { pr_err("qp#%d Failed allocating skb\n", qp_num(qp)); wqe->status = IB_WC_LOC_QP_OP_ERR; - goto err; + goto err_drop_ah; } - ret = finish_packet(qp, wqe, &pkt, skb, payload); + ret = finish_packet(qp, av, wqe, &pkt, skb, payload); if (unlikely(ret)) { pr_debug("qp#%d Error during finish packet\n", qp_num(qp)); if (ret == -EFAULT) @@ -720,9 +725,12 @@ int rxe_requester(void *arg) else wqe->status = IB_WC_LOC_QP_OP_ERR; kfree_skb(skb); - goto err; + goto err_drop_ah; } + if (ah) + rxe_drop_ref(ah); + /* * To prevent a race on wqe access between requester and completer, * wqe members state and psn need to be set before calling @@ -751,6 +759,9 @@ int rxe_requester(void *arg) goto next_wqe; +err_drop_ah: + if (ah) + rxe_drop_ref(ah); err: wqe->state = wqe_state_error; __rxe_do_task(&qp->comp.task); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index e8f435fa6e4d7..192cb9a096a14 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -632,7 +632,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, if (ack->mask & RXE_ATMACK_MASK) atmack_set_orig(ack, qp->resp.atomic_orig); - err = rxe_prepare(ack, skb); + err = rxe_prepare(&qp->pri_av, ack, skb); if (err) { kfree_skb(skb); return NULL; @@ -814,6 +814,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) return RESPST_ERR_INVALIDATE_RKEY; } + if (pkt->mask & RXE_END_MASK) + /* We successfully processed this new request. */ + qp->resp.msn++; + /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; qp->resp.ack_psn = qp->resp.psn; @@ -821,11 +825,9 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; - if (pkt->mask & RXE_COMP_MASK) { - /* We successfully processed this new request. */ - qp->resp.msn++; + if (pkt->mask & RXE_COMP_MASK) return RESPST_COMPLETE; - } else if (qp_type(qp) == IB_QPT_RC) + else if (qp_type(qp) == IB_QPT_RC) return RESPST_ACKNOWLEDGE; else return RESPST_CLEANUP; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 759b85f033315..df4d06d4d183a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -297,6 +297,7 @@ static bool rtrs_clt_change_state_from_to(struct rtrs_clt_path *clt_path, return changed; } +static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_path *clt_path); static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con) { struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); @@ -304,16 +305,7 @@ static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con) if (rtrs_clt_change_state_from_to(clt_path, RTRS_CLT_CONNECTED, RTRS_CLT_RECONNECTING)) { - struct rtrs_clt_sess *clt = clt_path->clt; - unsigned int delay_ms; - - /* - * Normal scenario, reconnect if we were successfully connected - */ - delay_ms = clt->reconnect_delay_sec * 1000; - queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, - msecs_to_jiffies(delay_ms + - prandom_u32() % RTRS_RECONNECT_SEED)); + queue_work(rtrs_wq, &clt_path->err_recovery_work); } else { /* * Error can happen just on establishing new connection, @@ -1511,6 +1503,22 @@ static void rtrs_clt_init_hb(struct rtrs_clt_path *clt_path) static void rtrs_clt_reconnect_work(struct work_struct *work); static void rtrs_clt_close_work(struct work_struct *work); +static void rtrs_clt_err_recovery_work(struct work_struct *work) +{ + struct rtrs_clt_path *clt_path; + struct rtrs_clt_sess *clt; + int delay_ms; + + clt_path = container_of(work, struct rtrs_clt_path, err_recovery_work); + clt = clt_path->clt; + delay_ms = clt->reconnect_delay_sec * 1000; + rtrs_clt_stop_and_destroy_conns(clt_path); + queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, + msecs_to_jiffies(delay_ms + + prandom_u32() % + RTRS_RECONNECT_SEED)); +} + static struct rtrs_clt_path *alloc_path(struct rtrs_clt_sess *clt, const struct rtrs_addr *path, size_t con_num, u32 nr_poll_queues) @@ -1562,6 +1570,7 @@ static struct rtrs_clt_path *alloc_path(struct rtrs_clt_sess *clt, clt_path->state = RTRS_CLT_CONNECTING; atomic_set(&clt_path->connected_cnt, 0); INIT_WORK(&clt_path->close_work, rtrs_clt_close_work); + INIT_WORK(&clt_path->err_recovery_work, rtrs_clt_err_recovery_work); INIT_DELAYED_WORK(&clt_path->reconnect_dwork, rtrs_clt_reconnect_work); rtrs_clt_init_hb(clt_path); @@ -2326,6 +2335,7 @@ static void rtrs_clt_close_work(struct work_struct *work) clt_path = container_of(work, struct rtrs_clt_path, close_work); + cancel_work_sync(&clt_path->err_recovery_work); cancel_delayed_work_sync(&clt_path->reconnect_dwork); rtrs_clt_stop_and_destroy_conns(clt_path); rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CLOSED, NULL); @@ -2638,7 +2648,6 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) { struct rtrs_clt_path *clt_path; struct rtrs_clt_sess *clt; - unsigned int delay_ms; int err; clt_path = container_of(to_delayed_work(work), struct rtrs_clt_path, @@ -2655,8 +2664,6 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) } clt_path->reconnect_attempts++; - /* Stop everything */ - rtrs_clt_stop_and_destroy_conns(clt_path); msleep(RTRS_RECONNECT_BACKOFF); if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING, NULL)) { err = init_path(clt_path); @@ -2669,11 +2676,7 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) reconnect_again: if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_RECONNECTING, NULL)) { clt_path->stats->reconnects.fail_cnt++; - delay_ms = clt->reconnect_delay_sec * 1000; - queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, - msecs_to_jiffies(delay_ms + - prandom_u32() % - RTRS_RECONNECT_SEED)); + queue_work(rtrs_wq, &clt_path->err_recovery_work); } } @@ -2908,6 +2911,7 @@ int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_path *clt_path) &old_state); if (changed) { clt_path->reconnect_attempts = 0; + rtrs_clt_stop_and_destroy_conns(clt_path); queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, 0); } if (changed || old_state == RTRS_CLT_RECONNECTING) { diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index d1b18a154ae03..f848c0392d982 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -134,6 +134,7 @@ struct rtrs_clt_path { struct rtrs_clt_io_req *reqs; struct delayed_work reconnect_dwork; struct work_struct close_work; + struct work_struct err_recovery_work; unsigned int reconnect_attempts; bool established; struct rtrs_rbuf *rbufs; diff --git a/drivers/input/input.c b/drivers/input/input.c index c3139bc2aa0db..ccaeb24263854 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -2285,12 +2285,6 @@ int input_register_device(struct input_dev *dev) /* KEY_RESERVED is not supposed to be transmitted to userspace. */ __clear_bit(KEY_RESERVED, dev->keybit); - /* Buttonpads should not map BTN_RIGHT and/or BTN_MIDDLE. */ - if (test_bit(INPUT_PROP_BUTTONPAD, dev->propbit)) { - __clear_bit(BTN_RIGHT, dev->keybit); - __clear_bit(BTN_MIDDLE, dev->keybit); - } - /* Make sure that bitmasks not mentioned in dev->evbit are clean. */ input_cleanse_bitmasks(dev); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6dc6d8b6b3686..f60381cdf1c48 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1558,6 +1558,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) dev_info(smmu->dev, "\t0x%016llx\n", (unsigned long long)evt[i]); + cond_resched(); } /* diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b28c9435b898d..170e0f33040e8 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -95,10 +95,11 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) cached_iova = to_iova(iovad->cached32_node); if (free == cached_iova || (free->pfn_hi < iovad->dma_32bit_pfn && - free->pfn_lo >= cached_iova->pfn_lo)) { + free->pfn_lo >= cached_iova->pfn_lo)) iovad->cached32_node = rb_next(&free->node); + + if (free->pfn_lo < iovad->dma_32bit_pfn) iovad->max32_alloc_size = iovad->dma_32bit_pfn; - } cached_iova = to_iova(iovad->cached_node); if (free->pfn_lo >= cached_iova->pfn_lo) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index ca752bdc710f6..61bd9a3004ede 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1006,7 +1006,9 @@ static int ipmmu_probe(struct platform_device *pdev) bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); mmu->features = of_device_get_match_data(&pdev->dev); memset(mmu->utlb_ctx, IPMMU_CTX_INVALID, mmu->features->num_utlbs); - dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + if (ret) + return ret; /* Map I/O memory and request IRQ. */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 25b834104790c..5971a11686662 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -562,22 +562,52 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct mtk_iommu_data *data; + struct device_link *link; + struct device *larbdev; + unsigned int larbid, larbidx, i; if (!fwspec || fwspec->ops != &mtk_iommu_ops) return ERR_PTR(-ENODEV); /* Not a iommu client device */ data = dev_iommu_priv_get(dev); + /* + * Link the consumer device with the smi-larb device(supplier). + * The device that connects with each a larb is a independent HW. + * All the ports in each a device should be in the same larbs. + */ + larbid = MTK_M4U_TO_LARB(fwspec->ids[0]); + for (i = 1; i < fwspec->num_ids; i++) { + larbidx = MTK_M4U_TO_LARB(fwspec->ids[i]); + if (larbid != larbidx) { + dev_err(dev, "Can only use one larb. Fail@larb%d-%d.\n", + larbid, larbidx); + return ERR_PTR(-EINVAL); + } + } + larbdev = data->larb_imu[larbid].dev; + link = device_link_add(dev, larbdev, + DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS); + if (!link) + dev_err(dev, "Unable to link %s\n", dev_name(larbdev)); return &data->iommu; } static void mtk_iommu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct mtk_iommu_data *data; + struct device *larbdev; + unsigned int larbid; if (!fwspec || fwspec->ops != &mtk_iommu_ops) return; + data = dev_iommu_priv_get(dev); + larbid = MTK_M4U_TO_LARB(fwspec->ids[0]); + larbdev = data->larb_imu[larbid].dev; + device_link_remove(dev, larbdev); + iommu_fwspec_free(dev); } @@ -848,7 +878,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) plarbdev = of_find_device_by_node(larbnode); if (!plarbdev) { of_node_put(larbnode); - return -EPROBE_DEFER; + return -ENODEV; } data->larb_imu[id].dev = &plarbdev->dev; diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index be22fcf988cee..bc7ee90b9373d 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -423,7 +423,18 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev) struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct of_phandle_args iommu_spec; struct mtk_iommu_data *data; - int err, idx = 0; + int err, idx = 0, larbid, larbidx; + struct device_link *link; + struct device *larbdev; + + /* + * In the deferred case, free the existed fwspec. + * Always initialize the fwspec internally. + */ + if (fwspec) { + iommu_fwspec_free(dev); + fwspec = dev_iommu_fwspec_get(dev); + } while (!of_parse_phandle_with_args(dev->of_node, "iommus", "#iommu-cells", @@ -444,6 +455,23 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev) data = dev_iommu_priv_get(dev); + /* Link the consumer device with the smi-larb device(supplier) */ + larbid = mt2701_m4u_to_larb(fwspec->ids[0]); + for (idx = 1; idx < fwspec->num_ids; idx++) { + larbidx = mt2701_m4u_to_larb(fwspec->ids[idx]); + if (larbid != larbidx) { + dev_err(dev, "Can only use one larb. Fail@larb%d-%d.\n", + larbid, larbidx); + return ERR_PTR(-EINVAL); + } + } + + larbdev = data->larb_imu[larbid].dev; + link = device_link_add(dev, larbdev, + DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS); + if (!link) + dev_err(dev, "Unable to link %s\n", dev_name(larbdev)); + return &data->iommu; } @@ -464,10 +492,18 @@ static void mtk_iommu_probe_finalize(struct device *dev) static void mtk_iommu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct mtk_iommu_data *data; + struct device *larbdev; + unsigned int larbid; if (!fwspec || fwspec->ops != &mtk_iommu_ops) return; + data = dev_iommu_priv_get(dev); + larbid = mt2701_m4u_to_larb(fwspec->ids[0]); + larbdev = data->larb_imu[larbid].dev; + device_link_remove(dev, larbdev); + iommu_fwspec_free(dev); } @@ -595,7 +631,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) plarbdev = of_find_device_by_node(larbnode); if (!plarbdev) { of_node_put(larbnode); - return -EPROBE_DEFER; + return -ENODEV; } data->larb_imu[i].dev = &plarbdev->dev; diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 980e4af3f06b6..d2e82a1b56d8a 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1661,7 +1661,7 @@ static struct iommu_device *omap_iommu_probe_device(struct device *dev) num_iommus = of_property_count_elems_of_size(dev->of_node, "iommus", sizeof(phandle)); if (num_iommus < 0) - return 0; + return ERR_PTR(-ENODEV); arch_data = kcalloc(num_iommus + 1, sizeof(*arch_data), GFP_KERNEL); if (!arch_data) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index cd772973114af..a0fc764ec9dc6 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3011,18 +3011,12 @@ static int __init allocate_lpi_tables(void) return 0; } -static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set) +static u64 read_vpend_dirty_clear(void __iomem *vlpi_base) { u32 count = 1000000; /* 1s! */ bool clean; u64 val; - val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER); - val &= ~GICR_VPENDBASER_Valid; - val &= ~clr; - val |= set; - gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - do { val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER); clean = !(val & GICR_VPENDBASER_Dirty); @@ -3033,10 +3027,26 @@ static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set) } } while (!clean && count); - if (unlikely(val & GICR_VPENDBASER_Dirty)) { + if (unlikely(!clean)) pr_err_ratelimited("ITS virtual pending table not cleaning\n"); + + return val; +} + +static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set) +{ + u64 val; + + /* Make sure we wait until the RD is done with the initial scan */ + val = read_vpend_dirty_clear(vlpi_base); + val &= ~GICR_VPENDBASER_Valid; + val &= ~clr; + val |= set; + gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + + val = read_vpend_dirty_clear(vlpi_base); + if (unlikely(val & GICR_VPENDBASER_Dirty)) val |= GICR_VPENDBASER_PendingLast; - } return val; } diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 5e935d97207dc..907af63d1bba9 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -206,11 +206,11 @@ static inline void __iomem *gic_dist_base(struct irq_data *d) } } -static void gic_do_wait_for_rwp(void __iomem *base) +static void gic_do_wait_for_rwp(void __iomem *base, u32 bit) { u32 count = 1000000; /* 1s! */ - while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) { + while (readl_relaxed(base + GICD_CTLR) & bit) { count--; if (!count) { pr_err_ratelimited("RWP timeout, gone fishing\n"); @@ -224,13 +224,13 @@ static void gic_do_wait_for_rwp(void __iomem *base) /* Wait for completion of a distributor change */ static void gic_dist_wait_for_rwp(void) { - gic_do_wait_for_rwp(gic_data.dist_base); + gic_do_wait_for_rwp(gic_data.dist_base, GICD_CTLR_RWP); } /* Wait for completion of a redistributor change */ static void gic_redist_wait_for_rwp(void) { - gic_do_wait_for_rwp(gic_data_rdist_rd_base()); + gic_do_wait_for_rwp(gic_data_rdist_rd_base(), GICR_CTLR_RWP); } #ifdef CONFIG_ARM64 @@ -1466,6 +1466,12 @@ static int gic_irq_domain_translate(struct irq_domain *d, if(fwspec->param_count != 2) return -EINVAL; + if (fwspec->param[0] < 16) { + pr_err(FW_BUG "Illegal GSI%d translation request\n", + fwspec->param[0]); + return -EINVAL; + } + *hwirq = fwspec->param[0]; *type = fwspec->param[1]; diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index b8bb46c65a97a..3dbac62c932dd 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1085,6 +1085,12 @@ static int gic_irq_domain_translate(struct irq_domain *d, if(fwspec->param_count != 2) return -EINVAL; + if (fwspec->param[0] < 16) { + pr_err(FW_BUG "Illegal GSI%d translation request\n", + fwspec->param[0]); + return -EINVAL; + } + *hwirq = fwspec->param[0]; *type = fwspec->param[1]; diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c index ba4759b3e2693..94230306e0eee 100644 --- a/drivers/irqchip/irq-nvic.c +++ b/drivers/irqchip/irq-nvic.c @@ -107,6 +107,7 @@ static int __init nvic_of_init(struct device_node *node, if (!nvic_irq_domain) { pr_warn("Failed to allocate irq domain\n"); + iounmap(nvic_base); return -ENOMEM; } @@ -116,6 +117,7 @@ static int __init nvic_of_init(struct device_node *node, if (ret) { pr_warn("Failed to allocate irq chips\n"); irq_domain_remove(nvic_irq_domain); + iounmap(nvic_base); return ret; } diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index 173e6520e06ec..c0b457f26ec41 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -56,17 +56,18 @@ static u32 pdc_reg_read(int reg, u32 i) static void pdc_enable_intr(struct irq_data *d, bool on) { int pin_out = d->hwirq; + unsigned long flags; u32 index, mask; u32 enable; index = pin_out / 32; mask = pin_out % 32; - raw_spin_lock(&pdc_lock); + raw_spin_lock_irqsave(&pdc_lock, flags); enable = pdc_reg_read(IRQ_ENABLE_BANK, index); enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask); pdc_reg_write(IRQ_ENABLE_BANK, index, enable); - raw_spin_unlock(&pdc_lock); + raw_spin_unlock_irqrestore(&pdc_lock, flags); } static void qcom_pdc_gic_disable(struct irq_data *d) diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c index 544de2db64531..a0c252415c868 100644 --- a/drivers/mailbox/imx-mailbox.c +++ b/drivers/mailbox/imx-mailbox.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #define IMX_MU_CHANS 16 @@ -76,6 +77,7 @@ struct imx_mu_priv { const struct imx_mu_dcfg *dcfg; struct clk *clk; int irq; + bool suspend; u32 xcr[4]; @@ -334,6 +336,9 @@ static irqreturn_t imx_mu_isr(int irq, void *p) return IRQ_NONE; } + if (priv->suspend) + pm_system_wakeup(); + return IRQ_HANDLED; } @@ -702,6 +707,8 @@ static int __maybe_unused imx_mu_suspend_noirq(struct device *dev) priv->xcr[i] = imx_mu_read(priv, priv->dcfg->xCR[i]); } + priv->suspend = true; + return 0; } @@ -718,11 +725,13 @@ static int __maybe_unused imx_mu_resume_noirq(struct device *dev) * send failed, may lead to system freeze. This issue * is observed by testing freeze mode suspend. */ - if (!imx_mu_read(priv, priv->dcfg->xCR[0]) && !priv->clk) { + if (!priv->clk && !imx_mu_read(priv, priv->dcfg->xCR[0])) { for (i = 0; i < IMX_MU_xCR_MAX; i++) imx_mu_write(priv, priv->xcr[i], priv->dcfg->xCR[i]); } + priv->suspend = false; + return 0; } diff --git a/drivers/mailbox/tegra-hsp.c b/drivers/mailbox/tegra-hsp.c index acd0675da681e..78f7265039c66 100644 --- a/drivers/mailbox/tegra-hsp.c +++ b/drivers/mailbox/tegra-hsp.c @@ -412,6 +412,11 @@ static int tegra_hsp_mailbox_flush(struct mbox_chan *chan, value = tegra_hsp_channel_readl(ch, HSP_SM_SHRD_MBOX); if ((value & HSP_SM_SHRD_MBOX_FULL) == 0) { mbox_chan_txdone(chan, 0); + + /* Wait until channel is empty */ + if (chan->active_req != NULL) + continue; + return 0; } diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 88c573eeb5982..ad9f16689419d 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2060,9 +2060,11 @@ int bch_btree_check(struct cache_set *c) } } + /* + * Must wait for all threads to stop. + */ wait_event_interruptible(check_state->wait, - atomic_read(&check_state->started) == 0 || - test_bit(CACHE_SET_IO_DISABLE, &c->flags)); + atomic_read(&check_state->started) == 0); for (i = 0; i < check_state->total_threads; i++) { if (check_state->infos[i].result) { diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index c7560f66dca88..68d3dd6b4f119 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -998,9 +998,11 @@ void bch_sectors_dirty_init(struct bcache_device *d) } } + /* + * Must wait for all threads to stop. + */ wait_event_interruptible(state->wait, - atomic_read(&state->started) == 0 || - test_bit(CACHE_SET_IO_DISABLE, &c->flags)); + atomic_read(&state->started) == 0); out: kfree(state); diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index b855fef4f38a6..adb9604e85ac4 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -65,6 +65,8 @@ struct mapped_device { struct gendisk *disk; struct dax_device *dax_dev; + unsigned long __percpu *pending_io; + /* * A list of ios that arrived while we were suspended. */ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d4ae31558826a..f51aea71cb036 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2590,7 +2590,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string static int get_key_size(char **key_string) { - return (*key_string[0] == ':') ? -EINVAL : strlen(*key_string) >> 1; + return (*key_string[0] == ':') ? -EINVAL : (int)(strlen(*key_string) >> 1); } #endif /* CONFIG_KEYS */ diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index eb4b5e52bd6ff..ffe50be8b6875 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2473,9 +2473,11 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, dm_integrity_io_error(ic, "invalid sector in journal", -EIO); sec &= ~(sector_t)(ic->sectors_per_block - 1); } + if (unlikely(sec >= ic->provided_data_sectors)) { + journal_entry_set_unused(je); + continue; + } } - if (unlikely(sec >= ic->provided_data_sectors)) - continue; get_area_and_offset(ic, sec, &area, &offset); restore_last_bytes(ic, access_journal_data(ic, i, j), je); for (k = j + 1; k < ic->journal_section_entries; k++) { @@ -4398,6 +4400,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } if (ic->internal_hash) { + size_t recalc_tags_size; ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); if (!ic->recalc_wq ) { ti->error = "Cannot allocate workqueue"; @@ -4411,8 +4414,10 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) r = -ENOMEM; goto bad; } - ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block, - ic->tag_size, GFP_KERNEL); + recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size; + if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size) + recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size; + ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL); if (!ic->recalc_tags) { ti->error = "Cannot allocate tags for recalculating"; r = -ENOMEM; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 21fe8652b095b..901abd6dea419 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -1788,6 +1789,7 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) if (unlikely(cmd >= ARRAY_SIZE(_ioctls))) return NULL; + cmd = array_index_nospec(cmd, ARRAY_SIZE(_ioctls)); *ioctl_flags = _ioctls[cmd].flags; return _ioctls[cmd].fn; } diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c index 875bca30a0dd5..82f2a06153dc0 100644 --- a/drivers/md/dm-ps-historical-service-time.c +++ b/drivers/md/dm-ps-historical-service-time.c @@ -27,7 +27,6 @@ #include #include #include -#include #define DM_MSG_PREFIX "multipath historical-service-time" @@ -433,7 +432,7 @@ static struct dm_path *hst_select_path(struct path_selector *ps, { struct selector *s = ps->context; struct path_info *pi = NULL, *best = NULL; - u64 time_now = sched_clock(); + u64 time_now = ktime_get_ns(); struct dm_path *ret = NULL; unsigned long flags; @@ -474,7 +473,7 @@ static int hst_start_io(struct path_selector *ps, struct dm_path *path, static u64 path_service_time(struct path_info *pi, u64 start_time) { - u64 sched_now = ktime_get_ns(); + u64 now = ktime_get_ns(); /* if a previous disk request has finished after this IO was * sent to the hardware, pretend the submission happened @@ -483,11 +482,11 @@ static u64 path_service_time(struct path_info *pi, u64 start_time) if (time_after64(pi->last_finish, start_time)) start_time = pi->last_finish; - pi->last_finish = sched_now; - if (time_before64(sched_now, start_time)) + pi->last_finish = now; + if (time_before64(now, start_time)) return 0; - return sched_now - start_time; + return now - start_time; } static int hst_end_io(struct path_selector *ps, struct dm_path *path, diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 579ab6183d4d8..dffeb47a9efbc 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -499,8 +499,13 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(!ti)) { int srcu_idx; - struct dm_table *map = dm_get_live_table(md, &srcu_idx); + struct dm_table *map; + map = dm_get_live_table(md, &srcu_idx); + if (unlikely(!map)) { + dm_put_live_table(md, srcu_idx); + return BLK_STS_RESOURCE; + } ti = dm_table_find_target(map, 0); dm_put_live_table(md, srcu_idx); } diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 35d368c418d03..0e039a8c0bf2e 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -195,6 +195,7 @@ void dm_stats_init(struct dm_stats *stats) mutex_init(&stats->mutex); INIT_LIST_HEAD(&stats->list); + stats->precise_timestamps = false; stats->last = alloc_percpu(struct dm_stats_last_position); for_each_possible_cpu(cpu) { last = per_cpu_ptr(stats->last, cpu); @@ -231,6 +232,22 @@ void dm_stats_cleanup(struct dm_stats *stats) mutex_destroy(&stats->mutex); } +static void dm_stats_recalc_precise_timestamps(struct dm_stats *stats) +{ + struct list_head *l; + struct dm_stat *tmp_s; + bool precise_timestamps = false; + + list_for_each(l, &stats->list) { + tmp_s = container_of(l, struct dm_stat, list_entry); + if (tmp_s->stat_flags & STAT_PRECISE_TIMESTAMPS) { + precise_timestamps = true; + break; + } + } + stats->precise_timestamps = precise_timestamps; +} + static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, sector_t step, unsigned stat_flags, unsigned n_histogram_entries, @@ -376,6 +393,9 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, } ret_id = s->id; list_add_tail_rcu(&s->list_entry, l); + + dm_stats_recalc_precise_timestamps(stats); + mutex_unlock(&stats->mutex); resume_callback(md); @@ -418,6 +438,9 @@ static int dm_stats_delete(struct dm_stats *stats, int id) } list_del_rcu(&s->list_entry); + + dm_stats_recalc_precise_timestamps(stats); + mutex_unlock(&stats->mutex); /* @@ -621,13 +644,14 @@ static void __dm_stat_bio(struct dm_stat *s, int bi_rw, void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, sector_t bi_sector, unsigned bi_sectors, bool end, - unsigned long duration_jiffies, + unsigned long start_time, struct dm_stats_aux *stats_aux) { struct dm_stat *s; sector_t end_sector; struct dm_stats_last_position *last; bool got_precise_time; + unsigned long duration_jiffies = 0; if (unlikely(!bi_sectors)) return; @@ -647,16 +671,16 @@ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, )); WRITE_ONCE(last->last_sector, end_sector); WRITE_ONCE(last->last_rw, bi_rw); - } + } else + duration_jiffies = jiffies - start_time; rcu_read_lock(); got_precise_time = false; list_for_each_entry_rcu(s, &stats->list, list_entry) { if (s->stat_flags & STAT_PRECISE_TIMESTAMPS && !got_precise_time) { - if (!end) - stats_aux->duration_ns = ktime_to_ns(ktime_get()); - else + /* start (!end) duration_ns is set by DM core's alloc_io() */ + if (end) stats_aux->duration_ns = ktime_to_ns(ktime_get()) - stats_aux->duration_ns; got_precise_time = true; } diff --git a/drivers/md/dm-stats.h b/drivers/md/dm-stats.h index 2ddfae678f320..09c81a1ec057d 100644 --- a/drivers/md/dm-stats.h +++ b/drivers/md/dm-stats.h @@ -13,8 +13,7 @@ struct dm_stats { struct mutex mutex; struct list_head list; /* list of struct dm_stat */ struct dm_stats_last_position __percpu *last; - sector_t last_sector; - unsigned last_rw; + bool precise_timestamps; }; struct dm_stats_aux { @@ -32,7 +31,7 @@ int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv, void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, sector_t bi_sector, unsigned bi_sectors, bool end, - unsigned long duration_jiffies, + unsigned long start_time, struct dm_stats_aux *aux); static inline bool dm_stats_used(struct dm_stats *st) @@ -40,4 +39,10 @@ static inline bool dm_stats_used(struct dm_stats *st) return !list_empty(&st->list); } +static inline void dm_stats_record_start(struct dm_stats *stats, struct dm_stats_aux *aux) +{ + if (unlikely(stats->precise_timestamps)) + aux->duration_ns = ktime_to_ns(ktime_get()); +} + #endif diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 997ace47bbd54..dcb8d8fc7877a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -484,33 +484,48 @@ u64 dm_start_time_ns_from_clone(struct bio *bio) } EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone); -static void start_io_acct(struct dm_io *io) +static bool bio_is_flush_with_data(struct bio *bio) { - struct mapped_device *md = io->md; - struct bio *bio = io->orig_bio; - - bio_start_io_acct_time(bio, io->start_time); - if (unlikely(dm_stats_used(&md->stats))) - dm_stats_account_io(&md->stats, bio_data_dir(bio), - bio->bi_iter.bi_sector, bio_sectors(bio), - false, 0, &io->stats_aux); + return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size); } -static void end_io_acct(struct mapped_device *md, struct bio *bio, - unsigned long start_time, struct dm_stats_aux *stats_aux) +static void dm_io_acct(bool end, struct mapped_device *md, struct bio *bio, + unsigned long start_time, struct dm_stats_aux *stats_aux) { - unsigned long duration = jiffies - start_time; + bool is_flush_with_data; + unsigned int bi_size; - bio_end_io_acct(bio, start_time); + /* If REQ_PREFLUSH set save any payload but do not account it */ + is_flush_with_data = bio_is_flush_with_data(bio); + if (is_flush_with_data) { + bi_size = bio->bi_iter.bi_size; + bio->bi_iter.bi_size = 0; + } + + if (!end) + bio_start_io_acct_time(bio, start_time); + else + bio_end_io_acct(bio, start_time); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), - true, duration, stats_aux); + end, start_time, stats_aux); + + /* Restore bio's payload so it does get accounted upon requeue */ + if (is_flush_with_data) + bio->bi_iter.bi_size = bi_size; +} + +static void start_io_acct(struct dm_io *io) +{ + dm_io_acct(false, io->md, io->orig_bio, io->start_time, &io->stats_aux); +} - /* nudge anyone waiting on suspend queue */ - if (unlikely(wq_has_sleeper(&md->wait))) - wake_up(&md->wait); +static void end_io_acct(struct mapped_device *md, struct bio *bio, + unsigned long start_time, struct dm_stats_aux *stats_aux) +{ + dm_io_acct(true, md, bio, start_time, stats_aux); } static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) @@ -531,12 +546,15 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) io->magic = DM_IO_MAGIC; io->status = 0; atomic_set(&io->io_count, 1); + this_cpu_inc(*md->pending_io); io->orig_bio = bio; io->md = md; spin_lock_init(&io->endio_lock); io->start_time = jiffies; + dm_stats_record_start(&md->stats, &io->stats_aux); + return io; } @@ -826,11 +844,17 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error) stats_aux = io->stats_aux; free_io(md, io); end_io_acct(md, bio, start_time, &stats_aux); + smp_wmb(); + this_cpu_dec(*md->pending_io); + + /* nudge anyone waiting on suspend queue */ + if (unlikely(wq_has_sleeper(&md->wait))) + wake_up(&md->wait); if (io_error == BLK_STS_DM_REQUEUE) return; - if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) { + if (bio_is_flush_with_data(bio)) { /* * Preflush done for flush with data, reissue * without REQ_PREFLUSH. @@ -1495,15 +1519,10 @@ static void dm_submit_bio(struct bio *bio) struct dm_table *map; map = dm_get_live_table(md, &srcu_idx); - if (unlikely(!map)) { - DMERR_LIMIT("%s: mapping table unavailable, erroring io", - dm_device_name(md)); - bio_io_error(bio); - goto out; - } - /* If suspended, queue this IO for later */ - if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { + /* If suspended, or map not yet available, queue this IO for later */ + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) || + unlikely(!map)) { if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); else if (bio->bi_opf & REQ_RAHEAD) @@ -1607,6 +1626,7 @@ static void cleanup_mapped_device(struct mapped_device *md) md->dax_dev = NULL; } + dm_cleanup_zoned_dev(md); if (md->disk) { spin_lock(&_minor_lock); md->disk->private_data = NULL; @@ -1619,6 +1639,11 @@ static void cleanup_mapped_device(struct mapped_device *md) blk_cleanup_disk(md->disk); } + if (md->pending_io) { + free_percpu(md->pending_io); + md->pending_io = NULL; + } + cleanup_srcu_struct(&md->io_barrier); mutex_destroy(&md->suspend_lock); @@ -1627,7 +1652,6 @@ static void cleanup_mapped_device(struct mapped_device *md) mutex_destroy(&md->swap_bios_lock); dm_mq_cleanup_mapped_device(md); - dm_cleanup_zoned_dev(md); } /* @@ -1721,6 +1745,10 @@ static struct mapped_device *alloc_dev(int minor) if (!md->wq) goto bad; + md->pending_io = alloc_percpu(unsigned long); + if (!md->pending_io) + goto bad; + dm_stats_init(&md->stats); /* Populate the mapping, nobody knows we exist yet */ @@ -2128,16 +2156,13 @@ void dm_put(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_put); -static bool md_in_flight_bios(struct mapped_device *md) +static bool dm_in_flight_bios(struct mapped_device *md) { int cpu; - struct block_device *part = dm_disk(md)->part0; - long sum = 0; + unsigned long sum = 0; - for_each_possible_cpu(cpu) { - sum += part_stat_local_read_cpu(part, in_flight[0], cpu); - sum += part_stat_local_read_cpu(part, in_flight[1], cpu); - } + for_each_possible_cpu(cpu) + sum += *per_cpu_ptr(md->pending_io, cpu); return sum != 0; } @@ -2150,7 +2175,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta while (true) { prepare_to_wait(&md->wait, &wait, task_state); - if (!md_in_flight_bios(md)) + if (!dm_in_flight_bios(md)) break; if (signal_pending_state(task_state, current)) { @@ -2162,6 +2187,8 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta } finish_wait(&md->wait, &wait); + smp_rmb(); + return r; } diff --git a/drivers/media/i2c/adv7511-v4l2.c b/drivers/media/i2c/adv7511-v4l2.c index 8e13cae40ec5b..db7f41a80770d 100644 --- a/drivers/media/i2c/adv7511-v4l2.c +++ b/drivers/media/i2c/adv7511-v4l2.c @@ -522,7 +522,7 @@ static void log_infoframe(struct v4l2_subdev *sd, const struct adv7511_cfg_read_ buffer[3] = 0; buffer[3] = hdmi_infoframe_checksum(buffer, len + 4); - if (hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)) < 0) { + if (hdmi_infoframe_unpack(&frame, buffer, len + 4) < 0) { v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__, cri->desc); return; } diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c index a2fa408d2d9f5..bb0c8fc6d3832 100644 --- a/drivers/media/i2c/adv7604.c +++ b/drivers/media/i2c/adv7604.c @@ -2484,7 +2484,7 @@ static int adv76xx_read_infoframe(struct v4l2_subdev *sd, int index, buffer[i + 3] = infoframe_read(sd, adv76xx_cri[index].payload_addr + i); - if (hdmi_infoframe_unpack(frame, buffer, sizeof(buffer)) < 0) { + if (hdmi_infoframe_unpack(frame, buffer, len + 3) < 0) { v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__, adv76xx_cri[index].desc); return -ENOENT; diff --git a/drivers/media/i2c/adv7842.c b/drivers/media/i2c/adv7842.c index 9d6eed0f82819..22caa070273b4 100644 --- a/drivers/media/i2c/adv7842.c +++ b/drivers/media/i2c/adv7842.c @@ -2583,7 +2583,7 @@ static void log_infoframe(struct v4l2_subdev *sd, const struct adv7842_cfg_read_ for (i = 0; i < len; i++) buffer[i + 3] = infoframe_read(sd, cri->payload_addr + i); - if (hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)) < 0) { + if (hdmi_infoframe_unpack(&frame, buffer, len + 3) < 0) { v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__, cri->desc); return; } diff --git a/drivers/media/i2c/ov2740.c b/drivers/media/i2c/ov2740.c index bab720c7c1de1..d5f0eabf20c6a 100644 --- a/drivers/media/i2c/ov2740.c +++ b/drivers/media/i2c/ov2740.c @@ -1162,6 +1162,7 @@ static int ov2740_probe(struct i2c_client *client) if (!ov2740) return -ENOMEM; + v4l2_i2c_subdev_init(&ov2740->sd, client, &ov2740_subdev_ops); full_power = acpi_dev_state_d0(&client->dev); if (full_power) { ret = ov2740_identify_module(ov2740); @@ -1171,13 +1172,6 @@ static int ov2740_probe(struct i2c_client *client) } } - v4l2_i2c_subdev_init(&ov2740->sd, client, &ov2740_subdev_ops); - ret = ov2740_identify_module(ov2740); - if (ret) { - dev_err(&client->dev, "failed to find sensor: %d", ret); - return ret; - } - mutex_init(&ov2740->mutex); ov2740->cur_mode = &supported_modes[0]; ret = ov2740_init_controls(ov2740); diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c index ddbd71394db33..db5a19babe67d 100644 --- a/drivers/media/i2c/ov5640.c +++ b/drivers/media/i2c/ov5640.c @@ -2293,7 +2293,6 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd, struct ov5640_dev *sensor = to_ov5640_dev(sd); const struct ov5640_mode_info *new_mode; struct v4l2_mbus_framefmt *mbus_fmt = &format->format; - struct v4l2_mbus_framefmt *fmt; int ret; if (format->pad != 0) @@ -2311,12 +2310,10 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd, if (ret) goto out; - if (format->which == V4L2_SUBDEV_FORMAT_TRY) - fmt = v4l2_subdev_get_try_format(sd, sd_state, 0); - else - fmt = &sensor->fmt; - - *fmt = *mbus_fmt; + if (format->which == V4L2_SUBDEV_FORMAT_TRY) { + *v4l2_subdev_get_try_format(sd, sd_state, 0) = *mbus_fmt; + goto out; + } if (new_mode != sensor->current_mode) { sensor->current_mode = new_mode; @@ -2325,6 +2322,9 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd, if (mbus_fmt->code != sensor->fmt.code) sensor->pending_fmt_change = true; + /* update format even if code is unchanged, resolution might change */ + sensor->fmt = *mbus_fmt; + __v4l2_ctrl_s_ctrl_int64(sensor->ctrls.pixel_rate, ov5640_calc_pixel_rate(sensor)); out: diff --git a/drivers/media/i2c/ov5648.c b/drivers/media/i2c/ov5648.c index 947d437ed0efe..ef8b52dc9401d 100644 --- a/drivers/media/i2c/ov5648.c +++ b/drivers/media/i2c/ov5648.c @@ -639,7 +639,7 @@ struct ov5648_ctrls { struct v4l2_ctrl *pixel_rate; struct v4l2_ctrl_handler handler; -} __packed; +}; struct ov5648_sensor { struct device *dev; @@ -1778,8 +1778,14 @@ static int ov5648_state_configure(struct ov5648_sensor *sensor, static int ov5648_state_init(struct ov5648_sensor *sensor) { - return ov5648_state_configure(sensor, &ov5648_modes[0], - ov5648_mbus_codes[0]); + int ret; + + mutex_lock(&sensor->mutex); + ret = ov5648_state_configure(sensor, &ov5648_modes[0], + ov5648_mbus_codes[0]); + mutex_unlock(&sensor->mutex); + + return ret; } /* Sensor Base */ diff --git a/drivers/media/i2c/ov6650.c b/drivers/media/i2c/ov6650.c index f67412150b16b..eb59dc8bb5929 100644 --- a/drivers/media/i2c/ov6650.c +++ b/drivers/media/i2c/ov6650.c @@ -472,9 +472,16 @@ static int ov6650_get_selection(struct v4l2_subdev *sd, { struct i2c_client *client = v4l2_get_subdevdata(sd); struct ov6650 *priv = to_ov6650(client); + struct v4l2_rect *rect; - if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE) - return -EINVAL; + if (sel->which == V4L2_SUBDEV_FORMAT_TRY) { + /* pre-select try crop rectangle */ + rect = &sd_state->pads->try_crop; + + } else { + /* pre-select active crop rectangle */ + rect = &priv->rect; + } switch (sel->target) { case V4L2_SEL_TGT_CROP_BOUNDS: @@ -483,14 +490,33 @@ static int ov6650_get_selection(struct v4l2_subdev *sd, sel->r.width = W_CIF; sel->r.height = H_CIF; return 0; + case V4L2_SEL_TGT_CROP: - sel->r = priv->rect; + /* use selected crop rectangle */ + sel->r = *rect; return 0; + default: return -EINVAL; } } +static bool is_unscaled_ok(int width, int height, struct v4l2_rect *rect) +{ + return width > rect->width >> 1 || height > rect->height >> 1; +} + +static void ov6650_bind_align_crop_rectangle(struct v4l2_rect *rect) +{ + v4l_bound_align_image(&rect->width, 2, W_CIF, 1, + &rect->height, 2, H_CIF, 1, 0); + v4l_bound_align_image(&rect->left, DEF_HSTRT << 1, + (DEF_HSTRT << 1) + W_CIF - (__s32)rect->width, 1, + &rect->top, DEF_VSTRT << 1, + (DEF_VSTRT << 1) + H_CIF - (__s32)rect->height, + 1, 0); +} + static int ov6650_set_selection(struct v4l2_subdev *sd, struct v4l2_subdev_state *sd_state, struct v4l2_subdev_selection *sel) @@ -499,18 +525,30 @@ static int ov6650_set_selection(struct v4l2_subdev *sd, struct ov6650 *priv = to_ov6650(client); int ret; - if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE || - sel->target != V4L2_SEL_TGT_CROP) + if (sel->target != V4L2_SEL_TGT_CROP) return -EINVAL; - v4l_bound_align_image(&sel->r.width, 2, W_CIF, 1, - &sel->r.height, 2, H_CIF, 1, 0); - v4l_bound_align_image(&sel->r.left, DEF_HSTRT << 1, - (DEF_HSTRT << 1) + W_CIF - (__s32)sel->r.width, 1, - &sel->r.top, DEF_VSTRT << 1, - (DEF_VSTRT << 1) + H_CIF - (__s32)sel->r.height, - 1, 0); + ov6650_bind_align_crop_rectangle(&sel->r); + + if (sel->which == V4L2_SUBDEV_FORMAT_TRY) { + struct v4l2_rect *crop = &sd_state->pads->try_crop; + struct v4l2_mbus_framefmt *mf = &sd_state->pads->try_fmt; + /* detect current pad config scaling factor */ + bool half_scale = !is_unscaled_ok(mf->width, mf->height, crop); + + /* store new crop rectangle */ + *crop = sel->r; + /* adjust frame size */ + mf->width = crop->width >> half_scale; + mf->height = crop->height >> half_scale; + + return 0; + } + + /* V4L2_SUBDEV_FORMAT_ACTIVE */ + + /* apply new crop rectangle */ ret = ov6650_reg_write(client, REG_HSTRT, sel->r.left >> 1); if (!ret) { priv->rect.width += priv->rect.left - sel->r.left; @@ -562,30 +600,13 @@ static int ov6650_get_fmt(struct v4l2_subdev *sd, return 0; } -static bool is_unscaled_ok(int width, int height, struct v4l2_rect *rect) -{ - return width > rect->width >> 1 || height > rect->height >> 1; -} - #define to_clkrc(div) ((div) - 1) /* set the format we will capture in */ -static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) +static int ov6650_s_fmt(struct v4l2_subdev *sd, u32 code, bool half_scale) { struct i2c_client *client = v4l2_get_subdevdata(sd); struct ov6650 *priv = to_ov6650(client); - bool half_scale = !is_unscaled_ok(mf->width, mf->height, &priv->rect); - struct v4l2_subdev_selection sel = { - .which = V4L2_SUBDEV_FORMAT_ACTIVE, - .target = V4L2_SEL_TGT_CROP, - .r.left = priv->rect.left + (priv->rect.width >> 1) - - (mf->width >> (1 - half_scale)), - .r.top = priv->rect.top + (priv->rect.height >> 1) - - (mf->height >> (1 - half_scale)), - .r.width = mf->width << half_scale, - .r.height = mf->height << half_scale, - }; - u32 code = mf->code; u8 coma_set = 0, coma_mask = 0, coml_set, coml_mask; int ret; @@ -653,9 +674,7 @@ static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) coma_mask |= COMA_QCIF; } - ret = ov6650_set_selection(sd, NULL, &sel); - if (!ret) - ret = ov6650_reg_rmw(client, REG_COMA, coma_set, coma_mask); + ret = ov6650_reg_rmw(client, REG_COMA, coma_set, coma_mask); if (!ret) { priv->half_scale = half_scale; @@ -674,14 +693,12 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf = &format->format; struct i2c_client *client = v4l2_get_subdevdata(sd); struct ov6650 *priv = to_ov6650(client); + struct v4l2_rect *crop; + bool half_scale; if (format->pad) return -EINVAL; - if (is_unscaled_ok(mf->width, mf->height, &priv->rect)) - v4l_bound_align_image(&mf->width, 2, W_CIF, 1, - &mf->height, 2, H_CIF, 1, 0); - switch (mf->code) { case MEDIA_BUS_FMT_Y10_1X10: mf->code = MEDIA_BUS_FMT_Y8_1X8; @@ -699,10 +716,17 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd, break; } + if (format->which == V4L2_SUBDEV_FORMAT_TRY) + crop = &sd_state->pads->try_crop; + else + crop = &priv->rect; + + half_scale = !is_unscaled_ok(mf->width, mf->height, crop); + if (format->which == V4L2_SUBDEV_FORMAT_TRY) { - /* store media bus format code and frame size in pad config */ - sd_state->pads->try_fmt.width = mf->width; - sd_state->pads->try_fmt.height = mf->height; + /* store new mbus frame format code and size in pad config */ + sd_state->pads->try_fmt.width = crop->width >> half_scale; + sd_state->pads->try_fmt.height = crop->height >> half_scale; sd_state->pads->try_fmt.code = mf->code; /* return default mbus frame format updated with pad config */ @@ -712,9 +736,11 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd, mf->code = sd_state->pads->try_fmt.code; } else { - /* apply new media bus format code and frame size */ - int ret = ov6650_s_fmt(sd, mf); + int ret = 0; + /* apply new media bus frame format and scaling if changed */ + if (mf->code != priv->code || half_scale != priv->half_scale) + ret = ov6650_s_fmt(sd, mf->code, half_scale); if (ret) return ret; @@ -890,9 +916,8 @@ static int ov6650_video_probe(struct v4l2_subdev *sd) if (!ret) ret = ov6650_prog_dflt(client, xclk->clkrc); if (!ret) { - struct v4l2_mbus_framefmt mf = ov6650_def_fmt; - - ret = ov6650_s_fmt(sd, &mf); + /* driver default frame format, no scaling */ + ret = ov6650_s_fmt(sd, ov6650_def_fmt.code, false); } if (!ret) ret = v4l2_ctrl_handler_setup(&priv->hdl); diff --git a/drivers/media/i2c/ov8865.c b/drivers/media/i2c/ov8865.c index d9d016cfa9ac0..e0dd0f4849a7a 100644 --- a/drivers/media/i2c/ov8865.c +++ b/drivers/media/i2c/ov8865.c @@ -457,8 +457,8 @@ #define OV8865_NATIVE_WIDTH 3296 #define OV8865_NATIVE_HEIGHT 2528 -#define OV8865_ACTIVE_START_TOP 32 -#define OV8865_ACTIVE_START_LEFT 80 +#define OV8865_ACTIVE_START_LEFT 16 +#define OV8865_ACTIVE_START_TOP 40 #define OV8865_ACTIVE_WIDTH 3264 #define OV8865_ACTIVE_HEIGHT 2448 diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c index 8cc9bec43688e..5ca3d0cc653a8 100644 --- a/drivers/media/pci/bt8xx/bttv-driver.c +++ b/drivers/media/pci/bt8xx/bttv-driver.c @@ -3890,7 +3890,7 @@ static int bttv_register_video(struct bttv *btv) /* video */ vdev_init(btv, &btv->video_dev, &bttv_video_template, "video"); - btv->video_dev.device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_TUNER | + btv->video_dev.device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_READWRITE | V4L2_CAP_STREAMING; if (btv->tuner_type != TUNER_ABSENT) btv->video_dev.device_caps |= V4L2_CAP_TUNER; @@ -3911,7 +3911,7 @@ static int bttv_register_video(struct bttv *btv) /* vbi */ vdev_init(btv, &btv->vbi_dev, &bttv_video_template, "vbi"); btv->vbi_dev.device_caps = V4L2_CAP_VBI_CAPTURE | V4L2_CAP_READWRITE | - V4L2_CAP_STREAMING | V4L2_CAP_TUNER; + V4L2_CAP_STREAMING; if (btv->tuner_type != TUNER_ABSENT) btv->vbi_dev.device_caps |= V4L2_CAP_TUNER; diff --git a/drivers/media/pci/cx88/cx88-mpeg.c b/drivers/media/pci/cx88/cx88-mpeg.c index 680e1e3fe89b7..2c1d5137ac470 100644 --- a/drivers/media/pci/cx88/cx88-mpeg.c +++ b/drivers/media/pci/cx88/cx88-mpeg.c @@ -162,6 +162,9 @@ int cx8802_start_dma(struct cx8802_dev *dev, cx_write(MO_TS_GPCNTRL, GP_COUNT_CONTROL_RESET); q->count = 0; + /* clear interrupt status register */ + cx_write(MO_TS_INTSTAT, 0x1f1111); + /* enable irqs */ dprintk(1, "setting the interrupt mask\n"); cx_set(MO_PCI_INTMSK, core->pci_irqmask | PCI_INT_TSINT); diff --git a/drivers/media/pci/ivtv/ivtv-driver.h b/drivers/media/pci/ivtv/ivtv-driver.h index 4cf92dee65271..ce3a7ca51736e 100644 --- a/drivers/media/pci/ivtv/ivtv-driver.h +++ b/drivers/media/pci/ivtv/ivtv-driver.h @@ -330,7 +330,6 @@ struct ivtv_stream { struct ivtv *itv; /* for ease of use */ const char *name; /* name of the stream */ int type; /* stream type */ - u32 caps; /* V4L2 capabilities */ struct v4l2_fh *fh; /* pointer to the streaming filehandle */ spinlock_t qlock; /* locks access to the queues */ diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.c b/drivers/media/pci/ivtv/ivtv-ioctl.c index 0cdf6b3210c2f..fee460e2ca863 100644 --- a/drivers/media/pci/ivtv/ivtv-ioctl.c +++ b/drivers/media/pci/ivtv/ivtv-ioctl.c @@ -438,7 +438,7 @@ static int ivtv_g_fmt_vid_out_overlay(struct file *file, void *fh, struct v4l2_f struct ivtv_stream *s = &itv->streams[fh2id(fh)->type]; struct v4l2_window *winfmt = &fmt->fmt.win; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -EINVAL; if (!itv->osd_video_pbase) return -EINVAL; @@ -549,7 +549,7 @@ static int ivtv_try_fmt_vid_out_overlay(struct file *file, void *fh, struct v4l2 u32 chromakey = fmt->fmt.win.chromakey; u8 global_alpha = fmt->fmt.win.global_alpha; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -EINVAL; if (!itv->osd_video_pbase) return -EINVAL; @@ -1383,7 +1383,7 @@ static int ivtv_g_fbuf(struct file *file, void *fh, struct v4l2_framebuffer *fb) 0, }; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -ENOTTY; if (!itv->osd_video_pbase) return -ENOTTY; @@ -1450,7 +1450,7 @@ static int ivtv_s_fbuf(struct file *file, void *fh, const struct v4l2_framebuffe struct ivtv_stream *s = &itv->streams[fh2id(fh)->type]; struct yuv_playback_info *yi = &itv->yuv_info; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -ENOTTY; if (!itv->osd_video_pbase) return -ENOTTY; @@ -1470,7 +1470,7 @@ static int ivtv_overlay(struct file *file, void *fh, unsigned int on) struct ivtv *itv = id->itv; struct ivtv_stream *s = &itv->streams[fh2id(fh)->type]; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -ENOTTY; if (!itv->osd_video_pbase) return -ENOTTY; diff --git a/drivers/media/pci/ivtv/ivtv-streams.c b/drivers/media/pci/ivtv/ivtv-streams.c index 6e455948cc77a..13d7d55e65949 100644 --- a/drivers/media/pci/ivtv/ivtv-streams.c +++ b/drivers/media/pci/ivtv/ivtv-streams.c @@ -176,7 +176,7 @@ static void ivtv_stream_init(struct ivtv *itv, int type) s->itv = itv; s->type = type; s->name = ivtv_stream_info[type].name; - s->caps = ivtv_stream_info[type].v4l2_caps; + s->vdev.device_caps = ivtv_stream_info[type].v4l2_caps; if (ivtv_stream_info[type].pio) s->dma = DMA_NONE; @@ -299,12 +299,9 @@ static int ivtv_reg_dev(struct ivtv *itv, int type) if (s_mpg->vdev.v4l2_dev) num = s_mpg->vdev.num + ivtv_stream_info[type].num_offset; } - s->vdev.device_caps = s->caps; - if (itv->osd_video_pbase) { - itv->streams[IVTV_DEC_STREAM_TYPE_YUV].vdev.device_caps |= - V4L2_CAP_VIDEO_OUTPUT_OVERLAY; - itv->streams[IVTV_DEC_STREAM_TYPE_MPG].vdev.device_caps |= - V4L2_CAP_VIDEO_OUTPUT_OVERLAY; + if (itv->osd_video_pbase && (type == IVTV_DEC_STREAM_TYPE_YUV || + type == IVTV_DEC_STREAM_TYPE_MPG)) { + s->vdev.device_caps |= V4L2_CAP_VIDEO_OUTPUT_OVERLAY; itv->v4l2_cap |= V4L2_CAP_VIDEO_OUTPUT_OVERLAY; } video_set_drvdata(&s->vdev, s); diff --git a/drivers/media/pci/saa7134/saa7134-alsa.c b/drivers/media/pci/saa7134/saa7134-alsa.c index fb24d2ed3621b..d3cde05a6ebab 100644 --- a/drivers/media/pci/saa7134/saa7134-alsa.c +++ b/drivers/media/pci/saa7134/saa7134-alsa.c @@ -1214,7 +1214,7 @@ static int alsa_device_exit(struct saa7134_dev *dev) static int saa7134_alsa_init(void) { - struct saa7134_dev *dev = NULL; + struct saa7134_dev *dev; saa7134_dmasound_init = alsa_device_init; saa7134_dmasound_exit = alsa_device_exit; @@ -1229,7 +1229,7 @@ static int saa7134_alsa_init(void) alsa_device_init(dev); } - if (dev == NULL) + if (list_empty(&saa7134_devlist)) pr_info("saa7134 ALSA: no saa7134 cards found\n"); return 0; diff --git a/drivers/media/platform/aspeed-video.c b/drivers/media/platform/aspeed-video.c index 7a24daf7165a4..bdeecde0d9978 100644 --- a/drivers/media/platform/aspeed-video.c +++ b/drivers/media/platform/aspeed-video.c @@ -153,7 +153,7 @@ #define VE_SRC_TB_EDGE_DET_BOT GENMASK(28, VE_SRC_TB_EDGE_DET_BOT_SHF) #define VE_MODE_DETECT_STATUS 0x098 -#define VE_MODE_DETECT_H_PIXELS GENMASK(11, 0) +#define VE_MODE_DETECT_H_PERIOD GENMASK(11, 0) #define VE_MODE_DETECT_V_LINES_SHF 16 #define VE_MODE_DETECT_V_LINES GENMASK(27, VE_MODE_DETECT_V_LINES_SHF) #define VE_MODE_DETECT_STATUS_VSYNC BIT(28) @@ -164,6 +164,8 @@ #define VE_SYNC_STATUS_VSYNC_SHF 16 #define VE_SYNC_STATUS_VSYNC GENMASK(27, VE_SYNC_STATUS_VSYNC_SHF) +#define VE_H_TOTAL_PIXELS 0x0A0 + #define VE_INTERRUPT_CTRL 0x304 #define VE_INTERRUPT_STATUS 0x308 #define VE_INTERRUPT_MODE_DETECT_WD BIT(0) @@ -802,6 +804,7 @@ static void aspeed_video_get_resolution(struct aspeed_video *video) u32 src_lr_edge; u32 src_tb_edge; u32 sync; + u32 htotal; struct v4l2_bt_timings *det = &video->detected_timings; det->width = MIN_WIDTH; @@ -847,6 +850,7 @@ static void aspeed_video_get_resolution(struct aspeed_video *video) src_tb_edge = aspeed_video_read(video, VE_SRC_TB_EDGE_DET); mds = aspeed_video_read(video, VE_MODE_DETECT_STATUS); sync = aspeed_video_read(video, VE_SYNC_STATUS); + htotal = aspeed_video_read(video, VE_H_TOTAL_PIXELS); video->frame_bottom = (src_tb_edge & VE_SRC_TB_EDGE_DET_BOT) >> VE_SRC_TB_EDGE_DET_BOT_SHF; @@ -863,8 +867,7 @@ static void aspeed_video_get_resolution(struct aspeed_video *video) VE_SRC_LR_EDGE_DET_RT_SHF; video->frame_left = src_lr_edge & VE_SRC_LR_EDGE_DET_LEFT; det->hfrontporch = video->frame_left; - det->hbackporch = (mds & VE_MODE_DETECT_H_PIXELS) - - video->frame_right; + det->hbackporch = htotal - video->frame_right; det->hsync = sync & VE_SYNC_STATUS_HSYNC; if (video->frame_left > video->frame_right) continue; diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c index 660cd0ab6749a..24807782c9e50 100644 --- a/drivers/media/platform/atmel/atmel-isc-base.c +++ b/drivers/media/platform/atmel/atmel-isc-base.c @@ -1369,14 +1369,12 @@ static int isc_enum_framesizes(struct file *file, void *fh, struct v4l2_frmsizeenum *fsize) { struct isc_device *isc = video_drvdata(file); - struct v4l2_subdev_frame_size_enum fse = { - .code = isc->config.sd_format->mbus_code, - .index = fsize->index, - .which = V4L2_SUBDEV_FORMAT_ACTIVE, - }; int ret = -EINVAL; int i; + if (fsize->index) + return -EINVAL; + for (i = 0; i < isc->num_user_formats; i++) if (isc->user_formats[i]->fourcc == fsize->pixel_format) ret = 0; @@ -1388,14 +1386,14 @@ static int isc_enum_framesizes(struct file *file, void *fh, if (ret) return ret; - ret = v4l2_subdev_call(isc->current_subdev->sd, pad, enum_frame_size, - NULL, &fse); - if (ret) - return ret; + fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; - fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE; - fsize->discrete.width = fse.max_width; - fsize->discrete.height = fse.max_height; + fsize->stepwise.min_width = 16; + fsize->stepwise.max_width = isc->max_width; + fsize->stepwise.min_height = 16; + fsize->stepwise.max_height = isc->max_height; + fsize->stepwise.step_width = 1; + fsize->stepwise.step_height = 1; return 0; } diff --git a/drivers/media/platform/atmel/atmel-sama7g5-isc.c b/drivers/media/platform/atmel/atmel-sama7g5-isc.c index 5d1c76f680f37..2b1082295c130 100644 --- a/drivers/media/platform/atmel/atmel-sama7g5-isc.c +++ b/drivers/media/platform/atmel/atmel-sama7g5-isc.c @@ -556,7 +556,6 @@ static int microchip_xisc_remove(struct platform_device *pdev) v4l2_device_unregister(&isc->v4l2_dev); - clk_disable_unprepare(isc->ispck); clk_disable_unprepare(isc->hclock); isc_clk_cleanup(isc); @@ -568,7 +567,6 @@ static int __maybe_unused xisc_runtime_suspend(struct device *dev) { struct isc_device *isc = dev_get_drvdata(dev); - clk_disable_unprepare(isc->ispck); clk_disable_unprepare(isc->hclock); return 0; @@ -583,10 +581,6 @@ static int __maybe_unused xisc_runtime_resume(struct device *dev) if (ret) return ret; - ret = clk_prepare_enable(isc->ispck); - if (ret) - clk_disable_unprepare(isc->hclock); - return ret; } diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c index 3cd47ba26357e..a57822b050706 100644 --- a/drivers/media/platform/coda/coda-common.c +++ b/drivers/media/platform/coda/coda-common.c @@ -409,6 +409,7 @@ static struct vdoa_data *coda_get_vdoa_data(void) if (!vdoa_data) vdoa_data = ERR_PTR(-EPROBE_DEFER); + put_device(&vdoa_pdev->dev); out: of_node_put(vdoa_node); diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c index 5a89d885d0e3b..4a260f4ed236b 100644 --- a/drivers/media/platform/davinci/vpif.c +++ b/drivers/media/platform/davinci/vpif.c @@ -41,6 +41,11 @@ MODULE_ALIAS("platform:" VPIF_DRIVER_NAME); #define VPIF_CH2_MAX_MODES 15 #define VPIF_CH3_MAX_MODES 2 +struct vpif_data { + struct platform_device *capture; + struct platform_device *display; +}; + DEFINE_SPINLOCK(vpif_lock); EXPORT_SYMBOL_GPL(vpif_lock); @@ -423,16 +428,31 @@ int vpif_channel_getfid(u8 channel_id) } EXPORT_SYMBOL(vpif_channel_getfid); +static void vpif_pdev_release(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + kfree(pdev); +} + static int vpif_probe(struct platform_device *pdev) { static struct resource *res_irq; struct platform_device *pdev_capture, *pdev_display; struct device_node *endpoint = NULL; + struct vpif_data *data; + int ret; vpif_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(vpif_base)) return PTR_ERR(vpif_base); + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + platform_set_drvdata(pdev, data); + pm_runtime_enable(&pdev->dev); pm_runtime_get(&pdev->dev); @@ -456,46 +476,79 @@ static int vpif_probe(struct platform_device *pdev) res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!res_irq) { dev_warn(&pdev->dev, "Missing IRQ resource.\n"); - pm_runtime_put(&pdev->dev); - return -EINVAL; + ret = -EINVAL; + goto err_put_rpm; } - pdev_capture = devm_kzalloc(&pdev->dev, sizeof(*pdev_capture), - GFP_KERNEL); - if (pdev_capture) { - pdev_capture->name = "vpif_capture"; - pdev_capture->id = -1; - pdev_capture->resource = res_irq; - pdev_capture->num_resources = 1; - pdev_capture->dev.dma_mask = pdev->dev.dma_mask; - pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_capture->dev.parent = &pdev->dev; - platform_device_register(pdev_capture); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_capture.\n"); + pdev_capture = kzalloc(sizeof(*pdev_capture), GFP_KERNEL); + if (!pdev_capture) { + ret = -ENOMEM; + goto err_put_rpm; } - pdev_display = devm_kzalloc(&pdev->dev, sizeof(*pdev_display), - GFP_KERNEL); - if (pdev_display) { - pdev_display->name = "vpif_display"; - pdev_display->id = -1; - pdev_display->resource = res_irq; - pdev_display->num_resources = 1; - pdev_display->dev.dma_mask = pdev->dev.dma_mask; - pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_display->dev.parent = &pdev->dev; - platform_device_register(pdev_display); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_display.\n"); + pdev_capture->name = "vpif_capture"; + pdev_capture->id = -1; + pdev_capture->resource = res_irq; + pdev_capture->num_resources = 1; + pdev_capture->dev.dma_mask = pdev->dev.dma_mask; + pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_capture->dev.parent = &pdev->dev; + pdev_capture->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_capture); + if (ret) + goto err_put_pdev_capture; + + pdev_display = kzalloc(sizeof(*pdev_display), GFP_KERNEL); + if (!pdev_display) { + ret = -ENOMEM; + goto err_put_pdev_capture; } + pdev_display->name = "vpif_display"; + pdev_display->id = -1; + pdev_display->resource = res_irq; + pdev_display->num_resources = 1; + pdev_display->dev.dma_mask = pdev->dev.dma_mask; + pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_display->dev.parent = &pdev->dev; + pdev_display->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_display); + if (ret) + goto err_put_pdev_display; + + data->capture = pdev_capture; + data->display = pdev_display; + return 0; + +err_put_pdev_display: + platform_device_put(pdev_display); +err_put_pdev_capture: + platform_device_put(pdev_capture); +err_put_rpm: + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + kfree(data); + + return ret; } static int vpif_remove(struct platform_device *pdev) { + struct vpif_data *data = platform_get_drvdata(pdev); + + if (data->capture) + platform_device_unregister(data->capture); + if (data->display) + platform_device_unregister(data->display); + + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + + kfree(data); + return 0; } diff --git a/drivers/media/platform/imx-jpeg/mxc-jpeg.c b/drivers/media/platform/imx-jpeg/mxc-jpeg.c index 4ca96cf9def76..83a2b4d13bad3 100644 --- a/drivers/media/platform/imx-jpeg/mxc-jpeg.c +++ b/drivers/media/platform/imx-jpeg/mxc-jpeg.c @@ -947,8 +947,13 @@ static void mxc_jpeg_device_run(void *priv) v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, true); jpeg_src_buf = vb2_to_mxc_buf(&src_buf->vb2_buf); + if (q_data_cap->fmt->colplanes != dst_buf->vb2_buf.num_planes) { + dev_err(dev, "Capture format %s has %d planes, but capture buffer has %d planes\n", + q_data_cap->fmt->name, q_data_cap->fmt->colplanes, + dst_buf->vb2_buf.num_planes); + jpeg_src_buf->jpeg_parse_error = true; + } if (jpeg_src_buf->jpeg_parse_error) { - jpeg->slot_data[ctx->slot].used = false; v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); diff --git a/drivers/media/platform/meson/ge2d/ge2d.c b/drivers/media/platform/meson/ge2d/ge2d.c index ccda18e5a3774..5e7b319f300df 100644 --- a/drivers/media/platform/meson/ge2d/ge2d.c +++ b/drivers/media/platform/meson/ge2d/ge2d.c @@ -215,35 +215,35 @@ static void ge2d_hw_start(struct meson_ge2d *ge2d) regmap_write(ge2d->map, GE2D_SRC1_CLIPY_START_END, FIELD_PREP(GE2D_START, ctx->in.crop.top) | - FIELD_PREP(GE2D_END, ctx->in.crop.top + ctx->in.crop.height)); + FIELD_PREP(GE2D_END, ctx->in.crop.top + ctx->in.crop.height - 1)); regmap_write(ge2d->map, GE2D_SRC1_CLIPX_START_END, FIELD_PREP(GE2D_START, ctx->in.crop.left) | - FIELD_PREP(GE2D_END, ctx->in.crop.left + ctx->in.crop.width)); + FIELD_PREP(GE2D_END, ctx->in.crop.left + ctx->in.crop.width - 1)); regmap_write(ge2d->map, GE2D_SRC2_CLIPY_START_END, FIELD_PREP(GE2D_START, ctx->out.crop.top) | - FIELD_PREP(GE2D_END, ctx->out.crop.top + ctx->out.crop.height)); + FIELD_PREP(GE2D_END, ctx->out.crop.top + ctx->out.crop.height - 1)); regmap_write(ge2d->map, GE2D_SRC2_CLIPX_START_END, FIELD_PREP(GE2D_START, ctx->out.crop.left) | - FIELD_PREP(GE2D_END, ctx->out.crop.left + ctx->out.crop.width)); + FIELD_PREP(GE2D_END, ctx->out.crop.left + ctx->out.crop.width - 1)); regmap_write(ge2d->map, GE2D_DST_CLIPY_START_END, FIELD_PREP(GE2D_START, ctx->out.crop.top) | - FIELD_PREP(GE2D_END, ctx->out.crop.top + ctx->out.crop.height)); + FIELD_PREP(GE2D_END, ctx->out.crop.top + ctx->out.crop.height - 1)); regmap_write(ge2d->map, GE2D_DST_CLIPX_START_END, FIELD_PREP(GE2D_START, ctx->out.crop.left) | - FIELD_PREP(GE2D_END, ctx->out.crop.left + ctx->out.crop.width)); + FIELD_PREP(GE2D_END, ctx->out.crop.left + ctx->out.crop.width - 1)); regmap_write(ge2d->map, GE2D_SRC1_Y_START_END, - FIELD_PREP(GE2D_END, ctx->in.pix_fmt.height)); + FIELD_PREP(GE2D_END, ctx->in.pix_fmt.height - 1)); regmap_write(ge2d->map, GE2D_SRC1_X_START_END, - FIELD_PREP(GE2D_END, ctx->in.pix_fmt.width)); + FIELD_PREP(GE2D_END, ctx->in.pix_fmt.width - 1)); regmap_write(ge2d->map, GE2D_SRC2_Y_START_END, - FIELD_PREP(GE2D_END, ctx->out.pix_fmt.height)); + FIELD_PREP(GE2D_END, ctx->out.pix_fmt.height - 1)); regmap_write(ge2d->map, GE2D_SRC2_X_START_END, - FIELD_PREP(GE2D_END, ctx->out.pix_fmt.width)); + FIELD_PREP(GE2D_END, ctx->out.pix_fmt.width - 1)); regmap_write(ge2d->map, GE2D_DST_Y_START_END, - FIELD_PREP(GE2D_END, ctx->out.pix_fmt.height)); + FIELD_PREP(GE2D_END, ctx->out.pix_fmt.height - 1)); regmap_write(ge2d->map, GE2D_DST_X_START_END, - FIELD_PREP(GE2D_END, ctx->out.pix_fmt.width)); + FIELD_PREP(GE2D_END, ctx->out.pix_fmt.width - 1)); /* Color, no blend, use source color */ reg = GE2D_ALU_DO_COLOR_OPERATION_LOGIC(LOGIC_OPERATION_COPY, diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c index cd27f637dbe7c..cfc7ebed8fb7a 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c @@ -102,6 +102,8 @@ struct mtk_vcodec_fw *mtk_vcodec_fw_vpu_init(struct mtk_vcodec_dev *dev, vpu_wdt_reg_handler(fw_pdev, mtk_vcodec_vpu_reset_handler, dev, rst_id); fw = devm_kzalloc(&dev->plat_dev->dev, sizeof(*fw), GFP_KERNEL); + if (!fw) + return ERR_PTR(-ENOMEM); fw->type = VPU; fw->ops = &mtk_vcodec_vpu_msg; fw->pdev = fw_pdev; diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c index 5b9b57f4d9bf8..68cf68dbcace2 100644 --- a/drivers/media/platform/omap3isp/ispstat.c +++ b/drivers/media/platform/omap3isp/ispstat.c @@ -512,7 +512,7 @@ int omap3isp_stat_request_statistics(struct ispstat *stat, int omap3isp_stat_request_statistics_time32(struct ispstat *stat, struct omap3isp_stat_data_time32 *data) { - struct omap3isp_stat_data data64; + struct omap3isp_stat_data data64 = { }; int ret; ret = omap3isp_stat_request_statistics(stat, &data64); @@ -521,7 +521,8 @@ int omap3isp_stat_request_statistics_time32(struct ispstat *stat, data->ts.tv_sec = data64.ts.tv_sec; data->ts.tv_usec = data64.ts.tv_usec; - memcpy(&data->buf, &data64.buf, sizeof(*data) - sizeof(data->ts)); + data->buf = (uintptr_t)data64.buf; + memcpy(&data->frame, &data64.frame, sizeof(data->frame)); return 0; } diff --git a/drivers/media/platform/qcom/camss/camss-csid-170.c b/drivers/media/platform/qcom/camss/camss-csid-170.c index ac22ff29d2a9f..82f59933ad7b3 100644 --- a/drivers/media/platform/qcom/camss/camss-csid-170.c +++ b/drivers/media/platform/qcom/camss/camss-csid-170.c @@ -105,7 +105,8 @@ #define CSID_RDI_CTRL(rdi) ((IS_LITE ? 0x208 : 0x308)\ + 0x100 * (rdi)) #define RDI_CTRL_HALT_CMD 0 -#define ALT_CMD_RESUME_AT_FRAME_BOUNDARY 1 +#define HALT_CMD_HALT_AT_FRAME_BOUNDARY 0 +#define HALT_CMD_RESUME_AT_FRAME_BOUNDARY 1 #define RDI_CTRL_HALT_MODE 2 #define CSID_RDI_FRM_DROP_PATTERN(rdi) ((IS_LITE ? 0x20C : 0x30C)\ @@ -366,7 +367,7 @@ static void csid_configure_stream(struct csid_device *csid, u8 enable) val |= input_format->width & 0x1fff << TPG_DT_n_CFG_0_FRAME_WIDTH; writel_relaxed(val, csid->base + CSID_TPG_DT_n_CFG_0(0)); - val = DATA_TYPE_RAW_10BIT << TPG_DT_n_CFG_1_DATA_TYPE; + val = format->data_type << TPG_DT_n_CFG_1_DATA_TYPE; writel_relaxed(val, csid->base + CSID_TPG_DT_n_CFG_1(0)); val = tg->mode << TPG_DT_n_CFG_2_PAYLOAD_MODE; @@ -382,8 +383,9 @@ static void csid_configure_stream(struct csid_device *csid, u8 enable) val = 1 << RDI_CFG0_BYTE_CNTR_EN; val |= 1 << RDI_CFG0_FORMAT_MEASURE_EN; val |= 1 << RDI_CFG0_TIMESTAMP_EN; + /* note: for non-RDI path, this should be format->decode_format */ val |= DECODE_FORMAT_PAYLOAD_ONLY << RDI_CFG0_DECODE_FORMAT; - val |= DATA_TYPE_RAW_10BIT << RDI_CFG0_DATA_TYPE; + val |= format->data_type << RDI_CFG0_DATA_TYPE; val |= vc << RDI_CFG0_VIRTUAL_CHANNEL; val |= dt_id << RDI_CFG0_DT_ID; writel_relaxed(val, csid->base + CSID_RDI_CFG0(0)); @@ -443,13 +445,10 @@ static void csid_configure_stream(struct csid_device *csid, u8 enable) val |= 1 << CSI2_RX_CFG1_MISR_EN; writel_relaxed(val, csid->base + CSID_CSI2_RX_CFG1); // csi2_vc_mode_shift_val ? - /* error irqs start at BIT(11) */ - writel_relaxed(~0u, csid->base + CSID_CSI2_RX_IRQ_MASK); - - /* RDI irq */ - writel_relaxed(~0u, csid->base + CSID_TOP_IRQ_MASK); - - val = 1 << RDI_CTRL_HALT_CMD; + if (enable) + val = HALT_CMD_RESUME_AT_FRAME_BOUNDARY << RDI_CTRL_HALT_CMD; + else + val = HALT_CMD_HALT_AT_FRAME_BOUNDARY << RDI_CTRL_HALT_CMD; writel_relaxed(val, csid->base + CSID_RDI_CTRL(0)); } diff --git a/drivers/media/platform/qcom/camss/camss-vfe-170.c b/drivers/media/platform/qcom/camss/camss-vfe-170.c index f524af712a843..600150cfc4f70 100644 --- a/drivers/media/platform/qcom/camss/camss-vfe-170.c +++ b/drivers/media/platform/qcom/camss/camss-vfe-170.c @@ -395,17 +395,7 @@ static irqreturn_t vfe_isr(int irq, void *dev) */ static int vfe_halt(struct vfe_device *vfe) { - unsigned long time; - - reinit_completion(&vfe->halt_complete); - - time = wait_for_completion_timeout(&vfe->halt_complete, - msecs_to_jiffies(VFE_HALT_TIMEOUT_MS)); - if (!time) { - dev_err(vfe->camss->dev, "VFE halt timeout\n"); - return -EIO; - } - + /* rely on vfe_disable_output() to stop the VFE */ return 0; } diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c index 84c3a511ec31e..0bca95d016507 100644 --- a/drivers/media/platform/qcom/venus/helpers.c +++ b/drivers/media/platform/qcom/venus/helpers.c @@ -189,7 +189,6 @@ int venus_helper_alloc_dpb_bufs(struct venus_inst *inst) buf->va = dma_alloc_attrs(dev, buf->size, &buf->da, GFP_KERNEL, buf->attrs); if (!buf->va) { - kfree(buf); ret = -ENOMEM; goto fail; } @@ -209,6 +208,7 @@ int venus_helper_alloc_dpb_bufs(struct venus_inst *inst) return 0; fail: + kfree(buf); venus_helper_free_dpb_bufs(inst); return ret; } diff --git a/drivers/media/platform/qcom/venus/hfi_cmds.c b/drivers/media/platform/qcom/venus/hfi_cmds.c index 5aea07307e02e..4ecd444050bb6 100644 --- a/drivers/media/platform/qcom/venus/hfi_cmds.c +++ b/drivers/media/platform/qcom/venus/hfi_cmds.c @@ -1054,6 +1054,8 @@ static int pkt_session_set_property_1x(struct hfi_session_set_property_pkt *pkt, pkt->shdr.hdr.size += sizeof(u32) + sizeof(*info); break; } + case HFI_PROPERTY_PARAM_VENC_HDR10_PQ_SEI: + return -ENOTSUPP; /* FOLLOWING PROPERTIES ARE NOT IMPLEMENTED IN CORE YET */ case HFI_PROPERTY_CONFIG_BUFFER_REQUIREMENTS: diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c index 84bafc3118cc6..adea4c3b8c204 100644 --- a/drivers/media/platform/qcom/venus/venc.c +++ b/drivers/media/platform/qcom/venus/venc.c @@ -662,8 +662,8 @@ static int venc_set_properties(struct venus_inst *inst) ptype = HFI_PROPERTY_PARAM_VENC_H264_TRANSFORM_8X8; h264_transform.enable_type = 0; - if (ctr->profile.h264 == HFI_H264_PROFILE_HIGH || - ctr->profile.h264 == HFI_H264_PROFILE_CONSTRAINED_HIGH) + if (ctr->profile.h264 == V4L2_MPEG_VIDEO_H264_PROFILE_HIGH || + ctr->profile.h264 == V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH) h264_transform.enable_type = ctr->h264_8x8_transform; ret = hfi_session_set_property(inst, ptype, &h264_transform); diff --git a/drivers/media/platform/qcom/venus/venc_ctrls.c b/drivers/media/platform/qcom/venus/venc_ctrls.c index 1ada42df314dc..ea5805e71c143 100644 --- a/drivers/media/platform/qcom/venus/venc_ctrls.c +++ b/drivers/media/platform/qcom/venus/venc_ctrls.c @@ -320,8 +320,8 @@ static int venc_op_s_ctrl(struct v4l2_ctrl *ctrl) ctr->intra_refresh_period = ctrl->val; break; case V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM: - if (ctr->profile.h264 != HFI_H264_PROFILE_HIGH && - ctr->profile.h264 != HFI_H264_PROFILE_CONSTRAINED_HIGH) + if (ctr->profile.h264 != V4L2_MPEG_VIDEO_H264_PROFILE_HIGH && + ctr->profile.h264 != V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH) return -EINVAL; /* @@ -457,7 +457,7 @@ int venc_ctrl_init(struct venus_inst *inst) V4L2_CID_MPEG_VIDEO_H264_I_FRAME_MIN_QP, 1, 51, 1, 1); v4l2_ctrl_new_std(&inst->ctrl_handler, &venc_ctrl_ops, - V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM, 0, 1, 1, 0); + V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM, 0, 1, 1, 1); v4l2_ctrl_new_std(&inst->ctrl_handler, &venc_ctrl_ops, V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MIN_QP, 1, 51, 1, 1); diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c index 4de5e8d2b261b..3d3d1062e2122 100644 --- a/drivers/media/platform/rockchip/rga/rga.c +++ b/drivers/media/platform/rockchip/rga/rga.c @@ -892,7 +892,7 @@ static int rga_probe(struct platform_device *pdev) } rga->dst_mmu_pages = (unsigned int *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 3); - if (rga->dst_mmu_pages) { + if (!rga->dst_mmu_pages) { ret = -ENOMEM; goto free_src_pages; } diff --git a/drivers/media/platform/ti-vpe/cal-video.c b/drivers/media/platform/ti-vpe/cal-video.c index 7799da1cc261b..3e936a2ca36c6 100644 --- a/drivers/media/platform/ti-vpe/cal-video.c +++ b/drivers/media/platform/ti-vpe/cal-video.c @@ -823,6 +823,9 @@ static int cal_ctx_v4l2_init_formats(struct cal_ctx *ctx) /* Enumerate sub device formats and enable all matching local formats */ ctx->active_fmt = devm_kcalloc(ctx->cal->dev, cal_num_formats, sizeof(*ctx->active_fmt), GFP_KERNEL); + if (!ctx->active_fmt) + return -ENOMEM; + ctx->num_active_fmt = 0; for (j = 0, i = 0; ; ++j) { diff --git a/drivers/media/rc/gpio-ir-tx.c b/drivers/media/rc/gpio-ir-tx.c index c6cd2e6d8e654..a50701cfbbd7b 100644 --- a/drivers/media/rc/gpio-ir-tx.c +++ b/drivers/media/rc/gpio-ir-tx.c @@ -48,11 +48,29 @@ static int gpio_ir_tx_set_carrier(struct rc_dev *dev, u32 carrier) return 0; } +static void delay_until(ktime_t until) +{ + /* + * delta should never exceed 0.5 seconds (IR_MAX_DURATION) and on + * m68k ndelay(s64) does not compile; so use s32 rather than s64. + */ + s32 delta; + + while (true) { + delta = ktime_us_delta(until, ktime_get()); + if (delta <= 0) + return; + + /* udelay more than 1ms may not work */ + delta = min(delta, 1000); + udelay(delta); + } +} + static void gpio_ir_tx_unmodulated(struct gpio_ir *gpio_ir, uint *txbuf, uint count) { ktime_t edge; - s32 delta; int i; local_irq_disable(); @@ -63,9 +81,7 @@ static void gpio_ir_tx_unmodulated(struct gpio_ir *gpio_ir, uint *txbuf, gpiod_set_value(gpio_ir->gpio, !(i % 2)); edge = ktime_add_us(edge, txbuf[i]); - delta = ktime_us_delta(edge, ktime_get()); - if (delta > 0) - udelay(delta); + delay_until(edge); } gpiod_set_value(gpio_ir->gpio, 0); @@ -97,9 +113,7 @@ static void gpio_ir_tx_modulated(struct gpio_ir *gpio_ir, uint *txbuf, if (i % 2) { // space edge = ktime_add_us(edge, txbuf[i]); - delta = ktime_us_delta(edge, ktime_get()); - if (delta > 0) - udelay(delta); + delay_until(edge); } else { // pulse ktime_t last = ktime_add_us(edge, txbuf[i]); diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c index 7e98e7e3aacec..1968067092594 100644 --- a/drivers/media/rc/ir_toy.c +++ b/drivers/media/rc/ir_toy.c @@ -458,7 +458,7 @@ static int irtoy_probe(struct usb_interface *intf, err = usb_submit_urb(irtoy->urb_in, GFP_KERNEL); if (err != 0) { dev_err(irtoy->dev, "fail to submit in urb: %d\n", err); - return err; + goto free_rcdev; } err = irtoy_setup(irtoy); diff --git a/drivers/media/test-drivers/vidtv/vidtv_s302m.c b/drivers/media/test-drivers/vidtv/vidtv_s302m.c index d79b65854627c..4676083cee3b8 100644 --- a/drivers/media/test-drivers/vidtv/vidtv_s302m.c +++ b/drivers/media/test-drivers/vidtv/vidtv_s302m.c @@ -455,6 +455,9 @@ struct vidtv_encoder e->name = kstrdup(args.name, GFP_KERNEL); e->encoder_buf = vzalloc(VIDTV_S302M_BUF_SZ); + if (!e->encoder_buf) + goto out_kfree_e; + e->encoder_buf_sz = VIDTV_S302M_BUF_SZ; e->encoder_buf_offset = 0; @@ -467,10 +470,8 @@ struct vidtv_encoder e->is_video_encoder = false; ctx = kzalloc(priv_sz, GFP_KERNEL); - if (!ctx) { - kfree(e); - return NULL; - } + if (!ctx) + goto out_kfree_buf; e->ctx = ctx; ctx->last_duration = 0; @@ -498,6 +499,14 @@ struct vidtv_encoder e->next = NULL; return e; + +out_kfree_buf: + kfree(e->encoder_buf); + +out_kfree_e: + kfree(e->name); + kfree(e); + return NULL; } void vidtv_s302m_encoder_destroy(struct vidtv_encoder *e) diff --git a/drivers/media/tuners/si2157.c b/drivers/media/tuners/si2157.c index 47029746b89ee..0de587b412d4e 100644 --- a/drivers/media/tuners/si2157.c +++ b/drivers/media/tuners/si2157.c @@ -77,16 +77,16 @@ static int si2157_cmd_execute(struct i2c_client *client, struct si2157_cmd *cmd) } static const struct si2157_tuner_info si2157_tuners[] = { - { SI2141, false, 0x60, SI2141_60_FIRMWARE, SI2141_A10_FIRMWARE }, - { SI2141, false, 0x61, SI2141_61_FIRMWARE, SI2141_A10_FIRMWARE }, - { SI2146, false, 0x11, SI2146_11_FIRMWARE, NULL }, - { SI2147, false, 0x50, SI2147_50_FIRMWARE, NULL }, - { SI2148, true, 0x32, SI2148_32_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2148, true, 0x33, SI2148_33_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2157, false, 0x50, SI2157_50_FIRMWARE, SI2157_A30_FIRMWARE }, - { SI2158, false, 0x50, SI2158_50_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2158, false, 0x51, SI2158_51_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2177, false, 0x50, SI2177_50_FIRMWARE, SI2157_A30_FIRMWARE }, + { SI2141, 0x60, false, SI2141_60_FIRMWARE, SI2141_A10_FIRMWARE }, + { SI2141, 0x61, false, SI2141_61_FIRMWARE, SI2141_A10_FIRMWARE }, + { SI2146, 0x11, false, SI2146_11_FIRMWARE, NULL }, + { SI2147, 0x50, false, SI2147_50_FIRMWARE, NULL }, + { SI2148, 0x32, true, SI2148_32_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2148, 0x33, true, SI2148_33_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2157, 0x50, false, SI2157_50_FIRMWARE, SI2157_A30_FIRMWARE }, + { SI2158, 0x50, false, SI2158_50_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2158, 0x51, false, SI2158_51_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2177, 0x50, false, SI2177_50_FIRMWARE, SI2157_A30_FIRMWARE }, }; static int si2157_load_firmware(struct dvb_frontend *fe, @@ -178,7 +178,7 @@ static int si2157_find_and_load_firmware(struct dvb_frontend *fe) } } - if (!fw_name && !fw_alt_name) { + if (required && !fw_name && !fw_alt_name) { dev_err(&client->dev, "unknown chip version Si21%d-%c%c%c ROM 0x%02x\n", part_id, cmd.args[1], cmd.args[3], cmd.args[4], rom_id); diff --git a/drivers/media/usb/em28xx/em28xx-cards.c b/drivers/media/usb/em28xx/em28xx-cards.c index b451ce3cb169a..ae25d2cbfdfee 100644 --- a/drivers/media/usb/em28xx/em28xx-cards.c +++ b/drivers/media/usb/em28xx/em28xx-cards.c @@ -3936,6 +3936,8 @@ static int em28xx_usb_probe(struct usb_interface *intf, goto err_free; } + kref_init(&dev->ref); + dev->devno = nr; dev->model = id->driver_info; dev->alt = -1; @@ -4036,6 +4038,8 @@ static int em28xx_usb_probe(struct usb_interface *intf, } if (dev->board.has_dual_ts && em28xx_duplicate_dev(dev) == 0) { + kref_init(&dev->dev_next->ref); + dev->dev_next->ts = SECONDARY_TS; dev->dev_next->alt = -1; dev->dev_next->is_audio_only = has_vendor_audio && @@ -4090,12 +4094,8 @@ static int em28xx_usb_probe(struct usb_interface *intf, em28xx_write_reg(dev, 0x0b, 0x82); mdelay(100); } - - kref_init(&dev->dev_next->ref); } - kref_init(&dev->ref); - request_modules(dev); /* @@ -4150,11 +4150,8 @@ static void em28xx_usb_disconnect(struct usb_interface *intf) em28xx_close_extension(dev); - if (dev->dev_next) { - em28xx_close_extension(dev->dev_next); + if (dev->dev_next) em28xx_release_resources(dev->dev_next); - } - em28xx_release_resources(dev); if (dev->dev_next) { diff --git a/drivers/media/usb/go7007/s2250-board.c b/drivers/media/usb/go7007/s2250-board.c index c742cc88fac5c..1fa6f10ee157b 100644 --- a/drivers/media/usb/go7007/s2250-board.c +++ b/drivers/media/usb/go7007/s2250-board.c @@ -504,6 +504,7 @@ static int s2250_probe(struct i2c_client *client, u8 *data; struct go7007 *go = i2c_get_adapdata(adapter); struct go7007_usb *usb = go->hpi_context; + int err = -EIO; audio = i2c_new_dummy_device(adapter, TLV320_ADDRESS >> 1); if (IS_ERR(audio)) @@ -532,11 +533,8 @@ static int s2250_probe(struct i2c_client *client, V4L2_CID_HUE, -512, 511, 1, 0); sd->ctrl_handler = &state->hdl; if (state->hdl.error) { - int err = state->hdl.error; - - v4l2_ctrl_handler_free(&state->hdl); - kfree(state); - return err; + err = state->hdl.error; + goto fail; } state->std = V4L2_STD_NTSC; @@ -600,7 +598,7 @@ static int s2250_probe(struct i2c_client *client, i2c_unregister_device(audio); v4l2_ctrl_handler_free(&state->hdl); kfree(state); - return -EIO; + return err; } static int s2250_remove(struct i2c_client *client) diff --git a/drivers/media/usb/hdpvr/hdpvr-video.c b/drivers/media/usb/hdpvr/hdpvr-video.c index 563128d117317..60e57e0f19272 100644 --- a/drivers/media/usb/hdpvr/hdpvr-video.c +++ b/drivers/media/usb/hdpvr/hdpvr-video.c @@ -308,7 +308,6 @@ static int hdpvr_start_streaming(struct hdpvr_device *dev) dev->status = STATUS_STREAMING; - INIT_WORK(&dev->worker, hdpvr_transmit_buffers); schedule_work(&dev->worker); v4l2_dbg(MSG_BUFFER, hdpvr_debug, &dev->v4l2_dev, @@ -1165,6 +1164,9 @@ int hdpvr_register_videodev(struct hdpvr_device *dev, struct device *parent, bool ac3 = dev->flags & HDPVR_FLAG_AC3_CAP; int res; + // initialize dev->worker + INIT_WORK(&dev->worker, hdpvr_transmit_buffers); + dev->cur_std = V4L2_STD_525_60; dev->width = 720; dev->height = 480; diff --git a/drivers/media/usb/stk1160/stk1160-core.c b/drivers/media/usb/stk1160/stk1160-core.c index 4e1698f788187..ce717502ea4c3 100644 --- a/drivers/media/usb/stk1160/stk1160-core.c +++ b/drivers/media/usb/stk1160/stk1160-core.c @@ -403,7 +403,7 @@ static void stk1160_disconnect(struct usb_interface *interface) /* Here is the only place where isoc get released */ stk1160_uninit_isoc(dev); - stk1160_clear_queue(dev); + stk1160_clear_queue(dev, VB2_BUF_STATE_ERROR); video_unregister_device(&dev->vdev); v4l2_device_disconnect(&dev->v4l2_dev); diff --git a/drivers/media/usb/stk1160/stk1160-v4l.c b/drivers/media/usb/stk1160/stk1160-v4l.c index 6a4eb616d5160..1aa953469402f 100644 --- a/drivers/media/usb/stk1160/stk1160-v4l.c +++ b/drivers/media/usb/stk1160/stk1160-v4l.c @@ -258,7 +258,7 @@ static int stk1160_start_streaming(struct stk1160 *dev) stk1160_uninit_isoc(dev); out_stop_hw: usb_set_interface(dev->udev, 0, 0); - stk1160_clear_queue(dev); + stk1160_clear_queue(dev, VB2_BUF_STATE_QUEUED); mutex_unlock(&dev->v4l_lock); @@ -306,7 +306,7 @@ static int stk1160_stop_streaming(struct stk1160 *dev) stk1160_stop_hw(dev); - stk1160_clear_queue(dev); + stk1160_clear_queue(dev, VB2_BUF_STATE_ERROR); stk1160_dbg("streaming stopped\n"); @@ -745,7 +745,7 @@ static const struct video_device v4l_template = { /********************************************************************/ /* Must be called with both v4l_lock and vb_queue_lock hold */ -void stk1160_clear_queue(struct stk1160 *dev) +void stk1160_clear_queue(struct stk1160 *dev, enum vb2_buffer_state vb2_state) { struct stk1160_buffer *buf; unsigned long flags; @@ -756,7 +756,7 @@ void stk1160_clear_queue(struct stk1160 *dev) buf = list_first_entry(&dev->avail_bufs, struct stk1160_buffer, list); list_del(&buf->list); - vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); + vb2_buffer_done(&buf->vb.vb2_buf, vb2_state); stk1160_dbg("buffer [%p/%d] aborted\n", buf, buf->vb.vb2_buf.index); } @@ -766,7 +766,7 @@ void stk1160_clear_queue(struct stk1160 *dev) buf = dev->isoc_ctl.buf; dev->isoc_ctl.buf = NULL; - vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); + vb2_buffer_done(&buf->vb.vb2_buf, vb2_state); stk1160_dbg("buffer [%p/%d] aborted\n", buf, buf->vb.vb2_buf.index); } diff --git a/drivers/media/usb/stk1160/stk1160.h b/drivers/media/usb/stk1160/stk1160.h index a31ea1c80f255..a70963ce87533 100644 --- a/drivers/media/usb/stk1160/stk1160.h +++ b/drivers/media/usb/stk1160/stk1160.h @@ -166,7 +166,7 @@ struct regval { int stk1160_vb2_setup(struct stk1160 *dev); int stk1160_video_register(struct stk1160 *dev); void stk1160_video_unregister(struct stk1160 *dev); -void stk1160_clear_queue(struct stk1160 *dev); +void stk1160_clear_queue(struct stk1160 *dev, enum vb2_buffer_state vb2_state); /* Provided by stk1160-video.c */ int stk1160_alloc_isoc(struct stk1160 *dev); diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig index 6ee75c6c820e3..7f08e2b5a5041 100644 --- a/drivers/media/v4l2-core/Kconfig +++ b/drivers/media/v4l2-core/Kconfig @@ -48,6 +48,11 @@ config VIDEO_TUNER config V4L2_JPEG_HELPER tristate +config V4L2_LOOPBACK + tristate "V4L2 loopback device" + help + V4L2 loopback device + # Used by drivers that need v4l2-h264.ko config V4L2_H264 tristate diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile index 83fac5c746f54..9d7f1d0a331b0 100644 --- a/drivers/media/v4l2-core/Makefile +++ b/drivers/media/v4l2-core/Makefile @@ -30,6 +30,8 @@ obj-$(CONFIG_V4L2_FLASH_LED_CLASS) += v4l2-flash-led-class.o obj-$(CONFIG_V4L2_JPEG_HELPER) += v4l2-jpeg.o +obj-$(CONFIG_V4L2_LOOPBACK) += v4l2loopback.o + obj-$(CONFIG_VIDEOBUF_GEN) += videobuf-core.o obj-$(CONFIG_VIDEOBUF_DMA_SG) += videobuf-dma-sg.o obj-$(CONFIG_VIDEOBUF_DMA_CONTIG) += videobuf-dma-contig.o diff --git a/drivers/media/v4l2-core/v4l2-ctrls-core.c b/drivers/media/v4l2-core/v4l2-ctrls-core.c index 54abe5245dcc4..df8cff47a7fb5 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls-core.c +++ b/drivers/media/v4l2-core/v4l2-ctrls-core.c @@ -112,7 +112,9 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx, struct v4l2_ctrl_mpeg2_picture *p_mpeg2_picture; struct v4l2_ctrl_mpeg2_quantisation *p_mpeg2_quant; struct v4l2_ctrl_vp8_frame *p_vp8_frame; + struct v4l2_ctrl_vp9_frame *p_vp9_frame; struct v4l2_ctrl_fwht_params *p_fwht_params; + struct v4l2_ctrl_h264_scaling_matrix *p_h264_scaling_matrix; void *p = ptr.p + idx * ctrl->elem_size; if (ctrl->p_def.p_const) @@ -152,6 +154,13 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx, p_vp8_frame = p; p_vp8_frame->num_dct_parts = 1; break; + case V4L2_CTRL_TYPE_VP9_FRAME: + p_vp9_frame = p; + p_vp9_frame->profile = 0; + p_vp9_frame->bit_depth = 8; + p_vp9_frame->flags |= V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING | + V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING; + break; case V4L2_CTRL_TYPE_FWHT_PARAMS: p_fwht_params = p; p_fwht_params->version = V4L2_FWHT_VERSION; @@ -160,6 +169,15 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx, p_fwht_params->flags = V4L2_FWHT_FL_PIXENC_YUV | (2 << V4L2_FWHT_FL_COMPONENTS_NUM_OFFSET); break; + case V4L2_CTRL_TYPE_H264_SCALING_MATRIX: + p_h264_scaling_matrix = p; + /* + * The default (flat) H.264 scaling matrix when none are + * specified in the bitstream, this is according to formulas + * (7-8) and (7-9) of the specification. + */ + memset(p_h264_scaling_matrix, 16, sizeof(*p_h264_scaling_matrix)); + break; } } diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 9ac557b8e1467..642cb90f457c6 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -279,8 +279,8 @@ static void v4l_print_format(const void *arg, bool write_only) const struct v4l2_vbi_format *vbi; const struct v4l2_sliced_vbi_format *sliced; const struct v4l2_window *win; - const struct v4l2_sdr_format *sdr; const struct v4l2_meta_format *meta; + u32 pixelformat; u32 planes; unsigned i; @@ -299,8 +299,9 @@ static void v4l_print_format(const void *arg, bool write_only) case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: mp = &p->fmt.pix_mp; + pixelformat = mp->pixelformat; pr_cont(", width=%u, height=%u, format=%p4cc, field=%s, colorspace=%d, num_planes=%u, flags=0x%x, ycbcr_enc=%u, quantization=%u, xfer_func=%u\n", - mp->width, mp->height, &mp->pixelformat, + mp->width, mp->height, &pixelformat, prt_names(mp->field, v4l2_field_names), mp->colorspace, mp->num_planes, mp->flags, mp->ycbcr_enc, mp->quantization, mp->xfer_func); @@ -343,14 +344,15 @@ static void v4l_print_format(const void *arg, bool write_only) break; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: - sdr = &p->fmt.sdr; - pr_cont(", pixelformat=%p4cc\n", &sdr->pixelformat); + pixelformat = p->fmt.sdr.pixelformat; + pr_cont(", pixelformat=%p4cc\n", &pixelformat); break; case V4L2_BUF_TYPE_META_CAPTURE: case V4L2_BUF_TYPE_META_OUTPUT: meta = &p->fmt.meta; + pixelformat = meta->dataformat; pr_cont(", dataformat=%p4cc, buffersize=%u\n", - &meta->dataformat, meta->buffersize); + &pixelformat, meta->buffersize); break; } } diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c index e2654b422334c..675e22895ebe6 100644 --- a/drivers/media/v4l2-core/v4l2-mem2mem.c +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c @@ -585,19 +585,14 @@ int v4l2_m2m_reqbufs(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, } EXPORT_SYMBOL_GPL(v4l2_m2m_reqbufs); -int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, - struct v4l2_buffer *buf) +static void v4l2_m2m_adjust_mem_offset(struct vb2_queue *vq, + struct v4l2_buffer *buf) { - struct vb2_queue *vq; - int ret = 0; - unsigned int i; - - vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); - ret = vb2_querybuf(vq, buf); - /* Adjust MMAP memory offsets for the CAPTURE queue */ if (buf->memory == V4L2_MEMORY_MMAP && V4L2_TYPE_IS_CAPTURE(vq->type)) { if (V4L2_TYPE_IS_MULTIPLANAR(vq->type)) { + unsigned int i; + for (i = 0; i < buf->length; ++i) buf->m.planes[i].m.mem_offset += DST_QUEUE_OFF_BASE; @@ -605,8 +600,23 @@ int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, buf->m.offset += DST_QUEUE_OFF_BASE; } } +} - return ret; +int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, + struct v4l2_buffer *buf) +{ + struct vb2_queue *vq; + int ret; + + vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); + ret = vb2_querybuf(vq, buf); + if (ret) + return ret; + + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + + return 0; } EXPORT_SYMBOL_GPL(v4l2_m2m_querybuf); @@ -763,6 +773,9 @@ int v4l2_m2m_qbuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, if (ret) return ret; + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + /* * If the capture queue is streaming, but streaming hasn't started * on the device, but was asked to stop, mark the previously queued @@ -784,9 +797,17 @@ int v4l2_m2m_dqbuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct v4l2_buffer *buf) { struct vb2_queue *vq; + int ret; vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); - return vb2_dqbuf(vq, buf, file->f_flags & O_NONBLOCK); + ret = vb2_dqbuf(vq, buf, file->f_flags & O_NONBLOCK); + if (ret) + return ret; + + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + + return 0; } EXPORT_SYMBOL_GPL(v4l2_m2m_dqbuf); @@ -795,9 +816,17 @@ int v4l2_m2m_prepare_buf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, { struct video_device *vdev = video_devdata(file); struct vb2_queue *vq; + int ret; vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); - return vb2_prepare_buf(vq, vdev->v4l2_dev->mdev, buf); + ret = vb2_prepare_buf(vq, vdev->v4l2_dev->mdev, buf); + if (ret) + return ret; + + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + + return 0; } EXPORT_SYMBOL_GPL(v4l2_m2m_prepare_buf); diff --git a/drivers/media/v4l2-core/v4l2loopback.c b/drivers/media/v4l2-core/v4l2loopback.c new file mode 100644 index 0000000000000..b1c747f7d13c6 --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback.c @@ -0,0 +1,2914 @@ +/* -*- c-file-style: "linux" -*- */ +/* + * v4l2loopback.c -- video4linux2 loopback driver + * + * Copyright (C) 2005-2009 Vasily Levin (vasaka@gmail.com) + * Copyright (C) 2010-2019 IOhannes m zmoelnig (zmoelnig@iem.at) + * Copyright (C) 2011 Stefan Diewald (stefan.diewald@mytum.de) + * Copyright (C) 2012 Anton Novikov (random.plant@gmail.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) +#define HAVE__V4L2_DEVICE +#include +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36) +#define HAVE__V4L2_CTRLS +#include +#endif +#include + +#include +#include "v4l2loopback.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 6, 1) +#define kstrtoul strict_strtoul +#endif + +#if defined(timer_setup) && defined(from_timer) +#define HAVE_TIMER_SETUP +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0) +#define VFL_TYPE_VIDEO VFL_TYPE_GRABBER +#endif + +#define V4L2LOOPBACK_VERSION_CODE \ + KERNEL_VERSION(V4L2LOOPBACK_VERSION_MAJOR, V4L2LOOPBACK_VERSION_MINOR, \ + V4L2LOOPBACK_VERSION_BUGFIX) + +MODULE_DESCRIPTION("V4L2 loopback video device"); +MODULE_AUTHOR("Vasily Levin, " + "IOhannes m zmoelnig ," + "Stefan Diewald," + "Anton Novikov" + "et al."); +MODULE_VERSION("0.12.5"); +MODULE_LICENSE("GPL"); + +/* + * helpers + */ +#define STRINGIFY(s) #s +#define STRINGIFY2(s) STRINGIFY(s) + +#define dprintk(fmt, args...) \ + do { \ + if (debug > 0) { \ + printk(KERN_INFO "v4l2-loopback[" STRINGIFY2( \ + __LINE__) "]: " fmt, \ + ##args); \ + } \ + } while (0) + +#define MARK() \ + do { \ + if (debug > 1) { \ + printk(KERN_INFO "%s:%d[%s]\n", __FILE__, __LINE__, \ + __func__); \ + } \ + } while (0) + +#define dprintkrw(fmt, args...) \ + do { \ + if (debug > 2) { \ + printk(KERN_INFO "v4l2-loopback[" STRINGIFY2( \ + __LINE__) "]: " fmt, \ + ##args); \ + } \ + } while (0) + +/* + * compatibility hacks + */ + +#ifndef HAVE__V4L2_CTRLS +struct v4l2_ctrl_handler { + int error; +}; +struct v4l2_ctrl_config { + void *ops; + u32 id; + const char *name; + int type; + s32 min; + s32 max; + u32 step; + s32 def; +}; +int v4l2_ctrl_handler_init(struct v4l2_ctrl_handler *hdl, + unsigned nr_of_controls_hint) +{ + hdl->error = 0; + return 0; +} +void v4l2_ctrl_handler_free(struct v4l2_ctrl_handler *hdl) +{ +} +void *v4l2_ctrl_new_custom(struct v4l2_ctrl_handler *hdl, + const struct v4l2_ctrl_config *conf, void *priv) +{ + return NULL; +} +#endif /* HAVE__V4L2_CTRLS */ + +#ifndef HAVE__V4L2_DEVICE +/* dummy v4l2_device struct/functions */ +#define V4L2_DEVICE_NAME_SIZE (20 + 16) +struct v4l2_device { + char name[V4L2_DEVICE_NAME_SIZE]; + struct v4l2_ctrl_handler *ctrl_handler; +}; +static inline int v4l2_device_register(void *dev, void *v4l2_dev) +{ + return 0; +} +static inline void v4l2_device_unregister(struct v4l2_device *v4l2_dev) +{ + return; +} +#endif /* HAVE__V4L2_DEVICE */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) +#define v4l2_file_operations file_operations +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37) +void *v4l2l_vzalloc(unsigned long size) +{ + void *data = vmalloc(size); + + memset(data, 0, size); + return data; +} +#else +#define v4l2l_vzalloc vzalloc +#endif + +static inline void v4l2l_get_timestamp(struct v4l2_buffer *b) +{ + /* ktime_get_ts is considered deprecated, so use ktime_get_ts64 if possible */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + struct timespec ts; + ktime_get_ts(&ts); +#else + struct timespec64 ts; + ktime_get_ts64(&ts); +#endif + + b->timestamp.tv_sec = ts.tv_sec; + b->timestamp.tv_usec = (ts.tv_nsec / NSEC_PER_USEC); +} + +#if !defined(__poll_t) +typedef unsigned __poll_t; +#endif + +/* module constants + * can be overridden during he build process using something like + * make KCPPFLAGS="-DMAX_DEVICES=100" + */ + +/* maximum number of v4l2loopback devices that can be created */ +#ifndef MAX_DEVICES +#define MAX_DEVICES 8 +#endif + +/* whether the default is to announce capabilities exclusively or not */ +#ifndef V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS +#define V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS 0 +#endif + +/* when a producer is considered to have gone stale */ +#ifndef MAX_TIMEOUT +#define MAX_TIMEOUT (100 * 1000) /* in msecs */ +#endif + +/* max buffers that can be mapped, actually they + * are all mapped to max_buffers buffers */ +#ifndef MAX_BUFFERS +#define MAX_BUFFERS 32 +#endif + +/* module parameters */ +static int debug = 0; +module_param(debug, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(debug, "debugging level (higher values == more verbose)"); + +#define V4L2LOOPBACK_DEFAULT_MAX_BUFFERS 2 +static int max_buffers = V4L2LOOPBACK_DEFAULT_MAX_BUFFERS; +module_param(max_buffers, int, S_IRUGO); +MODULE_PARM_DESC(max_buffers, + "how many buffers should be allocated [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_DEFAULT_MAX_BUFFERS) "]"); + +/* how many times a device can be opened + * the per-module default value can be overridden on a per-device basis using + * the /sys/devices interface + * + * note that max_openers should be at least 2 in order to get a working system: + * one opener for the producer and one opener for the consumer + * however, we leave that to the user + */ +#define V4L2LOOPBACK_DEFAULT_MAX_OPENERS 10 +static int max_openers = V4L2LOOPBACK_DEFAULT_MAX_OPENERS; +module_param(max_openers, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC( + max_openers, + "how many users can open the loopback device [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_DEFAULT_MAX_OPENERS) "]"); + +static int devices = -1; +module_param(devices, int, 0); +MODULE_PARM_DESC(devices, "how many devices should be created"); + +static int video_nr[MAX_DEVICES] = { [0 ...(MAX_DEVICES - 1)] = -1 }; +module_param_array(video_nr, int, NULL, 0444); +MODULE_PARM_DESC(video_nr, + "video device numbers (-1=auto, 0=/dev/video0, etc.)"); + +static char *card_label[MAX_DEVICES]; +module_param_array(card_label, charp, NULL, 0000); +MODULE_PARM_DESC(card_label, "card labels for each device"); + +static bool exclusive_caps[MAX_DEVICES] = { + [0 ...(MAX_DEVICES - 1)] = V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS +}; +module_param_array(exclusive_caps, bool, NULL, 0444); +/* FIXXME: wording */ +MODULE_PARM_DESC( + exclusive_caps, + "whether to announce OUTPUT/CAPTURE capabilities exclusively or not [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS) "]"); + +/* format specifications */ +#define V4L2LOOPBACK_SIZE_MIN_WIDTH 48 +#define V4L2LOOPBACK_SIZE_MIN_HEIGHT 32 +#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH 8192 +#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT 8192 + +#define V4L2LOOPBACK_SIZE_DEFAULT_WIDTH 640 +#define V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT 480 + +static int max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; +module_param(max_width, int, S_IRUGO); +MODULE_PARM_DESC(max_width, "maximum allowed frame width [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH) "]"); +static int max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; +module_param(max_height, int, S_IRUGO); +MODULE_PARM_DESC(max_height, + "maximum allowed frame height [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT) "]"); + +static DEFINE_IDR(v4l2loopback_index_idr); +static DEFINE_MUTEX(v4l2loopback_ctl_mutex); + +/* control IDs */ +#ifndef HAVE__V4L2_CTRLS +#define V4L2LOOPBACK_CID_BASE (V4L2_CID_PRIVATE_BASE) +#else +#define V4L2LOOPBACK_CID_BASE (V4L2_CID_USER_BASE | 0xf000) +#endif +#define CID_KEEP_FORMAT (V4L2LOOPBACK_CID_BASE + 0) +#define CID_SUSTAIN_FRAMERATE (V4L2LOOPBACK_CID_BASE + 1) +#define CID_TIMEOUT (V4L2LOOPBACK_CID_BASE + 2) +#define CID_TIMEOUT_IMAGE_IO (V4L2LOOPBACK_CID_BASE + 3) + +static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl); +static const struct v4l2_ctrl_ops v4l2loopback_ctrl_ops = { + .s_ctrl = v4l2loopback_s_ctrl, +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_keepformat = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_KEEP_FORMAT, + .name = "keep_format", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_sustainframerate = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_SUSTAIN_FRAMERATE, + .name = "sustain_framerate", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeout = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_TIMEOUT, + .name = "timeout", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = MAX_TIMEOUT, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeoutimageio = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_TIMEOUT_IMAGE_IO, + .name = "timeout_image_io", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; + +/* module structures */ +struct v4l2loopback_private { + int device_nr; +}; + +/* TODO(vasaka) use typenames which are common to kernel, but first find out if + * it is needed */ +/* struct keeping state and settings of loopback device */ + +struct v4l2l_buffer { + struct v4l2_buffer buffer; + struct list_head list_head; + int use_count; +}; + +struct v4l2_loopback_device { + struct v4l2_device v4l2_dev; + struct v4l2_ctrl_handler ctrl_handler; + struct video_device *vdev; + /* pixel and stream format */ + struct v4l2_pix_format pix_format; + struct v4l2_captureparm capture_param; + unsigned long frame_jiffies; + + /* ctrls */ + int keep_format; /* CID_KEEP_FORMAT; stay ready_for_capture even when all + openers close() the device */ + int sustain_framerate; /* CID_SUSTAIN_FRAMERATE; duplicate frames to maintain + (close to) nominal framerate */ + + /* buffers stuff */ + u8 *image; /* pointer to actual buffers data */ + unsigned long int imagesize; /* size of buffers data */ + int buffers_number; /* should not be big, 4 is a good choice */ + struct v4l2l_buffer buffers[MAX_BUFFERS]; /* inner driver buffers */ + int used_buffers; /* number of the actually used buffers */ + int max_openers; /* how many times can this device be opened */ + + int write_position; /* number of last written frame + 1 */ + struct list_head outbufs_list; /* buffers in output DQBUF order */ + int bufpos2index + [MAX_BUFFERS]; /* mapping of (read/write_position % used_buffers) + * to inner buffer index */ + long buffer_size; + + /* sustain_framerate stuff */ + struct timer_list sustain_timer; + unsigned int reread_count; + + /* timeout stuff */ + unsigned long timeout_jiffies; /* CID_TIMEOUT; 0 means disabled */ + int timeout_image_io; /* CID_TIMEOUT_IMAGE_IO; next opener will + * read/write to timeout_image */ + u8 *timeout_image; /* copy of it will be captured when timeout passes */ + struct v4l2l_buffer timeout_image_buffer; + struct timer_list timeout_timer; + int timeout_happened; + + /* sync stuff */ + atomic_t open_count; + + int ready_for_capture; /* set to the number of writers that opened the + * device and negotiated format. */ + int ready_for_output; /* set to true when no writer is currently attached + * this differs slightly from !ready_for_capture, + * e.g. when using fallback images */ + int announce_all_caps; /* set to false, if device caps (OUTPUT/CAPTURE) + * should only be announced if the resp. "ready" + * flag is set; default=TRUE */ + + int max_width; + int max_height; + + char card_label[32]; + + wait_queue_head_t read_event; + spinlock_t lock; +}; + +/* types of opener shows what opener wants to do with loopback */ +enum opener_type { + // clang-format off + UNNEGOTIATED = 0, + READER = 1, + WRITER = 2, + // clang-format on +}; + +/* struct keeping state and type of opener */ +struct v4l2_loopback_opener { + enum opener_type type; + int vidioc_enum_frameintervals_calls; + int read_position; /* number of last processed frame + 1 or + * write_position - 1 if reader went out of sync */ + unsigned int reread_count; + struct v4l2_buffer *buffers; + int buffers_number; /* should not be big, 4 is a good choice */ + int timeout_image_io; + + struct v4l2_fh fh; +}; + +#define fh_to_opener(ptr) container_of((ptr), struct v4l2_loopback_opener, fh) + +/* this is heavily inspired by the bttv driver found in the linux kernel */ +struct v4l2l_format { + char *name; + int fourcc; /* video4linux 2 */ + int depth; /* bit/pixel */ + int flags; +}; +/* set the v4l2l_format.flags to PLANAR for non-packed formats */ +#define FORMAT_FLAGS_PLANAR 0x01 +#define FORMAT_FLAGS_COMPRESSED 0x02 + +#include "v4l2loopback_formats.h" + +static const unsigned int FORMATS = ARRAY_SIZE(formats); + +static char *fourcc2str(unsigned int fourcc, char buf[4]) +{ + buf[0] = (fourcc >> 0) & 0xFF; + buf[1] = (fourcc >> 8) & 0xFF; + buf[2] = (fourcc >> 16) & 0xFF; + buf[3] = (fourcc >> 24) & 0xFF; + + return buf; +} + +static const struct v4l2l_format *format_by_fourcc(int fourcc) +{ + unsigned int i; + + for (i = 0; i < FORMATS; i++) { + if (formats[i].fourcc == fourcc) + return formats + i; + } + + dprintk("unsupported format '%c%c%c%c'\n", (fourcc >> 0) & 0xFF, + (fourcc >> 8) & 0xFF, (fourcc >> 16) & 0xFF, + (fourcc >> 24) & 0xFF); + return NULL; +} + +static void pix_format_set_size(struct v4l2_pix_format *f, + const struct v4l2l_format *fmt, + unsigned int width, unsigned int height) +{ + f->width = width; + f->height = height; + + if (fmt->flags & FORMAT_FLAGS_PLANAR) { + f->bytesperline = width; /* Y plane */ + f->sizeimage = (width * height * fmt->depth) >> 3; + } else if (fmt->flags & FORMAT_FLAGS_COMPRESSED) { + /* doesn't make sense for compressed formats */ + f->bytesperline = 0; + f->sizeimage = (width * height * fmt->depth) >> 3; + } else { + f->bytesperline = (width * fmt->depth) >> 3; + f->sizeimage = height * f->bytesperline; + } +} + +static int set_timeperframe(struct v4l2_loopback_device *dev, + struct v4l2_fract *tpf) +{ + if ((tpf->denominator < 1) || (tpf->numerator < 1)) { + return -EINVAL; + } + dev->capture_param.timeperframe = *tpf; + dev->frame_jiffies = max(1UL, msecs_to_jiffies(1000) * tpf->numerator / + tpf->denominator); + return 0; +} + +static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd); + +/* device attributes */ +/* available via sysfs: /sys/devices/virtual/video4linux/video* */ + +static ssize_t attr_show_format(struct device *cd, + struct device_attribute *attr, char *buf) +{ + /* gets the current format as "FOURCC:WxH@f/s", e.g. "YUYV:320x240@1000/30" */ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + const struct v4l2_fract *tpf; + char buf4cc[5], buf_fps[32]; + + if (!dev || !dev->ready_for_capture) + return 0; + tpf = &dev->capture_param.timeperframe; + + fourcc2str(dev->pix_format.pixelformat, buf4cc); + buf4cc[4] = 0; + if (tpf->numerator == 1) + snprintf(buf_fps, sizeof(buf_fps), "%d", tpf->denominator); + else + snprintf(buf_fps, sizeof(buf_fps), "%d/%d", tpf->denominator, + tpf->numerator); + return sprintf(buf, "%4s:%dx%d@%s\n", buf4cc, dev->pix_format.width, + dev->pix_format.height, buf_fps); +} + +static ssize_t attr_store_format(struct device *cd, + struct device_attribute *attr, const char *buf, + size_t len) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + int fps_num = 0, fps_den = 1; + + if (!dev) + return -ENODEV; + + /* only fps changing is supported */ + if (sscanf(buf, "@%d/%d", &fps_num, &fps_den) > 0) { + struct v4l2_fract f = { .numerator = fps_den, + .denominator = fps_num }; + int err = 0; + if ((err = set_timeperframe(dev, &f)) < 0) + return err; + return len; + } + return -EINVAL; +} + +static DEVICE_ATTR(format, S_IRUGO | S_IWUSR, attr_show_format, + attr_store_format); + +static ssize_t attr_show_buffers(struct device *cd, + struct device_attribute *attr, char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + if (!dev) + return -ENODEV; + + return sprintf(buf, "%d\n", dev->used_buffers); +} + +static DEVICE_ATTR(buffers, S_IRUGO, attr_show_buffers, NULL); + +static ssize_t attr_show_maxopeners(struct device *cd, + struct device_attribute *attr, char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + return sprintf(buf, "%d\n", dev->max_openers); +} + +static ssize_t attr_store_maxopeners(struct device *cd, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct v4l2_loopback_device *dev = NULL; + unsigned long curr = 0; + + if (kstrtoul(buf, 0, &curr)) + return -EINVAL; + + dev = v4l2loopback_cd2dev(cd); + + if (dev->max_openers == curr) + return len; + + if (dev->open_count.counter > curr) { + /* request to limit to less openers as are currently attached to us */ + return -EINVAL; + } + + dev->max_openers = (int)curr; + + return len; +} + +static DEVICE_ATTR(max_openers, S_IRUGO | S_IWUSR, attr_show_maxopeners, + attr_store_maxopeners); + +static void v4l2loopback_remove_sysfs(struct video_device *vdev) +{ +#define V4L2_SYSFS_DESTROY(x) device_remove_file(&vdev->dev, &dev_attr_##x) + + if (vdev) { + V4L2_SYSFS_DESTROY(format); + V4L2_SYSFS_DESTROY(buffers); + V4L2_SYSFS_DESTROY(max_openers); + /* ... */ + } +} + +static void v4l2loopback_create_sysfs(struct video_device *vdev) +{ + int res = 0; + +#define V4L2_SYSFS_CREATE(x) \ + res = device_create_file(&vdev->dev, &dev_attr_##x); \ + if (res < 0) \ + break + if (!vdev) + return; + do { + V4L2_SYSFS_CREATE(format); + V4L2_SYSFS_CREATE(buffers); + V4L2_SYSFS_CREATE(max_openers); + /* ... */ + } while (0); + + if (res >= 0) + return; + dev_err(&vdev->dev, "%s error: %d\n", __func__, res); +} + +/* global module data */ +/* find a device based on it's device-number (e.g. '3' for /dev/video3) */ +struct v4l2loopback_lookup_cb_data { + int device_nr; + struct v4l2_loopback_device *device; +}; +static int v4l2loopback_lookup_cb(int id, void *ptr, void *data) +{ + struct v4l2_loopback_device *device = ptr; + struct v4l2loopback_lookup_cb_data *cbdata = data; + if (cbdata && device && device->vdev) { + if (device->vdev->num == cbdata->device_nr) { + cbdata->device = device; + cbdata->device_nr = id; + return 1; + } + } + return 0; +} +static int v4l2loopback_lookup(int device_nr, + struct v4l2_loopback_device **device) +{ + struct v4l2loopback_lookup_cb_data data = { + .device_nr = device_nr, + .device = NULL, + }; + int err = idr_for_each(&v4l2loopback_index_idr, &v4l2loopback_lookup_cb, + &data); + if (1 == err) { + if (device) + *device = data.device; + return data.device_nr; + } + return -ENODEV; +} +static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd) +{ + struct video_device *loopdev = to_video_device(cd); + struct v4l2loopback_private *ptr = + (struct v4l2loopback_private *)video_get_drvdata(loopdev); + int nr = ptr->device_nr; + + return idr_find(&v4l2loopback_index_idr, nr); +} + +static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f) +{ + struct video_device *loopdev = video_devdata(f); + struct v4l2loopback_private *ptr = + (struct v4l2loopback_private *)video_get_drvdata(loopdev); + int nr = ptr->device_nr; + + return idr_find(&v4l2loopback_index_idr, nr); +} + +/* forward declarations */ +static void init_buffers(struct v4l2_loopback_device *dev); +static int allocate_buffers(struct v4l2_loopback_device *dev); +static int free_buffers(struct v4l2_loopback_device *dev); +static void try_free_buffers(struct v4l2_loopback_device *dev); +static int allocate_timeout_image(struct v4l2_loopback_device *dev); +static void check_timers(struct v4l2_loopback_device *dev); +static const struct v4l2_file_operations v4l2_loopback_fops; +static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops; + +/* Queue helpers */ +/* next functions sets buffer flags and adjusts counters accordingly */ +static inline void set_done(struct v4l2l_buffer *buffer) +{ + buffer->buffer.flags &= ~V4L2_BUF_FLAG_QUEUED; + buffer->buffer.flags |= V4L2_BUF_FLAG_DONE; +} + +static inline void set_queued(struct v4l2l_buffer *buffer) +{ + buffer->buffer.flags &= ~V4L2_BUF_FLAG_DONE; + buffer->buffer.flags |= V4L2_BUF_FLAG_QUEUED; +} + +static inline void unset_flags(struct v4l2l_buffer *buffer) +{ + buffer->buffer.flags &= ~V4L2_BUF_FLAG_QUEUED; + buffer->buffer.flags &= ~V4L2_BUF_FLAG_DONE; +} + +/* V4L2 ioctl caps and params calls */ +/* returns device capabilities + * called on VIDIOC_QUERYCAP + */ +static int vidioc_querycap(struct file *file, void *priv, + struct v4l2_capability *cap) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + int labellen = (sizeof(cap->card) < sizeof(dev->card_label)) ? + sizeof(cap->card) : + sizeof(dev->card_label); + int device_nr = + ((struct v4l2loopback_private *)video_get_drvdata(dev->vdev)) + ->device_nr; + __u32 capabilities = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE; + + strlcpy(cap->driver, "v4l2 loopback", sizeof(cap->driver)); + snprintf(cap->card, labellen, dev->card_label); + snprintf(cap->bus_info, sizeof(cap->bus_info), + "platform:v4l2loopback-%03d", device_nr); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) + /* since 3.1.0, the v4l2-core system is supposed to set the version */ + cap->version = V4L2LOOPBACK_VERSION_CODE; +#endif + +#ifdef V4L2_CAP_VIDEO_M2M + capabilities |= V4L2_CAP_VIDEO_M2M; +#endif /* V4L2_CAP_VIDEO_M2M */ + + if (dev->announce_all_caps) { + capabilities |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT; + } else { + if (dev->ready_for_capture) { + capabilities |= V4L2_CAP_VIDEO_CAPTURE; + } + if (dev->ready_for_output) { + capabilities |= V4L2_CAP_VIDEO_OUTPUT; + } + } + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + dev->vdev->device_caps = +#endif /* >=linux-4.7.0 */ + cap->device_caps = cap->capabilities = capabilities; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0) + cap->capabilities |= V4L2_CAP_DEVICE_CAPS; +#endif + + memset(cap->reserved, 0, sizeof(cap->reserved)); + return 0; +} + +static int vidioc_enum_framesizes(struct file *file, void *fh, + struct v4l2_frmsizeenum *argp) +{ + struct v4l2_loopback_device *dev; + + /* LATER: what does the index really mean? + * if it's about enumerating formats, we can safely ignore it + * (CHECK) + */ + + /* there can be only one... */ + if (argp->index) + return -EINVAL; + + dev = v4l2loopback_getdevice(file); + if (dev->ready_for_capture) { + /* format has already been negotiated + * cannot change during runtime + */ + argp->type = V4L2_FRMSIZE_TYPE_DISCRETE; + + argp->discrete.width = dev->pix_format.width; + argp->discrete.height = dev->pix_format.height; + } else { + /* if the format has not been negotiated yet, we accept anything + */ + argp->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; + + argp->stepwise.min_width = V4L2LOOPBACK_SIZE_MIN_WIDTH; + argp->stepwise.min_height = V4L2LOOPBACK_SIZE_MIN_HEIGHT; + + argp->stepwise.max_width = dev->max_width; + argp->stepwise.max_height = dev->max_height; + + argp->stepwise.step_width = 1; + argp->stepwise.step_height = 1; + } + return 0; +} + +/* returns frameinterval (fps) for the set resolution + * called on VIDIOC_ENUM_FRAMEINTERVALS + */ +static int vidioc_enum_frameintervals(struct file *file, void *fh, + struct v4l2_frmivalenum *argp) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + + if (dev->ready_for_capture) { + if (opener->vidioc_enum_frameintervals_calls > 0) + return -EINVAL; + if (argp->width == dev->pix_format.width && + argp->height == dev->pix_format.height) { + argp->type = V4L2_FRMIVAL_TYPE_DISCRETE; + argp->discrete = dev->capture_param.timeperframe; + opener->vidioc_enum_frameintervals_calls++; + return 0; + } + return -EINVAL; + } + return 0; +} + +/* ------------------ CAPTURE ----------------------- */ + +/* returns device formats + * called on VIDIOC_ENUM_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_enum_fmt_cap(struct file *file, void *fh, + struct v4l2_fmtdesc *f) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + + if (f->index) + return -EINVAL; + if (dev->ready_for_capture) { + const __u32 format = dev->pix_format.pixelformat; + + snprintf(f->description, sizeof(f->description), "[%c%c%c%c]", + (format >> 0) & 0xFF, (format >> 8) & 0xFF, + (format >> 16) & 0xFF, (format >> 24) & 0xFF); + + f->pixelformat = dev->pix_format.pixelformat; + } else { + return -EINVAL; + } + f->flags = 0; + MARK(); + return 0; +} + +/* returns current video format + * called on VIDIOC_G_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_g_fmt_cap(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + + if (!dev->ready_for_capture) + return -EINVAL; + + fmt->fmt.pix = dev->pix_format; + MARK(); + return 0; +} + +/* checks if it is OK to change to format fmt; + * actual check is done by inner_try_fmt_cap + * just checking that pixelformat is OK and set other parameters, app should + * obey this decision + * called on VIDIOC_TRY_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_try_fmt_cap(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + char buf[5]; + + dev = v4l2loopback_getdevice(file); + + if (0 == dev->ready_for_capture) { + dprintk("setting fmt_cap not possible yet\n"); + return -EBUSY; + } + + if (fmt->fmt.pix.pixelformat != dev->pix_format.pixelformat) + return -EINVAL; + + fmt->fmt.pix = dev->pix_format; + + buf[4] = 0; + dprintk("capFOURCC=%s\n", fourcc2str(dev->pix_format.pixelformat, buf)); + return 0; +} + +/* sets new output format, if possible + * actually format is set by input and we even do not check it, just return + * current one, but it is possible to set subregions of input TODO(vasaka) + * called on VIDIOC_S_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_s_fmt_cap(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return vidioc_try_fmt_cap(file, priv, fmt); +} + +/* ------------------ OUTPUT ----------------------- */ + +/* returns device formats; + * LATER: allow all formats + * called on VIDIOC_ENUM_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_enum_fmt_out(struct file *file, void *fh, + struct v4l2_fmtdesc *f) +{ + struct v4l2_loopback_device *dev; + const struct v4l2l_format *fmt; + + dev = v4l2loopback_getdevice(file); + + if (dev->ready_for_capture) { + const __u32 format = dev->pix_format.pixelformat; + + /* format has been fixed by the writer, so only one single format is supported */ + if (f->index) + return -EINVAL; + + fmt = format_by_fourcc(format); + if (NULL == fmt) + return -EINVAL; + + f->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + /* f->flags = ??; */ + snprintf(f->description, sizeof(f->description), "%s", + fmt->name); + + f->pixelformat = dev->pix_format.pixelformat; + } else { + /* fill in a dummy format */ + /* coverity[unsigned_compare] */ + if (f->index < 0 || f->index >= FORMATS) + return -EINVAL; + + fmt = &formats[f->index]; + + f->pixelformat = fmt->fourcc; + snprintf(f->description, sizeof(f->description), "%s", + fmt->name); + } + f->flags = 0; + + return 0; +} + +/* returns current video format format fmt */ +/* NOTE: this is called from the producer + * so if format has not been negotiated yet, + * it should return ALL of available formats, + * called on VIDIOC_G_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_g_fmt_out(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + + /* + * LATER: this should return the currently valid format + * gstreamer doesn't like it, if this returns -EINVAL, as it + * then concludes that there is _no_ valid format + * CHECK whether this assumption is wrong, + * or whether we have to always provide a valid format + */ + + fmt->fmt.pix = dev->pix_format; + return 0; +} + +/* checks if it is OK to change to format fmt; + * if format is negotiated do not change it + * called on VIDIOC_TRY_FMT with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_try_fmt_out(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + + /* TODO(vasaka) loopback does not care about formats writer want to set, + * maybe it is a good idea to restrict format somehow */ + if (dev->ready_for_capture) { + fmt->fmt.pix = dev->pix_format; + } else { + __u32 w = fmt->fmt.pix.width; + __u32 h = fmt->fmt.pix.height; + __u32 pixfmt = fmt->fmt.pix.pixelformat; + const struct v4l2l_format *format = format_by_fourcc(pixfmt); + + if (w > dev->max_width) + w = dev->max_width; + if (h > dev->max_height) + h = dev->max_height; + + dprintk("trying image %dx%d\n", w, h); + + if (w < 1) + w = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; + + if (h < 1) + h = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; + + if (NULL == format) + format = &formats[0]; + + pix_format_set_size(&fmt->fmt.pix, format, w, h); + + fmt->fmt.pix.pixelformat = format->fourcc; + + if ((fmt->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT) || + (fmt->fmt.pix.colorspace > V4L2_COLORSPACE_DCI_P3)) + fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; + + if (V4L2_FIELD_ANY == fmt->fmt.pix.field) + fmt->fmt.pix.field = V4L2_FIELD_NONE; + + /* FIXXME: try_fmt should never modify the device-state */ + dev->pix_format = fmt->fmt.pix; + } + return 0; +} + +/* sets new output format, if possible; + * allocate data here because we do not know if it will be streaming or + * read/write IO + * called on VIDIOC_S_FMT with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_s_fmt_out(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + char buf[5]; + int ret; + MARK(); + + dev = v4l2loopback_getdevice(file); + ret = vidioc_try_fmt_out(file, priv, fmt); + + dprintk("s_fmt_out(%d) %d...%d\n", ret, dev->ready_for_capture, + dev->pix_format.sizeimage); + + buf[4] = 0; + dprintk("outFOURCC=%s\n", fourcc2str(dev->pix_format.pixelformat, buf)); + + if (ret < 0) + return ret; + + if (!dev->ready_for_capture) { + dev->buffer_size = PAGE_ALIGN(dev->pix_format.sizeimage); + fmt->fmt.pix.sizeimage = dev->buffer_size; + allocate_buffers(dev); + } + return ret; +} + +// #define V4L2L_OVERLAY +#ifdef V4L2L_OVERLAY +/* ------------------ OVERLAY ----------------------- */ +/* currently unsupported */ +/* GSTreamer's v4l2sink is buggy, as it requires the overlay to work + * while it should only require it, if overlay is requested + * once the gstreamer element is fixed, remove the overlay dummies + */ +#warning OVERLAY dummies +static int vidioc_g_fmt_overlay(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return 0; +} + +static int vidioc_s_fmt_overlay(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return 0; +} +#endif /* V4L2L_OVERLAY */ + +/* ------------------ PARAMs ----------------------- */ + +/* get some data flow parameters, only capability, fps and readbuffers has + * effect on this driver + * called on VIDIOC_G_PARM + */ +static int vidioc_g_parm(struct file *file, void *priv, + struct v4l2_streamparm *parm) +{ + /* do not care about type of opener, hope these enums would always be + * compatible */ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + parm->parm.capture = dev->capture_param; + return 0; +} + +/* get some data flow parameters, only capability, fps and readbuffers has + * effect on this driver + * called on VIDIOC_S_PARM + */ +static int vidioc_s_parm(struct file *file, void *priv, + struct v4l2_streamparm *parm) +{ + struct v4l2_loopback_device *dev; + int err = 0; + MARK(); + + dev = v4l2loopback_getdevice(file); + dprintk("vidioc_s_parm called frate=%d/%d\n", + parm->parm.capture.timeperframe.numerator, + parm->parm.capture.timeperframe.denominator); + + switch (parm->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + if ((err = set_timeperframe( + dev, &parm->parm.capture.timeperframe)) < 0) + return err; + break; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if ((err = set_timeperframe( + dev, &parm->parm.capture.timeperframe)) < 0) + return err; + break; + default: + return -1; + } + + parm->parm.capture = dev->capture_param; + return 0; +} + +#ifdef V4L2LOOPBACK_WITH_STD +/* sets a tv standard, actually we do not need to handle this any special way + * added to support effecttv + * called on VIDIOC_S_STD + */ +static int vidioc_s_std(struct file *file, void *fh, v4l2_std_id *_std) +{ + v4l2_std_id req_std = 0, supported_std = 0; + const v4l2_std_id all_std = V4L2_STD_ALL, no_std = 0; + + if (_std) { + req_std = *_std; + *_std = all_std; + } + + /* we support everything in V4L2_STD_ALL, but not more... */ + supported_std = (all_std & req_std); + if (no_std == supported_std) + return -EINVAL; + + return 0; +} + +/* gets a fake video standard + * called on VIDIOC_G_STD + */ +static int vidioc_g_std(struct file *file, void *fh, v4l2_std_id *norm) +{ + if (norm) + *norm = V4L2_STD_ALL; + return 0; +} +/* gets a fake video standard + * called on VIDIOC_QUERYSTD + */ +static int vidioc_querystd(struct file *file, void *fh, v4l2_std_id *norm) +{ + if (norm) + *norm = V4L2_STD_ALL; + return 0; +} +#endif /* V4L2LOOPBACK_WITH_STD */ + +/* get ctrls info + * called on VIDIOC_QUERYCTRL + */ +static int vidioc_queryctrl(struct file *file, void *fh, + struct v4l2_queryctrl *q) +{ + const struct v4l2_ctrl_config *cnf = 0; + switch (q->id) { + case CID_KEEP_FORMAT: + cnf = &v4l2loopback_ctrl_keepformat; + break; + case CID_SUSTAIN_FRAMERATE: + cnf = &v4l2loopback_ctrl_sustainframerate; + break; + case CID_TIMEOUT: + cnf = &v4l2loopback_ctrl_timeout; + break; + case CID_TIMEOUT_IMAGE_IO: + cnf = &v4l2loopback_ctrl_timeoutimageio; + break; + default: + return -EINVAL; + } + if (!cnf) + BUG(); + + strcpy(q->name, cnf->name); + q->default_value = cnf->def; + q->type = cnf->type; + q->minimum = cnf->min; + q->maximum = cnf->max; + q->step = cnf->step; + + memset(q->reserved, 0, sizeof(q->reserved)); + return 0; +} + +static int vidioc_g_ctrl(struct file *file, void *fh, struct v4l2_control *c) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + + switch (c->id) { + case CID_KEEP_FORMAT: + c->value = dev->keep_format; + break; + case CID_SUSTAIN_FRAMERATE: + c->value = dev->sustain_framerate; + break; + case CID_TIMEOUT: + c->value = jiffies_to_msecs(dev->timeout_jiffies); + break; + case CID_TIMEOUT_IMAGE_IO: + c->value = dev->timeout_image_io; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int v4l2loopback_set_ctrl(struct v4l2_loopback_device *dev, u32 id, + s64 val) +{ + switch (id) { + case CID_KEEP_FORMAT: + if (val < 0 || val > 1) + return -EINVAL; + dev->keep_format = val; + try_free_buffers( + dev); /* will only free buffers if !keep_format */ + break; + case CID_SUSTAIN_FRAMERATE: + if (val < 0 || val > 1) + return -EINVAL; + spin_lock_bh(&dev->lock); + dev->sustain_framerate = val; + check_timers(dev); + spin_unlock_bh(&dev->lock); + break; + case CID_TIMEOUT: + if (val < 0 || val > MAX_TIMEOUT) + return -EINVAL; + spin_lock_bh(&dev->lock); + dev->timeout_jiffies = msecs_to_jiffies(val); + check_timers(dev); + spin_unlock_bh(&dev->lock); + allocate_timeout_image(dev); + break; + case CID_TIMEOUT_IMAGE_IO: + if (val < 0 || val > 1) + return -EINVAL; + dev->timeout_image_io = val; + break; + default: + return -EINVAL; + } + return 0; +} + +static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl) +{ + struct v4l2_loopback_device *dev = container_of( + ctrl->handler, struct v4l2_loopback_device, ctrl_handler); + return v4l2loopback_set_ctrl(dev, ctrl->id, ctrl->val); +} +static int vidioc_s_ctrl(struct file *file, void *fh, struct v4l2_control *c) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + return v4l2loopback_set_ctrl(dev, c->id, c->value); +} + +/* returns set of device outputs, in our case there is only one + * called on VIDIOC_ENUMOUTPUT + */ +static int vidioc_enum_output(struct file *file, void *fh, + struct v4l2_output *outp) +{ + __u32 index = outp->index; + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + MARK(); + + if (!dev->announce_all_caps && !dev->ready_for_output) + return -ENOTTY; + + if (0 != index) + return -EINVAL; + + /* clear all data (including the reserved fields) */ + memset(outp, 0, sizeof(*outp)); + + outp->index = index; + strlcpy(outp->name, "loopback in", sizeof(outp->name)); + outp->type = V4L2_OUTPUT_TYPE_ANALOG; + outp->audioset = 0; + outp->modulator = 0; +#ifdef V4L2LOOPBACK_WITH_STD + outp->std = V4L2_STD_ALL; +#ifdef V4L2_OUT_CAP_STD + outp->capabilities |= V4L2_OUT_CAP_STD; +#endif /* V4L2_OUT_CAP_STD */ +#endif /* V4L2LOOPBACK_WITH_STD */ + + return 0; +} + +/* which output is currently active, + * called on VIDIOC_G_OUTPUT + */ +static int vidioc_g_output(struct file *file, void *fh, unsigned int *i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_output) + return -ENOTTY; + if (i) + *i = 0; + return 0; +} + +/* set output, can make sense if we have more than one video src, + * called on VIDIOC_S_OUTPUT + */ +static int vidioc_s_output(struct file *file, void *fh, unsigned int i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_output) + return -ENOTTY; + + if (i) + return -EINVAL; + + return 0; +} + +/* returns set of device inputs, in our case there is only one, + * but later I may add more + * called on VIDIOC_ENUMINPUT + */ +static int vidioc_enum_input(struct file *file, void *fh, + struct v4l2_input *inp) +{ + __u32 index = inp->index; + MARK(); + + if (0 != index) + return -EINVAL; + + /* clear all data (including the reserved fields) */ + memset(inp, 0, sizeof(*inp)); + + inp->index = index; + strlcpy(inp->name, "loopback", sizeof(inp->name)); + inp->type = V4L2_INPUT_TYPE_CAMERA; + inp->audioset = 0; + inp->tuner = 0; + inp->status = 0; + +#ifdef V4L2LOOPBACK_WITH_STD + inp->std = V4L2_STD_ALL; +#ifdef V4L2_IN_CAP_STD + inp->capabilities |= V4L2_IN_CAP_STD; +#endif +#endif /* V4L2LOOPBACK_WITH_STD */ + + return 0; +} + +/* which input is currently active, + * called on VIDIOC_G_INPUT + */ +static int vidioc_g_input(struct file *file, void *fh, unsigned int *i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_capture) + return -ENOTTY; + if (i) + *i = 0; + return 0; +} + +/* set input, can make sense if we have more than one video src, + * called on VIDIOC_S_INPUT + */ +static int vidioc_s_input(struct file *file, void *fh, unsigned int i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_capture) + return -ENOTTY; + if (i == 0) + return 0; + return -EINVAL; +} + +/* --------------- V4L2 ioctl buffer related calls ----------------- */ + +/* negotiate buffer type + * only mmap streaming supported + * called on VIDIOC_REQBUFS + */ +static int vidioc_reqbufs(struct file *file, void *fh, + struct v4l2_requestbuffers *b) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + int i; + MARK(); + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + dprintk("reqbufs: %d\t%d=%d\n", b->memory, b->count, + dev->buffers_number); + if (opener->timeout_image_io) { + if (b->memory != V4L2_MEMORY_MMAP) + return -EINVAL; + b->count = 1; + return 0; + } + + init_buffers(dev); + switch (b->memory) { + case V4L2_MEMORY_MMAP: + /* do nothing here, buffers are always allocated */ + if (b->count < 1 || dev->buffers_number < 1) + return 0; + + if (b->count > dev->buffers_number) + b->count = dev->buffers_number; + + /* make sure that outbufs_list contains buffers from 0 to used_buffers-1 + * actually, it will have been already populated via v4l2_loopback_init() + * at this point */ + if (list_empty(&dev->outbufs_list)) { + for (i = 0; i < dev->used_buffers; ++i) + list_add_tail(&dev->buffers[i].list_head, + &dev->outbufs_list); + } + + /* also, if dev->used_buffers is going to be decreased, we should remove + * out-of-range buffers from outbufs_list, and fix bufpos2index mapping */ + if (b->count < dev->used_buffers) { + struct v4l2l_buffer *pos, *n; + + list_for_each_entry_safe (pos, n, &dev->outbufs_list, + list_head) { + if (pos->buffer.index >= b->count) + list_del(&pos->list_head); + } + + /* after we update dev->used_buffers, buffers in outbufs_list will + * correspond to dev->write_position + [0;b->count-1] range */ + i = dev->write_position; + list_for_each_entry (pos, &dev->outbufs_list, + list_head) { + dev->bufpos2index[i % b->count] = + pos->buffer.index; + ++i; + } + } + + opener->buffers_number = b->count; + if (opener->buffers_number < dev->used_buffers) + dev->used_buffers = opener->buffers_number; + return 0; + default: + return -EINVAL; + } +} + +/* returns buffer asked for; + * give app as many buffers as it wants, if it less than MAX, + * but map them in our inner buffers + * called on VIDIOC_QUERYBUF + */ +static int vidioc_querybuf(struct file *file, void *fh, struct v4l2_buffer *b) +{ + enum v4l2_buf_type type; + int index; + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + + MARK(); + + type = b->type; + index = b->index; + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + if ((b->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) && + (b->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)) { + return -EINVAL; + } + if (b->index > max_buffers) + return -EINVAL; + + if (opener->timeout_image_io) + *b = dev->timeout_image_buffer.buffer; + else + *b = dev->buffers[b->index % dev->used_buffers].buffer; + + b->type = type; + b->index = index; + dprintkrw("buffer type: %d (of %d with size=%ld)\n", b->memory, + dev->buffers_number, dev->buffer_size); + + /* Hopefully fix 'DQBUF return bad index if queue bigger then 2 for capture' + https://github.com/umlaeute/v4l2loopback/issues/60 */ + b->flags &= ~V4L2_BUF_FLAG_DONE; + b->flags |= V4L2_BUF_FLAG_QUEUED; + + return 0; +} + +static void buffer_written(struct v4l2_loopback_device *dev, + struct v4l2l_buffer *buf) +{ + del_timer_sync(&dev->sustain_timer); + del_timer_sync(&dev->timeout_timer); + spin_lock_bh(&dev->lock); + + dev->bufpos2index[dev->write_position % dev->used_buffers] = + buf->buffer.index; + list_move_tail(&buf->list_head, &dev->outbufs_list); + ++dev->write_position; + dev->reread_count = 0; + + check_timers(dev); + spin_unlock_bh(&dev->lock); +} + +/* put buffer to queue + * called on VIDIOC_QBUF + */ +static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + struct v4l2l_buffer *b; + int index; + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + if (buf->index > max_buffers) + return -EINVAL; + if (opener->timeout_image_io) + return 0; + + index = buf->index % dev->used_buffers; + b = &dev->buffers[index]; + + switch (buf->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + dprintkrw("capture QBUF index: %d\n", index); + set_queued(b); + return 0; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + dprintkrw("output QBUF pos: %d index: %d\n", + dev->write_position, index); + if (buf->timestamp.tv_sec == 0 && buf->timestamp.tv_usec == 0) + v4l2l_get_timestamp(&b->buffer); + else + b->buffer.timestamp = buf->timestamp; + b->buffer.bytesused = buf->bytesused; + set_done(b); + buffer_written(dev, b); + + /* Hopefully fix 'DQBUF return bad index if queue bigger then 2 for capture' + https://github.com/umlaeute/v4l2loopback/issues/60 */ + buf->flags &= ~V4L2_BUF_FLAG_DONE; + buf->flags |= V4L2_BUF_FLAG_QUEUED; + + wake_up_all(&dev->read_event); + return 0; + default: + return -EINVAL; + } +} + +static int can_read(struct v4l2_loopback_device *dev, + struct v4l2_loopback_opener *opener) +{ + int ret; + + spin_lock_bh(&dev->lock); + check_timers(dev); + ret = dev->write_position > opener->read_position || + dev->reread_count > opener->reread_count || dev->timeout_happened; + spin_unlock_bh(&dev->lock); + return ret; +} + +static int get_capture_buffer(struct file *file) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); + int pos, ret; + int timeout_happened; + + if ((file->f_flags & O_NONBLOCK) && + (dev->write_position <= opener->read_position && + dev->reread_count <= opener->reread_count && + !dev->timeout_happened)) + return -EAGAIN; + wait_event_interruptible(dev->read_event, can_read(dev, opener)); + + spin_lock_bh(&dev->lock); + if (dev->write_position == opener->read_position) { + if (dev->reread_count > opener->reread_count + 2) + opener->reread_count = dev->reread_count - 1; + ++opener->reread_count; + pos = (opener->read_position + dev->used_buffers - 1) % + dev->used_buffers; + } else { + opener->reread_count = 0; + if (dev->write_position > + opener->read_position + dev->used_buffers) + opener->read_position = dev->write_position - 1; + pos = opener->read_position % dev->used_buffers; + ++opener->read_position; + } + timeout_happened = dev->timeout_happened; + dev->timeout_happened = 0; + spin_unlock_bh(&dev->lock); + + ret = dev->bufpos2index[pos]; + if (timeout_happened) { + /* although allocated on-demand, timeout_image is freed only + * in free_buffers(), so we don't need to worry about it being + * deallocated suddenly */ + memcpy(dev->image + dev->buffers[ret].buffer.m.offset, + dev->timeout_image, dev->buffer_size); + } + return ret; +} + +/* put buffer to dequeue + * called on VIDIOC_DQBUF + */ +static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + int index; + struct v4l2l_buffer *b; + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + if (opener->timeout_image_io) { + *buf = dev->timeout_image_buffer.buffer; + return 0; + } + + switch (buf->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + index = get_capture_buffer(file); + if (index < 0) + return index; + dprintkrw("capture DQBUF pos: %d index: %d\n", + opener->read_position - 1, index); + if (!(dev->buffers[index].buffer.flags & + V4L2_BUF_FLAG_MAPPED)) { + dprintk("trying to return not mapped buf[%d]\n", index); + return -EINVAL; + } + unset_flags(&dev->buffers[index]); + *buf = dev->buffers[index].buffer; + return 0; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + b = list_entry(dev->outbufs_list.prev, struct v4l2l_buffer, + list_head); + list_move_tail(&b->list_head, &dev->outbufs_list); + dprintkrw("output DQBUF index: %d\n", b->buffer.index); + unset_flags(b); + *buf = b->buffer; + buf->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; + return 0; + default: + return -EINVAL; + } +} + +/* ------------- STREAMING ------------------- */ + +/* start streaming + * called on VIDIOC_STREAMON + */ +static int vidioc_streamon(struct file *file, void *fh, enum v4l2_buf_type type) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + MARK(); + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + opener->type = WRITER; + dev->ready_for_output = 0; + if (!dev->ready_for_capture) { + int ret = allocate_buffers(dev); + if (ret < 0) + return ret; + } + dev->ready_for_capture++; + return 0; + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + opener->type = READER; + if (!dev->ready_for_capture) + return -EIO; + return 0; + default: + return -EINVAL; + } + return -EINVAL; +} + +/* stop streaming + * called on VIDIOC_STREAMOFF + */ +static int vidioc_streamoff(struct file *file, void *fh, + enum v4l2_buf_type type) +{ + struct v4l2_loopback_device *dev; + MARK(); + dprintk("%d\n", type); + + dev = v4l2loopback_getdevice(file); + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if (dev->ready_for_capture > 0) + dev->ready_for_capture--; + return 0; + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + return 0; + default: + return -EINVAL; + } + return -EINVAL; +} + +#ifdef CONFIG_VIDEO_V4L1_COMPAT +static int vidiocgmbuf(struct file *file, void *fh, struct video_mbuf *p) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + p->frames = dev->buffers_number; + p->offsets[0] = 0; + p->offsets[1] = 0; + p->size = dev->buffer_size; + return 0; +} +#endif + +static int vidioc_subscribe_event(struct v4l2_fh *fh, + const struct v4l2_event_subscription *sub) +{ + switch (sub->type) { + case V4L2_EVENT_CTRL: + return v4l2_ctrl_subscribe_event(fh, sub); + } + + return -EINVAL; +} + +/* file operations */ +static void vm_open(struct vm_area_struct *vma) +{ + struct v4l2l_buffer *buf; + MARK(); + + buf = vma->vm_private_data; + buf->use_count++; +} + +static void vm_close(struct vm_area_struct *vma) +{ + struct v4l2l_buffer *buf; + MARK(); + + buf = vma->vm_private_data; + buf->use_count--; +} + +static struct vm_operations_struct vm_ops = { + .open = vm_open, + .close = vm_close, +}; + +static int v4l2_loopback_mmap(struct file *file, struct vm_area_struct *vma) +{ + unsigned long addr; + unsigned long start; + unsigned long size; + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + struct v4l2l_buffer *buffer = NULL; + MARK(); + + start = (unsigned long)vma->vm_start; + size = (unsigned long)(vma->vm_end - vma->vm_start); + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(file->private_data); + + if (size > dev->buffer_size) { + dprintk("userspace tries to mmap too much, fail\n"); + return -EINVAL; + } + if (opener->timeout_image_io) { + /* we are going to map the timeout_image_buffer */ + if ((vma->vm_pgoff << PAGE_SHIFT) != + dev->buffer_size * MAX_BUFFERS) { + dprintk("invalid mmap offset for timeout_image_io mode\n"); + return -EINVAL; + } + } else if ((vma->vm_pgoff << PAGE_SHIFT) > + dev->buffer_size * (dev->buffers_number - 1)) { + dprintk("userspace tries to mmap too far, fail\n"); + return -EINVAL; + } + + /* FIXXXXXME: allocation should not happen here! */ + if (NULL == dev->image) + if (allocate_buffers(dev) < 0) + return -EINVAL; + + if (opener->timeout_image_io) { + buffer = &dev->timeout_image_buffer; + addr = (unsigned long)dev->timeout_image; + } else { + int i; + for (i = 0; i < dev->buffers_number; ++i) { + buffer = &dev->buffers[i]; + if ((buffer->buffer.m.offset >> PAGE_SHIFT) == + vma->vm_pgoff) + break; + } + + if (NULL == buffer) + return -EINVAL; + + addr = (unsigned long)dev->image + + (vma->vm_pgoff << PAGE_SHIFT); + } + + while (size > 0) { + struct page *page; + + page = (void *)vmalloc_to_page((void *)addr); + + if (vm_insert_page(vma, start, page) < 0) + return -EAGAIN; + + start += PAGE_SIZE; + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + + vma->vm_ops = &vm_ops; + vma->vm_private_data = buffer; + buffer->buffer.flags |= V4L2_BUF_FLAG_MAPPED; + + vm_open(vma); + + MARK(); + return 0; +} + +static unsigned int v4l2_loopback_poll(struct file *file, + struct poll_table_struct *pts) +{ + struct v4l2_loopback_opener *opener; + struct v4l2_loopback_device *dev; + __poll_t req_events = poll_requested_events(pts); + int ret_mask = 0; + MARK(); + + opener = fh_to_opener(file->private_data); + dev = v4l2loopback_getdevice(file); + + if (req_events & POLLPRI) { + if (!v4l2_event_pending(&opener->fh)) + poll_wait(file, &opener->fh.wait, pts); + if (v4l2_event_pending(&opener->fh)) { + ret_mask |= POLLPRI; + if (!(req_events & DEFAULT_POLLMASK)) + return ret_mask; + } + } + + switch (opener->type) { + case WRITER: + ret_mask |= POLLOUT | POLLWRNORM; + break; + case READER: + if (!can_read(dev, opener)) { + if (ret_mask) + return ret_mask; + poll_wait(file, &dev->read_event, pts); + } + if (can_read(dev, opener)) + ret_mask |= POLLIN | POLLRDNORM; + if (v4l2_event_pending(&opener->fh)) + ret_mask |= POLLPRI; + break; + default: + break; + } + + MARK(); + return ret_mask; +} + +/* do not want to limit device opens, it can be as many readers as user want, + * writers are limited by means of setting writer field */ +static int v4l2_loopback_open(struct file *file) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + MARK(); + dev = v4l2loopback_getdevice(file); + if (dev->open_count.counter >= dev->max_openers) + return -EBUSY; + /* kfree on close */ + opener = kzalloc(sizeof(*opener), GFP_KERNEL); + if (opener == NULL) + return -ENOMEM; + + v4l2_fh_init(&opener->fh, video_devdata(file)); + file->private_data = &opener->fh; + atomic_inc(&dev->open_count); + + opener->timeout_image_io = dev->timeout_image_io; + dev->timeout_image_io = 0; + + if (opener->timeout_image_io) { + int r = allocate_timeout_image(dev); + + if (r < 0) { + dprintk("timeout image allocation failed\n"); + return r; + } + } + + v4l2_fh_add(&opener->fh); + dprintk("opened dev:%p with image:%p\n", dev, dev ? dev->image : NULL); + MARK(); + return 0; +} + +static int v4l2_loopback_close(struct file *file) +{ + struct v4l2_loopback_opener *opener; + struct v4l2_loopback_device *dev; + int iswriter = 0; + MARK(); + + opener = fh_to_opener(file->private_data); + dev = v4l2loopback_getdevice(file); + + if (WRITER == opener->type) + iswriter = 1; + + atomic_dec(&dev->open_count); + if (dev->open_count.counter == 0) { + del_timer_sync(&dev->sustain_timer); + del_timer_sync(&dev->timeout_timer); + } + try_free_buffers(dev); + + v4l2_fh_del(&opener->fh); + v4l2_fh_exit(&opener->fh); + + kfree(opener); + if (iswriter) { + dev->ready_for_output = 1; + } + MARK(); + return 0; +} + +static ssize_t v4l2_loopback_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int read_index; + struct v4l2_loopback_device *dev; + struct v4l2_buffer *b; + MARK(); + + dev = v4l2loopback_getdevice(file); + + read_index = get_capture_buffer(file); + if (read_index < 0) + return read_index; + if (count > dev->buffer_size) + count = dev->buffer_size; + b = &dev->buffers[read_index].buffer; + if (count > b->bytesused) + count = b->bytesused; + if (copy_to_user((void *)buf, (void *)(dev->image + b->m.offset), + count)) { + printk(KERN_ERR + "v4l2-loopback: failed copy_to_user() in read buf\n"); + return -EFAULT; + } + dprintkrw("leave v4l2_loopback_read()\n"); + return count; +} + +static ssize_t v4l2_loopback_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct v4l2_loopback_device *dev; + int write_index; + struct v4l2_buffer *b; + MARK(); + + dev = v4l2loopback_getdevice(file); + + /* there's at least one writer, so don't stop announcing output capabilities */ + dev->ready_for_output = 0; + + if (!dev->ready_for_capture) { + int ret = allocate_buffers(dev); + if (ret < 0) + return ret; + dev->ready_for_capture = 1; + } + dprintkrw("v4l2_loopback_write() trying to write %zu bytes\n", count); + if (count > dev->buffer_size) + count = dev->buffer_size; + + write_index = dev->write_position % dev->used_buffers; + b = &dev->buffers[write_index].buffer; + + if (copy_from_user((void *)(dev->image + b->m.offset), (void *)buf, + count)) { + printk(KERN_ERR + "v4l2-loopback: failed copy_from_user() in write buf, could not write %zu\n", + count); + return -EFAULT; + } + v4l2l_get_timestamp(b); + b->bytesused = count; + b->sequence = dev->write_position; + buffer_written(dev, &dev->buffers[write_index]); + wake_up_all(&dev->read_event); + dprintkrw("leave v4l2_loopback_write()\n"); + return count; +} + +/* init functions */ +/* frees buffers, if already allocated */ +static int free_buffers(struct v4l2_loopback_device *dev) +{ + MARK(); + dprintk("freeing image@%p for dev:%p\n", dev ? dev->image : NULL, dev); + if (dev->image) { + vfree(dev->image); + dev->image = NULL; + } + if (dev->timeout_image) { + vfree(dev->timeout_image); + dev->timeout_image = NULL; + } + dev->imagesize = 0; + + return 0; +} +/* frees buffers, if they are no longer needed */ +static void try_free_buffers(struct v4l2_loopback_device *dev) +{ + MARK(); + if (0 == dev->open_count.counter && !dev->keep_format) { + free_buffers(dev); + dev->ready_for_capture = 0; + dev->buffer_size = 0; + dev->write_position = 0; + } +} +/* allocates buffers, if buffer_size is set */ +static int allocate_buffers(struct v4l2_loopback_device *dev) +{ + MARK(); + /* vfree on close file operation in case no open handles left */ + if (0 == dev->buffer_size) + return -EINVAL; + + if (dev->image) { + dprintk("allocating buffers again: %ld %ld\n", + dev->buffer_size * dev->buffers_number, dev->imagesize); + /* FIXME: prevent double allocation more intelligently! */ + if (dev->buffer_size * dev->buffers_number == dev->imagesize) + return 0; + + /* if there is only one writer, no problem should occur */ + if (dev->open_count.counter == 1) + free_buffers(dev); + else + return -EINVAL; + } + + dev->imagesize = dev->buffer_size * dev->buffers_number; + + dprintk("allocating %ld = %ldx%d\n", dev->imagesize, dev->buffer_size, + dev->buffers_number); + + dev->image = vmalloc(dev->imagesize); + if (dev->timeout_jiffies > 0) + allocate_timeout_image(dev); + + if (dev->image == NULL) + return -ENOMEM; + dprintk("vmallocated %ld bytes\n", dev->imagesize); + MARK(); + init_buffers(dev); + return 0; +} + +/* init inner buffers, they are capture mode and flags are set as + * for capture mod buffers */ +static void init_buffers(struct v4l2_loopback_device *dev) +{ + int i; + int buffer_size; + int bytesused; + MARK(); + + buffer_size = dev->buffer_size; + bytesused = dev->pix_format.sizeimage; + + for (i = 0; i < dev->buffers_number; ++i) { + struct v4l2_buffer *b = &dev->buffers[i].buffer; + b->index = i; + b->bytesused = bytesused; + b->length = buffer_size; + b->field = V4L2_FIELD_NONE; + b->flags = 0; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 6, 1) + b->input = 0; +#endif + b->m.offset = i * buffer_size; + b->memory = V4L2_MEMORY_MMAP; + b->sequence = 0; + b->timestamp.tv_sec = 0; + b->timestamp.tv_usec = 0; + b->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + + v4l2l_get_timestamp(b); + } + dev->timeout_image_buffer = dev->buffers[0]; + dev->timeout_image_buffer.buffer.m.offset = MAX_BUFFERS * buffer_size; + MARK(); +} + +static int allocate_timeout_image(struct v4l2_loopback_device *dev) +{ + MARK(); + if (dev->buffer_size <= 0) + return -EINVAL; + + if (dev->timeout_image == NULL) { + dev->timeout_image = v4l2l_vzalloc(dev->buffer_size); + if (dev->timeout_image == NULL) + return -ENOMEM; + } + return 0; +} + +/* fills and register video device */ +static void init_vdev(struct video_device *vdev, int nr) +{ + MARK(); + +#ifdef V4L2LOOPBACK_WITH_STD + vdev->tvnorms = V4L2_STD_ALL; +#endif /* V4L2LOOPBACK_WITH_STD */ + + vdev->vfl_type = VFL_TYPE_VIDEO; + vdev->fops = &v4l2_loopback_fops; + vdev->ioctl_ops = &v4l2_loopback_ioctl_ops; + vdev->release = &video_device_release; + vdev->minor = -1; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + vdev->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT | + V4L2_CAP_READWRITE | V4L2_CAP_STREAMING; +#ifdef V4L2_CAP_VIDEO_M2M + vdev->device_caps |= V4L2_CAP_VIDEO_M2M; +#endif +#endif /* >=linux-4.7.0 */ + + if (debug > 1) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 20, 0) + vdev->debug = V4L2_DEBUG_IOCTL | V4L2_DEBUG_IOCTL_ARG; +#else + vdev->dev_debug = + V4L2_DEV_DEBUG_IOCTL | V4L2_DEV_DEBUG_IOCTL_ARG; +#endif + + /* since kernel-3.7, there is a new field 'vfl_dir' that has to be + * set to VFL_DIR_M2M for bidirectional devices */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) + vdev->vfl_dir = VFL_DIR_M2M; +#endif + + MARK(); +} + +/* init default capture parameters, only fps may be changed in future */ +static void init_capture_param(struct v4l2_captureparm *capture_param) +{ + MARK(); + capture_param->capability = 0; + capture_param->capturemode = 0; + capture_param->extendedmode = 0; + capture_param->readbuffers = max_buffers; + capture_param->timeperframe.numerator = 1; + capture_param->timeperframe.denominator = 30; +} + +static void check_timers(struct v4l2_loopback_device *dev) +{ + if (!dev->ready_for_capture) + return; + + if (dev->timeout_jiffies > 0 && !timer_pending(&dev->timeout_timer)) + mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); + if (dev->sustain_framerate && !timer_pending(&dev->sustain_timer)) + mod_timer(&dev->sustain_timer, + jiffies + dev->frame_jiffies * 3 / 2); +} +#ifdef HAVE_TIMER_SETUP +static void sustain_timer_clb(struct timer_list *t) +{ + struct v4l2_loopback_device *dev = from_timer(dev, t, sustain_timer); +#else +static void sustain_timer_clb(unsigned long nr) +{ + struct v4l2_loopback_device *dev = + idr_find(&v4l2loopback_index_idr, nr); +#endif + spin_lock(&dev->lock); + if (dev->sustain_framerate) { + dev->reread_count++; + dprintkrw("reread: %d %d\n", dev->write_position, + dev->reread_count); + if (dev->reread_count == 1) + mod_timer(&dev->sustain_timer, + jiffies + max(1UL, dev->frame_jiffies / 2)); + else + mod_timer(&dev->sustain_timer, + jiffies + dev->frame_jiffies); + wake_up_all(&dev->read_event); + } + spin_unlock(&dev->lock); +} +#ifdef HAVE_TIMER_SETUP +static void timeout_timer_clb(struct timer_list *t) +{ + struct v4l2_loopback_device *dev = from_timer(dev, t, timeout_timer); +#else +static void timeout_timer_clb(unsigned long nr) +{ + struct v4l2_loopback_device *dev = + idr_find(&v4l2loopback_index_idr, nr); +#endif + spin_lock(&dev->lock); + if (dev->timeout_jiffies > 0) { + dev->timeout_happened = 1; + mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); + wake_up_all(&dev->read_event); + } + spin_unlock(&dev->lock); +} + +/* init loopback main structure */ +#define DEFAULT_FROM_CONF(confmember, default_condition, default_value) \ + ((conf) ? \ + ((conf->confmember default_condition) ? (default_value) : \ + (conf->confmember)) : \ + default_value) + +static int v4l2_loopback_add(struct v4l2_loopback_config *conf, int *ret_nr) +{ + struct v4l2_loopback_device *dev; + struct v4l2_ctrl_handler *hdl; + + int err = -ENOMEM; + + int _max_width = DEFAULT_FROM_CONF( + max_width, <= V4L2LOOPBACK_SIZE_MIN_WIDTH, max_width); + int _max_height = DEFAULT_FROM_CONF( + max_height, <= V4L2LOOPBACK_SIZE_MIN_HEIGHT, max_height); + bool _announce_all_caps = (conf && conf->announce_all_caps >= 0) ? + (conf->announce_all_caps) : + V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS; + + int _max_buffers = DEFAULT_FROM_CONF(max_buffers, <= 0, max_buffers); + int _max_openers = DEFAULT_FROM_CONF(max_openers, <= 0, max_openers); + + int nr = -1; + if (conf) { + if (conf->capture_nr >= 0 && + conf->output_nr == conf->capture_nr) { + nr = conf->capture_nr; + } else if (conf->capture_nr < 0 && conf->output_nr < 0) { + nr = -1; + } else if (conf->capture_nr < 0) { + nr = conf->output_nr; + } else if (conf->output_nr < 0) { + nr = conf->capture_nr; + } else { + printk(KERN_ERR + "split OUTPUT and CAPTURE devices not yet supported."); + printk(KERN_INFO + "both devices must have the same number (%d != %d).", + conf->output_nr, conf->capture_nr); + return -EINVAL; + } + } + + if (idr_find(&v4l2loopback_index_idr, nr)) + return -EEXIST; + + dprintk("creating v4l2loopback-device #%d\n", nr); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + /* allocate id, if @id >= 0, we're requesting that specific id */ + if (nr >= 0) { + err = idr_alloc(&v4l2loopback_index_idr, dev, nr, nr + 1, + GFP_KERNEL); + if (err == -ENOSPC) + err = -EEXIST; + } else { + err = idr_alloc(&v4l2loopback_index_idr, dev, 0, 0, GFP_KERNEL); + } + if (err < 0) + goto out_free_dev; + nr = err; + err = -ENOMEM; + + if (conf && conf->card_label && *(conf->card_label)) { + snprintf(dev->card_label, sizeof(dev->card_label), "%s", + conf->card_label); + } else { + snprintf(dev->card_label, sizeof(dev->card_label), + "Dummy video device (0x%04X)", nr); + } + snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), + "v4l2loopback-%03d", nr); + + err = v4l2_device_register(NULL, &dev->v4l2_dev); + if (err) + goto out_free_idr; + MARK(); + + dev->vdev = video_device_alloc(); + if (dev->vdev == NULL) { + err = -ENOMEM; + goto out_unregister; + } + video_set_drvdata(dev->vdev, + kzalloc(sizeof(struct v4l2loopback_private), + GFP_KERNEL)); + if (video_get_drvdata(dev->vdev) == NULL) { + err = -ENOMEM; + goto out_unregister; + } + + MARK(); + snprintf(dev->vdev->name, sizeof(dev->vdev->name), dev->card_label); + + ((struct v4l2loopback_private *)video_get_drvdata(dev->vdev)) + ->device_nr = nr; + + init_vdev(dev->vdev, nr); + dev->vdev->v4l2_dev = &dev->v4l2_dev; + init_capture_param(&dev->capture_param); + err = set_timeperframe(dev, &dev->capture_param.timeperframe); + if (err) + goto out_unregister; + dev->keep_format = 0; + dev->sustain_framerate = 0; + + dev->announce_all_caps = _announce_all_caps; + dev->max_width = _max_width; + dev->max_height = _max_height; + dev->max_openers = _max_openers; + dev->buffers_number = dev->used_buffers = _max_buffers; + + dev->write_position = 0; + + MARK(); + spin_lock_init(&dev->lock); + INIT_LIST_HEAD(&dev->outbufs_list); + if (list_empty(&dev->outbufs_list)) { + int i; + + for (i = 0; i < dev->used_buffers; ++i) + list_add_tail(&dev->buffers[i].list_head, + &dev->outbufs_list); + } + memset(dev->bufpos2index, 0, sizeof(dev->bufpos2index)); + atomic_set(&dev->open_count, 0); + dev->ready_for_capture = 0; + dev->ready_for_output = 1; + + dev->buffer_size = 0; + dev->image = NULL; + dev->imagesize = 0; +#ifdef HAVE_TIMER_SETUP + timer_setup(&dev->sustain_timer, sustain_timer_clb, 0); + timer_setup(&dev->timeout_timer, timeout_timer_clb, 0); +#else + setup_timer(&dev->sustain_timer, sustain_timer_clb, nr); + setup_timer(&dev->timeout_timer, timeout_timer_clb, nr); +#endif + dev->reread_count = 0; + dev->timeout_jiffies = 0; + dev->timeout_image = NULL; + dev->timeout_happened = 0; + + hdl = &dev->ctrl_handler; + err = v4l2_ctrl_handler_init(hdl, 4); + if (err) + goto out_unregister; + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_keepformat, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_sustainframerate, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeout, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeoutimageio, NULL); + if (hdl->error) { + err = hdl->error; + goto out_free_handler; + } + dev->v4l2_dev.ctrl_handler = hdl; + + err = v4l2_ctrl_handler_setup(hdl); + if (err) + goto out_free_handler; + + /* FIXME set buffers to 0 */ + + /* Set initial format */ + dev->pix_format.width = 0; /* V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; */ + dev->pix_format.height = 0; /* V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; */ + dev->pix_format.pixelformat = formats[0].fourcc; + dev->pix_format.colorspace = + V4L2_COLORSPACE_SRGB; /* do we need to set this ? */ + dev->pix_format.field = V4L2_FIELD_NONE; + + dev->buffer_size = PAGE_ALIGN(dev->pix_format.sizeimage); + dprintk("buffer_size = %ld (=%d)\n", dev->buffer_size, + dev->pix_format.sizeimage); + err = allocate_buffers(dev); + if (err && dev->buffer_size) + goto out_free_handler; + + init_waitqueue_head(&dev->read_event); + + /* register the device -> it creates /dev/video* */ + if (video_register_device(dev->vdev, VFL_TYPE_VIDEO, nr) < 0) { + printk(KERN_ERR + "v4l2loopback: failed video_register_device()\n"); + err = -EFAULT; + goto out_free_device; + } + v4l2loopback_create_sysfs(dev->vdev); + + MARK(); + if (ret_nr) + *ret_nr = dev->vdev->num; + return 0; + +out_free_device: + video_device_release(dev->vdev); +out_free_handler: + v4l2_ctrl_handler_free(&dev->ctrl_handler); +out_unregister: + v4l2_device_unregister(&dev->v4l2_dev); +out_free_idr: + idr_remove(&v4l2loopback_index_idr, nr); +out_free_dev: + kfree(dev); + return err; +} + +static void v4l2_loopback_remove(struct v4l2_loopback_device *dev) +{ + free_buffers(dev); + v4l2loopback_remove_sysfs(dev->vdev); + kfree(video_get_drvdata(dev->vdev)); + video_unregister_device(dev->vdev); + v4l2_device_unregister(&dev->v4l2_dev); + v4l2_ctrl_handler_free(&dev->ctrl_handler); + kfree(dev); +} + +static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd, + unsigned long parm) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_config conf; + struct v4l2_loopback_config *confptr = &conf; + int device_nr; + int ret; + + ret = mutex_lock_killable(&v4l2loopback_ctl_mutex); + if (ret) + return ret; + + ret = -EINVAL; + switch (cmd) { + default: + ret = -ENOSYS; + break; + /* add a v4l2loopback device (pair), based on the user-provided specs */ + case V4L2LOOPBACK_CTL_ADD: + if (parm) { + if ((ret = copy_from_user(&conf, (void *)parm, + sizeof(conf))) < 0) + break; + } else + confptr = NULL; + ret = v4l2_loopback_add(confptr, &device_nr); + if (ret >= 0) + ret = device_nr; + break; + /* remove a v4l2loopback device (both capture and output) */ + case V4L2LOOPBACK_CTL_REMOVE: + ret = v4l2loopback_lookup((int)parm, &dev); + if (ret >= 0 && dev) { + int nr = ret; + ret = -EBUSY; + if (dev->open_count.counter > 0) + break; + idr_remove(&v4l2loopback_index_idr, nr); + v4l2_loopback_remove(dev); + ret = 0; + }; + break; + /* get information for a loopback device. + * this is mostly about limits (which cannot be queried directly with VIDIOC_G_FMT and friends + */ + case V4L2LOOPBACK_CTL_QUERY: + if (!parm) + break; + if ((ret = copy_from_user(&conf, (void *)parm, sizeof(conf))) < + 0) + break; + device_nr = + (conf.output_nr < 0) ? conf.capture_nr : conf.output_nr; + MARK(); + /* get the device from either capture_nr or output_nr (whatever is valid) */ + if ((ret = v4l2loopback_lookup(device_nr, &dev)) < 0) + break; + MARK(); + /* if we got the device from output_nr and there is a valid capture_nr, + * make sure that both refer to the same device (or bail out) + */ + if ((device_nr != conf.capture_nr) && (conf.capture_nr >= 0) && + (ret != v4l2loopback_lookup(conf.capture_nr, 0))) + break; + MARK(); + /* if otoh, we got the device from capture_nr and there is a valid output_nr, + * make sure that both refer to the same device (or bail out) + */ + if ((device_nr != conf.output_nr) && (conf.output_nr >= 0) && + (ret != v4l2loopback_lookup(conf.output_nr, 0))) + break; + MARK(); + + /* v4l2_loopback_config identified a single device, so fetch the data */ + snprintf(conf.card_label, sizeof(conf.card_label), "%s", + dev->card_label); + MARK(); + conf.output_nr = conf.capture_nr = dev->vdev->num; + conf.max_width = dev->max_width; + conf.max_height = dev->max_height; + conf.announce_all_caps = dev->announce_all_caps; + conf.max_buffers = dev->buffers_number; + conf.max_openers = dev->max_openers; + conf.debug = debug; + MARK(); + if (copy_to_user((void *)parm, &conf, sizeof(conf))) { + ret = -EFAULT; + break; + } + MARK(); + ret = 0; + ; + break; + } + + MARK(); + mutex_unlock(&v4l2loopback_ctl_mutex); + MARK(); + return ret; +} + +/* LINUX KERNEL */ + +static const struct file_operations v4l2loopback_ctl_fops = { + // clang-format off + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = v4l2loopback_control_ioctl, + .compat_ioctl = v4l2loopback_control_ioctl, + .llseek = noop_llseek, + // clang-format on +}; + +static struct miscdevice v4l2loopback_misc = { + // clang-format off + .minor = MISC_DYNAMIC_MINOR, + .name = "v4l2loopback", + .fops = &v4l2loopback_ctl_fops, + // clang-format on +}; + +static const struct v4l2_file_operations v4l2_loopback_fops = { + // clang-format off + .owner = THIS_MODULE, + .open = v4l2_loopback_open, + .release = v4l2_loopback_close, + .read = v4l2_loopback_read, + .write = v4l2_loopback_write, + .poll = v4l2_loopback_poll, + .mmap = v4l2_loopback_mmap, + .unlocked_ioctl = video_ioctl2, + // clang-format on +}; + +static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops = { + // clang-format off + .vidioc_querycap = &vidioc_querycap, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) + .vidioc_enum_framesizes = &vidioc_enum_framesizes, + .vidioc_enum_frameintervals = &vidioc_enum_frameintervals, +#endif + +#ifndef HAVE__V4L2_CTRLS + .vidioc_queryctrl = &vidioc_queryctrl, + .vidioc_g_ctrl = &vidioc_g_ctrl, + .vidioc_s_ctrl = &vidioc_s_ctrl, +#endif /* HAVE__V4L2_CTRLS */ + + .vidioc_enum_output = &vidioc_enum_output, + .vidioc_g_output = &vidioc_g_output, + .vidioc_s_output = &vidioc_s_output, + + .vidioc_enum_input = &vidioc_enum_input, + .vidioc_g_input = &vidioc_g_input, + .vidioc_s_input = &vidioc_s_input, + + .vidioc_enum_fmt_vid_cap = &vidioc_enum_fmt_cap, + .vidioc_g_fmt_vid_cap = &vidioc_g_fmt_cap, + .vidioc_s_fmt_vid_cap = &vidioc_s_fmt_cap, + .vidioc_try_fmt_vid_cap = &vidioc_try_fmt_cap, + + .vidioc_enum_fmt_vid_out = &vidioc_enum_fmt_out, + .vidioc_s_fmt_vid_out = &vidioc_s_fmt_out, + .vidioc_g_fmt_vid_out = &vidioc_g_fmt_out, + .vidioc_try_fmt_vid_out = &vidioc_try_fmt_out, + +#ifdef V4L2L_OVERLAY + .vidioc_s_fmt_vid_overlay = &vidioc_s_fmt_overlay, + .vidioc_g_fmt_vid_overlay = &vidioc_g_fmt_overlay, +#endif + +#ifdef V4L2LOOPBACK_WITH_STD + .vidioc_s_std = &vidioc_s_std, + .vidioc_g_std = &vidioc_g_std, + .vidioc_querystd = &vidioc_querystd, +#endif /* V4L2LOOPBACK_WITH_STD */ + + .vidioc_g_parm = &vidioc_g_parm, + .vidioc_s_parm = &vidioc_s_parm, + + .vidioc_reqbufs = &vidioc_reqbufs, + .vidioc_querybuf = &vidioc_querybuf, + .vidioc_qbuf = &vidioc_qbuf, + .vidioc_dqbuf = &vidioc_dqbuf, + + .vidioc_streamon = &vidioc_streamon, + .vidioc_streamoff = &vidioc_streamoff, + +#ifdef CONFIG_VIDEO_V4L1_COMPAT + .vidiocgmbuf = &vidiocgmbuf, +#endif + + .vidioc_subscribe_event = &vidioc_subscribe_event, + .vidioc_unsubscribe_event = &v4l2_event_unsubscribe, + // clang-format on +}; + +static int free_device_cb(int id, void *ptr, void *data) +{ + struct v4l2_loopback_device *dev = ptr; + v4l2_loopback_remove(dev); + return 0; +} +static void free_devices(void) +{ + idr_for_each(&v4l2loopback_index_idr, &free_device_cb, NULL); + idr_destroy(&v4l2loopback_index_idr); +} + +static int __init v4l2loopback_init_module(void) +{ + int err; + int i; + MARK(); + + err = misc_register(&v4l2loopback_misc); + if (err < 0) + return err; + + if (devices < 0) { + devices = 1; + + /* try guessing the devices from the "video_nr" parameter */ + for (i = MAX_DEVICES - 1; i >= 0; i--) { + if (video_nr[i] >= 0) { + devices = i + 1; + break; + } + } + } + + if (devices > MAX_DEVICES) { + devices = MAX_DEVICES; + printk(KERN_INFO + "v4l2loopback: number of initial devices is limited to: %d\n", + MAX_DEVICES); + } + + if (max_buffers > MAX_BUFFERS) { + max_buffers = MAX_BUFFERS; + printk(KERN_INFO + "v4l2loopback: number of buffers is limited to: %d\n", + MAX_BUFFERS); + } + + if (max_openers < 0) { + printk(KERN_INFO + "v4l2loopback: allowing %d openers rather than %d\n", + 2, max_openers); + max_openers = 2; + } + + if (max_width < 1) { + max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; + printk(KERN_INFO "v4l2loopback: using max_width %d\n", + max_width); + } + if (max_height < 1) { + max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; + printk(KERN_INFO "v4l2loopback: using max_height %d\n", + max_height); + } + + /* kfree on module release */ + for (i = 0; i < devices; i++) { + struct v4l2_loopback_config cfg = { + // clang-format off + .output_nr = video_nr[i], + .capture_nr = video_nr[i], + .max_width = max_width, + .max_height = max_height, + .announce_all_caps = (!exclusive_caps[i]), + .max_buffers = max_buffers, + .max_openers = max_openers, + .debug = debug, + // clang-format on + }; + cfg.card_label[0] = 0; + if (card_label[i]) + snprintf(cfg.card_label, sizeof(cfg.card_label), "%s", + card_label[i]); + err = v4l2_loopback_add(&cfg, 0); + if (err) { + free_devices(); + goto error; + } + } + + dprintk("module installed\n"); + + printk(KERN_INFO "v4l2loopback driver version %d.%d.%d%s loaded\n", + // clang-format off + (V4L2LOOPBACK_VERSION_CODE >> 16) & 0xff, + (V4L2LOOPBACK_VERSION_CODE >> 8) & 0xff, + (V4L2LOOPBACK_VERSION_CODE ) & 0xff, +#ifdef SNAPSHOT_VERSION + " (" STRINGIFY2(SNAPSHOT_VERSION) ")" +#else + "" +#endif + ); + // clang-format on + + return 0; +error: + misc_deregister(&v4l2loopback_misc); + return err; +} + +#ifdef MODULE +static void v4l2loopback_cleanup_module(void) +{ + MARK(); + /* unregister the device -> it deletes /dev/video* */ + free_devices(); + /* and get rid of /dev/v4l2loopback */ + misc_deregister(&v4l2loopback_misc); + dprintk("module removed\n"); +} +#endif + +MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR); + +module_init(v4l2loopback_init_module); +module_exit(v4l2loopback_cleanup_module); + +/* + * fake usage of unused functions + */ +#ifdef HAVE__V4L2_CTRLS +static int vidioc_queryctrl(struct file *file, void *fh, + struct v4l2_queryctrl *q) __attribute__((unused)); +static int vidioc_g_ctrl(struct file *file, void *fh, struct v4l2_control *c) + __attribute__((unused)); +static int vidioc_s_ctrl(struct file *file, void *fh, struct v4l2_control *c) + __attribute__((unused)); +#endif /* HAVE__V4L2_CTRLS */ diff --git a/drivers/media/v4l2-core/v4l2loopback.h b/drivers/media/v4l2-core/v4l2loopback.h new file mode 100644 index 0000000000000..fb7180802c3f1 --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * v4l2loopback.h + * + * Written by IOhannes m zmölnig, 7/1/20. + * + * Copyright 2020 by IOhannes m zmölnig. Redistribution of this file is + * permitted under the GNU General Public License. + */ +#ifndef _V4L2LOOPBACK_H +#define _V4L2LOOPBACK_H + +#define V4L2LOOPBACK_VERSION_MAJOR 0 +#define V4L2LOOPBACK_VERSION_MINOR 12 +#define V4L2LOOPBACK_VERSION_BUGFIX 5 + +/* /dev/v4l2loopback interface */ + +struct v4l2_loopback_config { + /** + * the device-number (/dev/video) + * V4L2LOOPBACK_CTL_ADD: + * setting this to a value<0, will allocate an available one + * if nr>=0 and the device already exists, the ioctl will EEXIST + * if output_nr and capture_nr are the same, only a single device will be created + * + * V4L2LOOPBACK_CTL_QUERY: + * either both output_nr and capture_nr must refer to the same loopback, + * or one (and only one) of them must be -1 + * + */ + int output_nr; + int capture_nr; + + /** + * a nice name for your device + * if (*card_label)==0, an automatic name is assigned + */ + char card_label[32]; + + /** + * maximum allowed frame size + * if too low, default values are used + */ + int max_width; + int max_height; + + /** + * whether to announce OUTPUT/CAPTURE capabilities exclusively + * for this device or not + * (!exclusive_caps) + * FIXXME: this ought to be removed (if superseded by output_nr vs capture_nr) + */ + int announce_all_caps; + + /** + * number of buffers to allocate for the queue + * if set to <=0, default values are used + */ + int max_buffers; + + /** + * how many consumers are allowed to open this device concurrently + * if set to <=0, default values are used + */ + int max_openers; + + /** + * set the debugging level for this device + */ + int debug; +}; + +/* a pointer to a (struct v4l2_loopback_config) that has all values you wish to impose on the + * to-be-created device set. + * if the ptr is NULL, a new device is created with default values at the driver's discretion. + * + * returns the device_nr of the OUTPUT device (which can be used with V4L2LOOPBACK_CTL_QUERY, + * to get more information on the device) + */ +#define V4L2LOOPBACK_CTL_ADD 0x4C80 + +/* a pointer to a (struct v4l2_loopback_config) that has output_nr and/or capture_nr set + * (the two values must either refer to video-devices associated with the same loopback device + * or exactly one of them must be <0 + */ +#define V4L2LOOPBACK_CTL_QUERY 0x4C82 + +/* the device-number (either CAPTURE or OUTPUT) associated with the loopback-device */ +#define V4L2LOOPBACK_CTL_REMOVE 0x4C81 + +#endif /* _V4L2LOOPBACK_H */ diff --git a/drivers/media/v4l2-core/v4l2loopback_formats.h b/drivers/media/v4l2-core/v4l2loopback_formats.h new file mode 100644 index 0000000000000..0a4e458526b37 --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback_formats.h @@ -0,0 +1,437 @@ +static const struct v4l2l_format formats[] = { +#ifndef V4L2_PIX_FMT_VP9 +#define V4L2_PIX_FMT_VP9 v4l2_fourcc('V', 'P', '9', '0') +#endif +#ifndef V4L2_PIX_FMT_HEVC +#define V4L2_PIX_FMT_HEVC v4l2_fourcc('H', 'E', 'V', 'C') +#endif + + /* here come the packed formats */ + { + .name = "32 bpp RGB, le", + .fourcc = V4L2_PIX_FMT_BGR32, + .depth = 32, + .flags = 0, + }, + { + .name = "32 bpp RGB, be", + .fourcc = V4L2_PIX_FMT_RGB32, + .depth = 32, + .flags = 0, + }, + { + .name = "24 bpp RGB, le", + .fourcc = V4L2_PIX_FMT_BGR24, + .depth = 24, + .flags = 0, + }, + { + .name = "24 bpp RGB, be", + .fourcc = V4L2_PIX_FMT_RGB24, + .depth = 24, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_RGBA32 + { + .name = "32 bpp RGBA", + .fourcc = V4L2_PIX_FMT_RGBA32, + .depth = 32, + .flags = 0, + }, +#endif +#ifdef V4L2_PIX_FMT_RGB332 + { + .name = "8 bpp RGB-3-3-2", + .fourcc = V4L2_PIX_FMT_RGB332, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB332 */ +#ifdef V4L2_PIX_FMT_RGB444 + { + .name = "16 bpp RGB (xxxxrrrr ggggbbbb)", + .fourcc = V4L2_PIX_FMT_RGB444, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB444 */ +#ifdef V4L2_PIX_FMT_RGB555 + { + .name = "16 bpp RGB-5-5-5", + .fourcc = V4L2_PIX_FMT_RGB555, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB555 */ +#ifdef V4L2_PIX_FMT_RGB565 + { + .name = "16 bpp RGB-5-6-5", + .fourcc = V4L2_PIX_FMT_RGB565, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB565 */ +#ifdef V4L2_PIX_FMT_RGB555X + { + .name = "16 bpp RGB-5-5-5 BE", + .fourcc = V4L2_PIX_FMT_RGB555X, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB555X */ +#ifdef V4L2_PIX_FMT_RGB565X + { + .name = "16 bpp RGB-5-6-5 BE", + .fourcc = V4L2_PIX_FMT_RGB565X, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB565X */ +#ifdef V4L2_PIX_FMT_BGR666 + { + .name = "18 bpp BGR-6-6-6", + .fourcc = V4L2_PIX_FMT_BGR666, + .depth = 18, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_BGR666 */ + { + .name = "4:2:2, packed, YUYV", + .fourcc = V4L2_PIX_FMT_YUYV, + .depth = 16, + .flags = 0, + }, + { + .name = "4:2:2, packed, UYVY", + .fourcc = V4L2_PIX_FMT_UYVY, + .depth = 16, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_YVYU + { + .name = "4:2:2, packed YVYU", + .fourcc = V4L2_PIX_FMT_YVYU, + .depth = 16, + .flags = 0, + }, +#endif +#ifdef V4L2_PIX_FMT_VYUY + { + .name = "4:2:2, packed VYUY", + .fourcc = V4L2_PIX_FMT_VYUY, + .depth = 16, + .flags = 0, + }, +#endif + { + .name = "4:2:2, packed YYUV", + .fourcc = V4L2_PIX_FMT_YYUV, + .depth = 16, + .flags = 0, + }, + { + .name = "YUV-8-8-8-8", + .fourcc = V4L2_PIX_FMT_YUV32, + .depth = 32, + .flags = 0, + }, + { + .name = "8 bpp, Greyscale", + .fourcc = V4L2_PIX_FMT_GREY, + .depth = 8, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_Y4 + { + .name = "4 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y4, + .depth = 4, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y4 */ +#ifdef V4L2_PIX_FMT_Y6 + { + .name = "6 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y6, + .depth = 6, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y6 */ +#ifdef V4L2_PIX_FMT_Y10 + { + .name = "10 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y10, + .depth = 10, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y10 */ +#ifdef V4L2_PIX_FMT_Y12 + { + .name = "12 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y12, + .depth = 12, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y12 */ + { + .name = "16 bpp, Greyscale", + .fourcc = V4L2_PIX_FMT_Y16, + .depth = 16, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_YUV444 + { + .name = "16 bpp xxxxyyyy uuuuvvvv", + .fourcc = V4L2_PIX_FMT_YUV444, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV444 */ +#ifdef V4L2_PIX_FMT_YUV555 + { + .name = "16 bpp YUV-5-5-5", + .fourcc = V4L2_PIX_FMT_YUV555, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV555 */ +#ifdef V4L2_PIX_FMT_YUV565 + { + .name = "16 bpp YUV-5-6-5", + .fourcc = V4L2_PIX_FMT_YUV565, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV565 */ + +/* bayer formats */ +#ifdef V4L2_PIX_FMT_SRGGB8 + { + .name = "Bayer RGGB 8bit", + .fourcc = V4L2_PIX_FMT_SRGGB8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SRGGB8 */ +#ifdef V4L2_PIX_FMT_SGRBG8 + { + .name = "Bayer GRBG 8bit", + .fourcc = V4L2_PIX_FMT_SGRBG8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SGRBG8 */ +#ifdef V4L2_PIX_FMT_SGBRG8 + { + .name = "Bayer GBRG 8bit", + .fourcc = V4L2_PIX_FMT_SGBRG8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SGBRG8 */ +#ifdef V4L2_PIX_FMT_SBGGR8 + { + .name = "Bayer BA81 8bit", + .fourcc = V4L2_PIX_FMT_SBGGR8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SBGGR8 */ + + /* here come the planar formats */ + { + .name = "4:1:0, planar, Y-Cr-Cb", + .fourcc = V4L2_PIX_FMT_YVU410, + .depth = 9, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:2:0, planar, Y-Cr-Cb", + .fourcc = V4L2_PIX_FMT_YVU420, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:1:0, planar, Y-Cb-Cr", + .fourcc = V4L2_PIX_FMT_YUV410, + .depth = 9, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:2:0, planar, Y-Cb-Cr", + .fourcc = V4L2_PIX_FMT_YUV420, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#ifdef V4L2_PIX_FMT_YUV422P + { + .name = "16 bpp YVU422 planar", + .fourcc = V4L2_PIX_FMT_YUV422P, + .depth = 16, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_YUV422P */ +#ifdef V4L2_PIX_FMT_YUV411P + { + .name = "16 bpp YVU411 planar", + .fourcc = V4L2_PIX_FMT_YUV411P, + .depth = 16, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_YUV411P */ +#ifdef V4L2_PIX_FMT_Y41P + { + .name = "12 bpp YUV 4:1:1", + .fourcc = V4L2_PIX_FMT_Y41P, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_Y41P */ +#ifdef V4L2_PIX_FMT_NV12 + { + .name = "12 bpp Y/CbCr 4:2:0 ", + .fourcc = V4L2_PIX_FMT_NV12, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_NV12 */ + +/* here come the compressed formats */ + +#ifdef V4L2_PIX_FMT_MJPEG + { + .name = "Motion-JPEG", + .fourcc = V4L2_PIX_FMT_MJPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MJPEG */ +#ifdef V4L2_PIX_FMT_JPEG + { + .name = "JFIF JPEG", + .fourcc = V4L2_PIX_FMT_JPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_JPEG */ +#ifdef V4L2_PIX_FMT_DV + { + .name = "DV1394", + .fourcc = V4L2_PIX_FMT_DV, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_DV */ +#ifdef V4L2_PIX_FMT_MPEG + { + .name = "MPEG-1/2/4 Multiplexed", + .fourcc = V4L2_PIX_FMT_MPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG */ +#ifdef V4L2_PIX_FMT_H264 + { + .name = "H264 with start codes", + .fourcc = V4L2_PIX_FMT_H264, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264 */ +#ifdef V4L2_PIX_FMT_H264_NO_SC + { + .name = "H264 without start codes", + .fourcc = V4L2_PIX_FMT_H264_NO_SC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264_NO_SC */ +#ifdef V4L2_PIX_FMT_H264_MVC + { + .name = "H264 MVC", + .fourcc = V4L2_PIX_FMT_H264_MVC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264_MVC */ +#ifdef V4L2_PIX_FMT_H263 + { + .name = "H263", + .fourcc = V4L2_PIX_FMT_H263, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H263 */ +#ifdef V4L2_PIX_FMT_MPEG1 + { + .name = "MPEG-1 ES", + .fourcc = V4L2_PIX_FMT_MPEG1, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG1 */ +#ifdef V4L2_PIX_FMT_MPEG2 + { + .name = "MPEG-2 ES", + .fourcc = V4L2_PIX_FMT_MPEG2, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG2 */ +#ifdef V4L2_PIX_FMT_MPEG4 + { + .name = "MPEG-4 part 2 ES", + .fourcc = V4L2_PIX_FMT_MPEG4, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG4 */ +#ifdef V4L2_PIX_FMT_XVID + { + .name = "Xvid", + .fourcc = V4L2_PIX_FMT_XVID, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_XVID */ +#ifdef V4L2_PIX_FMT_VC1_ANNEX_G + { + .name = "SMPTE 421M Annex G compliant stream", + .fourcc = V4L2_PIX_FMT_VC1_ANNEX_G, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VC1_ANNEX_G */ +#ifdef V4L2_PIX_FMT_VC1_ANNEX_L + { + .name = "SMPTE 421M Annex L compliant stream", + .fourcc = V4L2_PIX_FMT_VC1_ANNEX_L, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VC1_ANNEX_L */ +#ifdef V4L2_PIX_FMT_VP8 + { + .name = "VP8", + .fourcc = V4L2_PIX_FMT_VP8, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VP8 */ +#ifdef V4L2_PIX_FMT_VP9 + { + .name = "VP9", + .fourcc = V4L2_PIX_FMT_VP9, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VP9 */ +#ifdef V4L2_PIX_FMT_HEVC + { + .name = "HEVC", + .fourcc = V4L2_PIX_FMT_HEVC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_HEVC */ +}; diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c index c267283b01fda..e749dcb3ddea9 100644 --- a/drivers/memory/atmel-ebi.c +++ b/drivers/memory/atmel-ebi.c @@ -544,20 +544,27 @@ static int atmel_ebi_probe(struct platform_device *pdev) smc_np = of_parse_phandle(dev->of_node, "atmel,smc", 0); ebi->smc.regmap = syscon_node_to_regmap(smc_np); - if (IS_ERR(ebi->smc.regmap)) - return PTR_ERR(ebi->smc.regmap); + if (IS_ERR(ebi->smc.regmap)) { + ret = PTR_ERR(ebi->smc.regmap); + goto put_node; + } ebi->smc.layout = atmel_hsmc_get_reg_layout(smc_np); - if (IS_ERR(ebi->smc.layout)) - return PTR_ERR(ebi->smc.layout); + if (IS_ERR(ebi->smc.layout)) { + ret = PTR_ERR(ebi->smc.layout); + goto put_node; + } ebi->smc.clk = of_clk_get(smc_np, 0); if (IS_ERR(ebi->smc.clk)) { - if (PTR_ERR(ebi->smc.clk) != -ENOENT) - return PTR_ERR(ebi->smc.clk); + if (PTR_ERR(ebi->smc.clk) != -ENOENT) { + ret = PTR_ERR(ebi->smc.clk); + goto put_node; + } ebi->smc.clk = NULL; } + of_node_put(smc_np); ret = clk_prepare_enable(ebi->smc.clk); if (ret) return ret; @@ -608,6 +615,10 @@ static int atmel_ebi_probe(struct platform_device *pdev) } return of_platform_populate(np, NULL, NULL, dev); + +put_node: + of_node_put(smc_np); + return ret; } static __maybe_unused int atmel_ebi_resume(struct device *dev) diff --git a/drivers/memory/emif.c b/drivers/memory/emif.c index 762d0c0f0716f..ecc78d6f89ed2 100644 --- a/drivers/memory/emif.c +++ b/drivers/memory/emif.c @@ -1025,7 +1025,7 @@ static struct emif_data *__init_or_module get_device_details( temp = devm_kzalloc(dev, sizeof(*pd), GFP_KERNEL); dev_info = devm_kzalloc(dev, sizeof(*dev_info), GFP_KERNEL); - if (!emif || !pd || !dev_info) { + if (!emif || !temp || !dev_info) { dev_err(dev, "%s:%d: allocation error\n", __func__, __LINE__); goto error; } @@ -1117,7 +1117,7 @@ static int __init_or_module emif_probe(struct platform_device *pdev) { struct emif_data *emif; struct resource *res; - int irq; + int irq, ret; if (pdev->dev.of_node) emif = of_get_memory_device_details(pdev->dev.of_node, &pdev->dev); @@ -1147,7 +1147,9 @@ static int __init_or_module emif_probe(struct platform_device *pdev) emif_onetime_settings(emif); emif_debugfs_init(emif); disable_and_clear_all_interrupts(emif); - setup_interrupts(emif, irq); + ret = setup_interrupts(emif, irq); + if (ret) + goto error; /* One-time actions taken on probing the first device */ if (!emif1) { diff --git a/drivers/memory/renesas-rpc-if.c b/drivers/memory/renesas-rpc-if.c index e4cc64f560196..2e545f473cc68 100644 --- a/drivers/memory/renesas-rpc-if.c +++ b/drivers/memory/renesas-rpc-if.c @@ -651,6 +651,7 @@ static int rpcif_probe(struct platform_device *pdev) struct platform_device *vdev; struct device_node *flash; const char *name; + int ret; flash = of_get_next_child(pdev->dev.of_node, NULL); if (!flash) { @@ -674,7 +675,14 @@ static int rpcif_probe(struct platform_device *pdev) return -ENOMEM; vdev->dev.parent = &pdev->dev; platform_set_drvdata(pdev, vdev); - return platform_device_add(vdev); + + ret = platform_device_add(vdev); + if (ret) { + platform_device_put(vdev); + return ret; + } + + return 0; } static int rpcif_remove(struct platform_device *pdev) diff --git a/drivers/memory/tegra/tegra20-emc.c b/drivers/memory/tegra/tegra20-emc.c index 497b6edbf3ca1..25ba3c5e4ad6a 100644 --- a/drivers/memory/tegra/tegra20-emc.c +++ b/drivers/memory/tegra/tegra20-emc.c @@ -540,7 +540,7 @@ static int emc_read_lpddr_mode_register(struct tegra_emc *emc, unsigned int register_addr, unsigned int *register_data) { - u32 memory_dev = emem_dev + 1; + u32 memory_dev = emem_dev ? 1 : 2; u32 val, mr_mask = 0xff; int err; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index c0450397b6735..7ea312f0840e0 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -186,13 +186,8 @@ static int mspro_block_bd_open(struct block_device *bdev, fmode_t mode) mutex_lock(&mspro_block_disk_lock); - if (msb && msb->card) { + if (msb && msb->card) msb->usage_count++; - if ((mode & FMODE_WRITE) && msb->read_only) - rc = -EROFS; - else - rc = 0; - } mutex_unlock(&mspro_block_disk_lock); @@ -1239,6 +1234,9 @@ static int mspro_block_init_disk(struct memstick_dev *card) set_capacity(msb->disk, capacity); dev_dbg(&card->dev, "capacity set %ld\n", capacity); + if (msb->read_only) + set_disk_ro(msb->disk, true); + rc = device_add_disk(&card->dev, msb->disk, NULL); if (rc) goto out_cleanup_disk; diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c index 8d58c8df46cfb..56338f9dbd0ba 100644 --- a/drivers/mfd/asic3.c +++ b/drivers/mfd/asic3.c @@ -906,14 +906,14 @@ static int __init asic3_mfd_probe(struct platform_device *pdev, ret = mfd_add_devices(&pdev->dev, pdev->id, &asic3_cell_ds1wm, 1, mem, asic->irq_base, NULL); if (ret < 0) - goto out; + goto out_unmap; } if (mem_sdio && (irq >= 0)) { ret = mfd_add_devices(&pdev->dev, pdev->id, &asic3_cell_mmc, 1, mem_sdio, irq, NULL); if (ret < 0) - goto out; + goto out_unmap; } ret = 0; @@ -927,8 +927,12 @@ static int __init asic3_mfd_probe(struct platform_device *pdev, ret = mfd_add_devices(&pdev->dev, 0, asic3_cell_leds, ASIC3_NUM_LEDS, NULL, 0, NULL); } + return ret; - out: +out_unmap: + if (asic->tmio_cnf) + iounmap(asic->tmio_cnf); +out: return ret; } diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c index 8a4f1d90dcfd1..1000572761a84 100644 --- a/drivers/mfd/mc13xxx-core.c +++ b/drivers/mfd/mc13xxx-core.c @@ -323,8 +323,10 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode, adc1 |= MC13783_ADC1_ATOX; dev_dbg(mc13xxx->dev, "%s: request irq\n", __func__); - mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE, + ret = mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE, mc13xxx_handler_adcdone, __func__, &adcdone_data); + if (ret) + goto out; mc13xxx_reg_write(mc13xxx, MC13XXX_ADC0, adc0); mc13xxx_reg_write(mc13xxx, MC13XXX_ADC1, adc1); diff --git a/drivers/misc/cardreader/alcor_pci.c b/drivers/misc/cardreader/alcor_pci.c index de6d44a158bba..3f514d77a843f 100644 --- a/drivers/misc/cardreader/alcor_pci.c +++ b/drivers/misc/cardreader/alcor_pci.c @@ -266,7 +266,7 @@ static int alcor_pci_probe(struct pci_dev *pdev, if (!priv) return -ENOMEM; - ret = ida_simple_get(&alcor_pci_idr, 0, 0, GFP_KERNEL); + ret = ida_alloc(&alcor_pci_idr, GFP_KERNEL); if (ret < 0) return ret; priv->id = ret; @@ -280,7 +280,8 @@ static int alcor_pci_probe(struct pci_dev *pdev, ret = pci_request_regions(pdev, DRV_NAME_ALCOR_PCI); if (ret) { dev_err(&pdev->dev, "Cannot request region\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error_free_ida; } if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) { @@ -324,6 +325,8 @@ static int alcor_pci_probe(struct pci_dev *pdev, error_release_regions: pci_release_regions(pdev); +error_free_ida: + ida_free(&alcor_pci_idr, priv->id); return ret; } @@ -337,7 +340,7 @@ static void alcor_pci_remove(struct pci_dev *pdev) mfd_remove_devices(&pdev->dev); - ida_simple_remove(&alcor_pci_idr, priv->id); + ida_free(&alcor_pci_idr, priv->id); pci_release_regions(pdev); pci_set_drvdata(pdev, NULL); diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index fc084ee5106ec..09001fd9db85f 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -890,6 +890,8 @@ static ssize_t hl_set_power_state(struct file *f, const char __user *buf, pci_set_power_state(hdev->pdev, PCI_D0); pci_restore_state(hdev->pdev); rc = pci_enable_device(hdev->pdev); + if (rc < 0) + return rc; } else if (value == 2) { pci_save_state(hdev->pdev); pci_disable_device(hdev->pdev); diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index c1eefaebacb64..bcc6e547e0714 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1967,16 +1967,15 @@ static int export_dmabuf_from_handle(struct hl_ctx *ctx, u64 handle, int flags, static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) { struct hl_device *hdev = hpriv->hdev; - struct hl_ctx *ctx = hpriv->ctx; u64 block_handle, device_addr = 0; + struct hl_ctx *ctx = hpriv->ctx; u32 handle = 0, block_size; - int rc, dmabuf_fd = -EBADF; + int rc; switch (args->in.op) { case HL_MEM_OP_ALLOC: if (args->in.alloc.mem_size == 0) { - dev_err(hdev->dev, - "alloc size must be larger than 0\n"); + dev_err(hdev->dev, "alloc size must be larger than 0\n"); rc = -EINVAL; goto out; } @@ -1997,15 +1996,14 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) case HL_MEM_OP_MAP: if (args->in.flags & HL_MEM_USERPTR) { - device_addr = args->in.map_host.host_virt_addr; - rc = 0; + dev_err(hdev->dev, "Failed to map host memory when MMU is disabled\n"); + rc = -EPERM; } else { - rc = get_paddr_from_handle(ctx, &args->in, - &device_addr); + rc = get_paddr_from_handle(ctx, &args->in, &device_addr); + memset(args, 0, sizeof(*args)); + args->out.device_virt_addr = device_addr; } - memset(args, 0, sizeof(*args)); - args->out.device_virt_addr = device_addr; break; case HL_MEM_OP_UNMAP: @@ -2013,20 +2011,14 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) break; case HL_MEM_OP_MAP_BLOCK: - rc = map_block(hdev, args->in.map_block.block_addr, - &block_handle, &block_size); + rc = map_block(hdev, args->in.map_block.block_addr, &block_handle, &block_size); args->out.block_handle = block_handle; args->out.block_size = block_size; break; case HL_MEM_OP_EXPORT_DMABUF_FD: - rc = export_dmabuf_from_addr(ctx, - args->in.export_dmabuf_fd.handle, - args->in.export_dmabuf_fd.mem_size, - args->in.flags, - &dmabuf_fd); - memset(args, 0, sizeof(*args)); - args->out.fd = dmabuf_fd; + dev_err(hdev->dev, "Failed to export dma-buf object when MMU is disabled\n"); + rc = -EPERM; break; default: diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c index 6134b6ae76157..3cadef97817d6 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c @@ -467,7 +467,7 @@ static void hl_mmu_v1_fini(struct hl_device *hdev) { /* MMU H/W fini was already done in device hw_fini() */ - if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) { + if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 013c6da2e3ca1..b4dacea801511 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7819,6 +7819,48 @@ static void gaudi_print_fw_alive_info(struct hl_device *hdev, fw_alive->thread_id, fw_alive->uptime_seconds); } +static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, + void *data) +{ + char desc[64] = "", *type; + struct eq_nic_sei_event *eq_nic_sei = data; + u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; + + switch (eq_nic_sei->axi_error_cause) { + case RXB: + type = "RXB"; + break; + case RXE: + type = "RXE"; + break; + case TXS: + type = "TXS"; + break; + case TXE: + type = "TXE"; + break; + case QPC_RESP: + type = "QPC_RESP"; + break; + case NON_AXI_ERR: + type = "NON_AXI_ERR"; + break; + case TMR: + type = "TMR"; + break; + default: + dev_err(hdev->dev, "unknown NIC AXI cause %d\n", + eq_nic_sei->axi_error_cause); + type = "N/A"; + break; + } + + snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, + eq_nic_sei->id); + dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", + event_type, desc); +} + static int gaudi_non_hard_reset_late_init(struct hl_device *hdev) { /* GAUDI doesn't support any reset except hard-reset */ @@ -8066,6 +8108,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) { struct gaudi_device *gaudi = hdev->asic_specific; + u64 data = le64_to_cpu(eq_entry->data[0]); u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); u32 fw_fatal_err_flag = 0; u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) @@ -8263,6 +8306,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev, hl_fw_unmask_irq(hdev, event_type); break; + case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: + gaudi_print_nic_axi_irq_info(hdev, event_type, &data); + hl_fw_unmask_irq(hdev, event_type); + break; + case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: gaudi_print_irq_info(hdev, event_type, false); gaudi_print_sm_sei_info(hdev, event_type, diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index 67c5b452dd356..88b91ad8e5413 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -1070,10 +1070,10 @@ static int kgdbts_option_setup(char *opt) { if (strlen(opt) >= MAX_CONFIG_LEN) { printk(KERN_ERR "kgdbts: config string too long\n"); - return -ENOSPC; + return 1; } strcpy(config, opt); - return 0; + return 1; } __setup("kgdbts=", kgdbts_option_setup); diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 67bb6a25fd0a0..64ce3f830262b 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -107,6 +107,7 @@ #define MEI_DEV_ID_ADP_S 0x7AE8 /* Alder Lake Point S */ #define MEI_DEV_ID_ADP_LP 0x7A60 /* Alder Lake Point LP */ #define MEI_DEV_ID_ADP_P 0x51E0 /* Alder Lake Point P */ +#define MEI_DEV_ID_ADP_N 0x54E0 /* Alder Lake Point N */ /* * MEI HW Section @@ -120,6 +121,7 @@ #define PCI_CFG_HFS_2 0x48 #define PCI_CFG_HFS_3 0x60 # define PCI_CFG_HFS_3_FW_SKU_MSK 0x00000070 +# define PCI_CFG_HFS_3_FW_SKU_IGN 0x00000000 # define PCI_CFG_HFS_3_FW_SKU_SPS 0x00000060 #define PCI_CFG_HFS_4 0x64 #define PCI_CFG_HFS_5 0x68 diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index d3a6c07286451..fbc4c95818645 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1405,16 +1405,16 @@ static bool mei_me_fw_type_sps_4(const struct pci_dev *pdev) .quirk_probe = mei_me_fw_type_sps_4 /** - * mei_me_fw_type_sps() - check for sps sku + * mei_me_fw_type_sps_ign() - check for sps or ign sku * - * Read ME FW Status register to check for SPS Firmware. - * The SPS FW is only signaled in pci function 0 + * Read ME FW Status register to check for SPS or IGN Firmware. + * The SPS/IGN FW is only signaled in pci function 0 * * @pdev: pci device * - * Return: true in case of SPS firmware + * Return: true in case of SPS/IGN firmware */ -static bool mei_me_fw_type_sps(const struct pci_dev *pdev) +static bool mei_me_fw_type_sps_ign(const struct pci_dev *pdev) { u32 reg; u32 fw_type; @@ -1427,14 +1427,15 @@ static bool mei_me_fw_type_sps(const struct pci_dev *pdev) dev_dbg(&pdev->dev, "fw type is %d\n", fw_type); - return fw_type == PCI_CFG_HFS_3_FW_SKU_SPS; + return fw_type == PCI_CFG_HFS_3_FW_SKU_IGN || + fw_type == PCI_CFG_HFS_3_FW_SKU_SPS; } #define MEI_CFG_KIND_ITOUCH \ .kind = "itouch" -#define MEI_CFG_FW_SPS \ - .quirk_probe = mei_me_fw_type_sps +#define MEI_CFG_FW_SPS_IGN \ + .quirk_probe = mei_me_fw_type_sps_ign #define MEI_CFG_FW_VER_SUPP \ .fw_ver_supported = 1 @@ -1535,7 +1536,7 @@ static const struct mei_cfg mei_me_pch12_sps_cfg = { MEI_CFG_PCH8_HFS, MEI_CFG_FW_VER_SUPP, MEI_CFG_DMA_128, - MEI_CFG_FW_SPS, + MEI_CFG_FW_SPS_IGN, }; /* Cannon Lake itouch with quirk for SPS 5.0 and newer Firmware exclusion @@ -1545,7 +1546,7 @@ static const struct mei_cfg mei_me_pch12_itouch_sps_cfg = { MEI_CFG_KIND_ITOUCH, MEI_CFG_PCH8_HFS, MEI_CFG_FW_VER_SUPP, - MEI_CFG_FW_SPS, + MEI_CFG_FW_SPS_IGN, }; /* Tiger Lake and newer devices */ @@ -1562,7 +1563,7 @@ static const struct mei_cfg mei_me_pch15_sps_cfg = { MEI_CFG_FW_VER_SUPP, MEI_CFG_DMA_128, MEI_CFG_TRC, - MEI_CFG_FW_SPS, + MEI_CFG_FW_SPS_IGN, }; /* diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c index a67f4f2d33a93..0706322154cbe 100644 --- a/drivers/misc/mei/interrupt.c +++ b/drivers/misc/mei/interrupt.c @@ -424,31 +424,26 @@ int mei_irq_read_handler(struct mei_device *dev, list_for_each_entry(cl, &dev->file_list, link) { if (mei_cl_hbm_equal(cl, mei_hdr)) { cl_dbg(dev, cl, "got a message\n"); - break; + ret = mei_cl_irq_read_msg(cl, mei_hdr, meta_hdr, cmpl_list); + goto reset_slots; } } /* if no recipient cl was found we assume corrupted header */ - if (&cl->link == &dev->file_list) { - /* A message for not connected fixed address clients - * should be silently discarded - * On power down client may be force cleaned, - * silently discard such messages - */ - if (hdr_is_fixed(mei_hdr) || - dev->dev_state == MEI_DEV_POWER_DOWN) { - mei_irq_discard_msg(dev, mei_hdr, mei_hdr->length); - ret = 0; - goto reset_slots; - } - dev_err(dev->dev, "no destination client found 0x%08X\n", - dev->rd_msg_hdr[0]); - ret = -EBADMSG; - goto end; + /* A message for not connected fixed address clients + * should be silently discarded + * On power down client may be force cleaned, + * silently discard such messages + */ + if (hdr_is_fixed(mei_hdr) || + dev->dev_state == MEI_DEV_POWER_DOWN) { + mei_irq_discard_msg(dev, mei_hdr, mei_hdr->length); + ret = 0; + goto reset_slots; } - - ret = mei_cl_irq_read_msg(cl, mei_hdr, meta_hdr, cmpl_list); - + dev_err(dev->dev, "no destination client found 0x%08X\n", dev->rd_msg_hdr[0]); + ret = -EBADMSG; + goto end; reset_slots: /* reset the number of slots and header */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 3a45aaf002ac8..a738253dbd056 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -113,6 +113,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_S, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_LP, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)}, /* required last entry */ {0, } diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 4e67c1403cc93..db99882c95d86 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1880,6 +1880,31 @@ static inline bool mmc_blk_rq_error(struct mmc_blk_request *brq) brq->data.error || brq->cmd.resp[0] & CMD_ERRORS; } +static int mmc_spi_err_check(struct mmc_card *card) +{ + u32 status = 0; + int err; + + /* + * SPI does not have a TRAN state we have to wait on, instead the + * card is ready again when it no longer holds the line LOW. + * We still have to ensure two things here before we know the write + * was successful: + * 1. The card has not disconnected during busy and we actually read our + * own pull-up, thinking it was still connected, so ensure it + * still responds. + * 2. Check for any error bits, in particular R1_SPI_IDLE to catch a + * just reconnected card after being disconnected during busy. + */ + err = __mmc_send_status(card, &status, 0); + if (err) + return err; + /* All R1 and R2 bits of SPI are errors in our case */ + if (status) + return -EIO; + return 0; +} + static int mmc_blk_busy_cb(void *cb_data, bool *busy) { struct mmc_blk_busy_data *data = cb_data; @@ -1903,9 +1928,16 @@ static int mmc_blk_card_busy(struct mmc_card *card, struct request *req) struct mmc_blk_busy_data cb_data; int err; - if (mmc_host_is_spi(card->host) || rq_data_dir(req) == READ) + if (rq_data_dir(req) == READ) return 0; + if (mmc_host_is_spi(card->host)) { + err = mmc_spi_err_check(card); + if (err) + mqrq->brq.data.bytes_xfered = 0; + return err; + } + cb_data.card = card; cb_data.status = 0; err = __mmc_poll_for_busy(card->host, 0, MMC_BLK_TIMEOUT_MS, @@ -2350,6 +2382,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, struct mmc_blk_data *md; int devidx, ret; char cap_str[10]; + bool cache_enabled = false; + bool fua_enabled = false; devidx = ida_simple_get(&mmc_blk_ida, 0, max_devices, GFP_KERNEL); if (devidx < 0) { @@ -2429,13 +2463,17 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, md->flags |= MMC_BLK_CMD23; } - if (mmc_card_mmc(card) && - md->flags & MMC_BLK_CMD23 && + if (md->flags & MMC_BLK_CMD23 && ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) || card->ext_csd.rel_sectors)) { md->flags |= MMC_BLK_REL_WR; - blk_queue_write_cache(md->queue.queue, true, true); + fua_enabled = true; + cache_enabled = true; } + if (mmc_cache_enabled(card->host)) + cache_enabled = true; + + blk_queue_write_cache(md->queue.queue, cache_enabled, fua_enabled); string_get_size((u64)size, 512, STRING_UNITS_2, cap_str, sizeof(cap_str)); diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 096ae624be9aa..58a60afa650b6 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -34,13 +35,13 @@ static ssize_t type_show(struct device *dev, switch (card->type) { case MMC_TYPE_MMC: - return sprintf(buf, "MMC\n"); + return sysfs_emit(buf, "MMC\n"); case MMC_TYPE_SD: - return sprintf(buf, "SD\n"); + return sysfs_emit(buf, "SD\n"); case MMC_TYPE_SDIO: - return sprintf(buf, "SDIO\n"); + return sysfs_emit(buf, "SDIO\n"); case MMC_TYPE_SD_COMBO: - return sprintf(buf, "SDcombo\n"); + return sysfs_emit(buf, "SDcombo\n"); default: return -EFAULT; } diff --git a/drivers/mmc/core/bus.h b/drivers/mmc/core/bus.h index 8105852c4b62f..3996b191b68d1 100644 --- a/drivers/mmc/core/bus.h +++ b/drivers/mmc/core/bus.h @@ -9,6 +9,7 @@ #define _MMC_CORE_BUS_H #include +#include struct mmc_host; struct mmc_card; @@ -17,7 +18,7 @@ struct mmc_card; static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf) \ { \ struct mmc_card *card = mmc_dev_to_card(dev); \ - return sprintf(buf, fmt, args); \ + return sysfs_emit(buf, fmt, args); \ } \ static DEVICE_ATTR(name, S_IRUGO, mmc_##name##_show, NULL) diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index cf140f4ec8643..d739e2b631fe8 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -588,6 +588,16 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) EXPORT_SYMBOL(mmc_alloc_host); +static int mmc_validate_host_caps(struct mmc_host *host) +{ + if (host->caps & MMC_CAP_SDIO_IRQ && !host->ops->enable_sdio_irq) { + dev_warn(host->parent, "missing ->enable_sdio_irq() ops\n"); + return -EINVAL; + } + + return 0; +} + /** * mmc_add_host - initialise host hardware * @host: mmc host @@ -600,8 +610,9 @@ int mmc_add_host(struct mmc_host *host) { int err; - WARN_ON((host->caps & MMC_CAP_SDIO_IRQ) && - !host->ops->enable_sdio_irq); + err = mmc_validate_host_caps(host); + if (err) + return err; err = device_add(&host->class_dev); if (err) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 8421519c2a983..43d1b9b2fa499 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -812,12 +813,11 @@ static ssize_t mmc_fwrev_show(struct device *dev, { struct mmc_card *card = mmc_dev_to_card(dev); - if (card->ext_csd.rev < 7) { - return sprintf(buf, "0x%x\n", card->cid.fwrev); - } else { - return sprintf(buf, "0x%*phN\n", MMC_FIRMWARE_LEN, - card->ext_csd.fwrev); - } + if (card->ext_csd.rev < 7) + return sysfs_emit(buf, "0x%x\n", card->cid.fwrev); + else + return sysfs_emit(buf, "0x%*phN\n", MMC_FIRMWARE_LEN, + card->ext_csd.fwrev); } static DEVICE_ATTR(fwrev, S_IRUGO, mmc_fwrev_show, NULL); @@ -830,10 +830,10 @@ static ssize_t mmc_dsr_show(struct device *dev, struct mmc_host *host = card->host; if (card->csd.dsr_imp && host->dsr_req) - return sprintf(buf, "0x%x\n", host->dsr); + return sysfs_emit(buf, "0x%x\n", host->dsr); else /* return default DSR value */ - return sprintf(buf, "0x%x\n", 0x404); + return sysfs_emit(buf, "0x%x\n", 0x404); } static DEVICE_ATTR(dsr, S_IRUGO, mmc_dsr_show, NULL); diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index 20f5687272778..f879dc63d9364 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -149,6 +149,11 @@ static const struct mmc_fixup __maybe_unused sdio_fixup_methods[] = { static const struct mmc_fixup __maybe_unused sdio_card_init_methods[] = { SDIO_FIXUP_COMPATIBLE("ti,wl1251", wl1251_quirk, 0), + SDIO_FIXUP_COMPATIBLE("silabs,wf200", add_quirk, + MMC_QUIRK_BROKEN_BYTE_MODE_512 | + MMC_QUIRK_LENIENT_FN0 | + MMC_QUIRK_BLKSZ_FOR_BYTE_MODE), + END_FIXUP }; diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index bfbfed30dc4d8..68df6b2f49cc7 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -708,18 +709,16 @@ MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr); MMC_DEV_ATTR(rca, "0x%04x\n", card->rca); -static ssize_t mmc_dsr_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t mmc_dsr_show(struct device *dev, struct device_attribute *attr, + char *buf) { - struct mmc_card *card = mmc_dev_to_card(dev); - struct mmc_host *host = card->host; - - if (card->csd.dsr_imp && host->dsr_req) - return sprintf(buf, "0x%x\n", host->dsr); - else - /* return default DSR value */ - return sprintf(buf, "0x%x\n", 0x404); + struct mmc_card *card = mmc_dev_to_card(dev); + struct mmc_host *host = card->host; + + if (card->csd.dsr_imp && host->dsr_req) + return sysfs_emit(buf, "0x%x\n", host->dsr); + /* return default DSR value */ + return sysfs_emit(buf, "0x%x\n", 0x404); } static DEVICE_ATTR(dsr, S_IRUGO, mmc_dsr_show, NULL); @@ -735,9 +734,9 @@ static ssize_t info##num##_show(struct device *dev, struct device_attribute *att \ if (num > card->num_info) \ return -ENODATA; \ - if (!card->info[num-1][0]) \ + if (!card->info[num - 1][0]) \ return 0; \ - return sprintf(buf, "%s\n", card->info[num-1]); \ + return sysfs_emit(buf, "%s\n", card->info[num - 1]); \ } \ static DEVICE_ATTR_RO(info##num) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 41164748723d2..25799accf8a02 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -40,9 +41,9 @@ static ssize_t info##num##_show(struct device *dev, struct device_attribute *att \ if (num > card->num_info) \ return -ENODATA; \ - if (!card->info[num-1][0]) \ + if (!card->info[num - 1][0]) \ return 0; \ - return sprintf(buf, "%s\n", card->info[num-1]); \ + return sysfs_emit(buf, "%s\n", card->info[num - 1]); \ } \ static DEVICE_ATTR_RO(info##num) diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index fda03b35c14a5..c6268c38c69e5 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -35,7 +36,7 @@ field##_show(struct device *dev, struct device_attribute *attr, char *buf) \ struct sdio_func *func; \ \ func = dev_to_sdio_func (dev); \ - return sprintf(buf, format_string, args); \ + return sysfs_emit(buf, format_string, args); \ } \ static DEVICE_ATTR_RO(field) @@ -52,9 +53,9 @@ static ssize_t info##num##_show(struct device *dev, struct device_attribute *att \ if (num > func->num_info) \ return -ENODATA; \ - if (!func->info[num-1][0]) \ + if (!func->info[num - 1][0]) \ return 0; \ - return sprintf(buf, "%s\n", func->info[num-1]); \ + return sysfs_emit(buf, "%s\n", func->info[num - 1]); \ } \ static DEVICE_ATTR_RO(info##num) diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c index 2a757c88f9d21..80de660027d89 100644 --- a/drivers/mmc/host/davinci_mmc.c +++ b/drivers/mmc/host/davinci_mmc.c @@ -1375,8 +1375,12 @@ static int davinci_mmcsd_suspend(struct device *dev) static int davinci_mmcsd_resume(struct device *dev) { struct mmc_davinci_host *host = dev_get_drvdata(dev); + int ret; + + ret = clk_enable(host->clk); + if (ret) + return ret; - clk_enable(host->clk); mmc_davinci_reset_ctrl(host, 0); return 0; diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index 9c13f2c313658..4566d7fc9055a 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -62,8 +62,8 @@ static int sdmmc_idma_validate_data(struct mmci_host *host, * excepted the last element which has no constraint on idmasize */ for_each_sg(data->sg, sg, data->sg_len - 1, i) { - if (!IS_ALIGNED(data->sg->offset, sizeof(u32)) || - !IS_ALIGNED(data->sg->length, SDMMC_IDMA_BURST)) { + if (!IS_ALIGNED(sg->offset, sizeof(u32)) || + !IS_ALIGNED(sg->length, SDMMC_IDMA_BURST)) { dev_err(mmc_dev(host->mmc), "unaligned scatterlist: ofst:%x length:%d\n", data->sg->offset, data->sg->length); @@ -71,7 +71,7 @@ static int sdmmc_idma_validate_data(struct mmci_host *host, } } - if (!IS_ALIGNED(data->sg->offset, sizeof(u32))) { + if (!IS_ALIGNED(sg->offset, sizeof(u32))) { dev_err(mmc_dev(host->mmc), "unaligned last scatterlist: ofst:%x length:%d\n", data->sg->offset, data->sg->length); diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 2797a9c0f17d8..ddb5ca2f559e2 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -144,9 +144,9 @@ static unsigned int renesas_sdhi_clk_update(struct tmio_mmc_host *host, return clk_get_rate(priv->clk); if (priv->clkh) { + /* HS400 with 4TAP needs different clock settings */ bool use_4tap = priv->quirks && priv->quirks->hs400_4taps; - bool need_slow_clkh = (host->mmc->ios.timing == MMC_TIMING_UHS_SDR104) || - (host->mmc->ios.timing == MMC_TIMING_MMC_HS400); + bool need_slow_clkh = host->mmc->ios.timing == MMC_TIMING_MMC_HS400; clkh_shift = use_4tap && need_slow_clkh ? 1 : 2; ref_clk = priv->clkh; } @@ -396,10 +396,10 @@ static void renesas_sdhi_hs400_complete(struct mmc_host *mmc) SH_MOBILE_SDHI_SCC_TMPPORT2_HS400OSEL) | sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_TMPPORT2)); - /* Set the sampling clock selection range of HS400 mode */ sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_DTCNTL, SH_MOBILE_SDHI_SCC_DTCNTL_TAPEN | - 0x4 << SH_MOBILE_SDHI_SCC_DTCNTL_TAPNUM_SHIFT); + sd_scc_read32(host, priv, + SH_MOBILE_SDHI_SCC_DTCNTL)); /* Avoid bad TAP */ if (bad_taps & BIT(priv->tap_set)) { diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c index 58cfaffa3c2d8..f7c384db89bf3 100644 --- a/drivers/mmc/host/rtsx_pci_sdmmc.c +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c @@ -1495,12 +1495,12 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev) realtek_init_host(host); - if (pcr->rtd3_en) { - pm_runtime_set_autosuspend_delay(&pdev->dev, 5000); - pm_runtime_use_autosuspend(&pdev->dev); - pm_runtime_enable(&pdev->dev); - } - + pm_runtime_no_callbacks(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, 200); + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_use_autosuspend(&pdev->dev); mmc_add_host(mmc); @@ -1521,11 +1521,6 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev) pcr->slots[RTSX_SD_CARD].card_event = NULL; mmc = host->mmc; - if (pcr->rtd3_en) { - pm_runtime_dont_use_autosuspend(&pdev->dev); - pm_runtime_disable(&pdev->dev); - } - cancel_work_sync(&host->work); mutex_lock(&host->host_mutex); @@ -1548,6 +1543,9 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev) flush_work(&host->work); + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_disable(&pdev->dev); + mmc_free_host(mmc); dev_dbg(&(pdev->dev), diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index 666cee4c7f7c6..08e838400b526 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -241,16 +241,6 @@ static void xenon_voltage_switch(struct sdhci_host *host) { /* Wait for 5ms after set 1.8V signal enable bit */ usleep_range(5000, 5500); - - /* - * For some reason the controller's Host Control2 register reports - * the bit representing 1.8V signaling as 0 when read after it was - * written as 1. Subsequent read reports 1. - * - * Since this may cause some issues, do an empty read of the Host - * Control2 register here to circumvent this. - */ - sdhci_readw(host, SDHCI_HOST_CONTROL2); } static unsigned int xenon_get_max_clock(struct sdhci_host *host) diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index f654afbe8e83c..b4891bb266485 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -514,26 +514,6 @@ static const struct sdhci_am654_driver_data sdhci_j721e_4bit_drvdata = { .flags = IOMUX_PRESENT, }; -static const struct sdhci_pltfm_data sdhci_am64_8bit_pdata = { - .ops = &sdhci_j721e_8bit_ops, - .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, -}; - -static const struct sdhci_am654_driver_data sdhci_am64_8bit_drvdata = { - .pdata = &sdhci_am64_8bit_pdata, - .flags = DLL_PRESENT | DLL_CALIB, -}; - -static const struct sdhci_pltfm_data sdhci_am64_4bit_pdata = { - .ops = &sdhci_j721e_4bit_ops, - .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, -}; - -static const struct sdhci_am654_driver_data sdhci_am64_4bit_drvdata = { - .pdata = &sdhci_am64_4bit_pdata, - .flags = IOMUX_PRESENT, -}; - static const struct soc_device_attribute sdhci_am654_devices[] = { { .family = "AM65X", .revision = "SR1.0", @@ -759,11 +739,11 @@ static const struct of_device_id sdhci_am654_of_match[] = { }, { .compatible = "ti,am64-sdhci-8bit", - .data = &sdhci_am64_8bit_drvdata, + .data = &sdhci_j721e_8bit_drvdata, }, { .compatible = "ti,am64-sdhci-4bit", - .data = &sdhci_am64_4bit_drvdata, + .data = &sdhci_j721e_4bit_drvdata, }, { /* sentinel */ } }; diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c index a8b31bddf14b8..1a840db207b5a 100644 --- a/drivers/mtd/devices/mchp23k256.c +++ b/drivers/mtd/devices/mchp23k256.c @@ -231,6 +231,19 @@ static const struct of_device_id mchp23k256_of_table[] = { }; MODULE_DEVICE_TABLE(of, mchp23k256_of_table); +static const struct spi_device_id mchp23k256_spi_ids[] = { + { + .name = "mchp23k256", + .driver_data = (kernel_ulong_t)&mchp23k256_caps, + }, + { + .name = "mchp23lcv1024", + .driver_data = (kernel_ulong_t)&mchp23lcv1024_caps, + }, + {} +}; +MODULE_DEVICE_TABLE(spi, mchp23k256_spi_ids); + static struct spi_driver mchp23k256_driver = { .driver = { .name = "mchp23k256", @@ -238,6 +251,7 @@ static struct spi_driver mchp23k256_driver = { }, .probe = mchp23k256_probe, .remove = mchp23k256_remove, + .id_table = mchp23k256_spi_ids, }; module_spi_driver(mchp23k256_driver); diff --git a/drivers/mtd/devices/mchp48l640.c b/drivers/mtd/devices/mchp48l640.c index 231a107901960..b9cf2b4415a54 100644 --- a/drivers/mtd/devices/mchp48l640.c +++ b/drivers/mtd/devices/mchp48l640.c @@ -359,6 +359,15 @@ static const struct of_device_id mchp48l640_of_table[] = { }; MODULE_DEVICE_TABLE(of, mchp48l640_of_table); +static const struct spi_device_id mchp48l640_spi_ids[] = { + { + .name = "48l640", + .driver_data = (kernel_ulong_t)&mchp48l640_caps, + }, + {} +}; +MODULE_DEVICE_TABLE(spi, mchp48l640_spi_ids); + static struct spi_driver mchp48l640_driver = { .driver = { .name = "mchp48l640", @@ -366,6 +375,7 @@ static struct spi_driver mchp48l640_driver = { }, .probe = mchp48l640_probe, .remove = mchp48l640_remove, + .id_table = mchp48l640_spi_ids, }; module_spi_driver(mchp48l640_driver); diff --git a/drivers/mtd/nand/onenand/generic.c b/drivers/mtd/nand/onenand/generic.c index 8b6f4da5d7201..a4b8b65fe15f5 100644 --- a/drivers/mtd/nand/onenand/generic.c +++ b/drivers/mtd/nand/onenand/generic.c @@ -53,7 +53,12 @@ static int generic_onenand_probe(struct platform_device *pdev) } info->onenand.mmcontrol = pdata ? pdata->mmcontrol : NULL; - info->onenand.irq = platform_get_irq(pdev, 0); + + err = platform_get_irq(pdev, 0); + if (err < 0) + goto out_iounmap; + + info->onenand.irq = err; info->mtd.dev.parent = &pdev->dev; info->mtd.priv = &info->onenand; diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c index f3276ee9e4fe7..ddd93bc38ea6c 100644 --- a/drivers/mtd/nand/raw/atmel/nand-controller.c +++ b/drivers/mtd/nand/raw/atmel/nand-controller.c @@ -2060,13 +2060,15 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc, nc->mck = of_clk_get(dev->parent->of_node, 0); if (IS_ERR(nc->mck)) { dev_err(dev, "Failed to retrieve MCK clk\n"); - return PTR_ERR(nc->mck); + ret = PTR_ERR(nc->mck); + goto out_release_dma; } np = of_parse_phandle(dev->parent->of_node, "atmel,smc", 0); if (!np) { dev_err(dev, "Missing or invalid atmel,smc property\n"); - return -EINVAL; + ret = -EINVAL; + goto out_release_dma; } nc->smc = syscon_node_to_regmap(np); @@ -2074,10 +2076,16 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc, if (IS_ERR(nc->smc)) { ret = PTR_ERR(nc->smc); dev_err(dev, "Could not get SMC regmap (err = %d)\n", ret); - return ret; + goto out_release_dma; } return 0; + +out_release_dma: + if (nc->dmac) + dma_release_channel(nc->dmac); + + return ret; } static int diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index ded4df4739280..e50db25e5ddcb 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -648,6 +648,7 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, const struct nand_sdr_timings *sdr) { struct gpmi_nfc_hardware_timing *hw = &this->hw; + struct resources *r = &this->resources; unsigned int dll_threshold_ps = this->devdata->max_chain_delay; unsigned int period_ps, reference_period_ps; unsigned int data_setup_cycles, data_hold_cycles, addr_setup_cycles; @@ -671,6 +672,8 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_NO_DELAY; } + hw->clk_rate = clk_round_rate(r->clock[0], hw->clk_rate); + /* SDR core timings are given in picoseconds */ period_ps = div_u64((u64)NSEC_PER_SEC * 1000, hw->clk_rate); diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index e7b2ba016d8c6..8daaba96edb2c 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -338,16 +338,19 @@ static int nand_isbad_bbm(struct nand_chip *chip, loff_t ofs) * * Return: -EBUSY if the chip has been suspended, 0 otherwise */ -static int nand_get_device(struct nand_chip *chip) +static void nand_get_device(struct nand_chip *chip) { - mutex_lock(&chip->lock); - if (chip->suspended) { + /* Wait until the device is resumed. */ + while (1) { + mutex_lock(&chip->lock); + if (!chip->suspended) { + mutex_lock(&chip->controller->lock); + return; + } mutex_unlock(&chip->lock); - return -EBUSY; - } - mutex_lock(&chip->controller->lock); - return 0; + wait_event(chip->resume_wq, !chip->suspended); + } } /** @@ -576,9 +579,7 @@ static int nand_block_markbad_lowlevel(struct nand_chip *chip, loff_t ofs) nand_erase_nand(chip, &einfo, 0); /* Write bad block marker to OOB */ - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); ret = nand_markbad_bbm(chip, ofs); nand_release_device(chip); @@ -3826,9 +3827,7 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from, ops->mode != MTD_OPS_RAW) return -ENOTSUPP; - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); if (!ops->datbuf) ret = nand_do_read_oob(chip, from, ops); @@ -4415,13 +4414,11 @@ static int nand_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) { struct nand_chip *chip = mtd_to_nand(mtd); - int ret; + int ret = 0; ops->retlen = 0; - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); switch (ops->mode) { case MTD_OPS_PLACE_OOB: @@ -4481,9 +4478,7 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr, return -EIO; /* Grab the lock and see if the device is available */ - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); /* Shift to get first page */ page = (int)(instr->addr >> chip->page_shift); @@ -4570,7 +4565,7 @@ static void nand_sync(struct mtd_info *mtd) pr_debug("%s: called\n", __func__); /* Grab the lock and see if the device is available */ - WARN_ON(nand_get_device(chip)); + nand_get_device(chip); /* Release it and go back */ nand_release_device(chip); } @@ -4587,9 +4582,7 @@ static int nand_block_isbad(struct mtd_info *mtd, loff_t offs) int ret; /* Select the NAND device */ - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); nand_select_target(chip, chipnr); @@ -4660,6 +4653,8 @@ static void nand_resume(struct mtd_info *mtd) __func__); } mutex_unlock(&chip->lock); + + wake_up_all(&chip->resume_wq); } /** @@ -5437,6 +5432,7 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips, chip->cur_cs = -1; mutex_init(&chip->lock); + init_waitqueue_head(&chip->resume_wq); /* Enforce the right timings for reset/detection */ chip->current_interface_config = nand_get_reset_interface_config(); diff --git a/drivers/mtd/nand/raw/pl35x-nand-controller.c b/drivers/mtd/nand/raw/pl35x-nand-controller.c index 8a91e069ee2e9..3c6f6aff649f8 100644 --- a/drivers/mtd/nand/raw/pl35x-nand-controller.c +++ b/drivers/mtd/nand/raw/pl35x-nand-controller.c @@ -1062,7 +1062,7 @@ static int pl35x_nand_chip_init(struct pl35x_nandc *nfc, chip->controller = &nfc->controller; mtd = nand_to_mtd(chip); mtd->dev.parent = nfc->dev; - nand_set_flash_node(chip, nfc->dev->of_node); + nand_set_flash_node(chip, np); if (!mtd->name) { mtd->name = devm_kasprintf(nfc->dev, GFP_KERNEL, "%s", PL35X_NANDC_DRIVER_NAME); diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 04ea180118e33..cc155f6c6c68c 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -3181,10 +3181,11 @@ static void spi_nor_set_mtd_info(struct spi_nor *nor) mtd->flags = MTD_CAP_NORFLASH; if (nor->info->flags & SPI_NOR_NO_ERASE) mtd->flags |= MTD_NO_ERASE; + else + mtd->_erase = spi_nor_erase; mtd->writesize = nor->params->writesize; mtd->writebufsize = nor->params->page_size; mtd->size = nor->params->size; - mtd->_erase = spi_nor_erase; mtd->_read = spi_nor_read; /* Might be already set by some SST flashes. */ if (!mtd->_write) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index a7e3eb9befb62..a32050fecabf3 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -351,9 +351,6 @@ static ssize_t dev_attribute_show(struct device *dev, * we still can use 'ubi->ubi_num'. */ ubi = container_of(dev, struct ubi_device, dev); - ubi = ubi_get_device(ubi->ubi_num); - if (!ubi) - return -ENODEV; if (attr == &dev_eraseblock_size) ret = sprintf(buf, "%d\n", ubi->leb_size); @@ -382,7 +379,6 @@ static ssize_t dev_attribute_show(struct device *dev, else ret = -EINVAL; - ubi_put_device(ubi); return ret; } @@ -979,9 +975,6 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, goto out_detach; } - /* Make device "available" before it becomes accessible via sysfs */ - ubi_devices[ubi_num] = ubi; - err = uif_init(ubi); if (err) goto out_detach; @@ -1026,6 +1019,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, wake_up_process(ubi->bgt_thread); spin_unlock(&ubi->wl_lock); + ubi_devices[ubi_num] = ubi; ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); return ubi_num; @@ -1034,7 +1028,6 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, out_uif: uif_close(ubi); out_detach: - ubi_devices[ubi_num] = NULL; ubi_wl_close(ubi); ubi_free_all_volumes(ubi); vfree(ubi->vtbl); diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 022af59906aa9..6b5f1ffd961b9 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -468,7 +468,9 @@ static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai, if (err == UBI_IO_FF_BITFLIPS) scrub = 1; - add_aeb(ai, free, pnum, ec, scrub); + ret = add_aeb(ai, free, pnum, ec, scrub); + if (ret) + goto out; continue; } else if (err == 0 || err == UBI_IO_BITFLIPS) { dbg_bld("Found non empty PEB:%i in pool", pnum); @@ -638,8 +640,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, if (fm_pos >= fm_size) goto fail_bad; - add_aeb(ai, &ai->free, be32_to_cpu(fmec->pnum), - be32_to_cpu(fmec->ec), 0); + ret = add_aeb(ai, &ai->free, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 0); + if (ret) + goto fail; } /* read EC values from used list */ @@ -649,8 +653,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, if (fm_pos >= fm_size) goto fail_bad; - add_aeb(ai, &used, be32_to_cpu(fmec->pnum), - be32_to_cpu(fmec->ec), 0); + ret = add_aeb(ai, &used, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 0); + if (ret) + goto fail; } /* read EC values from scrub list */ @@ -660,8 +666,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, if (fm_pos >= fm_size) goto fail_bad; - add_aeb(ai, &used, be32_to_cpu(fmec->pnum), - be32_to_cpu(fmec->ec), 1); + ret = add_aeb(ai, &used, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 1); + if (ret) + goto fail; } /* read EC values from erase list */ @@ -671,8 +679,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, if (fm_pos >= fm_size) goto fail_bad; - add_aeb(ai, &ai->erase, be32_to_cpu(fmec->pnum), - be32_to_cpu(fmec->ec), 1); + ret = add_aeb(ai, &ai->erase, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 1); + if (ret) + goto fail; } ai->mean_ec = div_u64(ai->ec_sum, ai->ec_count); diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 139ee132bfbcf..1bc7b3a056046 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -56,16 +56,11 @@ static ssize_t vol_attribute_show(struct device *dev, { int ret; struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); - struct ubi_device *ubi; - - ubi = ubi_get_device(vol->ubi->ubi_num); - if (!ubi) - return -ENODEV; + struct ubi_device *ubi = vol->ubi; spin_lock(&ubi->volumes_lock); if (!ubi->volumes[vol->vol_id]) { spin_unlock(&ubi->volumes_lock); - ubi_put_device(ubi); return -ENODEV; } /* Take a reference to prevent volume removal */ @@ -103,7 +98,6 @@ static ssize_t vol_attribute_show(struct device *dev, vol->ref_count -= 1; ubi_assert(vol->ref_count >= 0); spin_unlock(&ubi->volumes_lock); - ubi_put_device(ubi); return ret; } diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index ba587e5fc24fc..683203f87ae2b 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -148,14 +148,14 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_mac_header(skb); - if (!IS_ENABLED(CONFIG_IPV6) || family == AF_INET) + if (!ipv6_mod_enabled() || family == AF_INET) err = IP_ECN_decapsulate(oiph, skb); else err = IP6_ECN_decapsulate(oiph, skb); if (unlikely(err)) { if (log_ecn_error) { - if (!IS_ENABLED(CONFIG_IPV6) || family == AF_INET) + if (!ipv6_mod_enabled() || family == AF_INET) net_info_ratelimited("non-ECT from %pI4 " "with TOS=%#x\n", &((struct iphdr *)oiph)->saddr, @@ -221,11 +221,12 @@ static struct socket *bareudp_create_sock(struct net *net, __be16 port) int err; memset(&udp_conf, 0, sizeof(udp_conf)); -#if IS_ENABLED(CONFIG_IPV6) - udp_conf.family = AF_INET6; -#else - udp_conf.family = AF_INET; -#endif + + if (ipv6_mod_enabled()) + udp_conf.family = AF_INET6; + else + udp_conf.family = AF_INET; + udp_conf.local_udp_port = port; /* Open UDP socket */ err = udp_sock_create(net, &udp_conf, &sock); @@ -448,7 +449,7 @@ static netdev_tx_t bareudp_xmit(struct sk_buff *skb, struct net_device *dev) } rcu_read_lock(); - if (IS_ENABLED(CONFIG_IPV6) && info->mode & IP_TUNNEL_INFO_IPV6) + if (ipv6_mod_enabled() && info->mode & IP_TUNNEL_INFO_IPV6) err = bareudp6_xmit_skb(skb, dev, bareudp, info); else err = bareudp_xmit_skb(skb, dev, bareudp, info); @@ -478,7 +479,7 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, use_cache = ip_tunnel_dst_cache_usable(skb, info); - if (!IS_ENABLED(CONFIG_IPV6) || ip_tunnel_info_af(info) == AF_INET) { + if (!ipv6_mod_enabled() || ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; __be32 saddr; diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 1a4b56f6fa8c6..b3b5bc1c803b3 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1637,8 +1637,6 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) if (err) goto out_fail; - can_put_echo_skb(skb, dev, 0, 0); - if (cdev->can.ctrlmode & CAN_CTRLMODE_FD) { cccr = m_can_read(cdev, M_CAN_CCCR); cccr &= ~CCCR_CMR_MASK; @@ -1655,6 +1653,9 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) m_can_write(cdev, M_CAN_CCCR, cccr); } m_can_write(cdev, M_CAN_TXBTIE, 0x1); + + can_put_echo_skb(skb, dev, 0, 0); + m_can_write(cdev, M_CAN_TXBAR, 0x1); /* End of xmit function for version 3.0.x */ } else { diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index b5986df6eca0b..1c192554209a3 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1657,7 +1657,7 @@ mcp251xfd_register_get_dev_id(const struct mcp251xfd_priv *priv, out_kfree_buf_rx: kfree(buf_rx); - return 0; + return err; } #define MCP251XFD_QUIRK_ACTIVE(quirk) \ diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 7bedceffdfa36..bbec3311d8934 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -819,7 +819,6 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne usb_unanchor_urb(urb); usb_free_coherent(dev->udev, size, buf, urb->transfer_dma); - dev_kfree_skb(skb); atomic_dec(&dev->active_tx_urbs); diff --git a/drivers/net/can/usb/etas_es58x/es58x_fd.c b/drivers/net/can/usb/etas_es58x/es58x_fd.c index ec87126e1a7df..8ccda748fd084 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_fd.c +++ b/drivers/net/can/usb/etas_es58x/es58x_fd.c @@ -172,12 +172,11 @@ static int es58x_fd_rx_event_msg(struct net_device *netdev, const struct es58x_fd_rx_event_msg *rx_event_msg; int ret; + rx_event_msg = &es58x_fd_urb_cmd->rx_event_msg; ret = es58x_check_msg_len(es58x_dev->dev, *rx_event_msg, msg_len); if (ret) return ret; - rx_event_msg = &es58x_fd_urb_cmd->rx_event_msg; - return es58x_rx_err_msg(netdev, rx_event_msg->error_code, rx_event_msg->event_code, get_unaligned_le64(&rx_event_msg->timestamp)); diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 77bddff86252b..c45a814e1de2f 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -33,10 +33,6 @@ #define MCBA_USB_RX_BUFF_SIZE 64 #define MCBA_USB_TX_BUFF_SIZE (sizeof(struct mcba_usb_msg)) -/* MCBA endpoint numbers */ -#define MCBA_USB_EP_IN 1 -#define MCBA_USB_EP_OUT 1 - /* Microchip command id */ #define MBCA_CMD_RECEIVE_MESSAGE 0xE3 #define MBCA_CMD_I_AM_ALIVE_FROM_CAN 0xF5 @@ -83,6 +79,8 @@ struct mcba_priv { atomic_t free_ctx_cnt; void *rxbuf[MCBA_MAX_RX_URBS]; dma_addr_t rxbuf_dma[MCBA_MAX_RX_URBS]; + int rx_pipe; + int tx_pipe; }; /* CAN frame */ @@ -268,10 +266,8 @@ static netdev_tx_t mcba_usb_xmit(struct mcba_priv *priv, memcpy(buf, usb_msg, MCBA_USB_TX_BUFF_SIZE); - usb_fill_bulk_urb(urb, priv->udev, - usb_sndbulkpipe(priv->udev, MCBA_USB_EP_OUT), buf, - MCBA_USB_TX_BUFF_SIZE, mcba_usb_write_bulk_callback, - ctx); + usb_fill_bulk_urb(urb, priv->udev, priv->tx_pipe, buf, MCBA_USB_TX_BUFF_SIZE, + mcba_usb_write_bulk_callback, ctx); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_anchor_urb(urb, &priv->tx_submitted); @@ -364,7 +360,6 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb, xmit_failed: can_free_echo_skb(priv->netdev, ctx->ndx, NULL); mcba_usb_free_ctx(ctx); - dev_kfree_skb(skb); stats->tx_dropped++; return NETDEV_TX_OK; @@ -608,7 +603,7 @@ static void mcba_usb_read_bulk_callback(struct urb *urb) resubmit_urb: usb_fill_bulk_urb(urb, priv->udev, - usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_OUT), + priv->rx_pipe, urb->transfer_buffer, MCBA_USB_RX_BUFF_SIZE, mcba_usb_read_bulk_callback, priv); @@ -653,7 +648,7 @@ static int mcba_usb_start(struct mcba_priv *priv) urb->transfer_dma = buf_dma; usb_fill_bulk_urb(urb, priv->udev, - usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_IN), + priv->rx_pipe, buf, MCBA_USB_RX_BUFF_SIZE, mcba_usb_read_bulk_callback, priv); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; @@ -807,6 +802,13 @@ static int mcba_usb_probe(struct usb_interface *intf, struct mcba_priv *priv; int err; struct usb_device *usbdev = interface_to_usbdev(intf); + struct usb_endpoint_descriptor *in, *out; + + err = usb_find_common_endpoints(intf->cur_altsetting, &in, &out, NULL, NULL); + if (err) { + dev_err(&intf->dev, "Can't find endpoints\n"); + return err; + } netdev = alloc_candev(sizeof(struct mcba_priv), MCBA_MAX_TX_URBS); if (!netdev) { @@ -852,6 +854,9 @@ static int mcba_usb_probe(struct usb_interface *intf, goto cleanup_free_candev; } + priv->rx_pipe = usb_rcvbulkpipe(priv->udev, in->bEndpointAddress); + priv->tx_pipe = usb_sndbulkpipe(priv->udev, out->bEndpointAddress); + devm_can_led_init(netdev); /* Start USB dev only if we have successfully registered CAN device */ diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 431af1ec1e3ca..b638604bf1eef 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -663,9 +663,20 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, atomic_inc(&priv->active_tx_urbs); err = usb_submit_urb(urb, GFP_ATOMIC); - if (unlikely(err)) - goto failed; - else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) + if (unlikely(err)) { + can_free_echo_skb(netdev, context->echo_index, NULL); + + usb_unanchor_urb(urb); + usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); + + atomic_dec(&priv->active_tx_urbs); + + if (err == -ENODEV) + netif_device_detach(netdev); + else + netdev_warn(netdev, "failed tx_urb %d\n", err); + stats->tx_dropped++; + } else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) /* Slow down tx path */ netif_stop_queue(netdev); @@ -684,19 +695,6 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; -failed: - can_free_echo_skb(netdev, context->echo_index, NULL); - - usb_unanchor_urb(urb); - usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); - - atomic_dec(&priv->active_tx_urbs); - - if (err == -ENODEV) - netif_device_detach(netdev); - else - netdev_warn(netdev, "failed tx_urb %d\n", err); - nomembuf: usb_free_urb(urb); diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index 47ccc15a3486b..191ffa7776e8d 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -148,7 +148,7 @@ static void vxcan_setup(struct net_device *dev) dev->hard_header_len = 0; dev->addr_len = 0; dev->tx_queue_len = 0; - dev->flags = (IFF_NOARP|IFF_ECHO); + dev->flags = IFF_NOARP; dev->netdev_ops = &vxcan_netdev_ops; dev->needs_free_netdev = true; diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 0029d279616fd..37a3dabdce313 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -68,17 +68,7 @@ config NET_DSA_QCA8K This enables support for the Qualcomm Atheros QCA8K Ethernet switch chips. -config NET_DSA_REALTEK_SMI - tristate "Realtek SMI Ethernet switch family support" - select NET_DSA_TAG_RTL4_A - select NET_DSA_TAG_RTL8_4 - select FIXED_PHY - select IRQ_DOMAIN - select REALTEK_PHY - select REGMAP - help - This enables support for the Realtek SMI-based switch - chips, currently only RTL8366RB. +source "drivers/net/dsa/realtek/Kconfig" config NET_DSA_SMSC_LAN9303 tristate diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index 8da1569a34e6e..e73838c122560 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -9,8 +9,6 @@ obj-$(CONFIG_NET_DSA_LANTIQ_GSWIP) += lantiq_gswip.o obj-$(CONFIG_NET_DSA_MT7530) += mt7530.o obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o obj-$(CONFIG_NET_DSA_QCA8K) += qca8k.o -obj-$(CONFIG_NET_DSA_REALTEK_SMI) += realtek-smi.o -realtek-smi-objs := realtek-smi-core.o rtl8366.o rtl8366rb.o rtl8365mb.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303) += lan9303-core.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303_I2C) += lan9303_i2c.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303_MDIO) += lan9303_mdio.o @@ -23,5 +21,6 @@ obj-y += microchip/ obj-y += mv88e6xxx/ obj-y += ocelot/ obj-y += qca/ +obj-y += realtek/ obj-y += sja1105/ obj-y += xrs700x/ diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index a7e2fcf2df2c9..edbe5e7f1cb6b 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -567,14 +567,14 @@ static void bcm_sf2_cfp_slice_ipv6(struct bcm_sf2_priv *priv, static struct cfp_rule *bcm_sf2_cfp_rule_find(struct bcm_sf2_priv *priv, int port, u32 location) { - struct cfp_rule *rule = NULL; + struct cfp_rule *rule; list_for_each_entry(rule, &priv->cfp.rules_list, next) { if (rule->port == port && rule->fs.location == location) - break; + return rule; } - return rule; + return NULL; } static int bcm_sf2_cfp_rule_cmp(struct bcm_sf2_priv *priv, int port, diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index ab1676553714c..cf7754dddad78 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3639,6 +3639,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .port_sync_link = mv88e6185_port_sync_link, .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, + .port_set_policy = mv88e6352_port_set_policy, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_ucast_flood = mv88e6352_port_set_ucast_flood, .port_set_mcast_flood = mv88e6352_port_set_mcast_flood, diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 9957772201d58..c414d9e9d7c09 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -599,6 +599,8 @@ static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu, struct ocelot *ocelot = ds->priv; struct felix *felix = ocelot_to_felix(ocelot); enum dsa_tag_protocol old_proto = felix->tag_proto; + bool cpu_port_active = false; + struct dsa_port *dp; int err; if (proto != DSA_TAG_PROTO_SEVILLE && @@ -606,6 +608,27 @@ static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu, proto != DSA_TAG_PROTO_OCELOT_8021Q) return -EPROTONOSUPPORT; + /* We don't support multiple CPU ports, yet the DT blob may have + * multiple CPU ports defined. The first CPU port is the active one, + * the others are inactive. In this case, DSA will call + * ->change_tag_protocol() multiple times, once per CPU port. + * Since we implement the tagging protocol change towards "ocelot" or + * "seville" as effectively initializing the NPI port, what we are + * doing is effectively changing who the NPI port is to the last @cpu + * argument passed, which is an unused DSA CPU port and not the one + * that should actively pass traffic. + * Suppress DSA's calls on CPU ports that are inactive. + */ + dsa_switch_for_each_user_port(dp, ds) { + if (dp->cpu_dp->index == cpu) { + cpu_port_active = true; + break; + } + } + + if (!cpu_port_active) + return 0; + felix_del_tag_protocol(ds, cpu, old_proto); err = felix_set_tag_protocol(ds, cpu, proto); diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 33f0ceae381d4..443d34ce2853f 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1940,6 +1940,10 @@ static int vsc9959_psfp_filter_add(struct ocelot *ocelot, int port, case FLOW_ACTION_GATE: size = struct_size(sgi, entries, a->gate.num_entries); sgi = kzalloc(size, GFP_KERNEL); + if (!sgi) { + ret = -ENOMEM; + goto err; + } vsc9959_psfp_parse_gate(a, sgi); ret = vsc9959_psfp_sgi_table_add(ocelot, sgi); if (ret) { @@ -2324,7 +2328,7 @@ static int felix_pci_probe(struct pci_dev *pdev, err = dsa_register_switch(ds); if (err) { - dev_err(&pdev->dev, "Failed to register DSA switch: %d\n", err); + dev_err_probe(&pdev->dev, err, "Failed to register DSA switch\n"); goto err_register_ds; } diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig new file mode 100644 index 0000000000000..1315896ed6e2a --- /dev/null +++ b/drivers/net/dsa/realtek/Kconfig @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0-only +menuconfig NET_DSA_REALTEK + tristate "Realtek Ethernet switch family support" + depends on NET_DSA + select NET_DSA_TAG_RTL4_A + select NET_DSA_TAG_RTL8_4 + select FIXED_PHY + select IRQ_DOMAIN + select REALTEK_PHY + select REGMAP + help + Select to enable support for Realtek Ethernet switch chips. + +config NET_DSA_REALTEK_SMI + tristate "Realtek SMI connected switch driver" + depends on NET_DSA_REALTEK + depends on OF + default y + help + Select to enable support for registering switches connected + through SMI. diff --git a/drivers/net/dsa/realtek/Makefile b/drivers/net/dsa/realtek/Makefile new file mode 100644 index 0000000000000..323b921bfce0f --- /dev/null +++ b/drivers/net/dsa/realtek/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_NET_DSA_REALTEK_SMI) += realtek-smi.o +realtek-smi-objs := realtek-smi-core.o rtl8366.o rtl8366rb.o rtl8365mb.o diff --git a/drivers/net/dsa/realtek-smi-core.c b/drivers/net/dsa/realtek/realtek-smi-core.c similarity index 91% rename from drivers/net/dsa/realtek-smi-core.c rename to drivers/net/dsa/realtek/realtek-smi-core.c index aae46ada8d839..a9c21f9e33709 100644 --- a/drivers/net/dsa/realtek-smi-core.c +++ b/drivers/net/dsa/realtek/realtek-smi-core.c @@ -315,7 +315,21 @@ static int realtek_smi_read(void *ctx, u32 reg, u32 *val) return realtek_smi_read_reg(smi, reg, val); } -static const struct regmap_config realtek_smi_mdio_regmap_config = { +static void realtek_smi_lock(void *ctx) +{ + struct realtek_smi *smi = ctx; + + mutex_lock(&smi->map_lock); +} + +static void realtek_smi_unlock(void *ctx) +{ + struct realtek_smi *smi = ctx; + + mutex_unlock(&smi->map_lock); +} + +static const struct regmap_config realtek_smi_regmap_config = { .reg_bits = 10, /* A4..A0 R4..R0 */ .val_bits = 16, .reg_stride = 1, @@ -325,6 +339,21 @@ static const struct regmap_config realtek_smi_mdio_regmap_config = { .reg_read = realtek_smi_read, .reg_write = realtek_smi_write, .cache_type = REGCACHE_NONE, + .lock = realtek_smi_lock, + .unlock = realtek_smi_unlock, +}; + +static const struct regmap_config realtek_smi_nolock_regmap_config = { + .reg_bits = 10, /* A4..A0 R4..R0 */ + .val_bits = 16, + .reg_stride = 1, + /* PHY regs are at 0x8000 */ + .max_register = 0xffff, + .reg_format_endian = REGMAP_ENDIAN_BIG, + .reg_read = realtek_smi_read, + .reg_write = realtek_smi_write, + .cache_type = REGCACHE_NONE, + .disable_locking = true, }; static int realtek_smi_mdio_read(struct mii_bus *bus, int addr, int regnum) @@ -388,6 +417,7 @@ static int realtek_smi_probe(struct platform_device *pdev) const struct realtek_smi_variant *var; struct device *dev = &pdev->dev; struct realtek_smi *smi; + struct regmap_config rc; struct device_node *np; int ret; @@ -398,14 +428,26 @@ static int realtek_smi_probe(struct platform_device *pdev) if (!smi) return -ENOMEM; smi->chip_data = (void *)smi + sizeof(*smi); - smi->map = devm_regmap_init(dev, NULL, smi, - &realtek_smi_mdio_regmap_config); + + mutex_init(&smi->map_lock); + + rc = realtek_smi_regmap_config; + rc.lock_arg = smi; + smi->map = devm_regmap_init(dev, NULL, smi, &rc); if (IS_ERR(smi->map)) { ret = PTR_ERR(smi->map); dev_err(dev, "regmap init failed: %d\n", ret); return ret; } + rc = realtek_smi_nolock_regmap_config; + smi->map_nolock = devm_regmap_init(dev, NULL, smi, &rc); + if (IS_ERR(smi->map_nolock)) { + ret = PTR_ERR(smi->map_nolock); + dev_err(dev, "regmap init failed: %d\n", ret); + return ret; + } + /* Link forward and backward */ smi->dev = dev; smi->clk_delay = var->clk_delay; diff --git a/drivers/net/dsa/realtek-smi-core.h b/drivers/net/dsa/realtek/realtek-smi-core.h similarity index 98% rename from drivers/net/dsa/realtek-smi-core.h rename to drivers/net/dsa/realtek/realtek-smi-core.h index 5bfa53e2480ae..5fcad51e1984f 100644 --- a/drivers/net/dsa/realtek-smi-core.h +++ b/drivers/net/dsa/realtek/realtek-smi-core.h @@ -25,7 +25,7 @@ struct rtl8366_mib_counter { const char *name; }; -/** +/* * struct rtl8366_vlan_mc - Virtual LAN member configuration */ struct rtl8366_vlan_mc { @@ -49,6 +49,8 @@ struct realtek_smi { struct gpio_desc *mdc; struct gpio_desc *mdio; struct regmap *map; + struct regmap *map_nolock; + struct mutex map_lock; struct mii_bus *slave_mii_bus; unsigned int clk_delay; @@ -74,7 +76,7 @@ struct realtek_smi { void *chip_data; /* Per-chip extra variant data */ }; -/** +/* * struct realtek_smi_ops - vtable for the per-SMI-chiptype operations * @detect: detects the chiptype */ diff --git a/drivers/net/dsa/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c similarity index 98% rename from drivers/net/dsa/rtl8365mb.c rename to drivers/net/dsa/realtek/rtl8365mb.c index 3b729544798b1..696c8906c74cb 100644 --- a/drivers/net/dsa/rtl8365mb.c +++ b/drivers/net/dsa/realtek/rtl8365mb.c @@ -565,7 +565,7 @@ static int rtl8365mb_phy_poll_busy(struct realtek_smi *smi) { u32 val; - return regmap_read_poll_timeout(smi->map, + return regmap_read_poll_timeout(smi->map_nolock, RTL8365MB_INDIRECT_ACCESS_STATUS_REG, val, !val, 10, 100); } @@ -579,7 +579,7 @@ static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy, /* Set OCP prefix */ val = FIELD_GET(RTL8365MB_PHY_OCP_ADDR_PREFIX_MASK, ocp_addr); ret = regmap_update_bits( - smi->map, RTL8365MB_GPHY_OCP_MSB_0_REG, + smi->map_nolock, RTL8365MB_GPHY_OCP_MSB_0_REG, RTL8365MB_GPHY_OCP_MSB_0_CFG_CPU_OCPADR_MASK, FIELD_PREP(RTL8365MB_GPHY_OCP_MSB_0_CFG_CPU_OCPADR_MASK, val)); if (ret) @@ -592,8 +592,8 @@ static int rtl8365mb_phy_ocp_prepare(struct realtek_smi *smi, int phy, ocp_addr >> 1); val |= FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_9_6_MASK, ocp_addr >> 6); - ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG, - val); + ret = regmap_write(smi->map_nolock, + RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG, val); if (ret) return ret; @@ -606,36 +606,42 @@ static int rtl8365mb_phy_ocp_read(struct realtek_smi *smi, int phy, u32 val; int ret; + mutex_lock(&smi->map_lock); + ret = rtl8365mb_phy_poll_busy(smi); if (ret) - return ret; + goto out; ret = rtl8365mb_phy_ocp_prepare(smi, phy, ocp_addr); if (ret) - return ret; + goto out; /* Execute read operation */ val = FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_MASK, RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_VALUE) | FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_RW_MASK, RTL8365MB_INDIRECT_ACCESS_CTRL_RW_READ); - ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val); + ret = regmap_write(smi->map_nolock, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, + val); if (ret) - return ret; + goto out; ret = rtl8365mb_phy_poll_busy(smi); if (ret) - return ret; + goto out; /* Get PHY register data */ - ret = regmap_read(smi->map, RTL8365MB_INDIRECT_ACCESS_READ_DATA_REG, - &val); + ret = regmap_read(smi->map_nolock, + RTL8365MB_INDIRECT_ACCESS_READ_DATA_REG, &val); if (ret) - return ret; + goto out; *data = val & 0xFFFF; - return 0; +out: + mutex_unlock(&smi->map_lock); + + return ret; } static int rtl8365mb_phy_ocp_write(struct realtek_smi *smi, int phy, @@ -644,32 +650,38 @@ static int rtl8365mb_phy_ocp_write(struct realtek_smi *smi, int phy, u32 val; int ret; + mutex_lock(&smi->map_lock); + ret = rtl8365mb_phy_poll_busy(smi); if (ret) - return ret; + goto out; ret = rtl8365mb_phy_ocp_prepare(smi, phy, ocp_addr); if (ret) - return ret; + goto out; /* Set PHY register data */ - ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG, - data); + ret = regmap_write(smi->map_nolock, + RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG, data); if (ret) - return ret; + goto out; /* Execute write operation */ val = FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_MASK, RTL8365MB_INDIRECT_ACCESS_CTRL_CMD_VALUE) | FIELD_PREP(RTL8365MB_INDIRECT_ACCESS_CTRL_RW_MASK, RTL8365MB_INDIRECT_ACCESS_CTRL_RW_WRITE); - ret = regmap_write(smi->map, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, val); + ret = regmap_write(smi->map_nolock, RTL8365MB_INDIRECT_ACCESS_CTRL_REG, + val); if (ret) - return ret; + goto out; ret = rtl8365mb_phy_poll_busy(smi); if (ret) - return ret; + goto out; + +out: + mutex_unlock(&smi->map_lock); return 0; } diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/realtek/rtl8366.c similarity index 100% rename from drivers/net/dsa/rtl8366.c rename to drivers/net/dsa/realtek/rtl8366.c diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c similarity index 99% rename from drivers/net/dsa/rtl8366rb.c rename to drivers/net/dsa/realtek/rtl8366rb.c index ecc19bd5115f0..4f8c06d7ab3a9 100644 --- a/drivers/net/dsa/rtl8366rb.c +++ b/drivers/net/dsa/realtek/rtl8366rb.c @@ -1252,6 +1252,8 @@ rtl8366rb_port_bridge_leave(struct dsa_switch *ds, int port, * @smi: SMI state container * @port: the port to drop untagged and C-tagged frames on * @drop: whether to drop or pass untagged and C-tagged frames + * + * Return: zero for success, a negative number on error. */ static int rtl8366rb_drop_untagged(struct realtek_smi *smi, int port, bool drop) { diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index ff2d099aab218..53dc8d5fede86 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -696,6 +696,12 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring, buf_pool->rx_skb[skb_index] = NULL; datalen = xgene_enet_get_data_len(le64_to_cpu(raw_desc->m1)); + + /* strip off CRC as HW isn't doing this */ + nv = GET_VAL(NV, le64_to_cpu(raw_desc->m0)); + if (!nv) + datalen -= 4; + skb_put(skb, datalen); prefetch(skb->data - NET_IP_ALIGN); skb->protocol = eth_type_trans(skb, ndev); @@ -717,12 +723,8 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring, } } - nv = GET_VAL(NV, le64_to_cpu(raw_desc->m0)); - if (!nv) { - /* strip off CRC as HW isn't doing this */ - datalen -= 4; + if (!nv) goto skip_jumbo; - } slots = page_pool->slots - 1; head = page_pool->head; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b1c98d1408b82..6af0ae1d0c462 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3224,6 +3224,7 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp) } qidx = bp->tc_to_qidx[j]; ring->queue_id = bp->q_info[qidx].queue_id; + spin_lock_init(&txr->xdp_tx_lock); if (i < bp->tx_nr_rings_xdp) continue; if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1)) @@ -10294,6 +10295,12 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) if (irq_re_init) udp_tunnel_nic_reset_ntf(bp->dev); + if (bp->tx_nr_rings_xdp < num_possible_cpus()) { + if (!static_key_enabled(&bnxt_xdp_locking_key)) + static_branch_enable(&bnxt_xdp_locking_key); + } else if (static_key_enabled(&bnxt_xdp_locking_key)) { + static_branch_disable(&bnxt_xdp_locking_key); + } set_bit(BNXT_STATE_OPEN, &bp->state); bnxt_enable_int(bp); /* Enable TX queues */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 666fc1e7a7d2f..d57bff46b5878 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -593,7 +593,8 @@ struct nqe_cn { #define BNXT_MAX_MTU 9500 #define BNXT_MAX_PAGE_MODE_MTU \ ((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN - \ - XDP_PACKET_HEADROOM) + XDP_PACKET_HEADROOM - \ + SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info))) #define BNXT_MIN_PKT_SIZE 52 @@ -800,6 +801,8 @@ struct bnxt_tx_ring_info { u32 dev_state; struct bnxt_ring_struct tx_ring_struct; + /* Synchronize simultaneous xdp_xmit on same ring */ + spinlock_t xdp_tx_lock; }; #define BNXT_LEGACY_COAL_CMPL_PARAMS \ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 8aaa2335f848a..f09b04556c32e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2101,9 +2101,7 @@ static int bnxt_set_pauseparam(struct net_device *dev, } link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL; - if (bp->hwrm_spec_code >= 0x10201) - link_info->req_flow_ctrl = - PORT_PHY_CFG_REQ_AUTO_PAUSE_AUTONEG_PAUSE; + link_info->req_flow_ctrl = 0; } else { /* when transition from auto pause to force pause, * force a link change diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 48520967746ff..c75c5ae64d5d8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -329,7 +329,7 @@ static int bnxt_ptp_enable(struct ptp_clock_info *ptp_info, struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg, ptp_info); struct bnxt *bp = ptp->bp; - u8 pin_id; + int pin_id; int rc; switch (rq->type) { @@ -337,6 +337,8 @@ static int bnxt_ptp_enable(struct ptp_clock_info *ptp_info, /* Configure an External PPS IN */ pin_id = ptp_find_pin(ptp->ptp_clock, PTP_PF_EXTTS, rq->extts.index); + if (!TSIO_PIN_VALID(pin_id)) + return -EOPNOTSUPP; if (!on) break; rc = bnxt_ptp_cfg_pin(bp, pin_id, BNXT_PPS_PIN_PPS_IN); @@ -350,6 +352,8 @@ static int bnxt_ptp_enable(struct ptp_clock_info *ptp_info, /* Configure a Periodic PPS OUT */ pin_id = ptp_find_pin(ptp->ptp_clock, PTP_PF_PEROUT, rq->perout.index); + if (!TSIO_PIN_VALID(pin_id)) + return -EOPNOTSUPP; if (!on) break; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 7c528e1f8713e..8205140db829e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -31,7 +31,7 @@ struct pps_pin { u8 state; }; -#define TSIO_PIN_VALID(pin) ((pin) < (BNXT_MAX_TSIO_PINS)) +#define TSIO_PIN_VALID(pin) ((pin) >= 0 && (pin) < (BNXT_MAX_TSIO_PINS)) #define EVENT_DATA2_PPS_EVENT_TYPE(data2) \ ((data2) & ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 52fad0fdeacf3..03b1d6c045048 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -20,6 +20,8 @@ #include "bnxt.h" #include "bnxt_xdp.h" +DEFINE_STATIC_KEY_FALSE(bnxt_xdp_locking_key); + struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, struct bnxt_tx_ring_info *txr, dma_addr_t mapping, u32 len) @@ -227,11 +229,16 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, ring = smp_processor_id() % bp->tx_nr_rings_xdp; txr = &bp->tx_ring[ring]; + if (READ_ONCE(txr->dev_state) == BNXT_DEV_STATE_CLOSING) + return -EINVAL; + + if (static_branch_unlikely(&bnxt_xdp_locking_key)) + spin_lock(&txr->xdp_tx_lock); + for (i = 0; i < num_frames; i++) { struct xdp_frame *xdp = frames[i]; - if (!txr || !bnxt_tx_avail(bp, txr) || - !(bp->bnapi[ring]->flags & BNXT_NAPI_FLAG_XDP)) + if (!bnxt_tx_avail(bp, txr)) break; mapping = dma_map_single(&pdev->dev, xdp->data, xdp->len, @@ -250,6 +257,9 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, bnxt_db_write(bp, &txr->tx_db, txr->tx_prod); } + if (static_branch_unlikely(&bnxt_xdp_locking_key)) + spin_unlock(&txr->xdp_tx_lock); + return nxmit; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 0df40c3beb050..067bb5e821f54 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -10,6 +10,8 @@ #ifndef BNXT_XDP_H #define BNXT_XDP_H +DECLARE_STATIC_KEY_FALSE(bnxt_xdp_locking_key); + struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, struct bnxt_tx_ring_info *txr, dma_addr_t mapping, u32 len); diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index d5356db7539a4..caf48023f8ea5 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1835,11 +1835,6 @@ static int ftgmac100_probe(struct platform_device *pdev) priv->rxdes0_edorr_mask = BIT(30); priv->txdes0_edotr_mask = BIT(30); priv->is_aspeed = true; - /* Disable ast2600 problematic HW arbitration */ - if (of_device_is_compatible(np, "aspeed,ast2600-mac")) { - iowrite32(FTGMAC100_TM_DEFAULT, - priv->base + FTGMAC100_OFFSET_TM); - } } else { priv->rxdes0_edorr_mask = BIT(15); priv->txdes0_edotr_mask = BIT(15); @@ -1911,6 +1906,11 @@ static int ftgmac100_probe(struct platform_device *pdev) err = ftgmac100_setup_clk(priv); if (err) goto err_phy_connect; + + /* Disable ast2600 problematic HW arbitration */ + if (of_device_is_compatible(np, "aspeed,ast2600-mac")) + iowrite32(FTGMAC100_TM_DEFAULT, + priv->base + FTGMAC100_OFFSET_TM); } /* Default ring sizes */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c index 5f5f8c53c4a0f..c8cb541572ffe 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c @@ -167,7 +167,7 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev) base = of_iomap(node, 0); if (!base) { err = -ENOMEM; - goto err_close; + goto err_put; } err = fsl_mc_allocate_irqs(mc_dev); @@ -210,6 +210,8 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev) fsl_mc_free_irqs(mc_dev); err_unmap: iounmap(base); +err_put: + of_node_put(node); err_close: dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle); err_free_mcp: diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index fa5b4f885b177..60ec64bfb3f0b 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -674,7 +674,10 @@ static int enetc_get_ts_info(struct net_device *ndev, #ifdef CONFIG_FSL_ENETC_PTP_CLOCK info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_RAW_HARDWARE; + SOF_TIMESTAMPING_RAW_HARDWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON) | diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 3555c12edb45a..d3d7172e0fcc5 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -45,6 +45,7 @@ void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed) | pspeed); } +#define ENETC_QOS_ALIGN 64 static int enetc_setup_taprio(struct net_device *ndev, struct tc_taprio_qopt_offload *admin_conf) { @@ -52,10 +53,11 @@ static int enetc_setup_taprio(struct net_device *ndev, struct enetc_cbd cbd = {.cmd = 0}; struct tgs_gcl_conf *gcl_config; struct tgs_gcl_data *gcl_data; + dma_addr_t dma, dma_align; struct gce *gce; - dma_addr_t dma; u16 data_size; u16 gcl_len; + void *tmp; u32 tge; int err; int i; @@ -82,9 +84,16 @@ static int enetc_setup_taprio(struct net_device *ndev, gcl_config = &cbd.gcl_conf; data_size = struct_size(gcl_data, entry, gcl_len); - gcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!gcl_data) + tmp = dma_alloc_coherent(&priv->si->pdev->dev, + data_size + ENETC_QOS_ALIGN, + &dma, GFP_KERNEL); + if (!tmp) { + dev_err(&priv->si->pdev->dev, + "DMA mapping of taprio gate list failed!\n"); return -ENOMEM; + } + dma_align = ALIGN(dma, ENETC_QOS_ALIGN); + gcl_data = (struct tgs_gcl_data *)PTR_ALIGN(tmp, ENETC_QOS_ALIGN); gce = (struct gce *)(gcl_data + 1); @@ -110,16 +119,8 @@ static int enetc_setup_taprio(struct net_device *ndev, cbd.length = cpu_to_le16(data_size); cbd.status_flags = 0; - dma = dma_map_single(&priv->si->pdev->dev, gcl_data, - data_size, DMA_TO_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - kfree(gcl_data); - return -ENOMEM; - } - - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); + cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); + cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); cbd.cls = BDCR_CMD_PORT_GCL; cbd.status_flags = 0; @@ -132,8 +133,8 @@ static int enetc_setup_taprio(struct net_device *ndev, ENETC_QBV_PTGCR_OFFSET, tge & (~ENETC_QBV_TGE)); - dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_TO_DEVICE); - kfree(gcl_data); + dma_free_coherent(&priv->si->pdev->dev, data_size + ENETC_QOS_ALIGN, + tmp, dma); return err; } @@ -463,8 +464,9 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, struct enetc_cbd cbd = {.cmd = 0}; struct streamid_data *si_data; struct streamid_conf *si_conf; + dma_addr_t dma, dma_align; u16 data_size; - dma_addr_t dma; + void *tmp; int port; int err; @@ -485,21 +487,20 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, cbd.status_flags = 0; data_size = sizeof(struct streamid_data); - si_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!si_data) + tmp = dma_alloc_coherent(&priv->si->pdev->dev, + data_size + ENETC_QOS_ALIGN, + &dma, GFP_KERNEL); + if (!tmp) { + dev_err(&priv->si->pdev->dev, + "DMA mapping of stream identify failed!\n"); return -ENOMEM; - cbd.length = cpu_to_le16(data_size); - - dma = dma_map_single(&priv->si->pdev->dev, si_data, - data_size, DMA_FROM_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - err = -ENOMEM; - goto out; } + dma_align = ALIGN(dma, ENETC_QOS_ALIGN); + si_data = (struct streamid_data *)PTR_ALIGN(tmp, ENETC_QOS_ALIGN); - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); + cbd.length = cpu_to_le16(data_size); + cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); + cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); eth_broadcast_addr(si_data->dmac); si_data->vid_vidm_tg = (ENETC_CBDR_SID_VID_MASK + ((0x3 << 14) | ENETC_CBDR_SID_VIDM)); @@ -539,8 +540,8 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, cbd.length = cpu_to_le16(data_size); - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); + cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); + cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); /* VIDM default to be 1. * VID Match. If set (b1) then the VID must match, otherwise @@ -561,10 +562,8 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, err = enetc_send_cmd(priv->si, &cbd); out: - if (!dma_mapping_error(&priv->si->pdev->dev, dma)) - dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_FROM_DEVICE); - - kfree(si_data); + dma_free_coherent(&priv->si->pdev->dev, data_size + ENETC_QOS_ALIGN, + tmp, dma); return err; } @@ -633,8 +632,9 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, { struct enetc_cbd cbd = { .cmd = 2 }; struct sfi_counter_data *data_buf; - dma_addr_t dma; + dma_addr_t dma, dma_align; u16 data_size; + void *tmp; int err; cbd.index = cpu_to_le16((u16)index); @@ -643,19 +643,19 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, cbd.status_flags = 0; data_size = sizeof(struct sfi_counter_data); - data_buf = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!data_buf) + tmp = dma_alloc_coherent(&priv->si->pdev->dev, + data_size + ENETC_QOS_ALIGN, + &dma, GFP_KERNEL); + if (!tmp) { + dev_err(&priv->si->pdev->dev, + "DMA mapping of stream counter failed!\n"); return -ENOMEM; - - dma = dma_map_single(&priv->si->pdev->dev, data_buf, - data_size, DMA_FROM_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - err = -ENOMEM; - goto exit; } - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); + dma_align = ALIGN(dma, ENETC_QOS_ALIGN); + data_buf = (struct sfi_counter_data *)PTR_ALIGN(tmp, ENETC_QOS_ALIGN); + + cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); + cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); cbd.length = cpu_to_le16(data_size); @@ -684,7 +684,9 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, data_buf->flow_meter_dropl; exit: - kfree(data_buf); + dma_free_coherent(&priv->si->pdev->dev, data_size + ENETC_QOS_ALIGN, + tmp, dma); + return err; } @@ -723,9 +725,10 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, struct sgcl_conf *sgcl_config; struct sgcl_data *sgcl_data; struct sgce *sgce; - dma_addr_t dma; + dma_addr_t dma, dma_align; u16 data_size; int err, i; + void *tmp; u64 now; cbd.index = cpu_to_le16(sgi->index); @@ -772,24 +775,20 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, sgcl_config->acl_len = (sgi->num_entries - 1) & 0x3; data_size = struct_size(sgcl_data, sgcl, sgi->num_entries); - - sgcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!sgcl_data) - return -ENOMEM; - - cbd.length = cpu_to_le16(data_size); - - dma = dma_map_single(&priv->si->pdev->dev, - sgcl_data, data_size, - DMA_FROM_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - kfree(sgcl_data); + tmp = dma_alloc_coherent(&priv->si->pdev->dev, + data_size + ENETC_QOS_ALIGN, + &dma, GFP_KERNEL); + if (!tmp) { + dev_err(&priv->si->pdev->dev, + "DMA mapping of stream counter failed!\n"); return -ENOMEM; } + dma_align = ALIGN(dma, ENETC_QOS_ALIGN); + sgcl_data = (struct sgcl_data *)PTR_ALIGN(tmp, ENETC_QOS_ALIGN); - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); + cbd.length = cpu_to_le16(data_size); + cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); + cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); sgce = &sgcl_data->sgcl[0]; @@ -844,7 +843,8 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, err = enetc_send_cmd(priv->si, &cbd); exit: - kfree(sgcl_data); + dma_free_coherent(&priv->si->pdev->dev, data_size + ENETC_QOS_ALIGN, + tmp, dma); return err; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 9298fbecb31ac..8184a954f6481 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -536,6 +536,8 @@ struct hnae3_ae_dev { * Get 1588 rx hwstamp * get_ts_info * Get phc info + * clean_vf_config + * Clean residual vf info after disable sriov */ struct hnae3_ae_ops { int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev); @@ -729,6 +731,7 @@ struct hnae3_ae_ops { struct ethtool_ts_info *info); int (*get_link_diagnosis_info)(struct hnae3_handle *handle, u32 *status_code); + void (*clean_vf_config)(struct hnae3_ae_dev *ae_dev, int num_vfs); }; struct hnae3_dcb_ops { @@ -841,6 +844,7 @@ struct hnae3_handle { struct dentry *hnae3_dbgfs; /* protects concurrent contention between debugfs commands */ struct mutex dbgfs_lock; + char **dbgfs_buf; /* Network interface message level enabled bits */ u32 msg_enable; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index f726a5b70f9e2..44d9b560b3374 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -1227,7 +1227,7 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, return ret; mutex_lock(&handle->dbgfs_lock); - save_buf = &hns3_dbg_cmd[index].buf; + save_buf = &handle->dbgfs_buf[index]; if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) || test_bit(HNS3_NIC_STATE_RESETTING, &priv->state)) { @@ -1332,6 +1332,13 @@ int hns3_dbg_init(struct hnae3_handle *handle) int ret; u32 i; + handle->dbgfs_buf = devm_kcalloc(&handle->pdev->dev, + ARRAY_SIZE(hns3_dbg_cmd), + sizeof(*handle->dbgfs_buf), + GFP_KERNEL); + if (!handle->dbgfs_buf) + return -ENOMEM; + hns3_dbg_dentry[HNS3_DBG_DENTRY_COMMON].dentry = debugfs_create_dir(name, hns3_dbgfs_root); handle->hnae3_dbgfs = hns3_dbg_dentry[HNS3_DBG_DENTRY_COMMON].dentry; @@ -1380,9 +1387,9 @@ void hns3_dbg_uninit(struct hnae3_handle *handle) u32 i; for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++) - if (hns3_dbg_cmd[i].buf) { - kvfree(hns3_dbg_cmd[i].buf); - hns3_dbg_cmd[i].buf = NULL; + if (handle->dbgfs_buf[i]) { + kvfree(handle->dbgfs_buf[i]); + handle->dbgfs_buf[i] = NULL; } mutex_destroy(&handle->dbgfs_lock); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h index 83aa1450ab9fe..97578eabb7d8b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h @@ -49,7 +49,6 @@ struct hns3_dbg_cmd_info { enum hnae3_dbg_cmd cmd; enum hns3_dbg_dentry_type dentry; u32 buf_len; - char *buf; int (*init)(struct hnae3_handle *handle, unsigned int cmd); }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index babc5d7a3b526..f6082be7481c1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1028,46 +1028,56 @@ static bool hns3_can_use_tx_sgl(struct hns3_enet_ring *ring, static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring) { + u32 alloc_size = ring->tqp->handle->kinfo.tx_spare_buf_size; struct hns3_tx_spare *tx_spare; struct page *page; - u32 alloc_size; dma_addr_t dma; int order; - alloc_size = ring->tqp->handle->kinfo.tx_spare_buf_size; if (!alloc_size) return; order = get_order(alloc_size); + if (order >= MAX_ORDER) { + if (net_ratelimit()) + dev_warn(ring_to_dev(ring), "failed to allocate tx spare buffer, exceed to max order\n"); + return; + } + tx_spare = devm_kzalloc(ring_to_dev(ring), sizeof(*tx_spare), GFP_KERNEL); if (!tx_spare) { /* The driver still work without the tx spare buffer */ dev_warn(ring_to_dev(ring), "failed to allocate hns3_tx_spare\n"); - return; + goto devm_kzalloc_error; } page = alloc_pages_node(dev_to_node(ring_to_dev(ring)), GFP_KERNEL, order); if (!page) { dev_warn(ring_to_dev(ring), "failed to allocate tx spare pages\n"); - devm_kfree(ring_to_dev(ring), tx_spare); - return; + goto alloc_pages_error; } dma = dma_map_page(ring_to_dev(ring), page, 0, PAGE_SIZE << order, DMA_TO_DEVICE); if (dma_mapping_error(ring_to_dev(ring), dma)) { dev_warn(ring_to_dev(ring), "failed to map pages for tx spare\n"); - put_page(page); - devm_kfree(ring_to_dev(ring), tx_spare); - return; + goto dma_mapping_error; } tx_spare->dma = dma; tx_spare->buf = page_address(page); tx_spare->len = PAGE_SIZE << order; ring->tx_spare = tx_spare; + return; + +dma_mapping_error: + put_page(page); +alloc_pages_error: + devm_kfree(ring_to_dev(ring), tx_spare); +devm_kzalloc_error: + ring->tqp->handle->kinfo.tx_spare_buf_size = 0; } /* Use hns3_tx_spare_space() to make sure there is enough buffer @@ -2982,6 +2992,21 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return ret; } +/** + * hns3_clean_vf_config + * @pdev: pointer to a pci_dev structure + * @num_vfs: number of VFs allocated + * + * Clean residual vf config after disable sriov + **/ +static void hns3_clean_vf_config(struct pci_dev *pdev, int num_vfs) +{ + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); + + if (ae_dev->ops->clean_vf_config) + ae_dev->ops->clean_vf_config(ae_dev, num_vfs); +} + /* hns3_remove - Device removal routine * @pdev: PCI device information struct */ @@ -3020,7 +3045,10 @@ static int hns3_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) else return num_vfs; } else if (!pci_vfs_assigned(pdev)) { + int num_vfs_pre = pci_num_vf(pdev); + pci_disable_sriov(pdev); + hns3_clean_vf_config(pdev, num_vfs_pre); } else { dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n"); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index c06c39ece80da..cbf36cc86803a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -651,8 +651,8 @@ static void hns3_get_ringparam(struct net_device *netdev, struct hnae3_handle *h = priv->ae_handle; int rx_queue_index = h->kinfo.num_tqps; - if (hns3_nic_resetting(netdev)) { - netdev_err(netdev, "dev resetting!"); + if (hns3_nic_resetting(netdev) || !priv->ring) { + netdev_err(netdev, "failed to get ringparam value, due to dev resetting or uninited\n"); return; } @@ -1072,8 +1072,14 @@ static int hns3_check_ringparam(struct net_device *ndev, { #define RX_BUF_LEN_2K 2048 #define RX_BUF_LEN_4K 4096 - if (hns3_nic_resetting(ndev)) + + struct hns3_nic_priv *priv = netdev_priv(ndev); + + if (hns3_nic_resetting(ndev) || !priv->ring) { + netdev_err(ndev, "failed to set ringparam value, due to dev resetting or uninited\n"); return -EBUSY; + } + if (param->rx_mini_pending || param->rx_jumbo_pending) return -EINVAL; @@ -1764,9 +1770,6 @@ static int hns3_set_tx_spare_buf_size(struct net_device *netdev, struct hnae3_handle *h = priv->ae_handle; int ret; - if (hns3_nic_resetting(netdev)) - return -EBUSY; - h->kinfo.tx_spare_buf_size = data; ret = hns3_reset_notify(h, HNAE3_DOWN_CLIENT); @@ -1797,6 +1800,11 @@ static int hns3_set_tunable(struct net_device *netdev, struct hnae3_handle *h = priv->ae_handle; int i, ret = 0; + if (hns3_nic_resetting(netdev) || !priv->ring) { + netdev_err(netdev, "failed to set tunable value, dev resetting!"); + return -EBUSY; + } + switch (tuna->id) { case ETHTOOL_TX_COPYBREAK: priv->tx_copybreak = *(u32 *)data; @@ -1816,7 +1824,8 @@ static int hns3_set_tunable(struct net_device *netdev, old_tx_spare_buf_size = h->kinfo.tx_spare_buf_size; new_tx_spare_buf_size = *(u32 *)data; ret = hns3_set_tx_spare_buf_size(netdev, new_tx_spare_buf_size); - if (ret) { + if (ret || + (!priv->ring->tx_spare && new_tx_spare_buf_size != 0)) { int ret1; netdev_warn(netdev, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 24f7afacae028..e96bc61a0a877 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1863,6 +1863,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) vport->vf_info.link_state = IFLA_VF_LINK_STATE_AUTO; vport->mps = HCLGE_MAC_DEFAULT_FRAME; vport->port_base_vlan_cfg.state = HNAE3_PORT_BASE_VLAN_DISABLE; + vport->port_base_vlan_cfg.tbl_sta = true; vport->rxvlan_cfg.rx_vlan_offload_en = true; vport->req_vlan_fltr_en = true; INIT_LIST_HEAD(&vport->vlan_list); @@ -8429,12 +8430,11 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport, hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0); hclge_prepare_mac_addr(&req, addr, false); ret = hclge_remove_mac_vlan_tbl(vport, &req); - if (!ret) { + if (!ret || ret == -ENOENT) { mutex_lock(&hdev->vport_lock); hclge_update_umv_space(vport, true); mutex_unlock(&hdev->vport_lock); - } else if (ret == -ENOENT) { - ret = 0; + return 0; } return ret; @@ -8984,11 +8984,16 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, ether_addr_copy(vport->vf_info.mac, mac_addr); + /* there is a timewindow for PF to know VF unalive, it may + * cause send mailbox fail, but it doesn't matter, VF will + * query it when reinit. + */ if (test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) { dev_info(&hdev->pdev->dev, "MAC of VF %d has been set to %s, and it will be reinitialized!\n", vf, format_mac_addr); - return hclge_inform_reset_assert_to_vf(vport); + (void)hclge_inform_reset_assert_to_vf(vport); + return 0; } dev_info(&hdev->pdev->dev, "MAC of VF %d has been set to %s\n", @@ -9809,19 +9814,28 @@ static void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, bool writen_to_tbl) { struct hclge_vport_vlan_cfg *vlan, *tmp; + struct hclge_dev *hdev = vport->back; - list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) - if (vlan->vlan_id == vlan_id) + mutex_lock(&hdev->vport_lock); + + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { + if (vlan->vlan_id == vlan_id) { + mutex_unlock(&hdev->vport_lock); return; + } + } vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); - if (!vlan) + if (!vlan) { + mutex_unlock(&hdev->vport_lock); return; + } vlan->hd_tbl_status = writen_to_tbl; vlan->vlan_id = vlan_id; list_add_tail(&vlan->node, &vport->vlan_list); + mutex_unlock(&hdev->vport_lock); } static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport) @@ -9830,6 +9844,8 @@ static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport) struct hclge_dev *hdev = vport->back; int ret; + mutex_lock(&hdev->vport_lock); + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { if (!vlan->hd_tbl_status) { ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), @@ -9839,12 +9855,16 @@ static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport) dev_err(&hdev->pdev->dev, "restore vport vlan list failed, ret=%d\n", ret); + + mutex_unlock(&hdev->vport_lock); return ret; } } vlan->hd_tbl_status = true; } + mutex_unlock(&hdev->vport_lock); + return 0; } @@ -9854,6 +9874,8 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, struct hclge_vport_vlan_cfg *vlan, *tmp; struct hclge_dev *hdev = vport->back; + mutex_lock(&hdev->vport_lock); + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { if (vlan->vlan_id == vlan_id) { if (is_write_tbl && vlan->hd_tbl_status) @@ -9868,6 +9890,8 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, break; } } + + mutex_unlock(&hdev->vport_lock); } void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) @@ -9875,6 +9899,8 @@ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) struct hclge_vport_vlan_cfg *vlan, *tmp; struct hclge_dev *hdev = vport->back; + mutex_lock(&hdev->vport_lock); + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { if (vlan->hd_tbl_status) hclge_set_vlan_filter_hw(hdev, @@ -9890,6 +9916,7 @@ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) } } clear_bit(vport->vport_id, hdev->vf_vlan_full); + mutex_unlock(&hdev->vport_lock); } void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) @@ -9898,6 +9925,8 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) struct hclge_vport *vport; int i; + mutex_lock(&hdev->vport_lock); + for (i = 0; i < hdev->num_alloc_vport; i++) { vport = &hdev->vport[i]; list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { @@ -9905,37 +9934,61 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) kfree(vlan); } } + + mutex_unlock(&hdev->vport_lock); } -void hclge_restore_vport_vlan_table(struct hclge_vport *vport) +void hclge_restore_vport_port_base_vlan_config(struct hclge_dev *hdev) { - struct hclge_vport_vlan_cfg *vlan, *tmp; - struct hclge_dev *hdev = vport->back; + struct hclge_vlan_info *vlan_info; + struct hclge_vport *vport; u16 vlan_proto; u16 vlan_id; u16 state; + int vf_id; int ret; - vlan_proto = vport->port_base_vlan_cfg.vlan_info.vlan_proto; - vlan_id = vport->port_base_vlan_cfg.vlan_info.vlan_tag; - state = vport->port_base_vlan_cfg.state; + /* PF should restore all vfs port base vlan */ + for (vf_id = 0; vf_id < hdev->num_alloc_vfs; vf_id++) { + vport = &hdev->vport[vf_id + HCLGE_VF_VPORT_START_NUM]; + vlan_info = vport->port_base_vlan_cfg.tbl_sta ? + &vport->port_base_vlan_cfg.vlan_info : + &vport->port_base_vlan_cfg.old_vlan_info; - if (state != HNAE3_PORT_BASE_VLAN_DISABLE) { - clear_bit(vport->vport_id, hdev->vlan_table[vlan_id]); - hclge_set_vlan_filter_hw(hdev, htons(vlan_proto), - vport->vport_id, vlan_id, - false); - return; + vlan_id = vlan_info->vlan_tag; + vlan_proto = vlan_info->vlan_proto; + state = vport->port_base_vlan_cfg.state; + + if (state != HNAE3_PORT_BASE_VLAN_DISABLE) { + clear_bit(vport->vport_id, hdev->vlan_table[vlan_id]); + ret = hclge_set_vlan_filter_hw(hdev, htons(vlan_proto), + vport->vport_id, + vlan_id, false); + vport->port_base_vlan_cfg.tbl_sta = ret == 0; + } } +} - list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { - ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), - vport->vport_id, - vlan->vlan_id, false); - if (ret) - break; - vlan->hd_tbl_status = true; +void hclge_restore_vport_vlan_table(struct hclge_vport *vport) +{ + struct hclge_vport_vlan_cfg *vlan, *tmp; + struct hclge_dev *hdev = vport->back; + int ret; + + mutex_lock(&hdev->vport_lock); + + if (vport->port_base_vlan_cfg.state == HNAE3_PORT_BASE_VLAN_DISABLE) { + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { + ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), + vport->vport_id, + vlan->vlan_id, false); + if (ret) + break; + vlan->hd_tbl_status = true; + } } + + mutex_unlock(&hdev->vport_lock); } /* For global reset and imp reset, hardware will clear the mac table, @@ -9975,6 +10028,7 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev) struct hnae3_handle *handle = &vport->nic; hclge_restore_mac_table_common(vport); + hclge_restore_vport_port_base_vlan_config(hdev); hclge_restore_vport_vlan_table(vport); set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); hclge_restore_fd_entries(handle); @@ -10031,6 +10085,8 @@ static int hclge_update_vlan_filter_entries(struct hclge_vport *vport, false); } + vport->port_base_vlan_cfg.tbl_sta = false; + /* force add VLAN 0 */ ret = hclge_set_vf_vlan_common(hdev, vport->vport_id, false, 0); if (ret) @@ -10120,7 +10176,9 @@ int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state, else nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE; + vport->port_base_vlan_cfg.old_vlan_info = *old_vlan_info; vport->port_base_vlan_cfg.vlan_info = *vlan_info; + vport->port_base_vlan_cfg.tbl_sta = true; hclge_set_vport_vlan_fltr_change(vport); return 0; @@ -10188,14 +10246,17 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid, return ret; } - /* for DEVICE_VERSION_V3, vf doesn't need to know about the port based + /* there is a timewindow for PF to know VF unalive, it may + * cause send mailbox fail, but it doesn't matter, VF will + * query it when reinit. + * for DEVICE_VERSION_V3, vf doesn't need to know about the port based * VLAN state. */ if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 && test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) - hclge_push_vf_port_base_vlan_info(&hdev->vport[0], - vport->vport_id, state, - &vlan_info); + (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0], + vport->vport_id, + state, &vlan_info); return 0; } @@ -10253,11 +10314,11 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, } if (!ret) { - if (is_kill) - hclge_rm_vport_vlan_table(vport, vlan_id, false); - else + if (!is_kill) hclge_add_vport_vlan_table(vport, vlan_id, writen_to_tbl); + else if (is_kill && vlan_id != 0) + hclge_rm_vport_vlan_table(vport, vlan_id, false); } else if (is_kill) { /* when remove hw vlan filter failed, record the vlan id, * and try to remove it from hw later, to be consistence @@ -11831,8 +11892,8 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_misc_irq_uninit(hdev); hclge_devlink_uninit(hdev); hclge_pci_uninit(hdev); - mutex_destroy(&hdev->vport_lock); hclge_uninit_vport_vlan_table(hdev); + mutex_destroy(&hdev->vport_lock); ae_dev->priv = NULL; } @@ -12656,6 +12717,55 @@ static int hclge_get_link_diagnosis_info(struct hnae3_handle *handle, return 0; } +/* After disable sriov, VF still has some config and info need clean, + * which configed by PF. + */ +static void hclge_clear_vport_vf_info(struct hclge_vport *vport, int vfid) +{ + struct hclge_dev *hdev = vport->back; + struct hclge_vlan_info vlan_info; + int ret; + + /* after disable sriov, clean VF rate configured by PF */ + ret = hclge_tm_qs_shaper_cfg(vport, 0); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to clean vf%d rate config, ret = %d\n", + vfid, ret); + + vlan_info.vlan_tag = 0; + vlan_info.qos = 0; + vlan_info.vlan_proto = ETH_P_8021Q; + ret = hclge_update_port_base_vlan_cfg(vport, + HNAE3_PORT_BASE_VLAN_DISABLE, + &vlan_info); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to clean vf%d port base vlan, ret = %d\n", + vfid, ret); + + ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id, false); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to clean vf%d spoof config, ret = %d\n", + vfid, ret); + + memset(&vport->vf_info, 0, sizeof(vport->vf_info)); +} + +static void hclge_clean_vport_config(struct hnae3_ae_dev *ae_dev, int num_vfs) +{ + struct hclge_dev *hdev = ae_dev->priv; + struct hclge_vport *vport; + int i; + + for (i = 0; i < num_vfs; i++) { + vport = &hdev->vport[i + HCLGE_VF_VPORT_START_NUM]; + + hclge_clear_vport_vf_info(vport, i); + } +} + static const struct hnae3_ae_ops hclge_ops = { .init_ae_dev = hclge_init_ae_dev, .uninit_ae_dev = hclge_uninit_ae_dev, @@ -12757,6 +12867,7 @@ static const struct hnae3_ae_ops hclge_ops = { .get_rx_hwts = hclge_ptp_get_rx_hwts, .get_ts_info = hclge_ptp_get_ts_info, .get_link_diagnosis_info = hclge_get_link_diagnosis_info, + .clean_vf_config = hclge_clean_vport_config, }; static struct hnae3_ae_algo ae_algo = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index adfb26e792621..63197257dd4e4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -977,7 +977,9 @@ struct hclge_vlan_info { struct hclge_port_base_vlan_config { u16 state; + bool tbl_sta; struct hclge_vlan_info vlan_info; + struct hclge_vlan_info old_vlan_info; }; struct hclge_vf_info { @@ -1023,6 +1025,7 @@ struct hclge_vport { spinlock_t mac_list_lock; /* protect mac address need to add/detele */ struct list_head uc_mac_list; /* Store VF unicast table */ struct list_head mc_mac_list; /* Store VF multicast table */ + struct list_head vlan_list; /* Store VF vlan table */ }; @@ -1097,6 +1100,7 @@ void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list, void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list); void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev); void hclge_restore_mac_table_common(struct hclge_vport *vport); +void hclge_restore_vport_port_base_vlan_config(struct hclge_dev *hdev); void hclge_restore_vport_vlan_table(struct hclge_vport *vport); int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state, struct hclge_vlan_info *vlan_info); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 63d2be4349e3e..03d63b6a9b2bc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -48,7 +48,7 @@ static int hclge_mdio_write(struct mii_bus *bus, int phyid, int regnum, int ret; if (test_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state)) - return 0; + return -EBUSY; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, false); @@ -86,7 +86,7 @@ static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum) int ret; if (test_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state)) - return 0; + return -EBUSY; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, true); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 21442a9bb9961..90c6197d9374c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2855,6 +2855,11 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev) return ret; } + /* get current port based vlan state from PF */ + ret = hclgevf_get_port_base_vlan_filter_state(hdev); + if (ret) + return ret; + set_bit(HCLGEVF_STATE_PROMISC_CHANGED, &hdev->state); hclgevf_init_rxd_adv_layout(hdev); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b423e94956f10..b4804ce63151f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1429,6 +1429,15 @@ static int __ibmvnic_open(struct net_device *netdev) return rc; } + adapter->tx_queues_active = true; + + /* Since queues were stopped until now, there shouldn't be any + * one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we + * don't need the synchronize_rcu()? Leaving it for consistency + * with setting ->tx_queues_active = false. + */ + synchronize_rcu(); + netif_tx_start_all_queues(netdev); if (prev_state == VNIC_CLOSED) { @@ -1603,6 +1612,14 @@ static void ibmvnic_cleanup(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); /* ensure that transmissions are stopped if called by do_reset */ + + adapter->tx_queues_active = false; + + /* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active + * update so they don't restart a queue after we stop it below. + */ + synchronize_rcu(); + if (test_bit(0, &adapter->resetting)) netif_tx_disable(netdev); else @@ -1842,14 +1859,21 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, tx_buff->skb = NULL; adapter->netdev->stats.tx_dropped++; } + ind_bufp->index = 0; + if (atomic_sub_return(entries, &tx_scrq->used) <= (adapter->req_tx_entries_per_subcrq / 2) && - __netif_subqueue_stopped(adapter->netdev, queue_num) && - !test_bit(0, &adapter->resetting)) { - netif_wake_subqueue(adapter->netdev, queue_num); - netdev_dbg(adapter->netdev, "Started queue %d\n", - queue_num); + __netif_subqueue_stopped(adapter->netdev, queue_num)) { + rcu_read_lock(); + + if (adapter->tx_queues_active) { + netif_wake_subqueue(adapter->netdev, queue_num); + netdev_dbg(adapter->netdev, "Started queue %d\n", + queue_num); + } + + rcu_read_unlock(); } } @@ -1904,11 +1928,12 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) int index = 0; u8 proto = 0; - tx_scrq = adapter->tx_scrq[queue_num]; - txq = netdev_get_tx_queue(netdev, queue_num); - ind_bufp = &tx_scrq->ind_buf; - - if (test_bit(0, &adapter->resetting)) { + /* If a reset is in progress, drop the packet since + * the scrqs may get torn down. Otherwise use the + * rcu to ensure reset waits for us to complete. + */ + rcu_read_lock(); + if (!adapter->tx_queues_active) { dev_kfree_skb_any(skb); tx_send_failed++; @@ -1917,6 +1942,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) goto out; } + tx_scrq = adapter->tx_scrq[queue_num]; + txq = netdev_get_tx_queue(netdev, queue_num); + ind_bufp = &tx_scrq->ind_buf; + if (ibmvnic_xmit_workarounds(skb, netdev)) { tx_dropped++; tx_send_failed++; @@ -1924,6 +1953,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } + if (skb_is_gso(skb)) tx_pool = &adapter->tso_pool[queue_num]; else @@ -2078,6 +2108,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) netif_carrier_off(netdev); } out: + rcu_read_unlock(); netdev->stats.tx_dropped += tx_dropped; netdev->stats.tx_bytes += tx_bytes; netdev->stats.tx_packets += tx_packets; @@ -3732,9 +3763,15 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, (adapter->req_tx_entries_per_subcrq / 2) && __netif_subqueue_stopped(adapter->netdev, scrq->pool_index)) { - netif_wake_subqueue(adapter->netdev, scrq->pool_index); - netdev_dbg(adapter->netdev, "Started queue %d\n", - scrq->pool_index); + rcu_read_lock(); + if (adapter->tx_queues_active) { + netif_wake_subqueue(adapter->netdev, + scrq->pool_index); + netdev_dbg(adapter->netdev, + "Started queue %d\n", + scrq->pool_index); + } + rcu_read_unlock(); } } diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index fa2d607a7b1b9..8f5cefb932dd1 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1006,11 +1006,14 @@ struct ibmvnic_adapter { struct work_struct ibmvnic_reset; struct delayed_work ibmvnic_delayed_reset; unsigned long resetting; - bool napi_enabled, from_passive_init; - bool login_pending; /* last device reset time */ unsigned long last_reset_time; + bool napi_enabled; + bool from_passive_init; + bool login_pending; + /* protected by rcu */ + bool tx_queues_active; bool failover_pending; bool force_reset_recovery; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 9ddeb015eb7eb..e830987a8c6dd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1899,8 +1899,9 @@ i40e_status i40e_aq_add_vsi(struct i40e_hw *hw, desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); - status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info, - sizeof(vsi_ctx->info), cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, &vsi_ctx->info, + sizeof(vsi_ctx->info), + cmd_details, true); if (status) goto aq_add_vsi_exit; @@ -2287,8 +2288,9 @@ i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw, desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); - status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info, - sizeof(vsi_ctx->info), cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, &vsi_ctx->info, + sizeof(vsi_ctx->info), + cmd_details, true); vsi_ctx->vsis_allocated = le16_to_cpu(resp->vsi_used); vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free); @@ -2673,8 +2675,8 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, if (buf_size > I40E_AQ_LARGE_BUF) desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); - status = i40e_asq_send_command(hw, &desc, mv_list, buf_size, - cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, mv_list, buf_size, + cmd_details, true); return status; } @@ -2715,8 +2717,8 @@ i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid, if (buf_size > I40E_AQ_LARGE_BUF) desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); - status = i40e_asq_send_command(hw, &desc, mv_list, buf_size, - cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, mv_list, buf_size, + cmd_details, true); return status; } @@ -3868,7 +3870,8 @@ i40e_status i40e_aq_delete_element(struct i40e_hw *hw, u16 seid, cmd->seid = cpu_to_le16(seid); - status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, NULL, 0, + cmd_details, true); return status; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 945b1bb9c6f40..e5e72b5bb6196 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -218,7 +218,6 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) ntu += nb_buffs; if (ntu == rx_ring->count) { rx_desc = I40E_RX_DESC(rx_ring, 0); - xdp = i40e_rx_bi(rx_ring, 0); ntu = 0; } @@ -241,21 +240,25 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) { + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; + net_prefetch(xdp->data_meta); + /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - xdp->data_end - xdp->data_hard_start, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) goto out; - skb_reserve(skb, xdp->data - xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } out: xsk_buff_free(xdp); @@ -324,11 +327,11 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring, int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; - u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); u16 next_to_clean = rx_ring->next_to_clean; u16 count_mask = rx_ring->count - 1; unsigned int xdp_res, xdp_xmit = 0; bool failure = false; + u16 cleaned_count; while (likely(total_rx_packets < (unsigned int)budget)) { union i40e_rx_desc *rx_desc; diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 4babe4705a552..358a9b3031d5c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -44,6 +44,9 @@ #define DEFAULT_DEBUG_LEVEL_SHIFT 3 #define PFX "iavf: " +int iavf_status_to_errno(enum iavf_status status); +int virtchnl_status_to_errno(enum virtchnl_status_code v_status); + /* VSI state flags shared with common code */ enum iavf_vsi_state_t { __IAVF_VSI_DOWN, @@ -515,7 +518,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter); void iavf_del_vlans(struct iavf_adapter *adapter); void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags); void iavf_request_stats(struct iavf_adapter *adapter); -void iavf_request_reset(struct iavf_adapter *adapter); +int iavf_request_reset(struct iavf_adapter *adapter); void iavf_get_hena(struct iavf_adapter *adapter); void iavf_set_hena(struct iavf_adapter *adapter); void iavf_set_rss_key(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 0e178a0a59c5d..f55ecb6727684 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -51,6 +51,113 @@ MODULE_LICENSE("GPL v2"); static const struct net_device_ops iavf_netdev_ops; struct workqueue_struct *iavf_wq; +int iavf_status_to_errno(enum iavf_status status) +{ + switch (status) { + case IAVF_SUCCESS: + return 0; + case IAVF_ERR_PARAM: + case IAVF_ERR_MAC_TYPE: + case IAVF_ERR_INVALID_MAC_ADDR: + case IAVF_ERR_INVALID_LINK_SETTINGS: + case IAVF_ERR_INVALID_PD_ID: + case IAVF_ERR_INVALID_QP_ID: + case IAVF_ERR_INVALID_CQ_ID: + case IAVF_ERR_INVALID_CEQ_ID: + case IAVF_ERR_INVALID_AEQ_ID: + case IAVF_ERR_INVALID_SIZE: + case IAVF_ERR_INVALID_ARP_INDEX: + case IAVF_ERR_INVALID_FPM_FUNC_ID: + case IAVF_ERR_QP_INVALID_MSG_SIZE: + case IAVF_ERR_INVALID_FRAG_COUNT: + case IAVF_ERR_INVALID_ALIGNMENT: + case IAVF_ERR_INVALID_PUSH_PAGE_INDEX: + case IAVF_ERR_INVALID_IMM_DATA_SIZE: + case IAVF_ERR_INVALID_VF_ID: + case IAVF_ERR_INVALID_HMCFN_ID: + case IAVF_ERR_INVALID_PBLE_INDEX: + case IAVF_ERR_INVALID_SD_INDEX: + case IAVF_ERR_INVALID_PAGE_DESC_INDEX: + case IAVF_ERR_INVALID_SD_TYPE: + case IAVF_ERR_INVALID_HMC_OBJ_INDEX: + case IAVF_ERR_INVALID_HMC_OBJ_COUNT: + case IAVF_ERR_INVALID_SRQ_ARM_LIMIT: + return -EINVAL; + case IAVF_ERR_NVM: + case IAVF_ERR_NVM_CHECKSUM: + case IAVF_ERR_PHY: + case IAVF_ERR_CONFIG: + case IAVF_ERR_UNKNOWN_PHY: + case IAVF_ERR_LINK_SETUP: + case IAVF_ERR_ADAPTER_STOPPED: + case IAVF_ERR_MASTER_REQUESTS_PENDING: + case IAVF_ERR_AUTONEG_NOT_COMPLETE: + case IAVF_ERR_RESET_FAILED: + case IAVF_ERR_BAD_PTR: + case IAVF_ERR_SWFW_SYNC: + case IAVF_ERR_QP_TOOMANY_WRS_POSTED: + case IAVF_ERR_QUEUE_EMPTY: + case IAVF_ERR_FLUSHED_QUEUE: + case IAVF_ERR_OPCODE_MISMATCH: + case IAVF_ERR_CQP_COMPL_ERROR: + case IAVF_ERR_BACKING_PAGE_ERROR: + case IAVF_ERR_NO_PBLCHUNKS_AVAILABLE: + case IAVF_ERR_MEMCPY_FAILED: + case IAVF_ERR_SRQ_ENABLED: + case IAVF_ERR_ADMIN_QUEUE_ERROR: + case IAVF_ERR_ADMIN_QUEUE_FULL: + case IAVF_ERR_BAD_IWARP_CQE: + case IAVF_ERR_NVM_BLANK_MODE: + case IAVF_ERR_PE_DOORBELL_NOT_ENABLED: + case IAVF_ERR_DIAG_TEST_FAILED: + case IAVF_ERR_FIRMWARE_API_VERSION: + case IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR: + return -EIO; + case IAVF_ERR_DEVICE_NOT_SUPPORTED: + return -ENODEV; + case IAVF_ERR_NO_AVAILABLE_VSI: + case IAVF_ERR_RING_FULL: + return -ENOSPC; + case IAVF_ERR_NO_MEMORY: + return -ENOMEM; + case IAVF_ERR_TIMEOUT: + case IAVF_ERR_ADMIN_QUEUE_TIMEOUT: + return -ETIMEDOUT; + case IAVF_ERR_NOT_IMPLEMENTED: + case IAVF_NOT_SUPPORTED: + return -EOPNOTSUPP; + case IAVF_ERR_ADMIN_QUEUE_NO_WORK: + return -EALREADY; + case IAVF_ERR_NOT_READY: + return -EBUSY; + case IAVF_ERR_BUF_TOO_SHORT: + return -EMSGSIZE; + } + + return -EIO; +} + +int virtchnl_status_to_errno(enum virtchnl_status_code v_status) +{ + switch (v_status) { + case VIRTCHNL_STATUS_SUCCESS: + return 0; + case VIRTCHNL_STATUS_ERR_PARAM: + case VIRTCHNL_STATUS_ERR_INVALID_VF_ID: + return -EINVAL; + case VIRTCHNL_STATUS_ERR_NO_MEMORY: + return -ENOMEM; + case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH: + case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR: + case VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR: + return -EIO; + case VIRTCHNL_STATUS_ERR_NOT_SUPPORTED: + return -EOPNOTSUPP; + } + + return -EIO; +} + /** * iavf_pdev_to_adapter - go from pci_dev to adapter * @pdev: pci_dev pointer @@ -1421,7 +1528,7 @@ static int iavf_config_rss_aq(struct iavf_adapter *adapter) struct iavf_aqc_get_set_rss_key_data *rss_key = (struct iavf_aqc_get_set_rss_key_data *)adapter->rss_key; struct iavf_hw *hw = &adapter->hw; - int ret = 0; + enum iavf_status status; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -1430,24 +1537,25 @@ static int iavf_config_rss_aq(struct iavf_adapter *adapter) return -EBUSY; } - ret = iavf_aq_set_rss_key(hw, adapter->vsi.id, rss_key); - if (ret) { + status = iavf_aq_set_rss_key(hw, adapter->vsi.id, rss_key); + if (status) { dev_err(&adapter->pdev->dev, "Cannot set RSS key, err %s aq_err %s\n", - iavf_stat_str(hw, ret), + iavf_stat_str(hw, status), iavf_aq_str(hw, hw->aq.asq_last_status)); - return ret; + return iavf_status_to_errno(status); } - ret = iavf_aq_set_rss_lut(hw, adapter->vsi.id, false, - adapter->rss_lut, adapter->rss_lut_size); - if (ret) { + status = iavf_aq_set_rss_lut(hw, adapter->vsi.id, false, + adapter->rss_lut, adapter->rss_lut_size); + if (status) { dev_err(&adapter->pdev->dev, "Cannot set RSS lut, err %s aq_err %s\n", - iavf_stat_str(hw, ret), + iavf_stat_str(hw, status), iavf_aq_str(hw, hw->aq.asq_last_status)); + return iavf_status_to_errno(status); } - return ret; + return 0; } @@ -2003,23 +2111,24 @@ static void iavf_startup(struct iavf_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct iavf_hw *hw = &adapter->hw; - int err; + enum iavf_status status; + int ret; WARN_ON(adapter->state != __IAVF_STARTUP); /* driver loaded, probe complete */ adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; adapter->flags &= ~IAVF_FLAG_RESET_PENDING; - err = iavf_set_mac_type(hw); - if (err) { - dev_err(&pdev->dev, "Failed to set MAC type (%d)\n", err); + status = iavf_set_mac_type(hw); + if (status) { + dev_err(&pdev->dev, "Failed to set MAC type (%d)\n", status); goto err; } - err = iavf_check_reset_complete(hw); - if (err) { + ret = iavf_check_reset_complete(hw); + if (ret) { dev_info(&pdev->dev, "Device is still in reset (%d), retrying\n", - err); + ret); goto err; } hw->aq.num_arq_entries = IAVF_AQ_LEN; @@ -2027,14 +2136,15 @@ static void iavf_startup(struct iavf_adapter *adapter) hw->aq.arq_buf_size = IAVF_MAX_AQ_BUF_SIZE; hw->aq.asq_buf_size = IAVF_MAX_AQ_BUF_SIZE; - err = iavf_init_adminq(hw); - if (err) { - dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n", err); + status = iavf_init_adminq(hw); + if (status) { + dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n", + status); goto err; } - err = iavf_send_api_ver(adapter); - if (err) { - dev_err(&pdev->dev, "Unable to send to PF (%d)\n", err); + ret = iavf_send_api_ver(adapter); + if (ret) { + dev_err(&pdev->dev, "Unable to send to PF (%d)\n", ret); iavf_shutdown_adminq(hw); goto err; } @@ -2070,7 +2180,7 @@ static void iavf_init_version_check(struct iavf_adapter *adapter) /* aq msg sent, awaiting reply */ err = iavf_verify_api_ver(adapter); if (err) { - if (err == IAVF_ERR_ADMIN_QUEUE_NO_WORK) + if (err == -EALREADY) err = iavf_send_api_ver(adapter); else dev_err(&pdev->dev, "Unsupported PF API version %d.%d, expected %d.%d\n", @@ -2171,11 +2281,11 @@ static void iavf_init_get_resources(struct iavf_adapter *adapter) } } err = iavf_get_vf_config(adapter); - if (err == IAVF_ERR_ADMIN_QUEUE_NO_WORK) { + if (err == -EALREADY) { err = iavf_send_vf_config_msg(adapter); goto err_alloc; - } else if (err == IAVF_ERR_PARAM) { - /* We only get ERR_PARAM if the device is in a very bad + } else if (err == -EINVAL) { + /* We only get -EINVAL if the device is in a very bad * state or if we've been disabled for previous bad * behavior. Either way, we're done now. */ @@ -2626,6 +2736,7 @@ static void iavf_reset_task(struct work_struct *work) struct iavf_hw *hw = &adapter->hw; struct iavf_mac_filter *f, *ftmp; struct iavf_cloud_filter *cf; + enum iavf_status status; u32 reg_val; int i = 0, err; bool running; @@ -2706,7 +2817,6 @@ static void iavf_reset_task(struct work_struct *work) running = adapter->state == __IAVF_RUNNING; if (running) { - netdev->flags &= ~IFF_UP; netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); adapter->link_up = false; @@ -2727,10 +2837,12 @@ static void iavf_reset_task(struct work_struct *work) /* kill and reinit the admin queue */ iavf_shutdown_adminq(hw); adapter->current_op = VIRTCHNL_OP_UNKNOWN; - err = iavf_init_adminq(hw); - if (err) + status = iavf_init_adminq(hw); + if (status) { dev_info(&adapter->pdev->dev, "Failed to init adminq: %d\n", - err); + status); + goto reset_err; + } adapter->aq_required = 0; if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) || @@ -2821,7 +2933,7 @@ static void iavf_reset_task(struct work_struct *work) * to __IAVF_RUNNING */ iavf_up_complete(adapter); - netdev->flags |= IFF_UP; + iavf_irq_enable(adapter, true); } else { iavf_change_state(adapter, __IAVF_DOWN); @@ -2837,10 +2949,8 @@ static void iavf_reset_task(struct work_struct *work) reset_err: mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); - if (running) { + if (running) iavf_change_state(adapter, __IAVF_RUNNING); - netdev->flags |= IFF_UP; - } dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); iavf_close(netdev); } diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 5263cefe46f5f..b8c5837f8b505 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -22,17 +22,17 @@ static int iavf_send_pf_msg(struct iavf_adapter *adapter, enum virtchnl_ops op, u8 *msg, u16 len) { struct iavf_hw *hw = &adapter->hw; - enum iavf_status err; + enum iavf_status status; if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) return 0; /* nothing to see here, move along */ - err = iavf_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL); - if (err) - dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n", - op, iavf_stat_str(hw, err), + status = iavf_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL); + if (status) + dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, status %s, aq_err %s\n", + op, iavf_stat_str(hw, status), iavf_aq_str(hw, hw->aq.asq_last_status)); - return err; + return iavf_status_to_errno(status); } /** @@ -1827,11 +1827,13 @@ void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter) * * Request that the PF reset this VF. No response is expected. **/ -void iavf_request_reset(struct iavf_adapter *adapter) +int iavf_request_reset(struct iavf_adapter *adapter) { + int err; /* Don't check CURRENT_OP - this is always higher priority */ - iavf_send_pf_msg(adapter, VIRTCHNL_OP_RESET_VF, NULL, 0); + err = iavf_send_pf_msg(adapter, VIRTCHNL_OP_RESET_VF, NULL, 0); adapter->current_op = VIRTCHNL_OP_UNKNOWN; + return err; } /** diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index bea1d1e39fa27..9c04a71a9fca3 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -290,6 +290,7 @@ enum ice_pf_state { ICE_LINK_DEFAULT_OVERRIDE_PENDING, ICE_PHY_INIT_COMPLETE, ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */ + ICE_AUX_ERR_PENDING, ICE_STATE_NBITS /* must be last */ }; @@ -559,6 +560,7 @@ struct ice_pf { wait_queue_head_t reset_wait_queue; u32 hw_csum_rx_error; + u32 oicr_err_reg; u16 oicr_idx; /* Other interrupt cause MSIX vector index */ u16 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */ u16 max_pf_txqs; /* Total Tx queues PF wide */ @@ -672,7 +674,7 @@ static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev) static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi) { - return !!vsi->xdp_prog; + return !!READ_ONCE(vsi->xdp_prog); } static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) @@ -710,7 +712,7 @@ static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring) struct ice_vsi *vsi = ring->vsi; u16 qid; - qid = ring->q_index - vsi->num_xdp_txq; + qid = ring->q_index - vsi->alloc_txq; if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) return NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 5daade32ea625..fba178e076009 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -577,7 +577,7 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) { struct net_device *netdev; - if (!vsi || vsi->type != ICE_VSI_PF || !vsi->arfs_fltr_list) + if (!vsi || vsi->type != ICE_VSI_PF) return; netdev = vsi->netdev; @@ -599,7 +599,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) int base_idx, i; if (!vsi || vsi->type != ICE_VSI_PF) - return -EINVAL; + return 0; pf = vsi->back; netdev = vsi->netdev; @@ -636,7 +636,6 @@ void ice_remove_arfs(struct ice_pf *pf) if (!pf_vsi) return; - ice_free_cpu_rx_rmap(pf_vsi); ice_clear_arfs(pf_vsi); } @@ -653,9 +652,5 @@ void ice_rebuild_arfs(struct ice_pf *pf) return; ice_remove_arfs(pf); - if (ice_set_cpu_rx_rmap(pf_vsi)) { - dev_err(ice_pf_to_dev(pf), "Failed to rebuild aRFS\n"); - return; - } ice_init_arfs(pf_vsi); } diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index fc3580167e7b5..5559230eff8b5 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -34,6 +34,9 @@ void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event) { struct iidc_auxiliary_drv *iadrv; + if (WARN_ON_ONCE(!in_task())) + return; + if (!pf->adev) return; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 53256aca27c78..15bb6f001a04f 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1452,6 +1452,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->tx_tstamps = &pf->ptp.port.tx; ring->dev = dev; ring->count = vsi->num_tx_desc; + ring->txq_teid = ICE_INVAL_TEID; WRITE_ONCE(vsi->tx_rings[i], ring); } @@ -2868,6 +2869,8 @@ void ice_vsi_free_irq(struct ice_vsi *vsi) return; vsi->irqs_ready = false; + ice_free_cpu_rx_rmap(vsi); + ice_for_each_q_vector(vsi, i) { u16 vector = i + base; int irq_num; @@ -2881,7 +2884,8 @@ void ice_vsi_free_irq(struct ice_vsi *vsi) continue; /* clear the affinity notifier in the IRQ descriptor */ - irq_set_affinity_notifier(irq_num, NULL); + if (!IS_ENABLED(CONFIG_RFS_ACCEL)) + irq_set_affinity_notifier(irq_num, NULL); /* clear the affinity_mask in the IRQ descriptor */ irq_set_affinity_hint(irq_num, NULL); @@ -3147,6 +3151,8 @@ int ice_vsi_release(struct ice_vsi *vsi) } } + if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi)) + ice_clear_dflt_vsi(pf->first_sw); ice_fltr_remove_all(vsi); ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index b7e8744b0c0a6..2de2bbbca1e97 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2255,6 +2255,19 @@ static void ice_service_task(struct work_struct *work) return; } + if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) { + struct iidc_event *event; + + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (event) { + set_bit(IIDC_EVENT_CRIT_ERR, event->type); + /* report the entire OICR value to AUX driver */ + swap(event->reg, pf->oicr_err_reg); + ice_send_event_to_aux(pf, event); + kfree(event); + } + } + if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) { /* Plug aux device per request */ ice_plug_aux_dev(pf); @@ -2481,6 +2494,13 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) irq_set_affinity_hint(irq_num, &q_vector->affinity_mask); } + err = ice_set_cpu_rx_rmap(vsi); + if (err) { + netdev_err(vsi->netdev, "Failed to setup CPU RMAP on VSI %u: %pe\n", + vsi->vsi_num, ERR_PTR(err)); + goto free_q_irqs; + } + vsi->irqs_ready = true; return 0; @@ -2533,7 +2553,7 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) spin_lock_init(&xdp_ring->tx_lock); for (j = 0; j < xdp_ring->count; j++) { tx_desc = ICE_TX_DESC(xdp_ring, j); - tx_desc->cmd_type_offset_bsz = cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE); + tx_desc->cmd_type_offset_bsz = 0; } } @@ -2729,8 +2749,10 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi) ice_for_each_xdp_txq(vsi, i) if (vsi->xdp_rings[i]) { - if (vsi->xdp_rings[i]->desc) + if (vsi->xdp_rings[i]->desc) { + synchronize_rcu(); ice_free_tx_ring(vsi->xdp_rings[i]); + } kfree_rcu(vsi->xdp_rings[i], rcu); vsi->xdp_rings[i] = NULL; } @@ -3041,17 +3063,9 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) #define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M) if (oicr & ICE_AUX_CRIT_ERR) { - struct iidc_event *event; - + pf->oicr_err_reg |= oicr; + set_bit(ICE_AUX_ERR_PENDING, pf->state); ena_mask &= ~ICE_AUX_CRIT_ERR; - event = kzalloc(sizeof(*event), GFP_ATOMIC); - if (event) { - set_bit(IIDC_EVENT_CRIT_ERR, event->type); - /* report the entire OICR value to AUX driver */ - event->reg = oicr; - ice_send_event_to_aux(pf, event); - kfree(event); - } } /* Report any remaining unexpected interrupts */ @@ -3598,20 +3612,12 @@ static int ice_setup_pf_sw(struct ice_pf *pf) */ ice_napi_add(vsi); - status = ice_set_cpu_rx_rmap(vsi); - if (status) { - dev_err(dev, "Failed to set CPU Rx map VSI %d error %d\n", - vsi->vsi_num, status); - goto unroll_napi_add; - } status = ice_init_mac_fltr(pf); if (status) - goto free_cpu_rx_map; + goto unroll_napi_add; return 0; -free_cpu_rx_map: - ice_free_cpu_rx_rmap(vsi); unroll_napi_add: ice_tc_indir_block_unregister(vsi); unroll_cfg_netdev: @@ -5069,7 +5075,6 @@ static int __maybe_unused ice_suspend(struct device *dev) continue; ice_vsi_free_q_vectors(pf->vsi[v]); } - ice_free_cpu_rx_rmap(ice_get_main_vsi(pf)); ice_clear_interrupt_scheme(pf); pci_save_state(pdev); @@ -5427,16 +5432,19 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) /* Add filter for new MAC. If filter exists, return success */ err = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI); - if (err == -EEXIST) + if (err == -EEXIST) { /* Although this MAC filter is already present in hardware it's * possible in some cases (e.g. bonding) that dev_addr was * modified outside of the driver and needs to be restored back * to this value. */ netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac); - else if (err) + + return 0; + } else if (err) { /* error if the new filter addition failed */ err = -EADDRNOTAVAIL; + } err_update_filters: if (err) { diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 1be3cd4b2bef7..2bee8f10ad89c 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -3351,9 +3351,9 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) goto error_param; } - /* Skip queue if not enabled */ if (!test_bit(vf_q_id, vf->txq_ena)) - continue; + dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n", + vf_q_id, vsi->vsi_num); ice_fill_txq_meta(vsi, ring, &txq_meta); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 2388837d6d6c9..30620b942fa09 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -41,8 +41,10 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) { ice_clean_tx_ring(vsi->tx_rings[q_idx]); - if (ice_is_xdp_ena_vsi(vsi)) + if (ice_is_xdp_ena_vsi(vsi)) { + synchronize_rcu(); ice_clean_tx_ring(vsi->xdp_rings[q_idx]); + } ice_clean_rx_ring(vsi->rx_rings[q_idx]); } @@ -428,20 +430,24 @@ static void ice_bump_ntc(struct ice_rx_ring *rx_ring) static struct sk_buff * ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) { - unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start; + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard, + net_prefetch(xdp->data_meta); + + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, xdp->data - xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } xsk_buff_free(xdp); return skb; @@ -759,7 +765,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, struct ice_vsi *vsi = np->vsi; struct ice_tx_ring *ring; - if (test_bit(ICE_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state)) return -ENETDOWN; if (!ice_is_xdp_ena_vsi(vsi)) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 51a2dcaf553de..2a5782063f4c8 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -965,10 +965,6 @@ static int igb_set_ringparam(struct net_device *netdev, memcpy(&temp_ring[i], adapter->rx_ring[i], sizeof(struct igb_ring)); - /* Clear copied XDP RX-queue info */ - memset(&temp_ring[i].xdp_rxq, 0, - sizeof(temp_ring[i].xdp_rxq)); - temp_ring[i].count = new_rx_count; err = igb_setup_rx_resources(&temp_ring[i]); if (err) { diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 38ba92022cd45..c1e4ad65b02de 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4352,7 +4352,18 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) { struct igb_adapter *adapter = netdev_priv(rx_ring->netdev); struct device *dev = rx_ring->dev; - int size; + int size, res; + + /* XDP RX-queue info */ + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->queue_index, 0); + if (res < 0) { + dev_err(dev, "Failed to register xdp_rxq index %u\n", + rx_ring->queue_index); + return res; + } size = sizeof(struct igb_rx_buffer) * rx_ring->count; @@ -4375,14 +4386,10 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) rx_ring->xdp_prog = adapter->xdp_prog; - /* XDP RX-queue info */ - if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, - rx_ring->queue_index, 0) < 0) - goto err; - return 0; err: + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 2f17f36e94fde..2a9ae53238f73 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -505,6 +505,9 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring) u8 index = rx_ring->queue_index; int size, desc_len, res; + /* XDP RX-queue info */ + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, rx_ring->q_vector->napi.napi_id); if (res < 0) { @@ -2446,19 +2449,20 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, struct xdp_buff *xdp) { + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; - unsigned int totalsize = metasize + datasize; struct sk_buff *skb; - skb = __napi_alloc_skb(&ring->q_vector->napi, - xdp->data_end - xdp->data_hard_start, + net_prefetch(xdp->data_meta); + + skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); - memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize); + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + if (metasize) { skb_metadata_set(skb, metasize); __skb_pull(skb, metasize); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 6a5e9cf6b5dac..dd7ff66d422f0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -207,26 +207,28 @@ bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count) } static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *bi) + const struct xdp_buff *xdp) { - unsigned int metasize = bi->xdp->data - bi->xdp->data_meta; - unsigned int datasize = bi->xdp->data_end - bi->xdp->data; + unsigned int totalsize = xdp->data_end - xdp->data_meta; + unsigned int metasize = xdp->data - xdp->data_meta; struct sk_buff *skb; + net_prefetch(xdp->data_meta); + /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - bi->xdp->data_end - bi->xdp->data_hard_start, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, bi->xdp->data - bi->xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), bi->xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } - xsk_buff_free(bi->xdp); - bi->xdp = NULL; return skb; } @@ -317,12 +319,15 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, } /* XDP_PASS path */ - skb = ixgbe_construct_skb_zc(rx_ring, bi); + skb = ixgbe_construct_skb_zc(rx_ring, bi->xdp); if (!skb) { rx_ring->rx_stats.alloc_rx_buff_failed++; break; } + xsk_buff_free(bi->xdp); + bi->xdp = NULL; + cleaned_count++; ixgbe_inc_ntc(rx_ring); diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 143ca8be5eb59..4008596963be4 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2751,7 +2751,7 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, } ret = of_get_mac_address(pnp, ppd.mac_addr); - if (ret) + if (ret == -EPROBE_DEFER) return ret; mv643xx_eth_property(pnp, "tx-queue-size", ppd.tx_queue_size); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 91f86d77cd41b..3a31fb8cc1554 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -605,7 +605,7 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, struct npc_install_flow_req req = { 0 }; struct npc_install_flow_rsp rsp = { 0 }; struct npc_mcam *mcam = &rvu->hw->mcam; - struct nix_rx_action action; + struct nix_rx_action action = { 0 }; int blkaddr, index; /* AF's and SDP VFs work in promiscuous mode */ @@ -626,7 +626,6 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, *(u64 *)&action = npc_get_mcam_action(rvu, mcam, blkaddr, index); } else { - *(u64 *)&action = 0x00; action.op = NIX_RX_ACTIONOP_UCAST; action.pf_func = pcifunc; } @@ -657,7 +656,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, struct npc_mcam *mcam = &rvu->hw->mcam; struct rvu_hwinfo *hw = rvu->hw; int blkaddr, ucast_idx, index; - struct nix_rx_action action; + struct nix_rx_action action = { 0 }; u64 relaxed_mask; if (!hw->cap.nix_rx_multicast && is_cgx_vf(rvu, pcifunc)) @@ -685,14 +684,14 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, blkaddr, ucast_idx); if (action.op != NIX_RX_ACTIONOP_RSS) { - *(u64 *)&action = 0x00; + *(u64 *)&action = 0; action.op = NIX_RX_ACTIONOP_UCAST; } /* RX_ACTION set to MCAST for CGX PF's */ if (hw->cap.nix_rx_multicast && pfvf->use_mce_list && is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc))) { - *(u64 *)&action = 0x00; + *(u64 *)&action = 0; action.op = NIX_RX_ACTIONOP_MCAST; pfvf = rvu_get_pfvf(rvu, pcifunc & ~RVU_PFVF_FUNC_MASK); action.index = pfvf->promisc_mce_idx; @@ -832,7 +831,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, struct rvu_hwinfo *hw = rvu->hw; int blkaddr, ucast_idx, index; u8 mac_addr[ETH_ALEN] = { 0 }; - struct nix_rx_action action; + struct nix_rx_action action = { 0 }; struct rvu_pfvf *pfvf; u16 vf_func; @@ -861,14 +860,14 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, blkaddr, ucast_idx); if (action.op != NIX_RX_ACTIONOP_RSS) { - *(u64 *)&action = 0x00; + *(u64 *)&action = 0; action.op = NIX_RX_ACTIONOP_UCAST; action.pf_func = pcifunc; } /* RX_ACTION set to MCAST for CGX PF's */ if (hw->cap.nix_rx_multicast && pfvf->use_mce_list) { - *(u64 *)&action = 0x00; + *(u64 *)&action = 0; action.op = NIX_RX_ACTIONOP_MCAST; action.index = pfvf->mcast_mce_idx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c index 6699bdf5cf012..b895c378cfaf3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c @@ -27,11 +27,7 @@ tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - struct mlx5e_sample_attr *sample_attr; - - sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL); - if (!sample_attr) - return -ENOMEM; + struct mlx5e_sample_attr *sample_attr = &attr->sample_attr; sample_attr->rate = act->sample.rate; sample_attr->group_num = act->sample.psample_group->group_num; @@ -39,7 +35,6 @@ tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state, if (act->sample.truncate) sample_attr->trunc_size = act->sample.trunc_size; - attr->sample_attr = sample_attr; flow_flag_set(parse_state->flow, SAMPLE); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c index ff4b4f8a5a9db..0faaf9a4b5317 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -513,7 +513,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL); if (!sample_flow) return ERR_PTR(-ENOMEM); - sample_attr = attr->sample_attr; + sample_attr = &attr->sample_attr; sample_attr->sample_flow = sample_flow; /* For NICs with reg_c_preserve support or decap action, use @@ -546,6 +546,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, err = PTR_ERR(sample_flow->sampler); goto err_sampler; } + sample_attr->sampler_id = sample_flow->sampler->sampler_id; /* Create an id mapping reg_c0 value to sample object. */ restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE; @@ -585,8 +586,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, pre_attr->outer_match_level = attr->outer_match_level; pre_attr->chain = attr->chain; pre_attr->prio = attr->prio; - pre_attr->sample_attr = attr->sample_attr; - sample_attr->sampler_id = sample_flow->sampler->sampler_id; + pre_attr->sample_attr = *sample_attr; pre_esw_attr = pre_attr->esw_attr; pre_esw_attr->in_mdev = esw_attr->in_mdev; pre_esw_attr->in_rep = esw_attr->in_rep; @@ -633,11 +633,11 @@ mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, * will hit fw syndromes. */ esw = tc_psample->esw; - sample_flow = attr->sample_attr->sample_flow; + sample_flow = attr->sample_attr.sample_flow; mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr); sample_restore_put(tc_psample, sample_flow->restore); - mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id); + mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr.restore_obj_id); sampler_put(tc_psample, sample_flow->sampler); if (sample_flow->post_act_handle) mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3667f5ef5990f..169e3524bb1c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5345,6 +5345,7 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *prof } netif_carrier_off(netdev); + netif_tx_disable(netdev); dev_net_set(netdev, mlx5_core_net(mdev)); return netdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b27532a9301e7..7e5c00349ccf9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1634,7 +1634,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); - kfree(attr->sample_attr); kvfree(attr->esw_attr->rx_tun_attr); kvfree(attr->parse_attr); kfree(flow->attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 5ffae9b130665..2f09e34db9ffe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -71,7 +71,7 @@ struct mlx5_flow_attr { struct mlx5_fc *counter; struct mlx5_modify_hdr *modify_hdr; struct mlx5_ct_attr ct_attr; - struct mlx5e_sample_attr *sample_attr; + struct mlx5e_sample_attr sample_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; u32 chain; u16 prio; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index cfcd72bad9af6..e7e7b4b0dcdb5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -201,12 +201,12 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw, static int esw_setup_sampler_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, - struct mlx5_flow_attr *attr, + u32 sampler_id, int i) { flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; - dest[i].sampler_id = attr->sample_attr->sampler_id; + dest[i].sampler_id = sampler_id; return 0; } @@ -466,7 +466,7 @@ esw_setup_dests(struct mlx5_flow_destination *dest, attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE; if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) { - esw_setup_sampler_dest(dest, flow_act, attr, *i); + esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i); (*i)++; } else if (attr->dest_ft) { esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 7b16a1188aabb..fd79860de723b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -433,35 +433,12 @@ int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, struct mlx5_module_eeprom_query_params *params, u8 *data) { - u8 module_id; int err; err = mlx5_query_module_num(dev, ¶ms->module_number); if (err) return err; - err = mlx5_query_module_id(dev, params->module_number, &module_id); - if (err) - return err; - - switch (module_id) { - case MLX5_MODULE_ID_SFP: - if (params->page > 0) - return -EINVAL; - break; - case MLX5_MODULE_ID_QSFP: - case MLX5_MODULE_ID_QSFP28: - case MLX5_MODULE_ID_QSFP_PLUS: - if (params->page > 3) - return -EINVAL; - break; - case MLX5_MODULE_ID_DSFP: - break; - default: - mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id); - return -EINVAL; - } - if (params->i2c_address != MLX5_I2C_ADDR_HIGH && params->i2c_address != MLX5_I2C_ADDR_LOW) { mlx5_core_err(dev, "I2C address not recognized: 0x%x\n", params->i2c_address); diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 939b692ffc335..ce843ea914646 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -650,6 +650,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client, return 0; errout: + mutex_destroy(&mlxsw_i2c->cmd.lock); i2c_set_clientdata(client, NULL); return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index aa411dec62f00..eb1319d63613e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2148,13 +2148,11 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, struct mlxsw_sp *mlxsw_sp = priv; struct mlxsw_sp_port *mlxsw_sp_port; enum mlxsw_reg_pude_oper_status status; - unsigned int max_ports; u16 local_port; - max_ports = mlxsw_core_max_ports(mlxsw_sp->core); local_port = mlxsw_reg_pude_local_port_get(pude_pl); - if (WARN_ON_ONCE(!local_port || local_port >= max_ports)) + if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port))) return; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index bb2442e1f7052..30942b6ffcf99 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -481,6 +481,13 @@ int mlxsw_sp_port_vlan_classification_set(struct mlxsw_sp_port *mlxsw_sp_port, bool is_8021ad_tagged, bool is_8021q_tagged); +static inline bool +mlxsw_sp_local_port_is_valid(struct mlxsw_sp *mlxsw_sp, u16 local_port) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + + return local_port < max_ports && local_port; +} /* spectrum_buffers.c */ struct mlxsw_sp_hdroom_prio { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index 0ff163fbc7750..35422e64d89fc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -568,12 +568,11 @@ void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, u8 domain_number, u16 sequence_id, u64 timestamp) { - unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); struct mlxsw_sp_port *mlxsw_sp_port; struct mlxsw_sp1_ptp_key key; u8 types; - if (WARN_ON_ONCE(local_port >= max_ports)) + if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port))) return; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 65c1724c63b0a..bffdb41fc4edc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2616,7 +2616,6 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, char *sfn_pl, int rec_index, bool adding) { - unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; struct mlxsw_sp_bridge_device *bridge_device; struct mlxsw_sp_bridge_port *bridge_port; @@ -2630,7 +2629,7 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &fid, &local_port); - if (WARN_ON_ONCE(local_port >= max_ports)) + if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port))) return; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) { diff --git a/drivers/net/ethernet/micrel/Kconfig b/drivers/net/ethernet/micrel/Kconfig index 93df3049cdc05..1b632cdd76309 100644 --- a/drivers/net/ethernet/micrel/Kconfig +++ b/drivers/net/ethernet/micrel/Kconfig @@ -39,6 +39,7 @@ config KS8851 config KS8851_MLL tristate "Micrel KS8851 MLL" depends on HAS_IOMEM + depends on PTP_1588_CLOCK_OPTIONAL select MII select CRC32 select EEPROM_93CX6 diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c index ce5970bdcc6a0..2679111ef6696 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c @@ -346,7 +346,8 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, lan966x_mac_process_raw_entry(&raw_entries[column], mac, &vid, &dest_idx); - WARN_ON(dest_idx > lan966x->num_phys_ports); + if (WARN_ON(dest_idx > lan966x->num_phys_ports)) + continue; /* If the entry in SW is found, then there is nothing * to do @@ -392,7 +393,8 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, lan966x_mac_process_raw_entry(&raw_entries[column], mac, &vid, &dest_idx); - WARN_ON(dest_idx > lan966x->num_phys_ports); + if (WARN_ON(dest_idx > lan966x->num_phys_ports)) + continue; mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx); if (!mac_entry) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c index 7de55f6a4da80..3c987fd6b9e23 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c @@ -261,8 +261,7 @@ static int lan966x_port_prechangeupper(struct net_device *dev, if (netif_is_bridge_master(info->upper_dev) && !info->linking) switchdev_bridge_port_unoffload(port->dev, port, - &lan966x_switchdev_nb, - &lan966x_switchdev_blocking_nb); + NULL, NULL); return NOTIFY_DONE; } diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig index 7bdbb2d09a148..cc5e48e1bb4c3 100644 --- a/drivers/net/ethernet/microchip/sparx5/Kconfig +++ b/drivers/net/ethernet/microchip/sparx5/Kconfig @@ -4,6 +4,8 @@ config SPARX5_SWITCH depends on HAS_IOMEM depends on OF depends on ARCH_SPARX5 || COMPILE_TEST + depends on PTP_1588_CLOCK_OPTIONAL + depends on BRIDGE || BRIDGE=n select PHYLINK select PHY_SPARX5_SERDES select RESET_CONTROLLER diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c index 7436f62fa1525..174ad95e746a3 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c @@ -420,6 +420,8 @@ static int sparx5_fdma_tx_alloc(struct sparx5 *sparx5) db_hw->dataptr = phys; db_hw->status = 0; db = devm_kzalloc(sparx5->dev, sizeof(*db), GFP_KERNEL); + if (!db) + return -ENOMEM; db->cpu_addr = cpu_addr; list_add_tail(&db->list, &tx->db_list); } diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 50ac3ee2577a2..21d2645885cef 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -2903,11 +2903,9 @@ static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, status = myri10ge_xmit(curr, dev); if (status != 0) { dev_kfree_skb_any(curr); - if (segs != NULL) { - curr = segs; - segs = next; + skb_list_walk_safe(next, curr, next) { curr->next = NULL; - dev_kfree_skb_any(segs); + dev_kfree_skb_any(curr); } goto drop; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index 7e296fa71b368..40fa5bce2ac2c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -331,6 +331,9 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_deregister_lifs; } + mod_timer(&ionic->watchdog_timer, + round_jiffies(jiffies + ionic->watchdog_period)); + return 0; err_out_deregister_lifs: @@ -348,7 +351,6 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_out_reset: ionic_reset(ionic); err_out_teardown: - del_timer_sync(&ionic->watchdog_timer); pci_clear_master(pdev); /* Don't fail the probe for these errors, keep * the hw interface around for inspection diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index d57e80d44c9df..2c7ce820a1fa7 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -122,9 +122,6 @@ int ionic_dev_setup(struct ionic *ionic) idev->fw_generation = IONIC_FW_STS_F_GENERATION & ioread8(&idev->dev_info_regs->fw_status); - mod_timer(&ionic->watchdog_timer, - round_jiffies(jiffies + ionic->watchdog_period)); - idev->db_pages = bar->vaddr; idev->phy_db_pages = bar->bus_addr; @@ -132,6 +129,16 @@ int ionic_dev_setup(struct ionic *ionic) } /* Devcmd Interface */ +bool ionic_is_fw_running(struct ionic_dev *idev) +{ + u8 fw_status = ioread8(&idev->dev_info_regs->fw_status); + + /* firmware is useful only if the running bit is set and + * fw_status != 0xff (bad PCI read) + */ + return (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING); +} + int ionic_heartbeat_check(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; @@ -155,13 +162,10 @@ int ionic_heartbeat_check(struct ionic *ionic) goto do_check_time; } - /* firmware is useful only if the running bit is set and - * fw_status != 0xff (bad PCI read) - * If fw_status is not ready don't bother with the generation. - */ fw_status = ioread8(&idev->dev_info_regs->fw_status); - if (fw_status == 0xff || !(fw_status & IONIC_FW_STS_F_RUNNING)) { + /* If fw_status is not ready don't bother with the generation */ + if (!ionic_is_fw_running(idev)) { fw_status_ready = false; } else { fw_generation = fw_status & IONIC_FW_STS_F_GENERATION; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index e5acf3bd62b2d..73b950ac12722 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -353,5 +353,6 @@ void ionic_q_rewind(struct ionic_queue *q, struct ionic_desc_info *start); void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info, unsigned int stop_index); int ionic_heartbeat_check(struct ionic *ionic); +bool ionic_is_fw_running(struct ionic_dev *idev); #endif /* _IONIC_DEV_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 875f4ec42efee..a0f9136b2d899 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -215,9 +215,13 @@ static void ionic_adminq_flush(struct ionic_lif *lif) void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode, u8 status, int err) { + const char *stat_str; + + stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" : + ionic_error_to_str(status); + netdev_err(lif->netdev, "%s (%d) failed: %s (%d)\n", - ionic_opcode_to_str(opcode), opcode, - ionic_error_to_str(status), err); + ionic_opcode_to_str(opcode), opcode, stat_str, err); } static int ionic_adminq_check_err(struct ionic_lif *lif, @@ -318,6 +322,7 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, if (do_msg && !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) netdev_err(netdev, "Posting of %s (%d) failed: %d\n", name, ctx->cmd.cmd.opcode, err); + ctx->comp.comp.status = IONIC_RC_ERROR; return err; } @@ -336,6 +341,7 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, if (do_msg) netdev_err(netdev, "%s (%d) interrupted, FW in reset\n", name, ctx->cmd.cmd.opcode); + ctx->comp.comp.status = IONIC_RC_ERROR; return -ENXIO; } @@ -370,10 +376,10 @@ int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx * static void ionic_dev_cmd_clean(struct ionic *ionic) { - union __iomem ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs; + struct ionic_dev *idev = &ionic->idev; - iowrite32(0, ®s->doorbell); - memset_io(®s->cmd, 0, sizeof(regs->cmd)); + iowrite32(0, &idev->dev_cmd_regs->doorbell); + memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd)); } int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds) @@ -540,6 +546,9 @@ int ionic_reset(struct ionic *ionic) struct ionic_dev *idev = &ionic->idev; int err; + if (!ionic_is_fw_running(idev)) + return 0; + mutex_lock(&ionic->dev_cmd_lock); ionic_dev_cmd_reset(idev); err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); @@ -612,15 +621,17 @@ int ionic_port_init(struct ionic *ionic) int ionic_port_reset(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; - int err; + int err = 0; if (!idev->port_info) return 0; - mutex_lock(&ionic->dev_cmd_lock); - ionic_dev_cmd_port_reset(idev); - err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); - mutex_unlock(&ionic->dev_cmd_lock); + if (ionic_is_fw_running(idev)) { + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_port_reset(idev); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + mutex_unlock(&ionic->dev_cmd_lock); + } dma_free_coherent(ionic->dev, idev->port_info_sz, idev->port_info, idev->port_info_pa); @@ -628,9 +639,6 @@ int ionic_port_reset(struct ionic *ionic) idev->port_info = NULL; idev->port_info_pa = 0; - if (err) - dev_err(ionic->dev, "Failed to reset port\n"); - return err; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index e3edca187ddfa..5250d1d1e49ca 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -489,7 +489,7 @@ struct split_type_defs { #define STATIC_DEBUG_LINE_DWORDS 9 -#define NUM_COMMON_GLOBAL_PARAMS 11 +#define NUM_COMMON_GLOBAL_PARAMS 10 #define MAX_RECURSION_DEPTH 10 diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 48cf4355bc47a..0848b5529d48a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -2984,12 +2984,16 @@ static int qed_iov_pre_update_vport(struct qed_hwfn *hwfn, u8 mask = QED_ACCEPT_UCAST_UNMATCHED | QED_ACCEPT_MCAST_UNMATCHED; struct qed_filter_accept_flags *flags = ¶ms->accept_flags; struct qed_public_vf_info *vf_info; + u16 tlv_mask; + + tlv_mask = BIT(QED_IOV_VP_UPDATE_ACCEPT_PARAM) | + BIT(QED_IOV_VP_UPDATE_ACCEPT_ANY_VLAN); /* Untrusted VFs can't even be trusted to know that fact. * Simply indicate everything is configured fine, and trace * configuration 'behind their back'. */ - if (!(*tlvs & BIT(QED_IOV_VP_UPDATE_ACCEPT_PARAM))) + if (!(*tlvs & tlv_mask)) return 0; vf_info = qed_iov_get_public_vf_info(hwfn, vfid, true); @@ -3006,6 +3010,13 @@ static int qed_iov_pre_update_vport(struct qed_hwfn *hwfn, flags->tx_accept_filter &= ~mask; } + if (params->update_accept_any_vlan_flg) { + vf_info->accept_any_vlan = params->accept_any_vlan; + + if (vf_info->forced_vlan && !vf_info->is_trusted_configured) + params->accept_any_vlan = false; + } + return 0; } @@ -4719,6 +4730,7 @@ static int qed_get_vf_config(struct qed_dev *cdev, tx_rate = vf_info->tx_rate; ivi->max_tx_rate = tx_rate ? tx_rate : link.speed; ivi->min_tx_rate = qed_iov_get_vf_min_rate(hwfn, vf_id); + ivi->trusted = vf_info->is_trusted_request; return 0; } @@ -5149,6 +5161,12 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) params.update_ctl_frame_check = 1; params.mac_chk_en = !vf_info->is_trusted_configured; + params.update_accept_any_vlan_flg = 0; + + if (vf_info->accept_any_vlan && vf_info->forced_vlan) { + params.update_accept_any_vlan_flg = 1; + params.accept_any_vlan = vf_info->accept_any_vlan; + } if (vf_info->rx_accept_mode & mask) { flags->update_rx_mode_config = 1; @@ -5164,13 +5182,20 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) if (!vf_info->is_trusted_configured) { flags->rx_accept_filter &= ~mask; flags->tx_accept_filter &= ~mask; + params.accept_any_vlan = false; } if (flags->update_rx_mode_config || flags->update_tx_mode_config || - params.update_ctl_frame_check) + params.update_ctl_frame_check || + params.update_accept_any_vlan_flg) { + DP_VERBOSE(hwfn, QED_MSG_IOV, + "vport update config for %s VF[abs 0x%x rel 0x%x]\n", + vf_info->is_trusted_configured ? "trusted" : "untrusted", + vf->abs_vf_id, vf->relative_vf_id); qed_sp_vport_update(hwfn, ¶ms, QED_SPQ_MODE_EBLOCK, NULL); + } } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h index f448e3dd6c8ba..6ee2493de1642 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h @@ -62,6 +62,7 @@ struct qed_public_vf_info { bool is_trusted_request; u8 rx_accept_mode; u8 tx_accept_mode; + bool accept_any_vlan; }; struct qed_iov_vf_init_params { diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index b242000a77fd8..b7cc36589f592 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -748,6 +748,9 @@ qede_build_skb(struct qede_rx_queue *rxq, buf = page_address(bd->data) + bd->page_offset; skb = build_skb(buf, rxq->rx_buf_seg_size); + if (unlikely(!skb)) + return NULL; + skb_reserve(skb, pad); skb_put(skb, len); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h index 5d79ee4370bcd..7519773eaca6e 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h @@ -51,7 +51,7 @@ static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb) if (dcb && dcb->ops->get_hw_capability) return dcb->ops->get_hw_capability(dcb); - return 0; + return -EOPNOTSUPP; } static inline void qlcnic_dcb_free(struct qlcnic_dcb *dcb) @@ -65,7 +65,7 @@ static inline int qlcnic_dcb_attach(struct qlcnic_dcb *dcb) if (dcb && dcb->ops->attach) return dcb->ops->attach(dcb); - return 0; + return -EOPNOTSUPP; } static inline int @@ -74,7 +74,7 @@ qlcnic_dcb_query_hw_capability(struct qlcnic_dcb *dcb, char *buf) if (dcb && dcb->ops->query_hw_capability) return dcb->ops->query_hw_capability(dcb, buf); - return 0; + return -EOPNOTSUPP; } static inline void qlcnic_dcb_get_info(struct qlcnic_dcb *dcb) @@ -89,7 +89,7 @@ qlcnic_dcb_query_cee_param(struct qlcnic_dcb *dcb, char *buf, u8 type) if (dcb && dcb->ops->query_cee_param) return dcb->ops->query_cee_param(dcb, buf, type); - return 0; + return -EOPNOTSUPP; } static inline int qlcnic_dcb_get_cee_cfg(struct qlcnic_dcb *dcb) @@ -97,7 +97,7 @@ static inline int qlcnic_dcb_get_cee_cfg(struct qlcnic_dcb *dcb) if (dcb && dcb->ops->get_cee_cfg) return dcb->ops->get_cee_cfg(dcb); - return 0; + return -EOPNOTSUPP; } static inline void qlcnic_dcb_aen_handler(struct qlcnic_dcb *dcb, void *msg) diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index ead550ae27097..40bfd0ad7d053 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -764,6 +764,85 @@ void efx_remove_channels(struct efx_nic *efx) kfree(efx->xdp_tx_queues); } +static int efx_set_xdp_tx_queue(struct efx_nic *efx, int xdp_queue_number, + struct efx_tx_queue *tx_queue) +{ + if (xdp_queue_number >= efx->xdp_tx_queue_count) + return -EINVAL; + + netif_dbg(efx, drv, efx->net_dev, + "Channel %u TXQ %u is XDP %u, HW %u\n", + tx_queue->channel->channel, tx_queue->label, + xdp_queue_number, tx_queue->queue); + efx->xdp_tx_queues[xdp_queue_number] = tx_queue; + return 0; +} + +static void efx_set_xdp_channels(struct efx_nic *efx) +{ + struct efx_tx_queue *tx_queue; + struct efx_channel *channel; + unsigned int next_queue = 0; + int xdp_queue_number = 0; + int rc; + + /* We need to mark which channels really have RX and TX + * queues, and adjust the TX queue numbers if we have separate + * RX-only and TX-only channels. + */ + efx_for_each_channel(channel, efx) { + if (channel->channel < efx->tx_channel_offset) + continue; + + if (efx_channel_is_xdp_tx(channel)) { + efx_for_each_channel_tx_queue(tx_queue, channel) { + tx_queue->queue = next_queue++; + rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, + tx_queue); + if (rc == 0) + xdp_queue_number++; + } + } else { + efx_for_each_channel_tx_queue(tx_queue, channel) { + tx_queue->queue = next_queue++; + netif_dbg(efx, drv, efx->net_dev, + "Channel %u TXQ %u is HW %u\n", + channel->channel, tx_queue->label, + tx_queue->queue); + } + + /* If XDP is borrowing queues from net stack, it must + * use the queue with no csum offload, which is the + * first one of the channel + * (note: tx_queue_by_type is not initialized yet) + */ + if (efx->xdp_txq_queues_mode == + EFX_XDP_TX_QUEUES_BORROWED) { + tx_queue = &channel->tx_queue[0]; + rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, + tx_queue); + if (rc == 0) + xdp_queue_number++; + } + } + } + WARN_ON(efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_DEDICATED && + xdp_queue_number != efx->xdp_tx_queue_count); + WARN_ON(efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED && + xdp_queue_number > efx->xdp_tx_queue_count); + + /* If we have more CPUs than assigned XDP TX queues, assign the already + * existing queues to the exceeding CPUs + */ + next_queue = 0; + while (xdp_queue_number < efx->xdp_tx_queue_count) { + tx_queue = efx->xdp_tx_queues[next_queue++]; + rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue); + if (rc == 0) + xdp_queue_number++; + } +} + int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) { struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; @@ -835,6 +914,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) efx_init_napi_channel(efx->channel[i]); } + efx_set_xdp_channels(efx); out: /* Destroy unused channel structures */ for (i = 0; i < efx->n_channels; i++) { @@ -867,26 +947,9 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) goto out; } -static inline int -efx_set_xdp_tx_queue(struct efx_nic *efx, int xdp_queue_number, - struct efx_tx_queue *tx_queue) -{ - if (xdp_queue_number >= efx->xdp_tx_queue_count) - return -EINVAL; - - netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n", - tx_queue->channel->channel, tx_queue->label, - xdp_queue_number, tx_queue->queue); - efx->xdp_tx_queues[xdp_queue_number] = tx_queue; - return 0; -} - int efx_set_channels(struct efx_nic *efx) { - struct efx_tx_queue *tx_queue; struct efx_channel *channel; - unsigned int next_queue = 0; - int xdp_queue_number; int rc; efx->tx_channel_offset = @@ -904,61 +967,14 @@ int efx_set_channels(struct efx_nic *efx) return -ENOMEM; } - /* We need to mark which channels really have RX and TX - * queues, and adjust the TX queue numbers if we have separate - * RX-only and TX-only channels. - */ - xdp_queue_number = 0; efx_for_each_channel(channel, efx) { if (channel->channel < efx->n_rx_channels) channel->rx_queue.core_index = channel->channel; else channel->rx_queue.core_index = -1; - - if (channel->channel >= efx->tx_channel_offset) { - if (efx_channel_is_xdp_tx(channel)) { - efx_for_each_channel_tx_queue(tx_queue, channel) { - tx_queue->queue = next_queue++; - rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue); - if (rc == 0) - xdp_queue_number++; - } - } else { - efx_for_each_channel_tx_queue(tx_queue, channel) { - tx_queue->queue = next_queue++; - netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is HW %u\n", - channel->channel, tx_queue->label, - tx_queue->queue); - } - - /* If XDP is borrowing queues from net stack, it must use the queue - * with no csum offload, which is the first one of the channel - * (note: channel->tx_queue_by_type is not initialized yet) - */ - if (efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_BORROWED) { - tx_queue = &channel->tx_queue[0]; - rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue); - if (rc == 0) - xdp_queue_number++; - } - } - } } - WARN_ON(efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_DEDICATED && - xdp_queue_number != efx->xdp_tx_queue_count); - WARN_ON(efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED && - xdp_queue_number > efx->xdp_tx_queue_count); - /* If we have more CPUs than assigned XDP TX queues, assign the already - * existing queues to the exceeding CPUs - */ - next_queue = 0; - while (xdp_queue_number < efx->xdp_tx_queue_count) { - tx_queue = efx->xdp_tx_queues[next_queue++]; - rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue); - if (rc == 0) - xdp_queue_number++; - } + efx_set_xdp_channels(efx); rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); if (rc) @@ -1102,7 +1118,7 @@ void efx_start_channels(struct efx_nic *efx) struct efx_rx_queue *rx_queue; struct efx_channel *channel; - efx_for_each_channel(channel, efx) { + efx_for_each_channel_rev(channel, efx) { efx_for_each_channel_tx_queue(tx_queue, channel) { efx_init_tx_queue(tx_queue); atomic_inc(&efx->active_queues); diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index 633ca77a26fd1..b925de9b43028 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -166,6 +166,9 @@ static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue) struct efx_nic *efx = rx_queue->efx; int i; + if (unlikely(!rx_queue->page_ring)) + return; + /* Unmap and release the pages in the recycle ring. Remove the ring. */ for (i = 0; i <= rx_queue->page_ptr_mask; i++) { struct page *page = rx_queue->page_ring[i]; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index d16e031e95f44..6983799e1c05d 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -443,6 +443,9 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, if (unlikely(!tx_queue)) return -EINVAL; + if (!tx_queue->initialised) + return -EINVAL; + if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED) HARD_TX_LOCK(efx->net_dev, tx_queue->core_txq, cpu); diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c index d530cde2b8648..9bc8281b7f5bd 100644 --- a/drivers/net/ethernet/sfc/tx_common.c +++ b/drivers/net/ethernet/sfc/tx_common.c @@ -101,6 +101,8 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, "shutting down TX queue %d\n", tx_queue->queue); + tx_queue->initialised = false; + if (!tx_queue->buffer) return; diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c index cd478d2cd871a..00f6d347eaf75 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c @@ -57,10 +57,6 @@ #define TSE_PCS_USE_SGMII_ENA BIT(0) #define TSE_PCS_IF_USE_SGMII 0x03 -#define SGMII_ADAPTER_CTRL_REG 0x00 -#define SGMII_ADAPTER_DISABLE 0x0001 -#define SGMII_ADAPTER_ENABLE 0x0000 - #define AUTONEGO_LINK_TIMER 20 static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs) @@ -202,12 +198,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, unsigned int speed) { void __iomem *tse_pcs_base = pcs->tse_pcs_base; - void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base; u32 val; - writew(SGMII_ADAPTER_ENABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - pcs->autoneg = phy_dev->autoneg; if (phy_dev->autoneg == AUTONEG_ENABLE) { diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h index 442812c0a4bdc..694ac25ef426b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h @@ -10,6 +10,10 @@ #include #include +#define SGMII_ADAPTER_CTRL_REG 0x00 +#define SGMII_ADAPTER_ENABLE 0x0000 +#define SGMII_ADAPTER_DISABLE 0x0001 + struct tse_pcs { struct device *dev; void __iomem *tse_pcs_base; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 2ffa0a11eea56..569683f33804c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -460,6 +460,13 @@ static int ethqos_clks_config(void *priv, bool enabled) dev_err(ðqos->pdev->dev, "rgmii_clk enable failed\n"); return ret; } + + /* Enable functional clock to prevent DMA reset to timeout due + * to lacking PHY clock after the hardware block has been power + * cycled. The actual configuration will be adjusted once + * ethqos_fix_mac_speed() is invoked. + */ + ethqos_set_func_clk_en(ethqos); } else { clk_disable_unprepare(ethqos->rgmii_clk); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index b7c2579c963b6..ac9e6c7a33b55 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -18,9 +18,6 @@ #include "altr_tse_pcs.h" -#define SGMII_ADAPTER_CTRL_REG 0x00 -#define SGMII_ADAPTER_DISABLE 0x0001 - #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2 @@ -62,16 +59,14 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) { struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv; void __iomem *splitter_base = dwmac->splitter_base; - void __iomem *tse_pcs_base = dwmac->pcs.tse_pcs_base; void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base; struct device *dev = dwmac->dev; struct net_device *ndev = dev_get_drvdata(dev); struct phy_device *phy_dev = ndev->phydev; u32 val; - if ((tse_pcs_base) && (sgmii_adapter_base)) - writew(SGMII_ADAPTER_DISABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); + writew(SGMII_ADAPTER_DISABLE, + sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); if (splitter_base) { val = readl(splitter_base + EMAC_SPLITTER_CTRL_REG); @@ -93,7 +88,9 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG); } - if (tse_pcs_base && sgmii_adapter_base) + writew(SGMII_ADAPTER_ENABLE, + sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); + if (phy_dev) tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 5d29f336315b7..11e1055e8260f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -431,8 +431,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) plat->phylink_node = np; /* Get max speed of operation from device tree */ - if (of_property_read_u32(np, "max-speed", &plat->max_speed)) - plat->max_speed = -1; + of_property_read_u32(np, "max-speed", &plat->max_speed); plat->bus_id = of_alias_get_id(np, "ethernet"); if (plat->bus_id < 0) diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c index aa42141be3c0e..a557a477d0393 100644 --- a/drivers/net/ethernet/ti/cpsw_ethtool.c +++ b/drivers/net/ethernet/ti/cpsw_ethtool.c @@ -364,11 +364,9 @@ int cpsw_ethtool_op_begin(struct net_device *ndev) struct cpsw_common *cpsw = priv->cpsw; int ret; - ret = pm_runtime_get_sync(cpsw->dev); - if (ret < 0) { + ret = pm_runtime_resume_and_get(cpsw->dev); + if (ret < 0) cpsw_err(priv, drv, "ethtool begin failed %d\n", ret); - pm_runtime_put_noidle(cpsw->dev); - } return ret; } diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 377c94ec24869..a960227f61da4 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -857,46 +857,53 @@ static void axienet_recv(struct net_device *ndev) while ((cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK)) { dma_addr_t phys; - tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; - /* Ensure we see complete descriptor update */ dma_rmb(); - phys = desc_get_phys_addr(lp, cur_p); - dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size, - DMA_FROM_DEVICE); skb = cur_p->skb; cur_p->skb = NULL; - length = cur_p->app4 & 0x0000FFFF; - - skb_put(skb, length); - skb->protocol = eth_type_trans(skb, ndev); - /*skb_checksum_none_assert(skb);*/ - skb->ip_summed = CHECKSUM_NONE; - - /* if we're doing Rx csum offload, set it up */ - if (lp->features & XAE_FEATURE_FULL_RX_CSUM) { - csumstatus = (cur_p->app2 & - XAE_FULL_CSUM_STATUS_MASK) >> 3; - if ((csumstatus == XAE_IP_TCP_CSUM_VALIDATED) || - (csumstatus == XAE_IP_UDP_CSUM_VALIDATED)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* skb could be NULL if a previous pass already received the + * packet for this slot in the ring, but failed to refill it + * with a newly allocated buffer. In this case, don't try to + * receive it again. + */ + if (likely(skb)) { + length = cur_p->app4 & 0x0000FFFF; + + phys = desc_get_phys_addr(lp, cur_p); + dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size, + DMA_FROM_DEVICE); + + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, ndev); + /*skb_checksum_none_assert(skb);*/ + skb->ip_summed = CHECKSUM_NONE; + + /* if we're doing Rx csum offload, set it up */ + if (lp->features & XAE_FEATURE_FULL_RX_CSUM) { + csumstatus = (cur_p->app2 & + XAE_FULL_CSUM_STATUS_MASK) >> 3; + if (csumstatus == XAE_IP_TCP_CSUM_VALIDATED || + csumstatus == XAE_IP_UDP_CSUM_VALIDATED) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + } else if ((lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) != 0 && + skb->protocol == htons(ETH_P_IP) && + skb->len > 64) { + skb->csum = be32_to_cpu(cur_p->app3 & 0xFFFF); + skb->ip_summed = CHECKSUM_COMPLETE; } - } else if ((lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) != 0 && - skb->protocol == htons(ETH_P_IP) && - skb->len > 64) { - skb->csum = be32_to_cpu(cur_p->app3 & 0xFFFF); - skb->ip_summed = CHECKSUM_COMPLETE; - } - netif_rx(skb); + netif_rx(skb); - size += length; - packets++; + size += length; + packets++; + } new_skb = netdev_alloc_skb_ip_align(ndev, lp->max_frm_size); if (!new_skb) - return; + break; phys = dma_map_single(ndev->dev.parent, new_skb->data, lp->max_frm_size, @@ -905,7 +912,7 @@ static void axienet_recv(struct net_device *ndev) if (net_ratelimit()) netdev_err(ndev, "RX DMA mapping error\n"); dev_kfree_skb(new_skb); - return; + break; } desc_set_phys_addr(lp, phys, cur_p); @@ -913,6 +920,11 @@ static void axienet_recv(struct net_device *ndev) cur_p->status = 0; cur_p->skb = new_skb; + /* Only update tail_p to mark this slot as usable after it has + * been successfully refilled. + */ + tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; + if (++lp->rx_bd_ci >= lp->rx_bd_num) lp->rx_bd_ci = 0; cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; @@ -2060,15 +2072,14 @@ static int axienet_probe(struct platform_device *pdev) if (ret) goto cleanup_clk; - lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); - if (lp->phy_node) { - ret = axienet_mdio_setup(lp); - if (ret) - dev_warn(&pdev->dev, - "error registering MDIO bus: %d\n", ret); - } + ret = axienet_mdio_setup(lp); + if (ret) + dev_warn(&pdev->dev, + "error registering MDIO bus: %d\n", ret); + if (lp->phy_mode == PHY_INTERFACE_MODE_SGMII || lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX) { + lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); if (!lp->phy_node) { dev_err(&pdev->dev, "phy-handle required for 1000BaseX/SGMII\n"); ret = -EINVAL; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index afa81a9480ccd..e675d1016c3c8 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -154,19 +154,15 @@ static void free_netvsc_device(struct rcu_head *head) kfree(nvdev->extension); - if (nvdev->recv_original_buf) { - hv_unmap_memory(nvdev->recv_buf); + if (nvdev->recv_original_buf) vfree(nvdev->recv_original_buf); - } else { + else vfree(nvdev->recv_buf); - } - if (nvdev->send_original_buf) { - hv_unmap_memory(nvdev->send_buf); + if (nvdev->send_original_buf) vfree(nvdev->send_original_buf); - } else { + else vfree(nvdev->send_buf); - } bitmap_free(nvdev->send_section_map); @@ -765,6 +761,12 @@ void netvsc_device_remove(struct hv_device *device) netvsc_teardown_send_gpadl(device, net_device, ndev); } + if (net_device->recv_original_buf) + hv_unmap_memory(net_device->recv_buf); + + if (net_device->send_original_buf) + hv_unmap_memory(net_device->send_buf); + /* Release all resources */ free_netvsc_device_rcu(net_device); } @@ -1821,6 +1823,12 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, netif_napi_del(&net_device->chan_table[0].napi); cleanup2: + if (net_device->recv_original_buf) + hv_unmap_memory(net_device->recv_buf); + + if (net_device->send_original_buf) + hv_unmap_memory(net_device->send_buf); + free_netvsc_device(&net_device->rcu); return ERR_PTR(ret); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 6ef5f77be4d0a..c83664b28d890 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -460,8 +460,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; *pskb = skb; eth = eth_hdr(skb); - if (macvlan_forward_source(skb, port, eth->h_source)) + if (macvlan_forward_source(skb, port, eth->h_source)) { + kfree_skb(skb); return RX_HANDLER_CONSUMED; + } src = macvlan_hash_lookup(port, eth->h_source); if (src && src->mode != MACVLAN_MODE_VEPA && src->mode != MACVLAN_MODE_BRIDGE) { @@ -480,8 +482,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) return RX_HANDLER_PASS; } - if (macvlan_forward_source(skb, port, eth->h_source)) + if (macvlan_forward_source(skb, port, eth->h_source)) { + kfree_skb(skb); return RX_HANDLER_CONSUMED; + } if (macvlan_passthru(port)) vlan = list_first_or_null_rcu(&port->vlans, struct macvlan_dev, list); diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 6b12902a803f0..cecf8c63096cd 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -133,11 +133,17 @@ static void macvtap_setup(struct net_device *dev) dev->tx_queue_len = TUN_READQ_SIZE; } +static struct net *macvtap_link_net(const struct net_device *dev) +{ + return dev_net(macvlan_dev_real_dev(dev)); +} + static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .kind = "macvtap", .setup = macvtap_setup, .newlink = macvtap_newlink, .dellink = macvtap_dellink, + .get_link_net = macvtap_link_net, .priv_size = sizeof(struct macvtap_dev), }; diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index 1becb1a731f67..1c1584fca6327 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -43,6 +43,11 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio, int rc; rc = fwnode_irq_get(child, 0); + /* Don't wait forever if the IRQ provider doesn't become available, + * just fall back to poll mode + */ + if (rc == -EPROBE_DEFER) + rc = driver_deferred_probe_check_state(&phy->mdio.dev); if (rc == -EPROBE_DEFER) return rc; diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c index 64fb76c1e3959..08381038810d6 100644 --- a/drivers/net/mdio/mdio-mscc-miim.c +++ b/drivers/net/mdio/mdio-mscc-miim.c @@ -93,6 +93,9 @@ static int mscc_miim_read(struct mii_bus *bus, int mii_id, int regnum) u32 val; int ret; + if (regnum & MII_ADDR_C45) + return -EOPNOTSUPP; + ret = mscc_miim_wait_pending(bus); if (ret) goto out; @@ -136,6 +139,9 @@ static int mscc_miim_write(struct mii_bus *bus, int mii_id, struct mscc_miim_dev *miim = bus->priv; int ret; + if (regnum & MII_ADDR_C45) + return -EOPNOTSUPP; + ret = mscc_miim_wait_pending(bus); if (ret < 0) goto out; diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 29aa811af430f..a8794065b250b 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -784,25 +784,7 @@ static int at803x_probe(struct phy_device *phydev) return ret; } - /* Some bootloaders leave the fiber page selected. - * Switch to the copper page, as otherwise we read - * the PHY capabilities from the fiber side. - */ - if (phydev->drv->phy_id == ATH8031_PHY_ID) { - phy_lock_mdio_bus(phydev); - ret = at803x_write_page(phydev, AT803X_PAGE_COPPER); - phy_unlock_mdio_bus(phydev); - if (ret) - goto err; - } - return 0; - -err: - if (priv->vddio) - regulator_disable(priv->vddio); - - return ret; } static void at803x_remove(struct phy_device *phydev) @@ -912,6 +894,22 @@ static int at803x_config_init(struct phy_device *phydev) { int ret; + if (phydev->drv->phy_id == ATH8031_PHY_ID) { + /* Some bootloaders leave the fiber page selected. + * Switch to the copper page, as otherwise we read + * the PHY capabilities from the fiber side. + */ + phy_lock_mdio_bus(phydev); + ret = at803x_write_page(phydev, AT803X_PAGE_COPPER); + phy_unlock_mdio_bus(phydev); + if (ret) + return ret; + + ret = at8031_pll_config(phydev); + if (ret < 0) + return ret; + } + /* The RX and TX delay default is: * after HW reset: RX delay enabled and TX delay disabled * after SW reset: RX delay enabled, while TX delay retains the @@ -941,12 +939,6 @@ static int at803x_config_init(struct phy_device *phydev) if (ret < 0) return ret; - if (phydev->drv->phy_id == ATH8031_PHY_ID) { - ret = at8031_pll_config(phydev); - if (ret < 0) - return ret; - } - /* Ar803x extended next page bit is enabled by default. Cisco * multigig switches read this bit and attempt to negotiate 10Gbps * rates even if the next page bit is disabled. This is incorrect diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 3c683e0e40e9e..e36809aa6d300 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -11,6 +11,7 @@ */ #include "bcm-phy-lib.h" +#include #include #include #include @@ -602,6 +603,26 @@ static int brcm_fet_config_init(struct phy_device *phydev) if (err < 0) return err; + /* The datasheet indicates the PHY needs up to 1us to complete a reset, + * build some slack here. + */ + usleep_range(1000, 2000); + + /* The PHY requires 65 MDC clock cycles to complete a write operation + * and turnaround the line properly. + * + * We ignore -EIO here as the MDIO controller (e.g.: mdio-bcm-unimac) + * may flag the lack of turn-around as a read failure. This is + * particularly true with this combination since the MDIO controller + * only used 64 MDC cycles. This is not a critical failure in this + * specific case and it has no functional impact otherwise, so we let + * that one go through. If there is a genuine bus error, the next read + * of MII_BRCM_FET_INTREG will error out. + */ + err = phy_read(phydev, MII_BMCR); + if (err < 0 && err != -EIO) + return err; + reg = phy_read(phydev, MII_BRCM_FET_INTREG); if (reg < 0) return reg; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index a7ebcdab415b5..281cebc3d00cc 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1596,11 +1596,13 @@ static int lanphy_read_page_reg(struct phy_device *phydev, int page, u32 addr) { u32 data; - phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); - phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr); - phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, - (page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC)); - data = phy_read(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA); + phy_lock_mdio_bus(phydev); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, + (page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC)); + data = __phy_read(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA); + phy_unlock_mdio_bus(phydev); return data; } @@ -1608,18 +1610,18 @@ static int lanphy_read_page_reg(struct phy_device *phydev, int page, u32 addr) static int lanphy_write_page_reg(struct phy_device *phydev, int page, u16 addr, u16 val) { - phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); - phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr); - phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, - (page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC)); + phy_lock_mdio_bus(phydev); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, + page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC); - val = phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, val); - if (val) { + val = __phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, val); + if (val != 0) phydev_err(phydev, "Error: phy_write has returned error %d\n", val); - return val; - } - return 0; + phy_unlock_mdio_bus(phydev); + return val; } static int lan8814_config_init(struct phy_device *phydev) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index c1512c9925a66..15aa5ac1ff49c 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -74,6 +74,12 @@ static const struct sfp_quirk sfp_quirks[] = { .vendor = "HUAWEI", .part = "MA5671A", .modes = sfp_quirk_2500basex, + }, { + // Lantech 8330-262D-E can operate at 2500base-X, but + // incorrectly report 2500MBd NRZ in their EEPROM + .vendor = "Lantech", + .part = "8330-262D-E", + .modes = sfp_quirk_2500basex, }, { .vendor = "UBNT", .part = "UF-INSTANT", diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 98f586f910fb1..8ed4fcf70b9b2 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -469,7 +469,7 @@ static void sl_tx_timeout(struct net_device *dev, unsigned int txqueue) spin_lock(&sl->lock); if (netif_queue_stopped(dev)) { - if (!netif_running(dev)) + if (!netif_running(dev) || !sl->tty) goto out; /* May be we must check transmitter timeout here ? diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 8e3a28ba6b282..ba2ef5437e167 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1198,7 +1198,8 @@ static int tap_sendmsg(struct socket *sock, struct msghdr *m, struct xdp_buff *xdp; int i; - if (ctl && (ctl->type == TUN_MSG_PTR)) { + if (m->msg_controllen == sizeof(struct tun_msg_ctl) && + ctl && ctl->type == TUN_MSG_PTR) { for (i = 0; i < ctl->num; i++) { xdp = &((struct xdp_buff *)ctl->ptr)[i]; tap_get_user_xdp(q, xdp); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index fed85447701a5..aa78d7e00289a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1106,7 +1106,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* NETIF_F_LLTX requires to do our own update of trans_start */ queue = netdev_get_tx_queue(dev, txq); - queue->trans_start = jiffies; + txq_trans_cond_update(queue); /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) @@ -2489,7 +2489,8 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) if (!tun) return -EBADFD; - if (ctl && (ctl->type == TUN_MSG_PTR)) { + if (m->msg_controllen == sizeof(struct tun_msg_ctl) && + ctl && ctl->type == TUN_MSG_PTR) { struct tun_page tpage; int n = ctl->num; int flush = 0; diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index ea06d10e1c21a..ca409d450a296 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -1102,10 +1102,15 @@ static int aqc111_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (start_of_descs != desc_offset) goto err; - /* self check desc_offset from header*/ - if (desc_offset >= skb_len) + /* self check desc_offset from header and make sure that the + * bounds of the metadata array are inside the SKB + */ + if (pkt_count * 2 + desc_offset >= skb_len) goto err; + /* Packets must not overlap the metadata array */ + skb_trim(skb, desc_offset); + if (pkt_count == 0) goto err; diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h index 2a1e31defe718..4334aafab59a4 100644 --- a/drivers/net/usb/asix.h +++ b/drivers/net/usb/asix.h @@ -192,8 +192,8 @@ extern const struct driver_info ax88172a_info; /* ASIX specific flags */ #define FLAG_EEPROM_MAC (1UL << 0) /* init device MAC from eeprom */ -int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data, int in_pm); +int __must_check asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, + u16 size, void *data, int in_pm); int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data, int in_pm); diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 71682970be584..524805285019a 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -11,8 +11,8 @@ #define AX_HOST_EN_RETRIES 30 -int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data, int in_pm) +int __must_check asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, + u16 size, void *data, int in_pm) { int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); @@ -27,9 +27,12 @@ int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, ret = fn(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); - if (unlikely(ret < 0)) + if (unlikely(ret < size)) { + ret = ret < 0 ? ret : -ENODATA; + netdev_warn(dev->net, "Failed to read reg index 0x%04x: %d\n", index, ret); + } return ret; } @@ -79,7 +82,7 @@ static int asix_check_host_enable(struct usbnet *dev, int in_pm) 0, 0, 1, &smsr, in_pm); if (ret == -ENODEV) break; - else if (ret < sizeof(smsr)) + else if (ret < 0) continue; else if (smsr & AX_HOST_EN) break; @@ -579,8 +582,12 @@ int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc) return ret; } - asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, - (__u16)loc, 2, &res, 1); + ret = asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, + (__u16)loc, 2, &res, 1); + if (ret < 0) { + mutex_unlock(&dev->phy_mutex); + return ret; + } asix_set_hw_mii(dev, 1); mutex_unlock(&dev->phy_mutex); diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 4514d35ef4c48..6b2fbdf4e0fde 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -755,7 +755,12 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) priv->phy_addr = ret; priv->embd_phy = ((priv->phy_addr & 0x1f) == 0x10); - asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0); + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0); + if (ret < 0) { + netdev_dbg(dev->net, "Failed to read STATMNGSTS_REG: %d\n", ret); + return ret; + } + chipcode &= AX_CHIPCODE_MASK; ret = (chipcode == AX_AX88772_CHIPCODE) ? ax88772_hw_reset(dev, 0) : @@ -920,11 +925,21 @@ static int ax88178_reset(struct usbnet *dev) int gpio0 = 0; u32 phyid; - asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0); + ret = asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0); + if (ret < 0) { + netdev_dbg(dev->net, "Failed to read GPIOS: %d\n", ret); + return ret; + } + netdev_dbg(dev->net, "GPIO Status: 0x%04x\n", status); asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL, 0); - asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0); + ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0); + if (ret < 0) { + netdev_dbg(dev->net, "Failed to read EEPROM: %d\n", ret); + return ret; + } + asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL, 0); netdev_dbg(dev->net, "EEPROM index 0x17 is 0x%04x\n", eeprom); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index d29fb9759cc95..6c8f4f4dfc8a9 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -320,7 +320,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); rcv = rcu_dereference(priv->peer); - if (unlikely(!rcv)) { + if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) { kfree_skb(skb); goto drop; } diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index e0b1ab99a359e..f37adcef4bef3 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1266,6 +1266,7 @@ static int vrf_prepare_mac_header(struct sk_buff *skb, eth = (struct ethhdr *)skb->data; skb_reset_mac_header(skb); + skb_reset_mac_len(skb); /* we set the ethernet destination and the source addresses to the * address of the VRF device. @@ -1295,9 +1296,9 @@ static int vrf_prepare_mac_header(struct sk_buff *skb, */ static int vrf_add_mac_header_if_unset(struct sk_buff *skb, struct net_device *vrf_dev, - u16 proto) + u16 proto, struct net_device *orig_dev) { - if (skb_mac_header_was_set(skb)) + if (skb_mac_header_was_set(skb) && dev_has_header(orig_dev)) return 0; return vrf_prepare_mac_header(skb, vrf_dev, proto); @@ -1403,6 +1404,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, /* if packet is NDISC then keep the ingress interface */ if (!is_ndisc) { + struct net_device *orig_dev = skb->dev; + vrf_rx_stats(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; @@ -1411,7 +1414,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, int err; err = vrf_add_mac_header_if_unset(skb, vrf_dev, - ETH_P_IPV6); + ETH_P_IPV6, + orig_dev); if (likely(!err)) { skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); @@ -1441,6 +1445,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { + struct net_device *orig_dev = skb->dev; + skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IPCB(skb)->flags |= IPSKB_L3SLAVE; @@ -1461,7 +1467,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, if (!list_empty(&vrf_dev->ptype_all)) { int err; - err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP); + err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP, + orig_dev); if (likely(!err)) { skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c index 1de413b19e342..8084e7408c0ae 100644 --- a/drivers/net/wireguard/queueing.c +++ b/drivers/net/wireguard/queueing.c @@ -4,6 +4,7 @@ */ #include "queueing.h" +#include struct multicore_worker __percpu * wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) @@ -42,7 +43,7 @@ void wg_packet_queue_free(struct crypt_queue *queue, bool purge) { free_percpu(queue->worker); WARN_ON(!purge && !__ptr_ring_empty(&queue->ring)); - ptr_ring_cleanup(&queue->ring, purge ? (void(*)(void*))kfree_skb : NULL); + ptr_ring_cleanup(&queue->ring, purge ? __skb_array_destroy_skb : NULL); } #define NEXT(skb) ((skb)->prev) diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index 6f07b949cb81d..0414d7a6ce741 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -160,6 +160,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb, rcu_read_unlock_bh(); return ret; #else + kfree_skb(skb); return -EAFNOSUPPORT; #endif } @@ -241,7 +242,7 @@ int wg_socket_endpoint_from_skb(struct endpoint *endpoint, endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; endpoint->src4.s_addr = ip_hdr(skb)->daddr; endpoint->src_if4 = skb->skb_iif; - } else if (skb->protocol == htons(ETH_P_IPV6)) { + } else if (IS_ENABLED(CONFIG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) { endpoint->addr6.sin6_family = AF_INET6; endpoint->addr6.sin6_port = udp_hdr(skb)->source; endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr; @@ -284,7 +285,7 @@ void wg_socket_set_peer_endpoint(struct wg_peer *peer, peer->endpoint.addr4 = endpoint->addr4; peer->endpoint.src4 = endpoint->src4; peer->endpoint.src_if4 = endpoint->src_if4; - } else if (endpoint->addr.sa_family == AF_INET6) { + } else if (IS_ENABLED(CONFIG_IPV6) && endpoint->addr.sa_family == AF_INET6) { peer->endpoint.addr6 = endpoint->addr6; peer->endpoint.src6 = endpoint->src6; } else { diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index 9513ab696fff1..f79dd9a716906 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -1556,11 +1556,11 @@ static int ath10k_setup_msa_resources(struct ath10k *ar, u32 msa_size) node = of_parse_phandle(dev->of_node, "memory-region", 0); if (node) { ret = of_address_to_resource(node, 0, &r); + of_node_put(node); if (ret) { dev_err(dev, "failed to resolve msa fixed region\n"); return ret; } - of_node_put(node); ar->msa.paddr = r.start; ar->msa.mem_size = resource_size(&r); diff --git a/drivers/net/wireless/ath/ath10k/wow.c b/drivers/net/wireless/ath/ath10k/wow.c index 7d65c115669fe..20b9aa8ddf7d5 100644 --- a/drivers/net/wireless/ath/ath10k/wow.c +++ b/drivers/net/wireless/ath/ath10k/wow.c @@ -337,14 +337,15 @@ static int ath10k_vif_wow_set_wakeups(struct ath10k_vif *arvif, if (patterns[i].mask[j / 8] & BIT(j % 8)) bitmask[j] = 0xff; old_pattern.mask = bitmask; - new_pattern = old_pattern; if (ar->wmi.rx_decap_mode == ATH10K_HW_TXRX_NATIVE_WIFI) { - if (patterns[i].pkt_offset < ETH_HLEN) + if (patterns[i].pkt_offset < ETH_HLEN) { ath10k_wow_convert_8023_to_80211(&new_pattern, &old_pattern); - else + } else { + new_pattern = old_pattern; new_pattern.pkt_offset += WOW_HDR_LEN - ETH_HLEN; + } } if (WARN_ON(new_pattern.pattern_len > WOW_MAX_PATTERN_SIZE)) diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index 3fb0aa0008259..24bd0520926bf 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -391,6 +391,8 @@ static void ath11k_ahb_free_ext_irq(struct ath11k_base *ab) for (j = 0; j < irq_grp->num_irq; j++) free_irq(ab->irq_num[irq_grp->irqs[j]], irq_grp); + + netif_napi_del(&irq_grp->napi); } } diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index c212a789421ee..e432f8dc05d61 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -2642,9 +2642,9 @@ int ath11k_dp_process_rx(struct ath11k_base *ab, int ring_id, spin_lock_bh(&srng->lock); +try_again: ath11k_hal_srng_access_begin(ab, srng); -try_again: while (likely(desc = (struct hal_reo_dest_ring *)ath11k_hal_srng_dst_get_next_entry(ab, srng))) { diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c index 91d6244b65435..8402961c66887 100644 --- a/drivers/net/wireless/ath/ath11k/dp_tx.c +++ b/drivers/net/wireless/ath/ath11k/dp_tx.c @@ -426,7 +426,7 @@ void ath11k_dp_tx_update_txcompl(struct ath11k *ar, struct hal_tx_status *ts) struct ath11k_sta *arsta; struct ieee80211_sta *sta; u16 rate, ru_tones; - u8 mcs, rate_idx, ofdma; + u8 mcs, rate_idx = 0, ofdma; int ret; spin_lock_bh(&ab->base_lock); diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 07f499d5ec92b..f54d5819477a4 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -2319,6 +2319,9 @@ static void ath11k_peer_assoc_h_he_6ghz(struct ath11k *ar, if (!arg->he_flag || band != NL80211_BAND_6GHZ || !sta->he_6ghz_capa.capa) return; + if (sta->bandwidth == IEEE80211_STA_RX_BW_40) + arg->bw_40 = true; + if (sta->bandwidth == IEEE80211_STA_RX_BW_80) arg->bw_80 = true; @@ -3128,6 +3131,20 @@ static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw, arvif->do_not_send_tmpl = true; else arvif->do_not_send_tmpl = false; + + if (vif->bss_conf.he_support) { + ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, + WMI_VDEV_PARAM_BA_MODE, + WMI_BA_MODE_BUFFER_SIZE_256); + if (ret) + ath11k_warn(ar->ab, + "failed to set BA BUFFER SIZE 256 for vdev: %d\n", + arvif->vdev_id); + else + ath11k_dbg(ar->ab, ATH11K_DBG_MAC, + "Set BA BUFFER SIZE 256 for VDEV: %d\n", + arvif->vdev_id); + } } if (changed & (BSS_CHANGED_BEACON_INFO | BSS_CHANGED_BEACON)) { @@ -3163,14 +3180,6 @@ static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw, if (arvif->is_up && vif->bss_conf.he_support && vif->bss_conf.he_oper.params) { - ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, - WMI_VDEV_PARAM_BA_MODE, - WMI_BA_MODE_BUFFER_SIZE_256); - if (ret) - ath11k_warn(ar->ab, - "failed to set BA BUFFER SIZE 256 for vdev: %d\n", - arvif->vdev_id); - param_id = WMI_VDEV_PARAM_HEOPS_0_31; param_value = vif->bss_conf.he_oper.params; ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, @@ -4504,24 +4513,30 @@ static int ath11k_mac_op_sta_state(struct ieee80211_hw *hw, sta->addr, arvif->vdev_id); } else if ((old_state == IEEE80211_STA_NONE && new_state == IEEE80211_STA_NOTEXIST)) { - ath11k_dp_peer_cleanup(ar, arvif->vdev_id, sta->addr); + bool skip_peer_delete = ar->ab->hw_params.vdev_start_delay && + vif->type == NL80211_IFTYPE_STATION; - if (ar->ab->hw_params.vdev_start_delay && - vif->type == NL80211_IFTYPE_STATION) - goto free; + ath11k_dp_peer_cleanup(ar, arvif->vdev_id, sta->addr); - ret = ath11k_peer_delete(ar, arvif->vdev_id, sta->addr); - if (ret) - ath11k_warn(ar->ab, "Failed to delete peer: %pM for VDEV: %d\n", - sta->addr, arvif->vdev_id); - else - ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "Removed peer: %pM for VDEV: %d\n", - sta->addr, arvif->vdev_id); + if (!skip_peer_delete) { + ret = ath11k_peer_delete(ar, arvif->vdev_id, sta->addr); + if (ret) + ath11k_warn(ar->ab, + "Failed to delete peer: %pM for VDEV: %d\n", + sta->addr, arvif->vdev_id); + else + ath11k_dbg(ar->ab, + ATH11K_DBG_MAC, + "Removed peer: %pM for VDEV: %d\n", + sta->addr, arvif->vdev_id); + } ath11k_mac_dec_num_stations(arvif, sta); spin_lock_bh(&ar->ab->base_lock); peer = ath11k_peer_find(ar->ab, arvif->vdev_id, sta->addr); - if (peer && peer->sta == sta) { + if (skip_peer_delete && peer) { + peer->sta = NULL; + } else if (peer && peer->sta == sta) { ath11k_warn(ar->ab, "Found peer entry %pM n vdev %i after it was supposedly removed\n", vif->addr, arvif->vdev_id); peer->sta = NULL; @@ -4531,7 +4546,6 @@ static int ath11k_mac_op_sta_state(struct ieee80211_hw *hw, } spin_unlock_bh(&ar->ab->base_lock); -free: kfree(arsta->tx_stats); arsta->tx_stats = NULL; @@ -5566,7 +5580,7 @@ static int ath11k_mac_mgmt_tx(struct ath11k *ar, struct sk_buff *skb, skb_queue_tail(q, skb); atomic_inc(&ar->num_pending_mgmt_tx); - ieee80211_queue_work(ar->hw, &ar->wmi_mgmt_tx_work); + queue_work(ar->ab->workqueue, &ar->wmi_mgmt_tx_work); return 0; } diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c index e4250ba8dfee2..8b21438028169 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c @@ -332,6 +332,7 @@ static int ath11k_mhi_read_addr_from_dt(struct mhi_controller *mhi_ctrl) return -ENOENT; ret = of_address_to_resource(np, 0, &res); + of_node_put(np); if (ret) return ret; @@ -560,7 +561,7 @@ static int ath11k_mhi_set_state(struct ath11k_pci *ab_pci, ret = 0; break; case ATH11K_MHI_POWER_ON: - ret = mhi_async_power_up(ab_pci->mhi_ctrl); + ret = mhi_sync_power_up(ab_pci->mhi_ctrl); break; case ATH11K_MHI_POWER_OFF: mhi_power_down(ab_pci->mhi_ctrl, true); diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c index de71ad594f347..903758751c99a 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c @@ -1571,6 +1571,11 @@ static __maybe_unused int ath11k_pci_pm_suspend(struct device *dev) struct ath11k_base *ab = dev_get_drvdata(dev); int ret; + if (test_bit(ATH11K_FLAG_QMI_FAIL, &ab->dev_flags)) { + ath11k_dbg(ab, ATH11K_DBG_BOOT, "boot skipping pci suspend as qmi is not initialised\n"); + return 0; + } + ret = ath11k_core_suspend(ab); if (ret) ath11k_warn(ab, "failed to suspend core: %d\n", ret); @@ -1583,6 +1588,11 @@ static __maybe_unused int ath11k_pci_pm_resume(struct device *dev) struct ath11k_base *ab = dev_get_drvdata(dev); int ret; + if (test_bit(ATH11K_FLAG_QMI_FAIL, &ab->dev_flags)) { + ath11k_dbg(ab, ATH11K_DBG_BOOT, "boot skipping pci resume as qmi is not initialised\n"); + return 0; + } + ret = ath11k_core_resume(ab); if (ret) ath11k_warn(ab, "failed to resume core: %d\n", ret); diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c index 65d3c6ba35ae6..d0701e8eca9c0 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.c +++ b/drivers/net/wireless/ath/ath11k/qmi.c @@ -1932,10 +1932,11 @@ static int ath11k_qmi_assign_target_mem_chunk(struct ath11k_base *ab) if (!hremote_node) { ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi fail to get hremote_node\n"); - return ret; + return -ENODEV; } ret = of_address_to_resource(hremote_node, 0, &res); + of_node_put(hremote_node); if (ret) { ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi fail to get reg from hremote\n"); diff --git a/drivers/net/wireless/ath/ath5k/eeprom.c b/drivers/net/wireless/ath/ath5k/eeprom.c index 1fbc2c19848f2..d444b3d70ba2e 100644 --- a/drivers/net/wireless/ath/ath5k/eeprom.c +++ b/drivers/net/wireless/ath/ath5k/eeprom.c @@ -746,6 +746,9 @@ ath5k_eeprom_convert_pcal_info_5111(struct ath5k_hw *ah, int mode, } } + if (idx == AR5K_EEPROM_N_PD_CURVES) + goto err_out; + ee->ee_pd_gains[mode] = 1; pd = &chinfo[pier].pd_curves[idx]; diff --git a/drivers/net/wireless/ath/ath9k/htc_hst.c b/drivers/net/wireless/ath/ath9k/htc_hst.c index 510e61e97dbcb..994ec48b2f669 100644 --- a/drivers/net/wireless/ath/ath9k/htc_hst.c +++ b/drivers/net/wireless/ath/ath9k/htc_hst.c @@ -30,6 +30,7 @@ static int htc_issue_send(struct htc_target *target, struct sk_buff* skb, hdr->endpoint_id = epid; hdr->flags = flags; hdr->payload_len = cpu_to_be16(len); + memset(hdr->control, 0, sizeof(hdr->control)); status = target->hif->send(target->hif_dev, endpoint->ul_pipeid, skb); @@ -272,6 +273,10 @@ int htc_connect_service(struct htc_target *target, conn_msg->dl_pipeid = endpoint->dl_pipeid; conn_msg->ul_pipeid = endpoint->ul_pipeid; + /* To prevent infoleak */ + conn_msg->svc_meta_len = 0; + conn_msg->pad = 0; + ret = htc_issue_send(target, skb, skb->len, 0, ENDPOINT0); if (ret) goto err; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 98090e40e1cf4..e2791d45f5f59 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -839,7 +839,7 @@ static bool ath9k_txq_list_has_key(struct list_head *txq_list, u32 keyix) continue; txinfo = IEEE80211_SKB_CB(bf->bf_mpdu); - fi = (struct ath_frame_info *)&txinfo->rate_driver_data[0]; + fi = (struct ath_frame_info *)&txinfo->status.status_driver_data[0]; if (fi->keyix == keyix) return true; } diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index d0caf1de2bdec..db83cc4ba810a 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -141,8 +141,8 @@ static struct ath_frame_info *get_frame_info(struct sk_buff *skb) { struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); BUILD_BUG_ON(sizeof(struct ath_frame_info) > - sizeof(tx_info->rate_driver_data)); - return (struct ath_frame_info *) &tx_info->rate_driver_data[0]; + sizeof(tx_info->status.status_driver_data)); + return (struct ath_frame_info *) &tx_info->status.status_driver_data[0]; } static void ath_send_bar(struct ath_atx_tid *tid, u16 seqno) @@ -2542,6 +2542,16 @@ static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf, spin_unlock_irqrestore(&sc->tx.txbuflock, flags); } +static void ath_clear_tx_status(struct ieee80211_tx_info *tx_info) +{ + void *ptr = &tx_info->status; + + memset(ptr + sizeof(tx_info->status.rates), 0, + sizeof(tx_info->status) - + sizeof(tx_info->status.rates) - + sizeof(tx_info->status.status_driver_data)); +} + static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_tx_status *ts, int nframes, int nbad, int txok) @@ -2553,6 +2563,8 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_hw *ah = sc->sc_ah; u8 i, tx_rateindex; + ath_clear_tx_status(tx_info); + if (txok) tx_info->status.ack_signal = ts->ts_rssi; @@ -2567,6 +2579,13 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, tx_info->status.ampdu_len = nframes; tx_info->status.ampdu_ack_len = nframes - nbad; + tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1; + + for (i = tx_rateindex + 1; i < hw->max_rates; i++) { + tx_info->status.rates[i].count = 0; + tx_info->status.rates[i].idx = -1; + } + if ((ts->ts_status & ATH9K_TXERR_FILT) == 0 && (tx_info->flags & IEEE80211_TX_CTL_NO_ACK) == 0) { /* @@ -2588,16 +2607,6 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, tx_info->status.rates[tx_rateindex].count = hw->max_rate_tries; } - - for (i = tx_rateindex + 1; i < hw->max_rates; i++) { - tx_info->status.rates[i].count = 0; - tx_info->status.rates[i].idx = -1; - } - - tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1; - - /* we report airtime in ath_tx_count_airtime(), don't report twice */ - tx_info->status.tx_time = 0; } static void ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq) diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 49f7ee1c912b8..2208ec8004821 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1914,7 +1914,7 @@ static int carl9170_parse_eeprom(struct ar9170 *ar) WARN_ON(!(tx_streams >= 1 && tx_streams <= IEEE80211_HT_MCS_TX_MAX_STREAMS)); - tx_params = (tx_streams - 1) << + tx_params |= (tx_streams - 1) << IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT; carl9170_band_2GHz.ht_cap.mcs.tx_params |= tx_params; diff --git a/drivers/net/wireless/ath/regd.c b/drivers/net/wireless/ath/regd.c index b2400e2417a55..f15e7bd690b5b 100644 --- a/drivers/net/wireless/ath/regd.c +++ b/drivers/net/wireless/ath/regd.c @@ -667,14 +667,14 @@ ath_regd_init_wiphy(struct ath_regulatory *reg, /* * Some users have reported their EEPROM programmed with - * 0x8000 or 0x0 set, this is not a supported regulatory - * domain but since we have more than one user with it we - * need a solution for them. We default to 0x64, which is - * the default Atheros world regulatory domain. + * 0x8000 set, this is not a supported regulatory domain + * but since we have more than one user with it we need + * a solution for them. We default to 0x64, which is the + * default Atheros world regulatory domain. */ static void ath_regd_sanitize(struct ath_regulatory *reg) { - if (reg->current_rd != COUNTRY_ERD_FLAG && reg->current_rd != 0) + if (reg->current_rd != COUNTRY_ERD_FLAG) return; printk(KERN_DEBUG "ath: EEPROM regdomain sanitized\n"); reg->current_rd = 0x64; diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index 9575d7373bf27..ac2813ed851c4 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -1513,6 +1513,9 @@ static int wcn36xx_platform_get_resources(struct wcn36xx *wcn, if (iris_node) { if (of_device_is_compatible(iris_node, "qcom,wcn3620")) wcn->rf_id = RF_IRIS_WCN3620; + if (of_device_is_compatible(iris_node, "qcom,wcn3660") || + of_device_is_compatible(iris_node, "qcom,wcn3660b")) + wcn->rf_id = RF_IRIS_WCN3660; if (of_device_is_compatible(iris_node, "qcom,wcn3680")) wcn->rf_id = RF_IRIS_WCN3680; of_node_put(iris_node); diff --git a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h index fbd0558c2c196..5d3f8f56e5681 100644 --- a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h +++ b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h @@ -97,6 +97,7 @@ enum wcn36xx_ampdu_state { #define RF_UNKNOWN 0x0000 #define RF_IRIS_WCN3620 0x3620 +#define RF_IRIS_WCN3660 0x3660 #define RF_IRIS_WCN3680 0x3680 static inline void buff_to_be(u32 *buf, size_t len) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c index d99140960a820..dcbe55b56e437 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c @@ -207,6 +207,8 @@ static int brcmf_init_nvram_parser(struct nvram_parser *nvp, size = BRCMF_FW_MAX_NVRAM_SIZE; else size = data_len; + /* Add space for properties we may add */ + size += strlen(BRCMF_FW_DEFAULT_BOARDREV) + 1; /* Alloc for extra 0 byte + roundup by 4 + length field */ size += 1 + 3 + sizeof(u32); nvp->nvram = kzalloc(size, GFP_KERNEL); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index 8b149996fc000..3ff4997e1c97a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -59,6 +60,13 @@ BRCMF_FW_DEF(4366B, "brcmfmac4366b-pcie"); BRCMF_FW_DEF(4366C, "brcmfmac4366c-pcie"); BRCMF_FW_DEF(4371, "brcmfmac4371-pcie"); +/* firmware config files */ +MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.txt"); +MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.txt"); + +/* per-board firmware binaries */ +MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.bin"); + static const struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = { BRCMF_FW_ENTRY(BRCM_CC_43602_CHIP_ID, 0xFFFFFFFF, 43602), BRCMF_FW_ENTRY(BRCM_CC_43465_CHIP_ID, 0xFFFFFFF0, 4366C), @@ -447,47 +455,6 @@ brcmf_pcie_write_ram32(struct brcmf_pciedev_info *devinfo, u32 mem_offset, } -static void -brcmf_pcie_copy_mem_todev(struct brcmf_pciedev_info *devinfo, u32 mem_offset, - void *srcaddr, u32 len) -{ - void __iomem *address = devinfo->tcm + mem_offset; - __le32 *src32; - __le16 *src16; - u8 *src8; - - if (((ulong)address & 4) || ((ulong)srcaddr & 4) || (len & 4)) { - if (((ulong)address & 2) || ((ulong)srcaddr & 2) || (len & 2)) { - src8 = (u8 *)srcaddr; - while (len) { - iowrite8(*src8, address); - address++; - src8++; - len--; - } - } else { - len = len / 2; - src16 = (__le16 *)srcaddr; - while (len) { - iowrite16(le16_to_cpu(*src16), address); - address += 2; - src16++; - len--; - } - } - } else { - len = len / 4; - src32 = (__le32 *)srcaddr; - while (len) { - iowrite32(le32_to_cpu(*src32), address); - address += 4; - src32++; - len--; - } - } -} - - static void brcmf_pcie_copy_dev_tomem(struct brcmf_pciedev_info *devinfo, u32 mem_offset, void *dstaddr, u32 len) @@ -1348,6 +1315,18 @@ static void brcmf_pcie_down(struct device *dev) { } +static int brcmf_pcie_preinit(struct device *dev) +{ + struct brcmf_bus *bus_if = dev_get_drvdata(dev); + struct brcmf_pciedev *buspub = bus_if->bus_priv.pcie; + + brcmf_dbg(PCIE, "Enter\n"); + + brcmf_pcie_intr_enable(buspub->devinfo); + brcmf_pcie_hostready(buspub->devinfo); + + return 0; +} static int brcmf_pcie_tx(struct device *dev, struct sk_buff *skb) { @@ -1456,6 +1435,7 @@ static int brcmf_pcie_reset(struct device *dev) } static const struct brcmf_bus_ops brcmf_pcie_bus_ops = { + .preinit = brcmf_pcie_preinit, .txdata = brcmf_pcie_tx, .stop = brcmf_pcie_down, .txctl = brcmf_pcie_tx_ctlpkt, @@ -1563,8 +1543,8 @@ static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo, return err; brcmf_dbg(PCIE, "Download FW %s\n", devinfo->fw_name); - brcmf_pcie_copy_mem_todev(devinfo, devinfo->ci->rambase, - (void *)fw->data, fw->size); + memcpy_toio(devinfo->tcm + devinfo->ci->rambase, + (void *)fw->data, fw->size); resetintr = get_unaligned_le32(fw->data); release_firmware(fw); @@ -1578,7 +1558,7 @@ static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo, brcmf_dbg(PCIE, "Download NVRAM %s\n", devinfo->nvram_name); address = devinfo->ci->rambase + devinfo->ci->ramsize - nvram_len; - brcmf_pcie_copy_mem_todev(devinfo, address, nvram, nvram_len); + memcpy_toio(devinfo->tcm + address, nvram, nvram_len); brcmf_fw_nvram_free(nvram); } else { brcmf_dbg(PCIE, "No matching NVRAM file found %s\n", @@ -1777,6 +1757,8 @@ static void brcmf_pcie_setup(struct device *dev, int ret, ret = brcmf_chip_get_raminfo(devinfo->ci); if (ret) { brcmf_err(bus, "Failed to get RAM info\n"); + release_firmware(fw); + brcmf_fw_nvram_free(nvram); goto fail; } @@ -1826,9 +1808,6 @@ static void brcmf_pcie_setup(struct device *dev, int ret, init_waitqueue_head(&devinfo->mbdata_resp_wait); - brcmf_pcie_intr_enable(devinfo); - brcmf_pcie_hostready(devinfo); - ret = brcmf_attach(&devinfo->pdev->dev); if (ret) goto fail; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 8effeb7a7269b..5d156e591b35c 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -629,7 +629,6 @@ BRCMF_FW_CLM_DEF(43752, "brcmfmac43752-sdio"); /* firmware config files */ MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-sdio.*.txt"); -MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.txt"); /* per-board firmware binaries */ MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-sdio.*.bin"); diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig index 85e7042837556..a647a406b87be 100644 --- a/drivers/net/wireless/intel/iwlwifi/Kconfig +++ b/drivers/net/wireless/intel/iwlwifi/Kconfig @@ -139,6 +139,7 @@ config IWLMEI tristate "Intel Management Engine communication over WLAN" depends on INTEL_MEI depends on PM + depends on CFG80211 help Enables the iwlmei kernel module. diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c index 754876cd27ce8..e8bd4f0e3d2dc 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c @@ -299,7 +299,7 @@ static int iwlagn_mac_start(struct ieee80211_hw *hw) priv->is_open = 1; IWL_DEBUG_MAC80211(priv, "leave\n"); - return 0; + return ret; } static void iwlagn_mac_stop(struct ieee80211_hw *hw) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h b/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h index 456b7eaac5700..061fe6cc6cf5b 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #ifndef __iwl_fw_dbg_tlv_h__ #define __iwl_fw_dbg_tlv_h__ @@ -249,11 +249,10 @@ struct iwl_fw_ini_hcmd_tlv { } __packed; /* FW_TLV_DEBUG_HCMD_API_S_VER_1 */ /** -* struct iwl_fw_ini_conf_tlv - preset configuration TLV +* struct iwl_fw_ini_addr_val - Address and value to set it to * * @address: the base address * @value: value to set at address - */ struct iwl_fw_ini_addr_val { __le32 address; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 7ad9cee925da5..372cc950cc884 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1561,8 +1561,6 @@ iwl_dump_ini_dbgi_sram_iter(struct iwl_fw_runtime *fwrt, return -EBUSY; range->range_data_size = reg->dev_addr.size; - iwl_write_prph_no_grab(fwrt->trans, DBGI_SRAM_TARGET_ACCESS_CFG, - DBGI_SRAM_TARGET_ACCESS_CFG_RESET_ADDRESS_MSK); for (i = 0; i < (le32_to_cpu(reg->dev_addr.size) / 4); i++) { prph_data = iwl_read_prph_no_grab(fwrt->trans, (i % 2) ? DBGI_SRAM_TARGET_ACCESS_RDATA_MSB : diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index c73672d613562..42f6f8bb83be9 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -861,11 +861,18 @@ static void iwl_dbg_tlv_apply_config(struct iwl_fw_runtime *fwrt, case IWL_FW_INI_CONFIG_SET_TYPE_DBGC_DRAM_ADDR: { struct iwl_dbgc1_info dram_info = {}; struct iwl_dram_data *frags = &fwrt->trans->dbg.fw_mon_ini[1].frags[0]; - __le64 dram_base_addr = cpu_to_le64(frags->physical); - __le32 dram_size = cpu_to_le32(frags->size); - u64 dram_addr = le64_to_cpu(dram_base_addr); + __le64 dram_base_addr; + __le32 dram_size; + u64 dram_addr; u32 ret; + if (!frags) + break; + + dram_base_addr = cpu_to_le64(frags->physical); + dram_size = cpu_to_le32(frags->size); + dram_addr = le64_to_cpu(dram_base_addr); + IWL_DEBUG_FW(fwrt, "WRT: dram_base_addr 0x%016llx, dram_size 0x%x\n", dram_base_addr, dram_size); IWL_DEBUG_FW(fwrt, "WRT: config_list->addr_offset: %u\n", diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h index 95b3dae7b504b..9331a6b6bf36c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h @@ -354,8 +354,6 @@ #define WFPM_GP2 0xA030B4 /* DBGI SRAM Register details */ -#define DBGI_SRAM_TARGET_ACCESS_CFG 0x00A2E14C -#define DBGI_SRAM_TARGET_ACCESS_CFG_RESET_ADDRESS_MSK 0x10000 #define DBGI_SRAM_TARGET_ACCESS_RDATA_LSB 0x00A2E154 #define DBGI_SRAM_TARGET_ACCESS_RDATA_MSB 0x00A2E158 diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index b400867e94f0a..3f284836e7076 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -2704,7 +2704,9 @@ static int iwl_mvm_d3_test_open(struct inode *inode, struct file *file) /* start pseudo D3 */ rtnl_lock(); + wiphy_lock(mvm->hw->wiphy); err = __iwl_mvm_suspend(mvm->hw, mvm->hw->wiphy->wowlan_config, true); + wiphy_unlock(mvm->hw->wiphy); rtnl_unlock(); if (err > 0) err = -EINVAL; @@ -2760,7 +2762,9 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file) iwl_fw_dbg_read_d3_debug_data(&mvm->fwrt); rtnl_lock(); + wiphy_lock(mvm->hw->wiphy); __iwl_mvm_resume(mvm, true); + wiphy_unlock(mvm->hw->wiphy); rtnl_unlock(); iwl_mvm_resume_tcm(mvm); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index ae589b3b8c46e..ee031a5897140 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1658,8 +1658,10 @@ int iwl_mvm_up(struct iwl_mvm *mvm) while (!sband && i < NUM_NL80211_BANDS) sband = mvm->hw->wiphy->bands[i++]; - if (WARN_ON_ONCE(!sband)) + if (WARN_ON_ONCE(!sband)) { + ret = -ENODEV; goto error; + } chan = &sband->channels[0]; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 1f8b97995b943..069d54501e30e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -235,7 +235,8 @@ static void iwl_mvm_rx_thermal_dual_chain_req(struct iwl_mvm *mvm, */ mvm->fw_static_smps_request = req->event == cpu_to_le32(THERMAL_DUAL_CHAIN_REQ_DISABLE); - ieee80211_iterate_interfaces(mvm->hw, IEEE80211_IFACE_ITER_NORMAL, + ieee80211_iterate_interfaces(mvm->hw, + IEEE80211_IFACE_SKIP_SDATA_NOT_IN_DRIVER, iwl_mvm_intf_dual_chain_req, NULL); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c index 9af40b0fa37ae..a6e6673bf4ee0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2021 Intel Corporation + * Copyright (C) 2012-2014, 2018-2022 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2017 Intel Deutschland GmbH */ @@ -349,18 +349,31 @@ void iwl_mvm_phy_ctxt_unref(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt) * otherwise we might not be able to reuse this phy. */ if (ctxt->ref == 0) { - struct ieee80211_channel *chan; + struct ieee80211_channel *chan = NULL; struct cfg80211_chan_def chandef; - struct ieee80211_supported_band *sband = NULL; - enum nl80211_band band = NL80211_BAND_2GHZ; + struct ieee80211_supported_band *sband; + enum nl80211_band band; + int channel; - while (!sband && band < NUM_NL80211_BANDS) - sband = mvm->hw->wiphy->bands[band++]; + for (band = NL80211_BAND_2GHZ; band < NUM_NL80211_BANDS; band++) { + sband = mvm->hw->wiphy->bands[band]; - if (WARN_ON(!sband)) - return; + if (!sband) + continue; + + for (channel = 0; channel < sband->n_channels; channel++) + if (!(sband->channels[channel].flags & + IEEE80211_CHAN_DISABLED)) { + chan = &sband->channels[channel]; + break; + } - chan = &sband->channels[0]; + if (chan) + break; + } + + if (WARN_ON(!chan)) + return; cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_NO_HT); iwl_mvm_phy_ctxt_changed(mvm, ctxt, &chandef, 1, 1); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 64446a11ef980..9a46468bd4345 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -640,7 +640,7 @@ static void iwl_mvm_stat_iterator_all_macs(void *_data, u8 *mac, struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); u16 vif_id = mvmvif->id; - if (WARN_ONCE(vif_id > MAC_INDEX_AUX, "invalid vif id: %d", vif_id)) + if (WARN_ONCE(vif_id >= MAC_INDEX_AUX, "invalid vif id: %d", vif_id)) return; if (vif->type != NL80211_IFTYPE_STATION) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index 5f92a09db3742..4cd507cb412de 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1893,7 +1893,10 @@ static u8 iwl_mvm_scan_umac_chan_flags_v2(struct iwl_mvm *mvm, IWL_SCAN_CHANNEL_FLAG_CACHE_ADD; /* set fragmented ebs for fragmented scan on HB channels */ - if (iwl_mvm_is_scan_fragmented(params->hb_type)) + if ((!iwl_mvm_is_cdb_supported(mvm) && + iwl_mvm_is_scan_fragmented(params->type)) || + (iwl_mvm_is_cdb_supported(mvm) && + iwl_mvm_is_scan_fragmented(params->hb_type))) flags |= IWL_SCAN_CHANNEL_FLAG_EBS_FRAG; return flags; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 9213f8518f10d..40daced97b9e8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -318,15 +318,14 @@ static u32 iwl_mvm_get_tx_rate(struct iwl_mvm *mvm, /* info->control is only relevant for non HW rate control */ if (!ieee80211_hw_check(mvm->hw, HAS_RATE_CONTROL)) { - struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); - /* HT rate doesn't make sense for a non data frame */ WARN_ONCE(info->control.rates[0].flags & IEEE80211_TX_RC_MCS && !ieee80211_is_data(fc), "Got a HT rate (flags:0x%x/mcs:%d/fc:0x%x/state:%d) for a non data frame\n", info->control.rates[0].flags, info->control.rates[0].idx, - le16_to_cpu(fc), sta ? mvmsta->sta_state : -1); + le16_to_cpu(fc), + sta ? iwl_mvm_sta_from_mac80211(sta)->sta_state : -1); rate_idx = info->control.rates[0].idx; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index ef14584fc0a17..4b08eb46617c7 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1112,7 +1112,7 @@ static const struct iwl_causes_list causes_list_pre_bz[] = { }; static const struct iwl_causes_list causes_list_bz[] = { - {MSIX_HW_INT_CAUSES_REG_SW_ERR_BZ, CSR_MSIX_HW_INT_MASK_AD, 0x29}, + {MSIX_HW_INT_CAUSES_REG_SW_ERR_BZ, CSR_MSIX_HW_INT_MASK_AD, 0x15}, }; static void iwl_pcie_map_list(struct iwl_trans *trans, diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 3a9af8931c35a..3d644925a4e04 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -465,6 +465,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q) qbuf.addr = addr + offset; qbuf.len = len - offset; + qbuf.skip_unmap = false; mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, NULL); frames++; } diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 404c3d1a70d69..5197fcb066492 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -19,7 +19,7 @@ #define MT_MCU_RING_SIZE 32 #define MT_RX_BUF_SIZE 2048 -#define MT_SKB_HEAD_LEN 128 +#define MT_SKB_HEAD_LEN 256 #define MT_MAX_NON_AQL_PKT 16 #define MT_TXQ_FREE_THR 32 @@ -224,7 +224,7 @@ enum mt76_wcid_flags { MT_WCID_FLAG_HDR_TRANS, }; -#define MT76_N_WCIDS 288 +#define MT76_N_WCIDS 544 /* stored in ieee80211_tx_info::hw_queue */ #define MT_TX_HW_QUEUE_EXT_PHY BIT(3) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 2b546bc05d822..83c5eec5b1633 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -641,6 +641,9 @@ mt7603_sta_rate_tbl_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta_rates *sta_rates = rcu_dereference(sta->rates); int i; + if (!sta_rates) + return; + spin_lock_bh(&dev->mt76.lock); for (i = 0; i < ARRAY_SIZE(msta->rates); i++) { msta->rates[i].idx = sta_rates->rate[i].idx; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index ec25e5a95d442..5d69e77814c9d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -253,13 +253,13 @@ static void mt7615_mac_fill_tm_rx(struct mt7615_phy *phy, __le32 *rxv) static int mt7615_reverse_frag0_hdr_trans(struct sk_buff *skb, u16 hdr_gap) { struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb; + struct ethhdr *eth_hdr = (struct ethhdr *)(skb->data + hdr_gap); struct mt7615_sta *msta = (struct mt7615_sta *)status->wcid; + __le32 *rxd = (__le32 *)skb->data; struct ieee80211_sta *sta; struct ieee80211_vif *vif; struct ieee80211_hdr hdr; - struct ethhdr eth_hdr; - __le32 *rxd = (__le32 *)skb->data; - __le32 qos_ctrl, ht_ctrl; + u16 frame_control; if (FIELD_GET(MT_RXD1_NORMAL_ADDR_TYPE, le32_to_cpu(rxd[1])) != MT_RXD1_NORMAL_U2M) @@ -275,47 +275,53 @@ static int mt7615_reverse_frag0_hdr_trans(struct sk_buff *skb, u16 hdr_gap) vif = container_of((void *)msta->vif, struct ieee80211_vif, drv_priv); /* store the info from RXD and ethhdr to avoid being overridden */ - memcpy(ð_hdr, skb->data + hdr_gap, sizeof(eth_hdr)); - hdr.frame_control = FIELD_GET(MT_RXD4_FRAME_CONTROL, rxd[4]); - hdr.seq_ctrl = FIELD_GET(MT_RXD6_SEQ_CTRL, rxd[6]); - qos_ctrl = FIELD_GET(MT_RXD6_QOS_CTL, rxd[6]); - ht_ctrl = FIELD_GET(MT_RXD7_HT_CONTROL, rxd[7]); - + frame_control = le32_get_bits(rxd[4], MT_RXD4_FRAME_CONTROL); + hdr.frame_control = cpu_to_le16(frame_control); + hdr.seq_ctrl = cpu_to_le16(le32_get_bits(rxd[6], MT_RXD6_SEQ_CTRL)); hdr.duration_id = 0; + ether_addr_copy(hdr.addr1, vif->addr); ether_addr_copy(hdr.addr2, sta->addr); - switch (le16_to_cpu(hdr.frame_control) & - (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + switch (frame_control & (IEEE80211_FCTL_TODS | + IEEE80211_FCTL_FROMDS)) { case 0: ether_addr_copy(hdr.addr3, vif->bss_conf.bssid); break; case IEEE80211_FCTL_FROMDS: - ether_addr_copy(hdr.addr3, eth_hdr.h_source); + ether_addr_copy(hdr.addr3, eth_hdr->h_source); break; case IEEE80211_FCTL_TODS: - ether_addr_copy(hdr.addr3, eth_hdr.h_dest); + ether_addr_copy(hdr.addr3, eth_hdr->h_dest); break; case IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS: - ether_addr_copy(hdr.addr3, eth_hdr.h_dest); - ether_addr_copy(hdr.addr4, eth_hdr.h_source); + ether_addr_copy(hdr.addr3, eth_hdr->h_dest); + ether_addr_copy(hdr.addr4, eth_hdr->h_source); break; default: break; } skb_pull(skb, hdr_gap + sizeof(struct ethhdr) - 2); - if (eth_hdr.h_proto == htons(ETH_P_AARP) || - eth_hdr.h_proto == htons(ETH_P_IPX)) + if (eth_hdr->h_proto == cpu_to_be16(ETH_P_AARP) || + eth_hdr->h_proto == cpu_to_be16(ETH_P_IPX)) ether_addr_copy(skb_push(skb, ETH_ALEN), bridge_tunnel_header); - else if (eth_hdr.h_proto >= htons(ETH_P_802_3_MIN)) + else if (be16_to_cpu(eth_hdr->h_proto) >= ETH_P_802_3_MIN) ether_addr_copy(skb_push(skb, ETH_ALEN), rfc1042_header); else skb_pull(skb, 2); if (ieee80211_has_order(hdr.frame_control)) - memcpy(skb_push(skb, 2), &ht_ctrl, 2); - if (ieee80211_is_data_qos(hdr.frame_control)) - memcpy(skb_push(skb, 2), &qos_ctrl, 2); + memcpy(skb_push(skb, IEEE80211_HT_CTL_LEN), &rxd[7], + IEEE80211_HT_CTL_LEN); + + if (ieee80211_is_data_qos(hdr.frame_control)) { + __le16 qos_ctrl; + + qos_ctrl = cpu_to_le16(le32_get_bits(rxd[6], MT_RXD6_QOS_CTL)); + memcpy(skb_push(skb, IEEE80211_QOS_CTL_LEN), &qos_ctrl, + IEEE80211_QOS_CTL_LEN); + } + if (ieee80211_has_a4(hdr.frame_control)) memcpy(skb_push(skb, sizeof(hdr)), &hdr, sizeof(hdr)); else @@ -1835,7 +1841,7 @@ mt7615_mac_adjust_sensitivity(struct mt7615_phy *phy, struct mt7615_dev *dev = phy->dev; int false_cca = ofdm ? phy->false_cca_ofdm : phy->false_cca_cck; bool ext_phy = phy != &dev->phy; - u16 def_th = ofdm ? -98 : -110; + s16 def_th = ofdm ? -98 : -110; bool update = false; s8 *sensitivity; int signal; @@ -2103,6 +2109,14 @@ void mt7615_pm_power_save_work(struct work_struct *work) test_bit(MT76_HW_SCHED_SCANNING, &dev->mphy.state)) goto out; + if (mutex_is_locked(&dev->mt76.mutex)) + /* if mt76 mutex is held we should not put the device + * to sleep since we are currently accessing device + * register map. We need to wait for the next power_save + * trigger. + */ + goto out; + if (time_is_after_jiffies(dev->pm.last_activity + delta)) { delta = dev->pm.last_activity + delta - jiffies; goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index 82d625a16a62c..ce902b107ce33 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -683,6 +683,9 @@ static void mt7615_sta_rate_tbl_update(struct ieee80211_hw *hw, struct ieee80211_sta_rates *sta_rates = rcu_dereference(sta->rates); int i; + if (!sta_rates) + return; + spin_lock_bh(&dev->mt76.lock); for (i = 0; i < ARRAY_SIZE(msta->rates); i++) { msta->rates[i].idx = sta_rates->rate[i].idx; diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c index f79e3d5084f39..5664f119447bc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c @@ -310,7 +310,7 @@ mt76_connac_mcu_alloc_wtbl_req(struct mt76_dev *dev, struct mt76_wcid *wcid, } if (sta_hdr) - sta_hdr->len = cpu_to_le16(sizeof(hdr)); + le16_add_cpu(&sta_hdr->len, sizeof(hdr)); return skb_put_data(nskb, &hdr, sizeof(hdr)); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h index 5baf8370b7bd8..93c783a3af7c5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h @@ -996,7 +996,8 @@ enum { MCU_CE_CMD_SET_BSS_CONNECTED = 0x16, MCU_CE_CMD_SET_BSS_ABORT = 0x17, MCU_CE_CMD_CANCEL_HW_SCAN = 0x1b, - MCU_CE_CMD_SET_ROC = 0x1d, + MCU_CE_CMD_SET_ROC = 0x1c, + MCU_CE_CMD_SET_EDCA_PARMS = 0x1d, MCU_CE_CMD_SET_P2P_OPPPS = 0x33, MCU_CE_CMD_SET_RATE_TX_POWER = 0x5d, MCU_CE_CMD_SCHED_SCAN_ENABLE = 0x61, diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index d054cdecd5f70..29517ca08de0c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -399,7 +399,7 @@ static void mt7915_mac_init(struct mt7915_dev *dev) /* enable hardware de-agg */ mt76_set(dev, MT_MDP_DCR0, MT_MDP_DCR0_DAMSDU_EN); - for (i = 0; i < MT7915_WTBL_SIZE; i++) + for (i = 0; i < mt7915_wtbl_size(dev); i++) mt7915_mac_wtbl_update(dev, i, MT_WTBL_UPDATE_ADM_COUNT_CLEAR); for (i = 0; i < 2; i++) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 48f1155022823..e4c300aa15260 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -391,13 +391,13 @@ mt7915_mac_decode_he_radiotap(struct sk_buff *skb, __le32 *rxv, u32 mode) static int mt7915_reverse_frag0_hdr_trans(struct sk_buff *skb, u16 hdr_gap) { struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb; + struct ethhdr *eth_hdr = (struct ethhdr *)(skb->data + hdr_gap); struct mt7915_sta *msta = (struct mt7915_sta *)status->wcid; + __le32 *rxd = (__le32 *)skb->data; struct ieee80211_sta *sta; struct ieee80211_vif *vif; struct ieee80211_hdr hdr; - struct ethhdr eth_hdr; - __le32 *rxd = (__le32 *)skb->data; - __le32 qos_ctrl, ht_ctrl; + u16 frame_control; if (FIELD_GET(MT_RXD3_NORMAL_ADDR_TYPE, le32_to_cpu(rxd[3])) != MT_RXD3_NORMAL_U2M) @@ -413,47 +413,52 @@ static int mt7915_reverse_frag0_hdr_trans(struct sk_buff *skb, u16 hdr_gap) vif = container_of((void *)msta->vif, struct ieee80211_vif, drv_priv); /* store the info from RXD and ethhdr to avoid being overridden */ - memcpy(ð_hdr, skb->data + hdr_gap, sizeof(eth_hdr)); - hdr.frame_control = FIELD_GET(MT_RXD6_FRAME_CONTROL, rxd[6]); - hdr.seq_ctrl = FIELD_GET(MT_RXD8_SEQ_CTRL, rxd[8]); - qos_ctrl = FIELD_GET(MT_RXD8_QOS_CTL, rxd[8]); - ht_ctrl = FIELD_GET(MT_RXD9_HT_CONTROL, rxd[9]); - + frame_control = le32_get_bits(rxd[6], MT_RXD6_FRAME_CONTROL); + hdr.frame_control = cpu_to_le16(frame_control); + hdr.seq_ctrl = cpu_to_le16(le32_get_bits(rxd[8], MT_RXD8_SEQ_CTRL)); hdr.duration_id = 0; + ether_addr_copy(hdr.addr1, vif->addr); ether_addr_copy(hdr.addr2, sta->addr); - switch (le16_to_cpu(hdr.frame_control) & - (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + switch (frame_control & (IEEE80211_FCTL_TODS | + IEEE80211_FCTL_FROMDS)) { case 0: ether_addr_copy(hdr.addr3, vif->bss_conf.bssid); break; case IEEE80211_FCTL_FROMDS: - ether_addr_copy(hdr.addr3, eth_hdr.h_source); + ether_addr_copy(hdr.addr3, eth_hdr->h_source); break; case IEEE80211_FCTL_TODS: - ether_addr_copy(hdr.addr3, eth_hdr.h_dest); + ether_addr_copy(hdr.addr3, eth_hdr->h_dest); break; case IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS: - ether_addr_copy(hdr.addr3, eth_hdr.h_dest); - ether_addr_copy(hdr.addr4, eth_hdr.h_source); + ether_addr_copy(hdr.addr3, eth_hdr->h_dest); + ether_addr_copy(hdr.addr4, eth_hdr->h_source); break; default: break; } skb_pull(skb, hdr_gap + sizeof(struct ethhdr) - 2); - if (eth_hdr.h_proto == htons(ETH_P_AARP) || - eth_hdr.h_proto == htons(ETH_P_IPX)) + if (eth_hdr->h_proto == cpu_to_be16(ETH_P_AARP) || + eth_hdr->h_proto == cpu_to_be16(ETH_P_IPX)) ether_addr_copy(skb_push(skb, ETH_ALEN), bridge_tunnel_header); - else if (eth_hdr.h_proto >= htons(ETH_P_802_3_MIN)) + else if (be16_to_cpu(eth_hdr->h_proto) >= ETH_P_802_3_MIN) ether_addr_copy(skb_push(skb, ETH_ALEN), rfc1042_header); else skb_pull(skb, 2); if (ieee80211_has_order(hdr.frame_control)) - memcpy(skb_push(skb, 2), &ht_ctrl, 2); - if (ieee80211_is_data_qos(hdr.frame_control)) - memcpy(skb_push(skb, 2), &qos_ctrl, 2); + memcpy(skb_push(skb, IEEE80211_HT_CTL_LEN), &rxd[9], + IEEE80211_HT_CTL_LEN); + if (ieee80211_is_data_qos(hdr.frame_control)) { + __le16 qos_ctrl; + + qos_ctrl = cpu_to_le16(le32_get_bits(rxd[8], MT_RXD8_QOS_CTL)); + memcpy(skb_push(skb, IEEE80211_QOS_CTL_LEN), &qos_ctrl, + IEEE80211_QOS_CTL_LEN); + } + if (ieee80211_has_a4(hdr.frame_control)) memcpy(skb_push(skb, sizeof(hdr)), &hdr, sizeof(hdr)); else @@ -1080,6 +1085,7 @@ mt7915_mac_write_txwi_80211(struct mt7915_dev *dev, __le32 *txwi, val = MT_TXD3_SN_VALID | FIELD_PREP(MT_TXD3_SEQ, IEEE80211_SEQ_TO_SN(seqno)); txwi[3] |= cpu_to_le32(val); + txwi[7] &= ~cpu_to_le32(MT_TXD7_HW_AMSDU); } val = FIELD_PREP(MT_TXD7_TYPE, fc_type) | @@ -1512,7 +1518,6 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid, break; case MT_PHY_TYPE_HT: case MT_PHY_TYPE_HT_GF: - rate.mcs += (rate.nss - 1) * 8; if (rate.mcs > 31) goto out; @@ -1594,7 +1599,7 @@ static void mt7915_mac_add_txs(struct mt7915_dev *dev, void *data) if (pid < MT_PACKET_ID_FIRST) return; - if (wcidx >= MT7915_WTBL_SIZE) + if (wcidx >= mt7915_wtbl_size(dev)) return; rcu_read_lock(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 0911b6f973b5a..31634d7ed1737 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -211,24 +211,12 @@ mt7915_mcu_get_sta_nss(u16 mcs_map) static void mt7915_mcu_set_sta_he_mcs(struct ieee80211_sta *sta, __le16 *he_mcs, - const u16 *mask) + u16 mcs_map) { struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv; - struct cfg80211_chan_def *chandef = &msta->vif->phy->mt76->chandef; + enum nl80211_band band = msta->vif->phy->mt76->chandef.chan->band; + const u16 *mask = msta->vif->bitrate_mask.control[band].he_mcs; int nss, max_nss = sta->rx_nss > 3 ? 4 : sta->rx_nss; - u16 mcs_map; - - switch (chandef->width) { - case NL80211_CHAN_WIDTH_80P80: - mcs_map = le16_to_cpu(sta->he_cap.he_mcs_nss_supp.rx_mcs_80p80); - break; - case NL80211_CHAN_WIDTH_160: - mcs_map = le16_to_cpu(sta->he_cap.he_mcs_nss_supp.rx_mcs_160); - break; - default: - mcs_map = le16_to_cpu(sta->he_cap.he_mcs_nss_supp.rx_mcs_80); - break; - } for (nss = 0; nss < max_nss; nss++) { int mcs; @@ -1264,8 +1252,11 @@ mt7915_mcu_wtbl_generic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif, generic = (struct wtbl_generic *)tlv; if (sta) { + if (vif->type == NL80211_IFTYPE_STATION) + generic->partial_aid = cpu_to_le16(vif->bss_conf.aid); + else + generic->partial_aid = cpu_to_le16(sta->aid); memcpy(generic->peer_addr, sta->addr, ETH_ALEN); - generic->partial_aid = cpu_to_le16(sta->aid); generic->muar_idx = mvif->mt76.omac_idx; generic->qos = sta->wme; } else { @@ -1319,12 +1310,15 @@ mt7915_mcu_sta_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif, case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_AP: basic->conn_type = cpu_to_le32(CONNECTION_INFRA_STA); + basic->aid = cpu_to_le16(sta->aid); break; case NL80211_IFTYPE_STATION: basic->conn_type = cpu_to_le32(CONNECTION_INFRA_AP); + basic->aid = cpu_to_le16(vif->bss_conf.aid); break; case NL80211_IFTYPE_ADHOC: basic->conn_type = cpu_to_le32(CONNECTION_IBSS_ADHOC); + basic->aid = cpu_to_le16(sta->aid); break; default: WARN_ON(1); @@ -1332,7 +1326,6 @@ mt7915_mcu_sta_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif, } memcpy(basic->peer_addr, sta->addr, ETH_ALEN); - basic->aid = cpu_to_le16(sta->aid); basic->qos = sta->wme; } @@ -1340,11 +1333,9 @@ static void mt7915_mcu_sta_he_tlv(struct sk_buff *skb, struct ieee80211_sta *sta, struct ieee80211_vif *vif) { - struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv; struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv; struct ieee80211_he_cap_elem *elem = &sta->he_cap.he_cap_elem; - enum nl80211_band band = msta->vif->phy->mt76->chandef.chan->band; - const u16 *mcs_mask = msta->vif->bitrate_mask.control[band].he_mcs; + struct ieee80211_he_mcs_nss_supp mcs_map; struct sta_rec_he *he; struct tlv *tlv; u32 cap = 0; @@ -1434,22 +1425,23 @@ mt7915_mcu_sta_he_tlv(struct sk_buff *skb, struct ieee80211_sta *sta, he->he_cap = cpu_to_le32(cap); + mcs_map = sta->he_cap.he_mcs_nss_supp; switch (sta->bandwidth) { case IEEE80211_STA_RX_BW_160: if (elem->phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G) mt7915_mcu_set_sta_he_mcs(sta, &he->max_nss_mcs[CMD_HE_MCS_BW8080], - mcs_mask); + le16_to_cpu(mcs_map.rx_mcs_80p80)); mt7915_mcu_set_sta_he_mcs(sta, &he->max_nss_mcs[CMD_HE_MCS_BW160], - mcs_mask); + le16_to_cpu(mcs_map.rx_mcs_160)); fallthrough; default: mt7915_mcu_set_sta_he_mcs(sta, &he->max_nss_mcs[CMD_HE_MCS_BW80], - mcs_mask); + le16_to_cpu(mcs_map.rx_mcs_80)); break; } @@ -1524,9 +1516,6 @@ mt7915_mcu_sta_muru_tlv(struct sk_buff *skb, struct ieee80211_sta *sta, vif->type != NL80211_IFTYPE_AP) return; - if (!sta->vht_cap.vht_supported) - return; - tlv = mt7915_mcu_add_tlv(skb, STA_REC_MURU, sizeof(*muru)); muru = (struct sta_rec_muru *)tlv; @@ -1534,9 +1523,12 @@ mt7915_mcu_sta_muru_tlv(struct sk_buff *skb, struct ieee80211_sta *sta, muru->cfg.mimo_dl_en = mvif->cap.he_mu_ebfer || mvif->cap.vht_mu_ebfer || mvif->cap.vht_mu_ebfee; + muru->cfg.mimo_ul_en = true; + muru->cfg.ofdma_dl_en = true; - muru->mimo_dl.vht_mu_bfee = - !!(sta->vht_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE); + if (sta->vht_cap.vht_supported) + muru->mimo_dl.vht_mu_bfee = + !!(sta->vht_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE); if (!sta->he_cap.has_he) return; @@ -1544,13 +1536,11 @@ mt7915_mcu_sta_muru_tlv(struct sk_buff *skb, struct ieee80211_sta *sta, muru->mimo_dl.partial_bw_dl_mimo = HE_PHY(CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO, elem->phy_cap_info[6]); - muru->cfg.mimo_ul_en = true; muru->mimo_ul.full_ul_mimo = HE_PHY(CAP2_UL_MU_FULL_MU_MIMO, elem->phy_cap_info[2]); muru->mimo_ul.partial_ul_mimo = HE_PHY(CAP2_UL_MU_PARTIAL_MU_MIMO, elem->phy_cap_info[2]); - muru->cfg.ofdma_dl_en = true; muru->ofdma_dl.punc_pream_rx = HE_PHY(CAP1_PREAMBLE_PUNC_RX_MASK, elem->phy_cap_info[1]); muru->ofdma_dl.he_20m_in_40m_2g = @@ -2134,9 +2124,12 @@ mt7915_mcu_add_rate_ctrl_fixed(struct mt7915_dev *dev, phy.sgi |= gi << (i << (_he)); \ phy.he_ltf |= mask->control[band].he_ltf << (i << (_he));\ } \ - for (i = 0; i < ARRAY_SIZE(mask->control[band]._mcs); i++) \ - nrates += hweight16(mask->control[band]._mcs[i]); \ - phy.mcs = ffs(mask->control[band]._mcs[0]) - 1; \ + for (i = 0; i < ARRAY_SIZE(mask->control[band]._mcs); i++) { \ + if (!mask->control[band]._mcs[i]) \ + continue; \ + nrates += hweight16(mask->control[band]._mcs[i]); \ + phy.mcs = ffs(mask->control[band]._mcs[i]) - 1; \ + } \ } while (0) if (sta->he_cap.has_he) { @@ -2394,8 +2387,10 @@ int mt7915_mcu_add_sta(struct mt7915_dev *dev, struct ieee80211_vif *vif, } ret = mt7915_mcu_sta_wtbl_tlv(dev, skb, vif, sta); - if (ret) + if (ret) { + dev_kfree_skb(skb); return ret; + } if (sta && sta->ht_cap.ht_supported) { /* starec amsdu */ @@ -2409,8 +2404,10 @@ int mt7915_mcu_add_sta(struct mt7915_dev *dev, struct ieee80211_vif *vif, } ret = mt7915_mcu_add_group(dev, vif, sta); - if (ret) + if (ret) { + dev_kfree_skb(skb); return ret; + } out: return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_EXT_CMD(STA_REC_UPDATE), true); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h index 42d887383e8d8..12ca545664614 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h @@ -12,7 +12,8 @@ #define MT7915_MAX_INTERFACES 19 #define MT7915_MAX_WMM_SETS 4 #define MT7915_WTBL_SIZE 288 -#define MT7915_WTBL_RESERVED (MT7915_WTBL_SIZE - 1) +#define MT7916_WTBL_SIZE 544 +#define MT7915_WTBL_RESERVED (mt7915_wtbl_size(dev) - 1) #define MT7915_WTBL_STA (MT7915_WTBL_RESERVED - \ MT7915_MAX_INTERFACES) @@ -449,6 +450,11 @@ static inline bool is_mt7915(struct mt76_dev *dev) return mt76_chip(dev) == 0x7915; } +static inline u16 mt7915_wtbl_size(struct mt7915_dev *dev) +{ + return is_mt7915(&dev->mt76) ? MT7915_WTBL_SIZE : MT7916_WTBL_SIZE; +} + void mt7915_dual_hif_set_irq_mask(struct mt7915_dev *dev, bool write_reg, u32 clear, u32 set); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c index 86fd7292b229f..196b50e616fe0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c @@ -129,23 +129,22 @@ mt7921_queues_acq(struct seq_file *s, void *data) mt7921_mutex_acquire(dev); - for (i = 0; i < 16; i++) { - int j, acs = i / 4, index = i % 4; + for (i = 0; i < 4; i++) { u32 ctrl, val, qlen = 0; + int j; - val = mt76_rr(dev, MT_PLE_AC_QEMPTY(acs, index)); - ctrl = BIT(31) | BIT(15) | (acs << 8); + val = mt76_rr(dev, MT_PLE_AC_QEMPTY(i)); + ctrl = BIT(31) | BIT(11) | (i << 24); for (j = 0; j < 32; j++) { if (val & BIT(j)) continue; - mt76_wr(dev, MT_PLE_FL_Q0_CTRL, - ctrl | (j + (index << 5))); + mt76_wr(dev, MT_PLE_FL_Q0_CTRL, ctrl | j); qlen += mt76_get_field(dev, MT_PLE_FL_Q3_CTRL, GENMASK(11, 0)); } - seq_printf(s, "AC%d%d: queued=%d\n", acs, index, qlen); + seq_printf(s, "AC%d: queued=%d\n", i, qlen); } mt7921_mutex_release(dev); @@ -291,13 +290,12 @@ mt7921_pm_set(void *data, u64 val) pm->enable = false; mt76_connac_pm_wake(&dev->mphy, pm); + pm->enable = val; ieee80211_iterate_active_interfaces(mt76_hw(dev), IEEE80211_IFACE_ITER_RESUME_ALL, mt7921_pm_interface_iter, dev); mt76_connac_mcu_set_deep_sleep(&dev->mt76, pm->ds_enable); - - pm->enable = val; mt76_connac_power_save_sched(&dev->mphy, pm); out: mutex_unlock(&dev->mt76.mutex); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c index cdff1fd52d93a..39d6ce4ecddd7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c @@ -78,110 +78,6 @@ static void mt7921_dma_prefetch(struct mt7921_dev *dev) mt76_wr(dev, MT_WFDMA0_TX_RING17_EXT_CTRL, PREFETCH(0x380, 0x4)); } -static u32 __mt7921_reg_addr(struct mt7921_dev *dev, u32 addr) -{ - static const struct { - u32 phys; - u32 mapped; - u32 size; - } fixed_map[] = { - { 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */ - { 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */ - { 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */ - { 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */ - { 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */ - { 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */ - { 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */ - { 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */ - { 0x00400000, 0x80000, 0x10000 }, /* WF_MCU_SYSRAM */ - { 0x00410000, 0x90000, 0x10000 }, /* WF_MCU_SYSRAM (configure register) */ - { 0x40000000, 0x70000, 0x10000 }, /* WF_UMAC_SYSRAM */ - { 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */ - { 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */ - { 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */ - { 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */ - { 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */ - { 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */ - { 0x7c060000, 0xe0000, 0x10000 }, /* CONN_INFRA, conn_host_csr_top */ - { 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */ - { 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */ - { 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */ - { 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */ - { 0x820cc000, 0x0e000, 0x1000 }, /* WF_UMAC_TOP (PP) */ - { 0x820cd000, 0x0f000, 0x1000 }, /* WF_MDP_TOP */ - { 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */ - { 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */ - { 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */ - { 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */ - { 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */ - { 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */ - { 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */ - { 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */ - { 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */ - { 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */ - { 0x820f3000, 0xa0c00, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_ARB) */ - { 0x820f4000, 0xa1000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_TMAC) */ - { 0x820f5000, 0xa1400, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_RMAC) */ - { 0x820f7000, 0xa1e00, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_DMA) */ - { 0x820f9000, 0xa3400, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_WTBLOFF) */ - { 0x820fa000, 0xa4000, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_ETBF) */ - { 0x820fb000, 0xa4200, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_LPON) */ - { 0x820fc000, 0xa4600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_INT) */ - { 0x820fd000, 0xa4800, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_MIB) */ - }; - int i; - - if (addr < 0x100000) - return addr; - - for (i = 0; i < ARRAY_SIZE(fixed_map); i++) { - u32 ofs; - - if (addr < fixed_map[i].phys) - continue; - - ofs = addr - fixed_map[i].phys; - if (ofs > fixed_map[i].size) - continue; - - return fixed_map[i].mapped + ofs; - } - - if ((addr >= 0x18000000 && addr < 0x18c00000) || - (addr >= 0x70000000 && addr < 0x78000000) || - (addr >= 0x7c000000 && addr < 0x7c400000)) - return mt7921_reg_map_l1(dev, addr); - - dev_err(dev->mt76.dev, "Access currently unsupported address %08x\n", - addr); - - return 0; -} - -static u32 mt7921_rr(struct mt76_dev *mdev, u32 offset) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - return dev->bus_ops->rr(mdev, addr); -} - -static void mt7921_wr(struct mt76_dev *mdev, u32 offset, u32 val) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - dev->bus_ops->wr(mdev, addr, val); -} - -static u32 mt7921_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - return dev->bus_ops->rmw(mdev, addr, mask, val); -} - static int mt7921_dma_disable(struct mt7921_dev *dev, bool force) { if (force) { @@ -341,23 +237,8 @@ int mt7921_wpdma_reinit_cond(struct mt7921_dev *dev) int mt7921_dma_init(struct mt7921_dev *dev) { - struct mt76_bus_ops *bus_ops; int ret; - dev->phy.dev = dev; - dev->phy.mt76 = &dev->mt76.phy; - dev->mt76.phy.priv = &dev->phy; - dev->bus_ops = dev->mt76.bus; - bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops), - GFP_KERNEL); - if (!bus_ops) - return -ENOMEM; - - bus_ops->rr = mt7921_rr; - bus_ops->wr = mt7921_wr; - bus_ops->rmw = mt7921_rmw; - dev->mt76.bus = bus_ops; - mt76_dma_attach(&dev->mt76); ret = mt7921_dma_disable(dev, true); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c index ec10f95a46495..84f72dd1bf930 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c @@ -402,13 +402,13 @@ mt7921_mac_assoc_rssi(struct mt7921_dev *dev, struct sk_buff *skb) static int mt7921_reverse_frag0_hdr_trans(struct sk_buff *skb, u16 hdr_gap) { struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb; + struct ethhdr *eth_hdr = (struct ethhdr *)(skb->data + hdr_gap); struct mt7921_sta *msta = (struct mt7921_sta *)status->wcid; + __le32 *rxd = (__le32 *)skb->data; struct ieee80211_sta *sta; struct ieee80211_vif *vif; struct ieee80211_hdr hdr; - struct ethhdr eth_hdr; - __le32 *rxd = (__le32 *)skb->data; - __le32 qos_ctrl, ht_ctrl; + u16 frame_control; if (FIELD_GET(MT_RXD3_NORMAL_ADDR_TYPE, le32_to_cpu(rxd[3])) != MT_RXD3_NORMAL_U2M) @@ -424,47 +424,52 @@ static int mt7921_reverse_frag0_hdr_trans(struct sk_buff *skb, u16 hdr_gap) vif = container_of((void *)msta->vif, struct ieee80211_vif, drv_priv); /* store the info from RXD and ethhdr to avoid being overridden */ - memcpy(ð_hdr, skb->data + hdr_gap, sizeof(eth_hdr)); - hdr.frame_control = FIELD_GET(MT_RXD6_FRAME_CONTROL, rxd[6]); - hdr.seq_ctrl = FIELD_GET(MT_RXD8_SEQ_CTRL, rxd[8]); - qos_ctrl = FIELD_GET(MT_RXD8_QOS_CTL, rxd[8]); - ht_ctrl = FIELD_GET(MT_RXD9_HT_CONTROL, rxd[9]); - + frame_control = le32_get_bits(rxd[6], MT_RXD6_FRAME_CONTROL); + hdr.frame_control = cpu_to_le16(frame_control); + hdr.seq_ctrl = cpu_to_le16(le32_get_bits(rxd[8], MT_RXD8_SEQ_CTRL)); hdr.duration_id = 0; + ether_addr_copy(hdr.addr1, vif->addr); ether_addr_copy(hdr.addr2, sta->addr); - switch (le16_to_cpu(hdr.frame_control) & - (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + switch (frame_control & (IEEE80211_FCTL_TODS | + IEEE80211_FCTL_FROMDS)) { case 0: ether_addr_copy(hdr.addr3, vif->bss_conf.bssid); break; case IEEE80211_FCTL_FROMDS: - ether_addr_copy(hdr.addr3, eth_hdr.h_source); + ether_addr_copy(hdr.addr3, eth_hdr->h_source); break; case IEEE80211_FCTL_TODS: - ether_addr_copy(hdr.addr3, eth_hdr.h_dest); + ether_addr_copy(hdr.addr3, eth_hdr->h_dest); break; case IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS: - ether_addr_copy(hdr.addr3, eth_hdr.h_dest); - ether_addr_copy(hdr.addr4, eth_hdr.h_source); + ether_addr_copy(hdr.addr3, eth_hdr->h_dest); + ether_addr_copy(hdr.addr4, eth_hdr->h_source); break; default: break; } skb_pull(skb, hdr_gap + sizeof(struct ethhdr) - 2); - if (eth_hdr.h_proto == htons(ETH_P_AARP) || - eth_hdr.h_proto == htons(ETH_P_IPX)) + if (eth_hdr->h_proto == cpu_to_be16(ETH_P_AARP) || + eth_hdr->h_proto == cpu_to_be16(ETH_P_IPX)) ether_addr_copy(skb_push(skb, ETH_ALEN), bridge_tunnel_header); - else if (eth_hdr.h_proto >= htons(ETH_P_802_3_MIN)) + else if (be16_to_cpu(eth_hdr->h_proto) >= ETH_P_802_3_MIN) ether_addr_copy(skb_push(skb, ETH_ALEN), rfc1042_header); else skb_pull(skb, 2); if (ieee80211_has_order(hdr.frame_control)) - memcpy(skb_push(skb, 2), &ht_ctrl, 2); - if (ieee80211_is_data_qos(hdr.frame_control)) - memcpy(skb_push(skb, 2), &qos_ctrl, 2); + memcpy(skb_push(skb, IEEE80211_HT_CTL_LEN), &rxd[9], + IEEE80211_HT_CTL_LEN); + if (ieee80211_is_data_qos(hdr.frame_control)) { + __le16 qos_ctrl; + + qos_ctrl = cpu_to_le16(le32_get_bits(rxd[8], MT_RXD8_QOS_CTL)); + memcpy(skb_push(skb, IEEE80211_QOS_CTL_LEN), &qos_ctrl, + IEEE80211_QOS_CTL_LEN); + } + if (ieee80211_has_a4(hdr.frame_control)) memcpy(skb_push(skb, sizeof(hdr)), &hdr, sizeof(hdr)); else @@ -914,9 +919,15 @@ mt7921_mac_write_txwi_80211(struct mt7921_dev *dev, __le32 *txwi, txwi[3] |= cpu_to_le32(val); } - val = FIELD_PREP(MT_TXD7_TYPE, fc_type) | - FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype); - txwi[7] |= cpu_to_le32(val); + if (mt76_is_mmio(&dev->mt76)) { + val = FIELD_PREP(MT_TXD7_TYPE, fc_type) | + FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype); + txwi[7] |= cpu_to_le32(val); + } else { + val = FIELD_PREP(MT_TXD8_L_TYPE, fc_type) | + FIELD_PREP(MT_TXD8_L_SUB_TYPE, fc_stype); + txwi[8] |= cpu_to_le32(val); + } } void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi, @@ -1092,7 +1103,6 @@ mt7921_mac_add_txs_skb(struct mt7921_dev *dev, struct mt76_wcid *wcid, int pid, break; case MT_PHY_TYPE_HT: case MT_PHY_TYPE_HT_GF: - rate.mcs += (rate.nss - 1) * 8; if (rate.mcs > 31) goto out; @@ -1551,6 +1561,14 @@ void mt7921_pm_power_save_work(struct work_struct *work) test_bit(MT76_HW_SCHED_SCANNING, &mphy->state)) goto out; + if (mutex_is_locked(&dev->mt76.mutex)) + /* if mt76 mutex is held we should not put the device + * to sleep since we are currently accessing device + * register map. We need to wait for the next power_save + * trigger. + */ + goto out; + if (time_is_after_jiffies(dev->pm.last_activity + delta)) { delta = dev->pm.last_activity + delta - jiffies; goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.h b/drivers/net/wireless/mediatek/mt76/mt7921/mac.h index 544a1c33126a4..12e1cf8abe6ea 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.h @@ -284,6 +284,9 @@ enum tx_mcu_port_q_idx { #define MT_TXD7_HW_AMSDU BIT(10) #define MT_TXD7_TX_TIME GENMASK(9, 0) +#define MT_TXD8_L_TYPE GENMASK(5, 4) +#define MT_TXD8_L_SUB_TYPE GENMASK(3, 0) + #define MT_TX_RATE_STBC BIT(13) #define MT_TX_RATE_NSS GENMASK(12, 10) #define MT_TX_RATE_MODE GENMASK(9, 6) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 7a8d2596c2265..4abb7a6e775af 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -273,6 +273,7 @@ static void mt7921_stop(struct ieee80211_hw *hw) cancel_delayed_work_sync(&dev->pm.ps_work); cancel_work_sync(&dev->pm.wake_work); + cancel_work_sync(&dev->reset_work); mt76_connac_free_pending_tx_skbs(&dev->pm, NULL); mt7921_mutex_acquire(dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c index ef1e1ef91611b..e82545a7fcc11 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c @@ -707,12 +707,8 @@ static int mt7921_load_patch(struct mt7921_dev *dev) if (mt76_is_sdio(&dev->mt76)) { /* activate again */ ret = __mt7921_mcu_fw_pmctrl(dev); - if (ret) - return ret; - - ret = __mt7921_mcu_drv_pmctrl(dev); - if (ret) - return ret; + if (!ret) + ret = __mt7921_mcu_drv_pmctrl(dev); } out: @@ -920,33 +916,28 @@ EXPORT_SYMBOL_GPL(mt7921_mcu_exit); int mt7921_mcu_set_tx(struct mt7921_dev *dev, struct ieee80211_vif *vif) { -#define WMM_AIFS_SET BIT(0) -#define WMM_CW_MIN_SET BIT(1) -#define WMM_CW_MAX_SET BIT(2) -#define WMM_TXOP_SET BIT(3) -#define WMM_PARAM_SET GENMASK(3, 0) -#define TX_CMD_MODE 1 + struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv; + struct edca { - u8 queue; - u8 set; - u8 aifs; - u8 cw_min; + __le16 cw_min; __le16 cw_max; __le16 txop; - }; + __le16 aifs; + u8 guardtime; + u8 acm; + } __packed; struct mt7921_mcu_tx { - u8 total; - u8 action; - u8 valid; - u8 mode; - struct edca edca[IEEE80211_NUM_ACS]; + u8 bss_idx; + u8 qos; + u8 wmm_idx; + u8 pad; } __packed req = { - .valid = true, - .mode = TX_CMD_MODE, - .total = IEEE80211_NUM_ACS, + .bss_idx = mvif->mt76.idx, + .qos = vif->bss_conf.qos, + .wmm_idx = mvif->mt76.wmm_idx, }; - struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv; + struct mu_edca { u8 cw_min; u8 cw_max; @@ -970,30 +961,29 @@ int mt7921_mcu_set_tx(struct mt7921_dev *dev, struct ieee80211_vif *vif) .qos = vif->bss_conf.qos, .wmm_idx = mvif->mt76.wmm_idx, }; + int to_aci[] = {1, 0, 2, 3}; int ac, ret; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { struct ieee80211_tx_queue_params *q = &mvif->queue_params[ac]; - struct edca *e = &req.edca[ac]; + struct edca *e = &req.edca[to_aci[ac]]; - e->set = WMM_PARAM_SET; - e->queue = ac + mvif->mt76.wmm_idx * MT7921_MAX_WMM_SETS; e->aifs = q->aifs; e->txop = cpu_to_le16(q->txop); if (q->cw_min) - e->cw_min = fls(q->cw_min); + e->cw_min = cpu_to_le16(q->cw_min); else e->cw_min = 5; if (q->cw_max) - e->cw_max = cpu_to_le16(fls(q->cw_max)); + e->cw_max = cpu_to_le16(q->cw_max); else e->cw_max = cpu_to_le16(10); } - ret = mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD(EDCA_UPDATE), - &req, sizeof(req), true); + ret = mt76_mcu_send_msg(&dev->mt76, MCU_CE_CMD(SET_EDCA_PARMS), &req, + sizeof(req), false); if (ret) return ret; @@ -1003,7 +993,6 @@ int mt7921_mcu_set_tx(struct mt7921_dev *dev, struct ieee80211_vif *vif) for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { struct ieee80211_he_mu_edca_param_ac_rec *q; struct mu_edca *e; - int to_aci[] = {1, 0, 2, 3}; if (!mvif->queue_params[ac].mu_edca) break; diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h index 96647801850a5..33f8e5b541b35 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h @@ -452,6 +452,7 @@ int mt7921e_mcu_init(struct mt7921_dev *dev); int mt7921s_wfsys_reset(struct mt7921_dev *dev); int mt7921s_mac_reset(struct mt7921_dev *dev); int mt7921s_init_reset(struct mt7921_dev *dev); +int __mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev); int mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev); int mt7921e_mcu_fw_pmctrl(struct mt7921_dev *dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index 9dae2f5972bf9..9a71a5d864819 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -121,6 +121,110 @@ static void mt7921e_unregister_device(struct mt7921_dev *dev) mt76_free_device(&dev->mt76); } +static u32 __mt7921_reg_addr(struct mt7921_dev *dev, u32 addr) +{ + static const struct { + u32 phys; + u32 mapped; + u32 size; + } fixed_map[] = { + { 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */ + { 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */ + { 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */ + { 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */ + { 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */ + { 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */ + { 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */ + { 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */ + { 0x00400000, 0x80000, 0x10000 }, /* WF_MCU_SYSRAM */ + { 0x00410000, 0x90000, 0x10000 }, /* WF_MCU_SYSRAM (configure register) */ + { 0x40000000, 0x70000, 0x10000 }, /* WF_UMAC_SYSRAM */ + { 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */ + { 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */ + { 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */ + { 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */ + { 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */ + { 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */ + { 0x7c060000, 0xe0000, 0x10000 }, /* CONN_INFRA, conn_host_csr_top */ + { 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */ + { 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */ + { 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */ + { 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */ + { 0x820cc000, 0x0e000, 0x1000 }, /* WF_UMAC_TOP (PP) */ + { 0x820cd000, 0x0f000, 0x1000 }, /* WF_MDP_TOP */ + { 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */ + { 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */ + { 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */ + { 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */ + { 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */ + { 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */ + { 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */ + { 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */ + { 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */ + { 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */ + { 0x820f3000, 0xa0c00, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_ARB) */ + { 0x820f4000, 0xa1000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_TMAC) */ + { 0x820f5000, 0xa1400, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_RMAC) */ + { 0x820f7000, 0xa1e00, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_DMA) */ + { 0x820f9000, 0xa3400, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_WTBLOFF) */ + { 0x820fa000, 0xa4000, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_ETBF) */ + { 0x820fb000, 0xa4200, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_LPON) */ + { 0x820fc000, 0xa4600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_INT) */ + { 0x820fd000, 0xa4800, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_MIB) */ + }; + int i; + + if (addr < 0x100000) + return addr; + + for (i = 0; i < ARRAY_SIZE(fixed_map); i++) { + u32 ofs; + + if (addr < fixed_map[i].phys) + continue; + + ofs = addr - fixed_map[i].phys; + if (ofs > fixed_map[i].size) + continue; + + return fixed_map[i].mapped + ofs; + } + + if ((addr >= 0x18000000 && addr < 0x18c00000) || + (addr >= 0x70000000 && addr < 0x78000000) || + (addr >= 0x7c000000 && addr < 0x7c400000)) + return mt7921_reg_map_l1(dev, addr); + + dev_err(dev->mt76.dev, "Access currently unsupported address %08x\n", + addr); + + return 0; +} + +static u32 mt7921_rr(struct mt76_dev *mdev, u32 offset) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + return dev->bus_ops->rr(mdev, addr); +} + +static void mt7921_wr(struct mt76_dev *mdev, u32 offset, u32 val) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + dev->bus_ops->wr(mdev, addr, val); +} + +static u32 mt7921_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + return dev->bus_ops->rmw(mdev, addr, mask, val); +} + static int mt7921_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -151,6 +255,7 @@ static int mt7921_pci_probe(struct pci_dev *pdev, .fw_own = mt7921e_mcu_fw_pmctrl, }; + struct mt76_bus_ops *bus_ops; struct mt7921_dev *dev; struct mt76_dev *mdev; int ret; @@ -188,6 +293,25 @@ static int mt7921_pci_probe(struct pci_dev *pdev, mt76_mmio_init(&dev->mt76, pcim_iomap_table(pdev)[0]); tasklet_init(&dev->irq_tasklet, mt7921_irq_tasklet, (unsigned long)dev); + + dev->phy.dev = dev; + dev->phy.mt76 = &dev->mt76.phy; + dev->mt76.phy.priv = &dev->phy; + dev->bus_ops = dev->mt76.bus; + bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops), + GFP_KERNEL); + if (!bus_ops) + return -ENOMEM; + + bus_ops->rr = mt7921_rr; + bus_ops->wr = mt7921_wr; + bus_ops->rmw = mt7921_rmw; + dev->mt76.bus = bus_ops; + + ret = __mt7921e_mcu_drv_pmctrl(dev); + if (ret) + return ret; + mdev->rev = (mt7921_l1_rr(dev, MT_HW_CHIPID) << 16) | (mt7921_l1_rr(dev, MT_HW_REV) & 0xff); dev_info(mdev->dev, "ASIC revision: %04x\n", mdev->rev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mcu.c index a020352122a12..daa73c92426ca 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mcu.c @@ -59,10 +59,8 @@ int mt7921e_mcu_init(struct mt7921_dev *dev) return err; } -int mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev) +int __mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev) { - struct mt76_phy *mphy = &dev->mt76.phy; - struct mt76_connac_pm *pm = &dev->pm; int i, err = 0; for (i = 0; i < MT7921_DRV_OWN_RETRY_COUNT; i++) { @@ -75,9 +73,21 @@ int mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev) if (i == MT7921_DRV_OWN_RETRY_COUNT) { dev_err(dev->mt76.dev, "driver own failed\n"); err = -EIO; - goto out; } + return err; +} + +int mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev) +{ + struct mt76_phy *mphy = &dev->mt76.phy; + struct mt76_connac_pm *pm = &dev->pm; + int err; + + err = __mt7921e_mcu_drv_pmctrl(dev); + if (err < 0) + goto out; + mt7921_wpdma_reinit_cond(dev); clear_bit(MT76_STATE_PM, &mphy->state); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h index cbd38122c510f..c8c92faa4624f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h @@ -17,13 +17,12 @@ #define MT_PLE_BASE 0x820c0000 #define MT_PLE(ofs) (MT_PLE_BASE + (ofs)) -#define MT_PLE_FL_Q0_CTRL MT_PLE(0x1b0) -#define MT_PLE_FL_Q1_CTRL MT_PLE(0x1b4) -#define MT_PLE_FL_Q2_CTRL MT_PLE(0x1b8) -#define MT_PLE_FL_Q3_CTRL MT_PLE(0x1bc) +#define MT_PLE_FL_Q0_CTRL MT_PLE(0x3e0) +#define MT_PLE_FL_Q1_CTRL MT_PLE(0x3e4) +#define MT_PLE_FL_Q2_CTRL MT_PLE(0x3e8) +#define MT_PLE_FL_Q3_CTRL MT_PLE(0x3ec) -#define MT_PLE_AC_QEMPTY(ac, n) MT_PLE(0x300 + 0x10 * (ac) + \ - ((n) << 2)) +#define MT_PLE_AC_QEMPTY(_n) MT_PLE(0x500 + 0x40 * (_n)) #define MT_PLE_AMSDU_PACK_MSDU_CNT(n) MT_PLE(0x10e0 + ((n) << 2)) #define MT_MDP_BASE 0x820cd000 diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mcu.c index d20f2ff01be17..5d8af18c70267 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mcu.c @@ -49,6 +49,26 @@ mt7921s_mcu_send_message(struct mt76_dev *mdev, struct sk_buff *skb, return ret; } +static u32 mt7921s_read_rm3r(struct mt7921_dev *dev) +{ + struct mt76_sdio *sdio = &dev->mt76.sdio; + + return sdio_readl(sdio->func, MCR_D2HRM3R, NULL); +} + +static u32 mt7921s_clear_rm3r_drv_own(struct mt7921_dev *dev) +{ + struct mt76_sdio *sdio = &dev->mt76.sdio; + u32 val; + + val = sdio_readl(sdio->func, MCR_D2HRM3R, NULL); + if (val) + sdio_writel(sdio->func, H2D_SW_INT_CLEAR_MAILBOX_ACK, + MCR_WSICR, NULL); + + return val; +} + int mt7921s_mcu_init(struct mt7921_dev *dev) { static const struct mt76_mcu_ops mt7921s_mcu_ops = { @@ -88,6 +108,12 @@ int mt7921s_mcu_drv_pmctrl(struct mt7921_dev *dev) err = readx_poll_timeout(mt76s_read_pcr, &dev->mt76, status, status & WHLPCR_IS_DRIVER_OWN, 2000, 1000000); + + if (!err && test_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state)) + err = readx_poll_timeout(mt7921s_read_rm3r, dev, status, + status & D2HRM3R_IS_DRIVER_OWN, + 2000, 1000000); + sdio_release_host(func); if (err < 0) { @@ -115,12 +141,24 @@ int mt7921s_mcu_fw_pmctrl(struct mt7921_dev *dev) sdio_claim_host(func); + if (test_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state)) { + err = readx_poll_timeout(mt7921s_clear_rm3r_drv_own, + dev, status, + !(status & D2HRM3R_IS_DRIVER_OWN), + 2000, 1000000); + if (err < 0) { + dev_err(dev->mt76.dev, "mailbox ACK not cleared\n"); + goto err; + } + } + sdio_writel(func, WHLPCR_FW_OWN_REQ_SET, MCR_WHLPCR, NULL); err = readx_poll_timeout(mt76s_read_pcr, &dev->mt76, status, !(status & WHLPCR_IS_DRIVER_OWN), 2000, 1000000); sdio_release_host(func); +err: if (err < 0) { dev_err(dev->mt76.dev, "firmware own failed\n"); clear_bit(MT76_STATE_PM, &mphy->state); diff --git a/drivers/net/wireless/mediatek/mt76/sdio.h b/drivers/net/wireless/mediatek/mt76/sdio.h index 99db4ad93b7c7..27d5d2077ebae 100644 --- a/drivers/net/wireless/mediatek/mt76/sdio.h +++ b/drivers/net/wireless/mediatek/mt76/sdio.h @@ -65,6 +65,7 @@ #define MCR_H2DSM0R 0x0070 #define H2D_SW_INT_READ BIT(16) #define H2D_SW_INT_WRITE BIT(17) +#define H2D_SW_INT_CLEAR_MAILBOX_ACK BIT(22) #define MCR_H2DSM1R 0x0074 #define MCR_D2HRM0R 0x0078 @@ -109,6 +110,7 @@ #define MCR_H2DSM2R 0x0160 /* supported in CONNAC2 */ #define MCR_H2DSM3R 0x0164 /* supported in CONNAC2 */ #define MCR_D2HRM3R 0x0174 /* supported in CONNAC2 */ +#define D2HRM3R_IS_DRIVER_OWN BIT(0) #define MCR_WTQCR8 0x0190 /* supported in CONNAC2 */ #define MCR_WTQCR9 0x0194 /* supported in CONNAC2 */ #define MCR_WTQCR10 0x0198 /* supported in CONNAC2 */ diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 2987ad9271f64..87e98ab068ed7 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -382,6 +382,8 @@ static int ray_config(struct pcmcia_device *link) goto failed; local->sram = ioremap(link->resource[2]->start, resource_size(link->resource[2])); + if (!local->sram) + goto failed; /*** Set up 16k window for shared memory (receive buffer) ***************/ link->resource[3]->flags |= @@ -396,6 +398,8 @@ static int ray_config(struct pcmcia_device *link) goto failed; local->rmem = ioremap(link->resource[3]->start, resource_size(link->resource[3])); + if (!local->rmem) + goto failed; /*** Set up window for attribute memory ***********************************/ link->resource[4]->flags |= @@ -410,6 +414,8 @@ static int ray_config(struct pcmcia_device *link) goto failed; local->amem = ioremap(link->resource[4]->start, resource_size(link->resource[4])); + if (!local->amem) + goto failed; dev_dbg(&link->dev, "ray_config sram=%p\n", local->sram); dev_dbg(&link->dev, "ray_config rmem=%p\n", local->rmem); diff --git a/drivers/net/wireless/realtek/rtw88/debug.c b/drivers/net/wireless/realtek/rtw88/debug.c index e429428232c15..e7e9f17df96a3 100644 --- a/drivers/net/wireless/realtek/rtw88/debug.c +++ b/drivers/net/wireless/realtek/rtw88/debug.c @@ -390,7 +390,7 @@ static ssize_t rtw_debugfs_set_h2c(struct file *filp, ¶m[0], ¶m[1], ¶m[2], ¶m[3], ¶m[4], ¶m[5], ¶m[6], ¶m[7]); if (num != 8) { - rtw_info(rtwdev, "invalid H2C command format for debug\n"); + rtw_warn(rtwdev, "invalid H2C command format for debug\n"); return -EINVAL; } diff --git a/drivers/net/wireless/realtek/rtw88/debug.h b/drivers/net/wireless/realtek/rtw88/debug.h index 61f8369fe2d61..066792dd96afb 100644 --- a/drivers/net/wireless/realtek/rtw88/debug.h +++ b/drivers/net/wireless/realtek/rtw88/debug.h @@ -23,6 +23,7 @@ enum rtw_debug_mask { RTW_DBG_PATH_DIV = 0x00004000, RTW_DBG_ADAPTIVITY = 0x00008000, RTW_DBG_HW_SCAN = 0x00010000, + RTW_DBG_STATE = 0x00020000, RTW_DBG_ALL = 0xffffffff }; diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c index 2f7c036f90221..db90d75a86339 100644 --- a/drivers/net/wireless/realtek/rtw88/fw.c +++ b/drivers/net/wireless/realtek/rtw88/fw.c @@ -1784,9 +1784,9 @@ void rtw_fw_scan_notify(struct rtw_dev *rtwdev, bool start) rtw_fw_send_h2c_command(rtwdev, h2c_pkt); } -static void rtw_append_probe_req_ie(struct rtw_dev *rtwdev, struct sk_buff *skb, - struct sk_buff_head *list, - struct rtw_vif *rtwvif) +static int rtw_append_probe_req_ie(struct rtw_dev *rtwdev, struct sk_buff *skb, + struct sk_buff_head *list, u8 *bands, + struct rtw_vif *rtwvif) { struct ieee80211_scan_ies *ies = rtwvif->scan_ies; struct rtw_chip_info *chip = rtwdev->chip; @@ -1797,19 +1797,24 @@ static void rtw_append_probe_req_ie(struct rtw_dev *rtwdev, struct sk_buff *skb, if (!(BIT(idx) & chip->band)) continue; new = skb_copy(skb, GFP_KERNEL); + if (!new) + return -ENOMEM; skb_put_data(new, ies->ies[idx], ies->len[idx]); skb_put_data(new, ies->common_ies, ies->common_ie_len); skb_queue_tail(list, new); + (*bands)++; } + + return 0; } -static int _rtw_hw_scan_update_probe_req(struct rtw_dev *rtwdev, u8 num_ssids, +static int _rtw_hw_scan_update_probe_req(struct rtw_dev *rtwdev, u8 num_probes, struct sk_buff_head *probe_req_list) { struct rtw_chip_info *chip = rtwdev->chip; struct sk_buff *skb, *tmp; u8 page_offset = 1, *buf, page_size = chip->page_size; - u8 pages = page_offset + num_ssids * RTW_PROBE_PG_CNT; + u8 pages = page_offset + num_probes * RTW_PROBE_PG_CNT; u16 pg_addr = rtwdev->fifo.rsvd_h2c_info_addr, loc; u16 buf_offset = page_size * page_offset; u8 tx_desc_sz = chip->tx_pkt_desc_sz; @@ -1848,6 +1853,8 @@ static int _rtw_hw_scan_update_probe_req(struct rtw_dev *rtwdev, u8 num_ssids, rtwdev->scan_info.probe_pg_size = page_offset; out: kfree(buf); + skb_queue_walk_safe(probe_req_list, skb, tmp) + kfree_skb(skb); return ret; } @@ -1857,8 +1864,9 @@ static int rtw_hw_scan_update_probe_req(struct rtw_dev *rtwdev, { struct cfg80211_scan_request *req = rtwvif->scan_req; struct sk_buff_head list; - struct sk_buff *skb; - u8 num = req->n_ssids, i; + struct sk_buff *skb, *tmp; + u8 num = req->n_ssids, i, bands = 0; + int ret; skb_queue_head_init(&list); for (i = 0; i < num; i++) { @@ -1866,11 +1874,25 @@ static int rtw_hw_scan_update_probe_req(struct rtw_dev *rtwdev, req->ssids[i].ssid, req->ssids[i].ssid_len, req->ie_len); - rtw_append_probe_req_ie(rtwdev, skb, &list, rtwvif); + if (!skb) { + ret = -ENOMEM; + goto out; + } + ret = rtw_append_probe_req_ie(rtwdev, skb, &list, &bands, + rtwvif); + if (ret) + goto out; + kfree_skb(skb); } - return _rtw_hw_scan_update_probe_req(rtwdev, num, &list); + return _rtw_hw_scan_update_probe_req(rtwdev, num * bands, &list); + +out: + skb_queue_walk_safe(&list, skb, tmp) + kfree_skb(skb); + + return ret; } static int rtw_add_chan_info(struct rtw_dev *rtwdev, struct rtw_chan_info *info, @@ -2022,7 +2044,7 @@ void rtw_hw_scan_complete(struct rtw_dev *rtwdev, struct ieee80211_vif *vif, rtwdev->hal.rcr |= BIT_CBSSID_BCN; rtw_write32(rtwdev, REG_RCR, rtwdev->hal.rcr); - rtw_core_scan_complete(rtwdev, vif); + rtw_core_scan_complete(rtwdev, vif, true); ieee80211_wake_queues(rtwdev->hw); ieee80211_scan_completed(rtwdev->hw, &info); @@ -2109,7 +2131,7 @@ void rtw_hw_scan_status_report(struct rtw_dev *rtwdev, struct sk_buff *skb) rtw_hw_scan_complete(rtwdev, vif, aborted); if (aborted) - rtw_info(rtwdev, "HW scan aborted with code: %d\n", rc); + rtw_dbg(rtwdev, RTW_DBG_HW_SCAN, "HW scan aborted with code: %d\n", rc); } void rtw_store_op_chan(struct rtw_dev *rtwdev) diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c index ae7d97de5fdf4..5cdc54c9a9aae 100644 --- a/drivers/net/wireless/realtek/rtw88/mac80211.c +++ b/drivers/net/wireless/realtek/rtw88/mac80211.c @@ -72,6 +72,9 @@ static int rtw_ops_config(struct ieee80211_hw *hw, u32 changed) struct rtw_dev *rtwdev = hw->priv; int ret = 0; + /* let previous ips work finish to ensure we don't leave ips twice */ + cancel_work_sync(&rtwdev->ips_work); + mutex_lock(&rtwdev->mutex); rtw_leave_lps_deep(rtwdev); @@ -205,7 +208,7 @@ static int rtw_ops_add_interface(struct ieee80211_hw *hw, mutex_unlock(&rtwdev->mutex); - rtw_info(rtwdev, "start vif %pM on port %d\n", vif->addr, rtwvif->port); + rtw_dbg(rtwdev, RTW_DBG_STATE, "start vif %pM on port %d\n", vif->addr, rtwvif->port); return 0; } @@ -216,7 +219,7 @@ static void rtw_ops_remove_interface(struct ieee80211_hw *hw, struct rtw_vif *rtwvif = (struct rtw_vif *)vif->drv_priv; u32 config = 0; - rtw_info(rtwdev, "stop vif %pM on port %d\n", vif->addr, rtwvif->port); + rtw_dbg(rtwdev, RTW_DBG_STATE, "stop vif %pM on port %d\n", vif->addr, rtwvif->port); mutex_lock(&rtwdev->mutex); @@ -242,8 +245,8 @@ static int rtw_ops_change_interface(struct ieee80211_hw *hw, { struct rtw_dev *rtwdev = hw->priv; - rtw_info(rtwdev, "change vif %pM (%d)->(%d), p2p (%d)->(%d)\n", - vif->addr, vif->type, type, vif->p2p, p2p); + rtw_dbg(rtwdev, RTW_DBG_STATE, "change vif %pM (%d)->(%d), p2p (%d)->(%d)\n", + vif->addr, vif->type, type, vif->p2p, p2p); rtw_ops_remove_interface(hw, vif); @@ -614,7 +617,7 @@ static void rtw_ops_sw_scan_complete(struct ieee80211_hw *hw, struct rtw_dev *rtwdev = hw->priv; mutex_lock(&rtwdev->mutex); - rtw_core_scan_complete(rtwdev, vif); + rtw_core_scan_complete(rtwdev, vif, false); mutex_unlock(&rtwdev->mutex); } diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index 38252113c4a87..b00200f81db7d 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -272,6 +272,15 @@ static void rtw_c2h_work(struct work_struct *work) } } +static void rtw_ips_work(struct work_struct *work) +{ + struct rtw_dev *rtwdev = container_of(work, struct rtw_dev, ips_work); + + mutex_lock(&rtwdev->mutex); + rtw_enter_ips(rtwdev); + mutex_unlock(&rtwdev->mutex); +} + static u8 rtw_acquire_macid(struct rtw_dev *rtwdev) { unsigned long mac_id; @@ -305,8 +314,8 @@ int rtw_sta_add(struct rtw_dev *rtwdev, struct ieee80211_sta *sta, rtwdev->sta_cnt++; rtwdev->beacon_loss = false; - rtw_info(rtwdev, "sta %pM joined with macid %d\n", - sta->addr, si->mac_id); + rtw_dbg(rtwdev, RTW_DBG_STATE, "sta %pM joined with macid %d\n", + sta->addr, si->mac_id); return 0; } @@ -327,8 +336,8 @@ void rtw_sta_remove(struct rtw_dev *rtwdev, struct ieee80211_sta *sta, kfree(si->mask); rtwdev->sta_cnt--; - rtw_info(rtwdev, "sta %pM with macid %d left\n", - sta->addr, si->mac_id); + rtw_dbg(rtwdev, RTW_DBG_STATE, "sta %pM with macid %d left\n", + sta->addr, si->mac_id); } struct rtw_fwcd_hdr { @@ -1339,7 +1348,8 @@ void rtw_core_scan_start(struct rtw_dev *rtwdev, struct rtw_vif *rtwvif, set_bit(RTW_FLAG_SCANNING, rtwdev->flags); } -void rtw_core_scan_complete(struct rtw_dev *rtwdev, struct ieee80211_vif *vif) +void rtw_core_scan_complete(struct rtw_dev *rtwdev, struct ieee80211_vif *vif, + bool hw_scan) { struct rtw_vif *rtwvif = (struct rtw_vif *)vif->drv_priv; u32 config = 0; @@ -1354,6 +1364,9 @@ void rtw_core_scan_complete(struct rtw_dev *rtwdev, struct ieee80211_vif *vif) rtw_vif_port_config(rtwdev, rtwvif, config); rtw_coex_scan_notify(rtwdev, COEX_SCAN_FINISH); + + if (rtwvif->net_type == RTW_NET_NO_LINK && hw_scan) + ieee80211_queue_work(rtwdev->hw, &rtwdev->ips_work); } int rtw_core_start(struct rtw_dev *rtwdev) @@ -1919,6 +1932,7 @@ int rtw_core_init(struct rtw_dev *rtwdev) INIT_DELAYED_WORK(&coex->wl_ccklock_work, rtw_coex_wl_ccklock_work); INIT_WORK(&rtwdev->tx_work, rtw_tx_work); INIT_WORK(&rtwdev->c2h_work, rtw_c2h_work); + INIT_WORK(&rtwdev->ips_work, rtw_ips_work); INIT_WORK(&rtwdev->fw_recovery_work, rtw_fw_recovery_work); INIT_WORK(&rtwdev->ba_work, rtw_txq_ba_work); skb_queue_head_init(&rtwdev->c2h_queue); diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h index dc1cd9bd4b8a3..36e1e408933db 100644 --- a/drivers/net/wireless/realtek/rtw88/main.h +++ b/drivers/net/wireless/realtek/rtw88/main.h @@ -1960,6 +1960,7 @@ struct rtw_dev { /* c2h cmd queue & handler work */ struct sk_buff_head c2h_queue; struct work_struct c2h_work; + struct work_struct ips_work; struct work_struct fw_recovery_work; /* used to protect txqs list */ @@ -2101,7 +2102,8 @@ void rtw_tx_report_purge_timer(struct timer_list *t); void rtw_update_sta_info(struct rtw_dev *rtwdev, struct rtw_sta_info *si); void rtw_core_scan_start(struct rtw_dev *rtwdev, struct rtw_vif *rtwvif, const u8 *mac_addr, bool hw_scan); -void rtw_core_scan_complete(struct rtw_dev *rtwdev, struct ieee80211_vif *vif); +void rtw_core_scan_complete(struct rtw_dev *rtwdev, struct ieee80211_vif *vif, + bool hw_scan); int rtw_core_start(struct rtw_dev *rtwdev); void rtw_core_stop(struct rtw_dev *rtwdev); int rtw_chip_info_setup(struct rtw_dev *rtwdev); diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index db078df63f855..80d4761796b15 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -499,7 +499,7 @@ static s8 get_cck_rx_pwr(struct rtw_dev *rtwdev, u8 lna_idx, u8 vga_idx) } if (lna_idx >= lna_gain_table_size) { - rtw_info(rtwdev, "incorrect lna index (%d)\n", lna_idx); + rtw_warn(rtwdev, "incorrect lna index (%d)\n", lna_idx); return -120; } diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c index dd4fbb82750d5..a23806b69b0fa 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c @@ -1012,12 +1012,12 @@ static int rtw8822b_set_antenna(struct rtw_dev *rtwdev, antenna_tx, antenna_rx); if (!rtw8822b_check_rf_path(antenna_tx)) { - rtw_info(rtwdev, "unsupported tx path 0x%x\n", antenna_tx); + rtw_warn(rtwdev, "unsupported tx path 0x%x\n", antenna_tx); return -EINVAL; } if (!rtw8822b_check_rf_path(antenna_rx)) { - rtw_info(rtwdev, "unsupported rx path 0x%x\n", antenna_rx); + rtw_warn(rtwdev, "unsupported rx path 0x%x\n", antenna_rx); return -EINVAL; } diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c index 35c46e5209de3..ddf4d1a23e605 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c @@ -2798,7 +2798,7 @@ static int rtw8822c_set_antenna(struct rtw_dev *rtwdev, case BB_PATH_AB: break; default: - rtw_info(rtwdev, "unsupported tx path 0x%x\n", antenna_tx); + rtw_warn(rtwdev, "unsupported tx path 0x%x\n", antenna_tx); return -EINVAL; } @@ -2808,7 +2808,7 @@ static int rtw8822c_set_antenna(struct rtw_dev *rtwdev, case BB_PATH_AB: break; default: - rtw_info(rtwdev, "unsupported rx path 0x%x\n", antenna_rx); + rtw_warn(rtwdev, "unsupported rx path 0x%x\n", antenna_rx); return -EINVAL; } diff --git a/drivers/net/wireless/realtek/rtw88/sar.c b/drivers/net/wireless/realtek/rtw88/sar.c index 3383726c4d90f..c472f1502b82a 100644 --- a/drivers/net/wireless/realtek/rtw88/sar.c +++ b/drivers/net/wireless/realtek/rtw88/sar.c @@ -91,10 +91,10 @@ int rtw_set_sar_specs(struct rtw_dev *rtwdev, return -EINVAL; power = sar->sub_specs[i].power; - rtw_info(rtwdev, "On freq %u to %u, set SAR %d in 1/%lu dBm\n", - rtw_common_sar_freq_ranges[idx].start_freq, - rtw_common_sar_freq_ranges[idx].end_freq, - power, BIT(RTW_COMMON_SAR_FCT)); + rtw_dbg(rtwdev, RTW_DBG_REGD, "On freq %u to %u, set SAR %d in 1/%lu dBm\n", + rtw_common_sar_freq_ranges[idx].start_freq, + rtw_common_sar_freq_ranges[idx].end_freq, + power, BIT(RTW_COMMON_SAR_FCT)); for (j = 0; j < RTW_RF_PATH_MAX; j++) { for (k = 0; k < RTW_RATE_SECTION_MAX; k++) { diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index a0737eea9f81d..9632e7f218dda 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -1509,11 +1509,12 @@ static void rtw89_core_txq_push(struct rtw89_dev *rtwdev, unsigned long i; int ret; + rcu_read_lock(); for (i = 0; i < frame_cnt; i++) { skb = ieee80211_tx_dequeue_ni(rtwdev->hw, txq); if (!skb) { rtw89_debug(rtwdev, RTW89_DBG_TXRX, "dequeue a NULL skb\n"); - return; + goto out; } rtw89_core_txq_check_agg(rtwdev, rtwtxq, skb); ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, NULL); @@ -1523,6 +1524,8 @@ static void rtw89_core_txq_push(struct rtw89_dev *rtwdev, break; } } +out: + rcu_read_unlock(); } static u32 rtw89_check_and_reclaim_tx_resource(struct rtw89_dev *rtwdev, u8 tid) diff --git a/drivers/net/wwan/qcom_bam_dmux.c b/drivers/net/wwan/qcom_bam_dmux.c index 5dfa2eba6014c..17d46f4d29139 100644 --- a/drivers/net/wwan/qcom_bam_dmux.c +++ b/drivers/net/wwan/qcom_bam_dmux.c @@ -755,7 +755,7 @@ static int __maybe_unused bam_dmux_runtime_resume(struct device *dev) return 0; dmux->tx = dma_request_chan(dev, "tx"); - if (IS_ERR(dmux->rx)) { + if (IS_ERR(dmux->tx)) { dev_err(dev, "Failed to request TX DMA channel: %pe\n", dmux->tx); dmux->tx = NULL; bam_dmux_runtime_suspend(dev); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 9ccf3d6087993..70ad891a76bae 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1025,6 +1025,9 @@ static unsigned long default_align(struct nd_region *nd_region) } } + if (nd_region->ndr_size < MEMREMAP_COMPAT_ALIGN_MAX) + align = PAGE_SIZE; + mappings = max_t(u16, 1, nd_region->ndr_mappings); div_u64_rem(align, mappings, &remainder); if (remainder) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fd4720d37cc0b..6215d50ed3e7d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1683,13 +1683,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); } -static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) -{ - return !uuid_is_null(&ids->uuid) || - memchr_inv(ids->nguid, 0, sizeof(ids->nguid)) || - memchr_inv(ids->eui64, 0, sizeof(ids->eui64)); -} - static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) { return uuid_equal(&a->uuid, &b->uuid) && @@ -1864,9 +1857,6 @@ static void nvme_update_disk_info(struct gendisk *disk, nvme_config_discard(disk, ns); blk_queue_max_write_zeroes_sectors(disk->queue, ns->ctrl->max_zeroes_sectors); - - set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) || - test_bit(NVME_NS_FORCE_RO, &ns->flags)); } static inline bool nvme_first_scan(struct gendisk *disk) @@ -1925,6 +1915,8 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) goto out_unfreeze; } + set_disk_ro(ns->disk, (id->nsattr & NVME_NS_ATTR_RO) || + test_bit(NVME_NS_FORCE_RO, &ns->flags)); set_bit(NVME_NS_READY, &ns->flags); blk_mq_unfreeze_queue(ns->disk->queue); @@ -1937,6 +1929,9 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) if (nvme_ns_head_multipath(ns->head)) { blk_mq_freeze_queue(ns->head->disk->queue); nvme_update_disk_info(ns->head->disk, ns, id); + set_disk_ro(ns->head->disk, + (id->nsattr & NVME_NS_ATTR_RO) || + test_bit(NVME_NS_FORCE_RO, &ns->flags)); nvme_mpath_revalidate_paths(ns); blk_stack_limits(&ns->head->disk->queue->limits, &ns->queue->limits, 0); @@ -3581,15 +3576,20 @@ static const struct attribute_group *nvme_dev_attr_groups[] = { NULL, }; -static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, +static struct nvme_ns_head *nvme_find_ns_head(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns_head *h; - lockdep_assert_held(&subsys->lock); + lockdep_assert_held(&ctrl->subsys->lock); - list_for_each_entry(h, &subsys->nsheads, entry) { - if (h->ns_id != nsid) + list_for_each_entry(h, &ctrl->subsys->nsheads, entry) { + /* + * Private namespaces can share NSIDs under some conditions. + * In that case we can't use the same ns_head for namespaces + * with the same NSID. + */ + if (h->ns_id != nsid || !nvme_is_unique_nsid(ctrl, h)) continue; if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) return h; @@ -3598,16 +3598,24 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, return NULL; } -static int __nvme_check_ids(struct nvme_subsystem *subsys, - struct nvme_ns_head *new) +static int nvme_subsys_check_duplicate_ids(struct nvme_subsystem *subsys, + struct nvme_ns_ids *ids) { + bool has_uuid = !uuid_is_null(&ids->uuid); + bool has_nguid = memchr_inv(ids->nguid, 0, sizeof(ids->nguid)); + bool has_eui64 = memchr_inv(ids->eui64, 0, sizeof(ids->eui64)); struct nvme_ns_head *h; lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) { - if (nvme_ns_ids_valid(&new->ids) && - nvme_ns_ids_equal(&new->ids, &h->ids)) + if (has_uuid && uuid_equal(&ids->uuid, &h->ids.uuid)) + return -EINVAL; + if (has_nguid && + memcmp(&ids->nguid, &h->ids.nguid, sizeof(ids->nguid)) == 0) + return -EINVAL; + if (has_eui64 && + memcmp(&ids->eui64, &h->ids.eui64, sizeof(ids->eui64)) == 0) return -EINVAL; } @@ -3706,7 +3714,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->ids = *ids; kref_init(&head->ref); - ret = __nvme_check_ids(ctrl->subsys, head); + ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, &head->ids); if (ret) { dev_err(ctrl->device, "duplicate IDs for nsid %d\n", nsid); @@ -3749,7 +3757,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, int ret = 0; mutex_lock(&ctrl->subsys->lock); - head = nvme_find_ns_head(ctrl->subsys, nsid); + head = nvme_find_ns_head(ctrl, nsid); if (!head) { head = nvme_alloc_ns_head(ctrl, nsid, ids); if (IS_ERR(head)) { diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index ff775235534cf..a703f1f5fb64c 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -504,10 +504,11 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) /* * Add a multipath node if the subsystems supports multiple controllers. - * We also do this for private namespaces as the namespace sharing data could - * change after a rescan. + * We also do this for private namespaces as the namespace sharing flag + * could change after a rescan. */ - if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath) + if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || + !nvme_is_unique_nsid(ctrl, head) || !multipath) return 0; head->disk = blk_alloc_disk(ctrl->numa_node); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a162f6c6da6e1..730cc80d84ff7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -716,6 +716,25 @@ static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, return queue_live; return __nvme_check_ready(ctrl, rq, queue_live); } + +/* + * NSID shall be unique for all shared namespaces, or if at least one of the + * following conditions is met: + * 1. Namespace Management is supported by the controller + * 2. ANA is supported by the controller + * 3. NVM Set are supported by the controller + * + * In other case, private namespace are not required to report a unique NSID. + */ +static inline bool nvme_is_unique_nsid(struct nvme_ctrl *ctrl, + struct nvme_ns_head *head) +{ + return head->shared || + (ctrl->oacs & NVME_CTRL_OACS_NS_MNGT_SUPP) || + (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA) || + (ctrl->ctratt & NVME_CTRL_CTRATT_NVM_SETS); +} + int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 65e00c64a588b..d66e2de044e0a 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -30,6 +30,44 @@ static int so_priority; module_param(so_priority, int, 0644); MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* lockdep can detect a circular dependency of the form + * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock + * because dependencies are tracked for both nvme-tcp and user contexts. Using + * a separate class prevents lockdep from conflating nvme-tcp socket use with + * user-space socket API use. + */ +static struct lock_class_key nvme_tcp_sk_key[2]; +static struct lock_class_key nvme_tcp_slock_key[2]; + +static void nvme_tcp_reclassify_socket(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) + return; + + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME", + &nvme_tcp_slock_key[0], + "sk_lock-AF_INET-NVME", + &nvme_tcp_sk_key[0]); + break; + case AF_INET6: + sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME", + &nvme_tcp_slock_key[1], + "sk_lock-AF_INET6-NVME", + &nvme_tcp_sk_key[1]); + break; + default: + WARN_ON_ONCE(1); + } +} +#else +static void nvme_tcp_reclassify_socket(struct socket *sock) { } +#endif + enum nvme_tcp_send_state { NVME_TCP_SEND_CMD_PDU = 0, NVME_TCP_SEND_H2C_PDU, @@ -1469,6 +1507,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, goto err_destroy_mutex; } + nvme_tcp_reclassify_socket(queue->sock); + /* Single syn retry */ tcp_sock_set_syncnt(queue->sock->sk, 1); diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c index 596c185b5dda4..b5f2f9f393926 100644 --- a/drivers/opp/debugfs.c +++ b/drivers/opp/debugfs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -131,9 +132,13 @@ void opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table) debugfs_create_bool("suspend", S_IRUGO, d, &opp->suspend); debugfs_create_u32("performance_state", S_IRUGO, d, &opp->pstate); debugfs_create_ulong("rate_hz", S_IRUGO, d, &opp->rate); + debugfs_create_u32("level", S_IRUGO, d, &opp->level); debugfs_create_ulong("clock_latency_ns", S_IRUGO, d, &opp->clock_latency_ns); + opp->of_name = of_node_full_name(opp->np); + debugfs_create_str("of_name", S_IRUGO, d, (char **)&opp->of_name); + opp_debug_create_supplies(opp, opp_table, d); opp_debug_create_bw(opp, opp_table, d); diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 407c3bfe51d96..45e3a55239a13 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -96,6 +96,7 @@ struct dev_pm_opp { #ifdef CONFIG_DEBUG_FS struct dentry *dentry; + const char *of_name; #endif }; diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index 952a92504df69..e33036281327d 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -142,9 +142,8 @@ struct dino_device { struct pci_hba_data hba; /* 'C' inheritance - must be first */ spinlock_t dinosaur_pen; - unsigned long txn_addr; /* EIR addr to generate interrupt */ - u32 txn_data; /* EIR data assign to each dino */ u32 imr; /* IRQ's which are enabled */ + struct gsc_irq gsc_irq; int global_irq[DINO_LOCAL_IRQS]; /* map IMR bit to global irq */ #ifdef DINO_DEBUG unsigned int dino_irr0; /* save most recent IRQ line stat */ @@ -339,14 +338,43 @@ static void dino_unmask_irq(struct irq_data *d) if (tmp & DINO_MASK_IRQ(local_irq)) { DBG(KERN_WARNING "%s(): IRQ asserted! (ILR 0x%x)\n", __func__, tmp); - gsc_writel(dino_dev->txn_data, dino_dev->txn_addr); + gsc_writel(dino_dev->gsc_irq.txn_data, dino_dev->gsc_irq.txn_addr); } } +#ifdef CONFIG_SMP +static int dino_set_affinity_irq(struct irq_data *d, const struct cpumask *dest, + bool force) +{ + struct dino_device *dino_dev = irq_data_get_irq_chip_data(d); + struct cpumask tmask; + int cpu_irq; + u32 eim; + + if (!cpumask_and(&tmask, dest, cpu_online_mask)) + return -EINVAL; + + cpu_irq = cpu_check_affinity(d, &tmask); + if (cpu_irq < 0) + return cpu_irq; + + dino_dev->gsc_irq.txn_addr = txn_affinity_addr(d->irq, cpu_irq); + eim = ((u32) dino_dev->gsc_irq.txn_addr) | dino_dev->gsc_irq.txn_data; + __raw_writel(eim, dino_dev->hba.base_addr+DINO_IAR0); + + irq_data_update_effective_affinity(d, &tmask); + + return IRQ_SET_MASK_OK; +} +#endif + static struct irq_chip dino_interrupt_type = { .name = "GSC-PCI", .irq_unmask = dino_unmask_irq, .irq_mask = dino_mask_irq, +#ifdef CONFIG_SMP + .irq_set_affinity = dino_set_affinity_irq, +#endif }; @@ -806,7 +834,6 @@ static int __init dino_common_init(struct parisc_device *dev, { int status; u32 eim; - struct gsc_irq gsc_irq; struct resource *res; pcibios_register_hba(&dino_dev->hba); @@ -821,10 +848,8 @@ static int __init dino_common_init(struct parisc_device *dev, ** still only has 11 IRQ input lines - just map some of them ** to a different processor. */ - dev->irq = gsc_alloc_irq(&gsc_irq); - dino_dev->txn_addr = gsc_irq.txn_addr; - dino_dev->txn_data = gsc_irq.txn_data; - eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data; + dev->irq = gsc_alloc_irq(&dino_dev->gsc_irq); + eim = ((u32) dino_dev->gsc_irq.txn_addr) | dino_dev->gsc_irq.txn_data; /* ** Dino needs a PA "IRQ" to get a processor's attention. diff --git a/drivers/parisc/gsc.c b/drivers/parisc/gsc.c index ed9371acf37eb..ec175ae998733 100644 --- a/drivers/parisc/gsc.c +++ b/drivers/parisc/gsc.c @@ -135,10 +135,41 @@ static void gsc_asic_unmask_irq(struct irq_data *d) */ } +#ifdef CONFIG_SMP +static int gsc_set_affinity_irq(struct irq_data *d, const struct cpumask *dest, + bool force) +{ + struct gsc_asic *gsc_dev = irq_data_get_irq_chip_data(d); + struct cpumask tmask; + int cpu_irq; + + if (!cpumask_and(&tmask, dest, cpu_online_mask)) + return -EINVAL; + + cpu_irq = cpu_check_affinity(d, &tmask); + if (cpu_irq < 0) + return cpu_irq; + + gsc_dev->gsc_irq.txn_addr = txn_affinity_addr(d->irq, cpu_irq); + gsc_dev->eim = ((u32) gsc_dev->gsc_irq.txn_addr) | gsc_dev->gsc_irq.txn_data; + + /* switch IRQ's for devices below LASI/WAX to other CPU */ + gsc_writel(gsc_dev->eim, gsc_dev->hpa + OFFSET_IAR); + + irq_data_update_effective_affinity(d, &tmask); + + return IRQ_SET_MASK_OK; +} +#endif + + static struct irq_chip gsc_asic_interrupt_type = { .name = "GSC-ASIC", .irq_unmask = gsc_asic_unmask_irq, .irq_mask = gsc_asic_mask_irq, +#ifdef CONFIG_SMP + .irq_set_affinity = gsc_set_affinity_irq, +#endif }; int gsc_assign_irq(struct irq_chip *type, void *data) diff --git a/drivers/parisc/gsc.h b/drivers/parisc/gsc.h index 86abad3fa2150..73cbd0bb1975a 100644 --- a/drivers/parisc/gsc.h +++ b/drivers/parisc/gsc.h @@ -31,6 +31,7 @@ struct gsc_asic { int version; int type; int eim; + struct gsc_irq gsc_irq; int global_irq[32]; }; diff --git a/drivers/parisc/lasi.c b/drivers/parisc/lasi.c index 4e4fd12c2112e..6ef621adb63a8 100644 --- a/drivers/parisc/lasi.c +++ b/drivers/parisc/lasi.c @@ -163,7 +163,6 @@ static int __init lasi_init_chip(struct parisc_device *dev) { extern void (*chassis_power_off)(void); struct gsc_asic *lasi; - struct gsc_irq gsc_irq; int ret; lasi = kzalloc(sizeof(*lasi), GFP_KERNEL); @@ -185,7 +184,7 @@ static int __init lasi_init_chip(struct parisc_device *dev) lasi_init_irq(lasi); /* the IRQ lasi should use */ - dev->irq = gsc_alloc_irq(&gsc_irq); + dev->irq = gsc_alloc_irq(&lasi->gsc_irq); if (dev->irq < 0) { printk(KERN_ERR "%s(): cannot get GSC irq\n", __func__); @@ -193,9 +192,9 @@ static int __init lasi_init_chip(struct parisc_device *dev) return -EBUSY; } - lasi->eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data; + lasi->eim = ((u32) lasi->gsc_irq.txn_addr) | lasi->gsc_irq.txn_data; - ret = request_irq(gsc_irq.irq, gsc_asic_intr, 0, "lasi", lasi); + ret = request_irq(lasi->gsc_irq.irq, gsc_asic_intr, 0, "lasi", lasi); if (ret < 0) { kfree(lasi); return ret; diff --git a/drivers/parisc/wax.c b/drivers/parisc/wax.c index 5b6df15162354..73a2b01f8d9ca 100644 --- a/drivers/parisc/wax.c +++ b/drivers/parisc/wax.c @@ -68,7 +68,6 @@ static int __init wax_init_chip(struct parisc_device *dev) { struct gsc_asic *wax; struct parisc_device *parent; - struct gsc_irq gsc_irq; int ret; wax = kzalloc(sizeof(*wax), GFP_KERNEL); @@ -85,7 +84,7 @@ static int __init wax_init_chip(struct parisc_device *dev) wax_init_irq(wax); /* the IRQ wax should use */ - dev->irq = gsc_claim_irq(&gsc_irq, WAX_GSC_IRQ); + dev->irq = gsc_claim_irq(&wax->gsc_irq, WAX_GSC_IRQ); if (dev->irq < 0) { printk(KERN_ERR "%s(): cannot get GSC irq\n", __func__); @@ -93,9 +92,9 @@ static int __init wax_init_chip(struct parisc_device *dev) return -EBUSY; } - wax->eim = ((u32) gsc_irq.txn_addr) | gsc_irq.txn_data; + wax->eim = ((u32) wax->gsc_irq.txn_addr) | wax->gsc_irq.txn_data; - ret = request_irq(gsc_irq.irq, gsc_asic_intr, 0, "wax", wax); + ret = request_irq(wax->gsc_irq.irq, gsc_asic_intr, 0, "wax", wax); if (ret < 0) { kfree(wax); return ret; diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 0d9f6b21babb1..708c7529647fd 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -159,9 +159,12 @@ int pci_generic_config_write32(struct pci_bus *bus, unsigned int devfn, * write happen to have any RW1C (write-one-to-clear) bits set, we * just inadvertently cleared something we shouldn't have. */ - dev_warn_ratelimited(&bus->dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n", - size, pci_domain_nr(bus), bus->number, - PCI_SLOT(devfn), PCI_FUNC(devfn), where); + if (!bus->unsafe_warn) { + dev_warn(&bus->dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n", + size, pci_domain_nr(bus), bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), where); + bus->unsafe_warn = 1; + } mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8)); tmp = readl(addr) & mask; diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 6974bd5aa1165..343fe1429e3c2 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -453,10 +453,6 @@ static int imx6_pcie_enable_ref_clk(struct imx6_pcie *imx6_pcie) case IMX7D: break; case IMX8MM: - ret = clk_prepare_enable(imx6_pcie->pcie_aux); - if (ret) - dev_err(dev, "unable to enable pcie_aux clock\n"); - break; case IMX8MQ: ret = clk_prepare_enable(imx6_pcie->pcie_aux); if (ret) { @@ -809,9 +805,7 @@ static int imx6_pcie_start_link(struct dw_pcie *pci) /* Start LTSSM. */ imx6_pcie_ltssm_enable(dev); - ret = dw_pcie_wait_for_link(pci); - if (ret) - goto err_reset_phy; + dw_pcie_wait_for_link(pci); if (pci->link_gen == 2) { /* Allow Gen2 mode after the link is up. */ @@ -847,11 +841,7 @@ static int imx6_pcie_start_link(struct dw_pcie *pci) } /* Make sure link training is finished as well! */ - ret = dw_pcie_wait_for_link(pci); - if (ret) { - dev_err(dev, "Failed to bring link up!\n"); - goto err_reset_phy; - } + dw_pcie_wait_for_link(pci); } else { dev_info(dev, "Link: Gen2 disabled\n"); } @@ -983,6 +973,7 @@ static int imx6_pcie_suspend_noirq(struct device *dev) case IMX8MM: if (phy_power_off(imx6_pcie->phy)) dev_err(dev, "unable to power off PHY\n"); + phy_exit(imx6_pcie->phy); break; default: break; diff --git a/drivers/pci/controller/dwc/pcie-fu740.c b/drivers/pci/controller/dwc/pcie-fu740.c index 00cde9a248b5a..78d002be4f821 100644 --- a/drivers/pci/controller/dwc/pcie-fu740.c +++ b/drivers/pci/controller/dwc/pcie-fu740.c @@ -181,10 +181,59 @@ static int fu740_pcie_start_link(struct dw_pcie *pci) { struct device *dev = pci->dev; struct fu740_pcie *afp = dev_get_drvdata(dev); + u8 cap_exp = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + int ret; + u32 orig, tmp; + + /* + * Force 2.5GT/s when starting the link, due to some devices not + * probing at higher speeds. This happens with the PCIe switch + * on the Unmatched board when U-Boot has not initialised the PCIe. + * The fix in U-Boot is to force 2.5GT/s, which then gets cleared + * by the soft reset done by this driver. + */ + dev_dbg(dev, "cap_exp at %x\n", cap_exp); + dw_pcie_dbi_ro_wr_en(pci); + + tmp = dw_pcie_readl_dbi(pci, cap_exp + PCI_EXP_LNKCAP); + orig = tmp & PCI_EXP_LNKCAP_SLS; + tmp &= ~PCI_EXP_LNKCAP_SLS; + tmp |= PCI_EXP_LNKCAP_SLS_2_5GB; + dw_pcie_writel_dbi(pci, cap_exp + PCI_EXP_LNKCAP, tmp); /* Enable LTSSM */ writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_APP_LTSSM_ENABLE); - return 0; + + ret = dw_pcie_wait_for_link(pci); + if (ret) { + dev_err(dev, "error: link did not start\n"); + goto err; + } + + tmp = dw_pcie_readl_dbi(pci, cap_exp + PCI_EXP_LNKCAP); + if ((tmp & PCI_EXP_LNKCAP_SLS) != orig) { + dev_dbg(dev, "changing speed back to original\n"); + + tmp &= ~PCI_EXP_LNKCAP_SLS; + tmp |= orig; + dw_pcie_writel_dbi(pci, cap_exp + PCI_EXP_LNKCAP, tmp); + + tmp = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL); + tmp |= PORT_LOGIC_SPEED_CHANGE; + dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, tmp); + + ret = dw_pcie_wait_for_link(pci); + if (ret) { + dev_err(dev, "error: link did not start at new speed\n"); + goto err; + } + } + + ret = 0; +err: + WARN_ON(ret); /* we assume that errors will be very rare */ + dw_pcie_dbi_ro_wr_dis(pci); + return ret; } static int fu740_pcie_host_init(struct pcie_port *pp) diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 4f5b44827d213..15348be1a8aa5 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -846,7 +846,9 @@ advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge, case PCI_EXP_RTSTA: { u32 isr0 = advk_readl(pcie, PCIE_ISR0_REG); u32 msglog = advk_readl(pcie, PCIE_MSG_LOG_REG); - *value = (isr0 & PCIE_MSG_PM_PME_MASK) << 16 | (msglog >> 16); + *value = msglog >> 16; + if (isr0 & PCIE_MSG_PM_PME_MASK) + *value |= PCI_EXP_RTSTA_PME; return PCI_BRIDGE_EMUL_HANDLED; } @@ -1184,7 +1186,7 @@ static void advk_msi_irq_compose_msi_msg(struct irq_data *data, msg->address_lo = lower_32_bits(msi_msg); msg->address_hi = upper_32_bits(msi_msg); - msg->data = data->irq; + msg->data = data->hwirq; } static int advk_msi_set_affinity(struct irq_data *irq_data, @@ -1201,15 +1203,11 @@ static int advk_msi_irq_domain_alloc(struct irq_domain *domain, int hwirq, i; mutex_lock(&pcie->msi_used_lock); - hwirq = bitmap_find_next_zero_area(pcie->msi_used, MSI_IRQ_NUM, - 0, nr_irqs, 0); - if (hwirq >= MSI_IRQ_NUM) { - mutex_unlock(&pcie->msi_used_lock); - return -ENOSPC; - } - - bitmap_set(pcie->msi_used, hwirq, nr_irqs); + hwirq = bitmap_find_free_region(pcie->msi_used, MSI_IRQ_NUM, + order_base_2(nr_irqs)); mutex_unlock(&pcie->msi_used_lock); + if (hwirq < 0) + return -ENOSPC; for (i = 0; i < nr_irqs; i++) irq_domain_set_info(domain, virq + i, hwirq + i, @@ -1227,7 +1225,7 @@ static void advk_msi_irq_domain_free(struct irq_domain *domain, struct advk_pcie *pcie = domain->host_data; mutex_lock(&pcie->msi_used_lock); - bitmap_clear(pcie->msi_used, d->hwirq, nr_irqs); + bitmap_release_region(pcie->msi_used, d->hwirq, order_base_2(nr_irqs)); mutex_unlock(&pcie->msi_used_lock); } @@ -1388,7 +1386,6 @@ static void advk_pcie_remove_irq_domain(struct advk_pcie *pcie) static void advk_pcie_handle_msi(struct advk_pcie *pcie) { u32 msi_val, msi_mask, msi_status, msi_idx; - u16 msi_data; msi_mask = advk_readl(pcie, PCIE_MSI_MASK_REG); msi_val = advk_readl(pcie, PCIE_MSI_STATUS_REG); @@ -1398,13 +1395,9 @@ static void advk_pcie_handle_msi(struct advk_pcie *pcie) if (!(BIT(msi_idx) & msi_status)) continue; - /* - * msi_idx contains bits [4:0] of the msi_data and msi_data - * contains 16bit MSI interrupt number - */ advk_writel(pcie, BIT(msi_idx), PCIE_MSI_STATUS_REG); - msi_data = advk_readl(pcie, PCIE_MSI_PAYLOAD_REG) & PCIE_MSI_DATA_MASK; - generic_handle_irq(msi_data); + if (generic_handle_domain_irq(pcie->msi_inner_domain, msi_idx) == -EINVAL) + dev_err_ratelimited(&pcie->pdev->dev, "unexpected MSI 0x%02x\n", msi_idx); } advk_writel(pcie, PCIE_ISR0_MSI_INT_PENDING, diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index ae0bc2fee4ca8..88b3b56d05228 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -3404,6 +3404,15 @@ static int hv_pci_probe(struct hv_device *hdev, hbus->bridge->domain_nr = dom; #ifdef CONFIG_X86 hbus->sysdata.domain = dom; +#elif defined(CONFIG_ARM64) + /* + * Set the PCI bus parent to be the corresponding VMbus + * device. Then the VMbus device will be assigned as the + * ACPI companion in pcibios_root_bridge_prepare() and + * pci_dma_configure() will propagate device coherence + * information to devices created on the bus. + */ + hbus->sysdata.parent = hdev->device.parent; #endif hbus->hdev = hdev; diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index 0d5acbfc7143f..7c763d820c52c 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -465,7 +465,7 @@ static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size) return 1; } - if ((size > SZ_1K) && (size < SZ_4G) && !(*ib_reg_mask & (1 << 0))) { + if ((size > SZ_1K) && (size < SZ_1T) && !(*ib_reg_mask & (1 << 0))) { *ib_reg_mask |= (1 << 0); return 0; } @@ -479,28 +479,27 @@ static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size) } static void xgene_pcie_setup_ib_reg(struct xgene_pcie *port, - struct resource_entry *entry, - u8 *ib_reg_mask) + struct of_pci_range *range, u8 *ib_reg_mask) { void __iomem *cfg_base = port->cfg_base; struct device *dev = port->dev; void __iomem *bar_addr; u32 pim_reg; - u64 cpu_addr = entry->res->start; - u64 pci_addr = cpu_addr - entry->offset; - u64 size = resource_size(entry->res); + u64 cpu_addr = range->cpu_addr; + u64 pci_addr = range->pci_addr; + u64 size = range->size; u64 mask = ~(size - 1) | EN_REG; u32 flags = PCI_BASE_ADDRESS_MEM_TYPE_64; u32 bar_low; int region; - region = xgene_pcie_select_ib_reg(ib_reg_mask, size); + region = xgene_pcie_select_ib_reg(ib_reg_mask, range->size); if (region < 0) { dev_warn(dev, "invalid pcie dma-range config\n"); return; } - if (entry->res->flags & IORESOURCE_PREFETCH) + if (range->flags & IORESOURCE_PREFETCH) flags |= PCI_BASE_ADDRESS_MEM_PREFETCH; bar_low = pcie_bar_low_val((u32)cpu_addr, flags); @@ -531,13 +530,25 @@ static void xgene_pcie_setup_ib_reg(struct xgene_pcie *port, static int xgene_pcie_parse_map_dma_ranges(struct xgene_pcie *port) { - struct pci_host_bridge *bridge = pci_host_bridge_from_priv(port); - struct resource_entry *entry; + struct device_node *np = port->node; + struct of_pci_range range; + struct of_pci_range_parser parser; + struct device *dev = port->dev; u8 ib_reg_mask = 0; - resource_list_for_each_entry(entry, &bridge->dma_ranges) - xgene_pcie_setup_ib_reg(port, entry, &ib_reg_mask); + if (of_pci_dma_range_parser_init(&parser, np)) { + dev_err(dev, "missing dma-ranges property\n"); + return -EINVAL; + } + + /* Get the dma-ranges from DT */ + for_each_of_pci_range(&parser, &range) { + u64 end = range.cpu_addr + range.size - 1; + dev_dbg(dev, "0x%08x 0x%016llx..0x%016llx -> 0x%016llx\n", + range.flags, range.cpu_addr, end, range.pci_addr); + xgene_pcie_setup_ib_reg(port, &range, &ib_reg_mask); + } return 0; } diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index 90d84d3bc868f..5b833f00e9800 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -285,7 +285,17 @@ static int pci_epf_test_copy(struct pci_epf_test *epf_test) if (ret) dev_err(dev, "Data transfer failed\n"); } else { - memcpy(dst_addr, src_addr, reg->size); + void *buf; + + buf = kzalloc(reg->size, GFP_KERNEL); + if (!buf) { + ret = -ENOMEM; + goto err_map_addr; + } + + memcpy_fromio(buf, src_addr, reg->size); + memcpy_toio(dst_addr, buf, reg->size); + kfree(buf); } ktime_get_ts64(&end); pci_epf_test_print_rate("COPY", reg->size, &start, &end, use_dma); @@ -441,7 +451,7 @@ static int pci_epf_test_write(struct pci_epf_test *epf_test) if (!epf_test->dma_supported) { dev_err(dev, "Cannot transfer data using DMA\n"); ret = -EINVAL; - goto err_map_addr; + goto err_dma_map; } src_phys_addr = dma_map_single(dma_dev, buf, reg->size, diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 1c1ebf3dad43c..040ae076ec0e9 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -98,6 +98,8 @@ static int pcie_poll_cmd(struct controller *ctrl, int timeout) if (slot_status & PCI_EXP_SLTSTA_CC) { pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_CC); + ctrl->cmd_busy = 0; + smp_mb(); return 1; } msleep(10); @@ -1084,6 +1086,8 @@ static void quirk_cmd_compl(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0110, + PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl); DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0400, PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl); DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0401, diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 65f7f6b0576c6..da829274fc66d 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1811,6 +1811,18 @@ static void quirk_alder_ioapic(struct pci_dev *pdev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EESSC, quirk_alder_ioapic); #endif +static void quirk_no_msi(struct pci_dev *dev) +{ + pci_info(dev, "avoiding MSI to work around a hardware defect\n"); + dev->no_msi = 1; +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4386, quirk_no_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4387, quirk_no_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4388, quirk_no_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4389, quirk_no_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x438a, quirk_no_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x438b, quirk_no_msi); + static void quirk_pcie_mch(struct pci_dev *pdev) { pdev->no_msi = 1; diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index e1a0c44bc6864..7d6ffdf44a415 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -141,7 +141,7 @@ config ARM_DMC620_PMU config MARVELL_CN10K_TAD_PMU tristate "Marvell CN10K LLC-TAD PMU" - depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT) help Provides support for Last-Level cache Tag-and-data Units (LLC-TAD) performance monitors on CN10K family silicons. diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 0e48adce57ef3..71448229bc5e9 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -71,9 +71,11 @@ #define CMN_DTM_WPn(n) (0x1A0 + (n) * 0x18) #define CMN_DTM_WPn_CONFIG(n) (CMN_DTM_WPn(n) + 0x00) #define CMN_DTM_WPn_CONFIG_WP_DEV_SEL2 GENMASK_ULL(18,17) -#define CMN_DTM_WPn_CONFIG_WP_COMBINE BIT(6) -#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE BIT(5) -#define CMN_DTM_WPn_CONFIG_WP_GRP BIT(4) +#define CMN_DTM_WPn_CONFIG_WP_COMBINE BIT(9) +#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE BIT(8) +#define CMN600_WPn_CONFIG_WP_COMBINE BIT(6) +#define CMN600_WPn_CONFIG_WP_EXCLUSIVE BIT(5) +#define CMN_DTM_WPn_CONFIG_WP_GRP GENMASK_ULL(5, 4) #define CMN_DTM_WPn_CONFIG_WP_CHN_SEL GENMASK_ULL(3, 1) #define CMN_DTM_WPn_CONFIG_WP_DEV_SEL BIT(0) #define CMN_DTM_WPn_VAL(n) (CMN_DTM_WPn(n) + 0x08) @@ -155,6 +157,7 @@ #define CMN_CONFIG_WP_COMBINE GENMASK_ULL(27, 24) #define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48) #define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51) +/* Note that we don't yet support the tertiary match group on newer IPs */ #define CMN_CONFIG_WP_GRP BIT_ULL(56) #define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(57) #define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0) @@ -595,6 +598,9 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, if ((intf & 4) && !(cmn->ports_used & BIT(intf & 3))) return 0; + if (chan == 4 && cmn->model == CMN600) + return 0; + if ((chan == 5 && cmn->rsp_vc_num < 2) || (chan == 6 && cmn->dat_vc_num < 2)) return 0; @@ -905,15 +911,18 @@ static u32 arm_cmn_wp_config(struct perf_event *event) u32 grp = CMN_EVENT_WP_GRP(event); u32 exc = CMN_EVENT_WP_EXCLUSIVE(event); u32 combine = CMN_EVENT_WP_COMBINE(event); + bool is_cmn600 = to_cmn(event->pmu)->model == CMN600; config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) | - FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE, exc) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL2, dev >> 1); + if (exc) + config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE : + CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE; if (combine && !grp) - config |= CMN_DTM_WPn_CONFIG_WP_COMBINE; - + config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_COMBINE : + CMN_DTM_WPn_CONFIG_WP_COMBINE; return config; } diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 94ebc1ecace7c..b1b2a55de77fc 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -29,7 +29,7 @@ #define CNTL_OVER_MASK 0xFFFFFFFE #define CNTL_CSV_SHIFT 24 -#define CNTL_CSV_MASK (0xFF << CNTL_CSV_SHIFT) +#define CNTL_CSV_MASK (0xFFU << CNTL_CSV_SHIFT) #define EVENT_CYCLES_ID 0 #define EVENT_CYCLES_COUNTER 0 diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index 7640491aab123..30234c261b05c 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -736,7 +736,7 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster( { u64 mpidr; int cpu_cluster_id; - struct cluster_pmu *cluster = NULL; + struct cluster_pmu *cluster; /* * This assumes that the cluster_id is in MPIDR[aff1] for @@ -758,10 +758,10 @@ static struct cluster_pmu *l2_cache_associate_cpu_with_cluster( cluster->cluster_id); cpumask_set_cpu(cpu, &cluster->cluster_cpus); *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster; - break; + return cluster; } - return cluster; + return NULL; } static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) diff --git a/drivers/phy/amlogic/phy-meson-gxl-usb2.c b/drivers/phy/amlogic/phy-meson-gxl-usb2.c index 2b3c0d730f20f..db17c3448bfed 100644 --- a/drivers/phy/amlogic/phy-meson-gxl-usb2.c +++ b/drivers/phy/amlogic/phy-meson-gxl-usb2.c @@ -114,8 +114,10 @@ static int phy_meson_gxl_usb2_init(struct phy *phy) return ret; ret = clk_prepare_enable(priv->clk); - if (ret) + if (ret) { + reset_control_rearm(priv->reset); return ret; + } return 0; } @@ -125,6 +127,7 @@ static int phy_meson_gxl_usb2_exit(struct phy *phy) struct phy_meson_gxl_usb2_priv *priv = phy_get_drvdata(phy); clk_disable_unprepare(priv->clk); + reset_control_rearm(priv->reset); return 0; } diff --git a/drivers/phy/amlogic/phy-meson8b-usb2.c b/drivers/phy/amlogic/phy-meson8b-usb2.c index cf10bed40528a..dd96763911b8b 100644 --- a/drivers/phy/amlogic/phy-meson8b-usb2.c +++ b/drivers/phy/amlogic/phy-meson8b-usb2.c @@ -154,6 +154,7 @@ static int phy_meson8b_usb2_power_on(struct phy *phy) ret = clk_prepare_enable(priv->clk_usb_general); if (ret) { dev_err(&phy->dev, "Failed to enable USB general clock\n"); + reset_control_rearm(priv->reset); return ret; } @@ -161,6 +162,7 @@ static int phy_meson8b_usb2_power_on(struct phy *phy) if (ret) { dev_err(&phy->dev, "Failed to enable USB DDR clock\n"); clk_disable_unprepare(priv->clk_usb_general); + reset_control_rearm(priv->reset); return ret; } @@ -199,6 +201,7 @@ static int phy_meson8b_usb2_power_on(struct phy *phy) dev_warn(&phy->dev, "USB ID detect failed!\n"); clk_disable_unprepare(priv->clk_usb); clk_disable_unprepare(priv->clk_usb_general); + reset_control_rearm(priv->reset); return -EINVAL; } } @@ -218,6 +221,7 @@ static int phy_meson8b_usb2_power_off(struct phy *phy) clk_disable_unprepare(priv->clk_usb); clk_disable_unprepare(priv->clk_usb_general); + reset_control_rearm(priv->reset); /* power off the PHY by putting it into reset mode */ regmap_update_bits(priv->regmap, REG_CTRL, REG_CTRL_POWER_ON_RESET, @@ -265,8 +269,9 @@ static int phy_meson8b_usb2_probe(struct platform_device *pdev) return PTR_ERR(priv->clk_usb); priv->reset = devm_reset_control_get_optional_shared(&pdev->dev, NULL); - if (PTR_ERR(priv->reset) == -EPROBE_DEFER) - return PTR_ERR(priv->reset); + if (IS_ERR(priv->reset)) + return dev_err_probe(&pdev->dev, PTR_ERR(priv->reset), + "Failed to get the reset line"); priv->dr_mode = of_usb_get_dr_mode_by_phy(pdev->dev.of_node, -1); if (priv->dr_mode == USB_DR_MODE_UNKNOWN) { diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.c b/drivers/phy/broadcom/phy-brcm-usb-init.c index 9391ab42a12b3..dd0f66288fbdd 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init.c @@ -79,6 +79,7 @@ enum brcm_family_type { BRCM_FAMILY_3390A0, + BRCM_FAMILY_4908, BRCM_FAMILY_7250B0, BRCM_FAMILY_7271A0, BRCM_FAMILY_7364A0, @@ -96,6 +97,7 @@ enum brcm_family_type { static const char *family_names[BRCM_FAMILY_COUNT] = { USB_BRCM_FAMILY(3390A0), + USB_BRCM_FAMILY(4908), USB_BRCM_FAMILY(7250B0), USB_BRCM_FAMILY(7271A0), USB_BRCM_FAMILY(7364A0), @@ -203,6 +205,27 @@ usb_reg_bits_map_table[BRCM_FAMILY_COUNT][USB_CTRL_SELECTOR_COUNT] = { USB_CTRL_USB_PM_USB20_HC_RESETB_VAR_MASK, ENDIAN_SETTINGS, /* USB_CTRL_SETUP ENDIAN bits */ }, + /* 4908 */ + [BRCM_FAMILY_4908] = { + 0, /* USB_CTRL_SETUP_SCB1_EN_MASK */ + 0, /* USB_CTRL_SETUP_SCB2_EN_MASK */ + 0, /* USB_CTRL_SETUP_SS_EHCI64BIT_EN_MASK */ + 0, /* USB_CTRL_SETUP_STRAP_IPP_SEL_MASK */ + 0, /* USB_CTRL_SETUP_OC3_DISABLE_MASK */ + 0, /* USB_CTRL_PLL_CTL_PLL_IDDQ_PWRDN_MASK */ + 0, /* USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK */ + USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK, + USB_CTRL_USB_PM_USB_PWRDN_MASK, + 0, /* USB_CTRL_USB30_CTL1_XHC_SOFT_RESETB_MASK */ + 0, /* USB_CTRL_USB30_CTL1_USB3_IOC_MASK */ + 0, /* USB_CTRL_USB30_CTL1_USB3_IPP_MASK */ + 0, /* USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK */ + 0, /* USB_CTRL_USB_PM_SOFT_RESET_MASK */ + 0, /* USB_CTRL_SETUP_CC_DRD_MODE_ENABLE_MASK */ + 0, /* USB_CTRL_SETUP_STRAP_CC_DRD_MODE_ENABLE_SEL_MASK */ + 0, /* USB_CTRL_USB_PM_USB20_HC_RESETB_VAR_MASK */ + 0, /* USB_CTRL_SETUP ENDIAN bits */ + }, /* 7250b0 */ [BRCM_FAMILY_7250B0] = { USB_CTRL_SETUP_SCB1_EN_MASK, @@ -559,6 +582,7 @@ static void brcmusb_usb3_pll_54mhz(struct brcm_usb_init_params *params) */ switch (params->selected_family) { case BRCM_FAMILY_3390A0: + case BRCM_FAMILY_4908: case BRCM_FAMILY_7250B0: case BRCM_FAMILY_7366C0: case BRCM_FAMILY_74371A0: @@ -1004,6 +1028,18 @@ static const struct brcm_usb_init_ops bcm7445_ops = { .set_dual_select = usb_set_dual_select, }; +void brcm_usb_dvr_init_4908(struct brcm_usb_init_params *params) +{ + int fam; + + fam = BRCM_FAMILY_4908; + params->selected_family = fam; + params->usb_reg_bits_map = + &usb_reg_bits_map_table[fam][0]; + params->family_name = family_names[fam]; + params->ops = &bcm7445_ops; +} + void brcm_usb_dvr_init_7445(struct brcm_usb_init_params *params) { int fam; diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.h b/drivers/phy/broadcom/phy-brcm-usb-init.h index a39f30fa2e991..1ccb5ddab865c 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init.h +++ b/drivers/phy/broadcom/phy-brcm-usb-init.h @@ -64,6 +64,7 @@ struct brcm_usb_init_params { bool suspend_with_clocks; }; +void brcm_usb_dvr_init_4908(struct brcm_usb_init_params *params); void brcm_usb_dvr_init_7445(struct brcm_usb_init_params *params); void brcm_usb_dvr_init_7216(struct brcm_usb_init_params *params); void brcm_usb_dvr_init_7211b0(struct brcm_usb_init_params *params); diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c index 0f1deb6e0eabf..2cb3779fcdf82 100644 --- a/drivers/phy/broadcom/phy-brcm-usb.c +++ b/drivers/phy/broadcom/phy-brcm-usb.c @@ -283,6 +283,15 @@ static const struct attribute_group brcm_usb_phy_group = { .attrs = brcm_usb_phy_attrs, }; +static const struct match_chip_info chip_info_4908 = { + .init_func = &brcm_usb_dvr_init_4908, + .required_regs = { + BRCM_REGS_CTRL, + BRCM_REGS_XHCI_EC, + -1, + }, +}; + static const struct match_chip_info chip_info_7216 = { .init_func = &brcm_usb_dvr_init_7216, .required_regs = { @@ -318,7 +327,7 @@ static const struct match_chip_info chip_info_7445 = { static const struct of_device_id brcm_usb_dt_ids[] = { { .compatible = "brcm,bcm4908-usb-phy", - .data = &chip_info_7445, + .data = &chip_info_4908, }, { .compatible = "brcm,bcm7216-usb-phy", diff --git a/drivers/phy/phy-core-mipi-dphy.c b/drivers/phy/phy-core-mipi-dphy.c index ccb4045685cdd..929e86d6558e0 100644 --- a/drivers/phy/phy-core-mipi-dphy.c +++ b/drivers/phy/phy-core-mipi-dphy.c @@ -64,10 +64,10 @@ int phy_mipi_dphy_get_default_config(unsigned long pixel_clock, cfg->hs_trail = max(4 * 8 * ui, 60000 + 4 * 4 * ui); cfg->init = 100; - cfg->lpx = 60000; + cfg->lpx = 50000; cfg->ta_get = 5 * cfg->lpx; cfg->ta_go = 4 * cfg->lpx; - cfg->ta_sure = 2 * cfg->lpx; + cfg->ta_sure = cfg->lpx; cfg->wakeup = 1000; cfg->hs_clk_rate = hs_clk_rate; diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 5f7c421ab6e76..334cb85855a93 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1038,6 +1038,7 @@ int mtk_pctrl_init(struct platform_device *pdev, node = of_parse_phandle(np, "mediatek,pctl-regmap", 0); if (node) { pctl->regmap1 = syscon_node_to_regmap(node); + of_node_put(node); if (IS_ERR(pctl->regmap1)) return PTR_ERR(pctl->regmap1); } else if (regmap) { @@ -1051,6 +1052,7 @@ int mtk_pctrl_init(struct platform_device *pdev, node = of_parse_phandle(np, "mediatek,pctl-regmap", 1); if (node) { pctl->regmap2 = syscon_node_to_regmap(node); + of_node_put(node); if (IS_ERR(pctl->regmap2)) return PTR_ERR(pctl->regmap2); } diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c b/drivers/pinctrl/mediatek/pinctrl-paris.c index f9f9110f2107d..fe6cf068c4f41 100644 --- a/drivers/pinctrl/mediatek/pinctrl-paris.c +++ b/drivers/pinctrl/mediatek/pinctrl-paris.c @@ -96,20 +96,16 @@ static int mtk_pinconf_get(struct pinctrl_dev *pctldev, err = hw->soc->bias_get_combo(hw, desc, &pullup, &ret); if (err) goto out; + if (ret == MTK_PUPD_SET_R1R0_00) + ret = MTK_DISABLE; if (param == PIN_CONFIG_BIAS_DISABLE) { - if (ret == MTK_PUPD_SET_R1R0_00) - ret = MTK_DISABLE; + if (ret != MTK_DISABLE) + err = -EINVAL; } else if (param == PIN_CONFIG_BIAS_PULL_UP) { - /* When desire to get pull-up value, return - * error if current setting is pull-down - */ - if (!pullup) + if (!pullup || ret == MTK_DISABLE) err = -EINVAL; } else if (param == PIN_CONFIG_BIAS_PULL_DOWN) { - /* When desire to get pull-down value, return - * error if current setting is pull-up - */ - if (pullup) + if (pullup || ret == MTK_DISABLE) err = -EINVAL; } } else { @@ -188,8 +184,7 @@ static int mtk_pinconf_get(struct pinctrl_dev *pctldev, } static int mtk_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, - enum pin_config_param param, - enum pin_config_param arg) + enum pin_config_param param, u32 arg) { struct mtk_pinctrl *hw = pinctrl_dev_get_drvdata(pctldev); const struct mtk_pin_desc *desc; @@ -586,6 +581,9 @@ ssize_t mtk_pctrl_show_one_pin(struct mtk_pinctrl *hw, if (gpio >= hw->soc->npins) return -EINVAL; + if (mtk_is_virt_gpio(hw, gpio)) + return -EINVAL; + desc = (const struct mtk_pin_desc *)&hw->soc->pins[gpio]; pinmux = mtk_pctrl_get_pinmux(hw, gpio); if (pinmux >= hw->soc->nfuncs) @@ -737,10 +735,10 @@ static int mtk_pconf_group_get(struct pinctrl_dev *pctldev, unsigned group, unsigned long *config) { struct mtk_pinctrl *hw = pinctrl_dev_get_drvdata(pctldev); + struct mtk_pinctrl_group *grp = &hw->groups[group]; - *config = hw->groups[group].config; - - return 0; + /* One pin per group only */ + return mtk_pinconf_get(pctldev, grp->pin, config); } static int mtk_pconf_group_set(struct pinctrl_dev *pctldev, unsigned group, @@ -756,8 +754,6 @@ static int mtk_pconf_group_set(struct pinctrl_dev *pctldev, unsigned group, pinconf_to_config_argument(configs[i])); if (ret < 0) return ret; - - grp->config = configs[i]; } return 0; @@ -988,7 +984,7 @@ int mtk_paris_pinctrl_probe(struct platform_device *pdev, hw->nbase = hw->soc->nbase_names; if (of_find_property(hw->dev->of_node, - "mediatek,rsel_resistance_in_si_unit", NULL)) + "mediatek,rsel-resistance-in-si-unit", NULL)) hw->rsel_si_unit = true; else hw->rsel_si_unit = false; diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index 39828e9c3120a..4757bf964d3cd 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -1883,8 +1883,10 @@ static int nmk_pinctrl_probe(struct platform_device *pdev) } prcm_np = of_parse_phandle(np, "prcm", 0); - if (prcm_np) + if (prcm_np) { npct->prcm_base = of_iomap(prcm_np, 0); + of_node_put(prcm_np); + } if (!npct->prcm_base) { if (version == PINCTRL_NMK_STN8815) { dev_info(&pdev->dev, diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c index 4d81908d6725d..41136f63014a4 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c +++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c @@ -78,7 +78,6 @@ struct npcm7xx_gpio { struct gpio_chip gc; int irqbase; int irq; - void *priv; struct irq_chip irq_chip; u32 pinctrl_id; int (*direction_input)(struct gpio_chip *chip, unsigned offset); @@ -226,7 +225,7 @@ static void npcmgpio_irq_handler(struct irq_desc *desc) chained_irq_enter(chip, desc); sts = ioread32(bank->base + NPCM7XX_GP_N_EVST); en = ioread32(bank->base + NPCM7XX_GP_N_EVEN); - dev_dbg(chip->parent_device, "==> got irq sts %.8x %.8x\n", sts, + dev_dbg(bank->gc.parent, "==> got irq sts %.8x %.8x\n", sts, en); sts &= en; @@ -241,33 +240,33 @@ static int npcmgpio_set_irq_type(struct irq_data *d, unsigned int type) gpiochip_get_data(irq_data_get_irq_chip_data(d)); unsigned int gpio = BIT(d->hwirq); - dev_dbg(d->chip->parent_device, "setirqtype: %u.%u = %u\n", gpio, + dev_dbg(bank->gc.parent, "setirqtype: %u.%u = %u\n", gpio, d->irq, type); switch (type) { case IRQ_TYPE_EDGE_RISING: - dev_dbg(d->chip->parent_device, "edge.rising\n"); + dev_dbg(bank->gc.parent, "edge.rising\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_EDGE_FALLING: - dev_dbg(d->chip->parent_device, "edge.falling\n"); + dev_dbg(bank->gc.parent, "edge.falling\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_EDGE_BOTH: - dev_dbg(d->chip->parent_device, "edge.both\n"); + dev_dbg(bank->gc.parent, "edge.both\n"); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); break; case IRQ_TYPE_LEVEL_LOW: - dev_dbg(d->chip->parent_device, "level.low\n"); + dev_dbg(bank->gc.parent, "level.low\n"); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_LEVEL_HIGH: - dev_dbg(d->chip->parent_device, "level.high\n"); + dev_dbg(bank->gc.parent, "level.high\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; default: - dev_dbg(d->chip->parent_device, "invalid irq type\n"); + dev_dbg(bank->gc.parent, "invalid irq type\n"); return -EINVAL; } @@ -289,7 +288,7 @@ static void npcmgpio_irq_ack(struct irq_data *d) gpiochip_get_data(irq_data_get_irq_chip_data(d)); unsigned int gpio = d->hwirq; - dev_dbg(d->chip->parent_device, "irq_ack: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_ack: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVST); } @@ -301,7 +300,7 @@ static void npcmgpio_irq_mask(struct irq_data *d) unsigned int gpio = d->hwirq; /* Clear events */ - dev_dbg(d->chip->parent_device, "irq_mask: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_mask: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENC); } @@ -313,7 +312,7 @@ static void npcmgpio_irq_unmask(struct irq_data *d) unsigned int gpio = d->hwirq; /* Enable events */ - dev_dbg(d->chip->parent_device, "irq_unmask: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_unmask: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENS); } @@ -323,7 +322,7 @@ static unsigned int npcmgpio_irq_startup(struct irq_data *d) unsigned int gpio = d->hwirq; /* active-high, input, clear interrupt, enable interrupt */ - dev_dbg(d->chip->parent_device, "startup: %u.%u\n", gpio, d->irq); + dev_dbg(gc->parent, "startup: %u.%u\n", gpio, d->irq); npcmgpio_direction_input(gc, gpio); npcmgpio_irq_ack(d); npcmgpio_irq_unmask(d); @@ -905,7 +904,7 @@ static struct npcm7xx_func npcm7xx_funcs[] = { #define DRIVE_STRENGTH_HI_SHIFT 12 #define DRIVE_STRENGTH_MASK 0x0000FF00 -#define DS(lo, hi) (((lo) << DRIVE_STRENGTH_LO_SHIFT) | \ +#define DSTR(lo, hi) (((lo) << DRIVE_STRENGTH_LO_SHIFT) | \ ((hi) << DRIVE_STRENGTH_HI_SHIFT)) #define DSLO(x) (((x) >> DRIVE_STRENGTH_LO_SHIFT) & 0xF) #define DSHI(x) (((x) >> DRIVE_STRENGTH_HI_SHIFT) & 0xF) @@ -925,31 +924,31 @@ struct npcm7xx_pincfg { static const struct npcm7xx_pincfg pincfg[] = { /* PIN FUNCTION 1 FUNCTION 2 FUNCTION 3 FLAGS */ NPCM7XX_PINCFG(0, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(1, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(2, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(1, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(2, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(3, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(4, iox2, MFSEL3, 14, smb1d, I2CSEGSEL, 7, none, NONE, 0, SLEW), NPCM7XX_PINCFG(5, iox2, MFSEL3, 14, smb1d, I2CSEGSEL, 7, none, NONE, 0, SLEW), NPCM7XX_PINCFG(6, iox2, MFSEL3, 14, smb2d, I2CSEGSEL, 10, none, NONE, 0, SLEW), NPCM7XX_PINCFG(7, iox2, MFSEL3, 14, smb2d, I2CSEGSEL, 10, none, NONE, 0, SLEW), - NPCM7XX_PINCFG(8, lkgpo1, FLOCKR1, 4, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(9, lkgpo2, FLOCKR1, 8, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(10, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(11, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(8, lkgpo1, FLOCKR1, 4, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(9, lkgpo2, FLOCKR1, 8, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(10, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(11, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(12, gspi, MFSEL1, 24, smb5b, I2CSEGSEL, 19, none, NONE, 0, SLEW), NPCM7XX_PINCFG(13, gspi, MFSEL1, 24, smb5b, I2CSEGSEL, 19, none, NONE, 0, SLEW), NPCM7XX_PINCFG(14, gspi, MFSEL1, 24, smb5c, I2CSEGSEL, 20, none, NONE, 0, SLEW), NPCM7XX_PINCFG(15, gspi, MFSEL1, 24, smb5c, I2CSEGSEL, 20, none, NONE, 0, SLEW), - NPCM7XX_PINCFG(16, lkgpo0, FLOCKR1, 0, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(17, pspi2, MFSEL3, 13, smb4den, I2CSEGSEL, 23, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(18, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(19, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(16, lkgpo0, FLOCKR1, 0, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(17, pspi2, MFSEL3, 13, smb4den, I2CSEGSEL, 23, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(18, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(19, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(20, smb4c, I2CSEGSEL, 15, smb15, MFSEL3, 8, none, NONE, 0, 0), NPCM7XX_PINCFG(21, smb4c, I2CSEGSEL, 15, smb15, MFSEL3, 8, none, NONE, 0, 0), NPCM7XX_PINCFG(22, smb4d, I2CSEGSEL, 16, smb14, MFSEL3, 7, none, NONE, 0, 0), NPCM7XX_PINCFG(23, smb4d, I2CSEGSEL, 16, smb14, MFSEL3, 7, none, NONE, 0, 0), - NPCM7XX_PINCFG(24, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(25, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(24, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(25, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(26, smb5, MFSEL1, 2, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(27, smb5, MFSEL1, 2, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(28, smb4, MFSEL1, 1, none, NONE, 0, none, NONE, 0, 0), @@ -965,12 +964,12 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(39, smb3b, I2CSEGSEL, 11, none, NONE, 0, none, NONE, 0, SLEW), NPCM7XX_PINCFG(40, smb3b, I2CSEGSEL, 11, none, NONE, 0, none, NONE, 0, SLEW), NPCM7XX_PINCFG(41, bmcuart0a, MFSEL1, 9, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(42, bmcuart0a, MFSEL1, 9, none, NONE, 0, none, NONE, 0, DS(2, 4) | GPO), + NPCM7XX_PINCFG(42, bmcuart0a, MFSEL1, 9, none, NONE, 0, none, NONE, 0, DSTR(2, 4) | GPO), NPCM7XX_PINCFG(43, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, bmcuart1, MFSEL3, 24, 0), NPCM7XX_PINCFG(44, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, bmcuart1, MFSEL3, 24, 0), NPCM7XX_PINCFG(45, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(46, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DS(2, 8)), - NPCM7XX_PINCFG(47, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DS(2, 8)), + NPCM7XX_PINCFG(46, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DSTR(2, 8)), + NPCM7XX_PINCFG(47, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DSTR(2, 8)), NPCM7XX_PINCFG(48, uart2, MFSEL1, 11, bmcuart0b, MFSEL4, 1, none, NONE, 0, GPO), NPCM7XX_PINCFG(49, uart2, MFSEL1, 11, bmcuart0b, MFSEL4, 1, none, NONE, 0, 0), NPCM7XX_PINCFG(50, uart2, MFSEL1, 11, none, NONE, 0, none, NONE, 0, 0), @@ -980,8 +979,8 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(54, uart2, MFSEL1, 11, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(55, uart2, MFSEL1, 11, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(56, r1err, MFSEL1, 12, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(57, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DS(2, 4)), - NPCM7XX_PINCFG(58, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DS(2, 4)), + NPCM7XX_PINCFG(57, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DSTR(2, 4)), + NPCM7XX_PINCFG(58, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DSTR(2, 4)), NPCM7XX_PINCFG(59, smb3d, I2CSEGSEL, 13, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(60, smb3d, I2CSEGSEL, 13, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(61, uart1, MFSEL1, 10, none, NONE, 0, none, NONE, 0, GPO), @@ -1004,19 +1003,19 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(77, fanin13, MFSEL2, 13, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(78, fanin14, MFSEL2, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(79, fanin15, MFSEL2, 15, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(80, pwm0, MFSEL2, 16, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(81, pwm1, MFSEL2, 17, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(82, pwm2, MFSEL2, 18, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(83, pwm3, MFSEL2, 19, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(84, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(85, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(86, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(80, pwm0, MFSEL2, 16, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(81, pwm1, MFSEL2, 17, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(82, pwm2, MFSEL2, 18, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(83, pwm3, MFSEL2, 19, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(84, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(85, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(86, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(87, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(88, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(89, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(90, r2err, MFSEL1, 15, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(91, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DS(2, 4)), - NPCM7XX_PINCFG(92, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DS(2, 4)), + NPCM7XX_PINCFG(91, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DSTR(2, 4)), + NPCM7XX_PINCFG(92, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DSTR(2, 4)), NPCM7XX_PINCFG(93, ga20kbc, MFSEL1, 17, smb5d, I2CSEGSEL, 21, none, NONE, 0, 0), NPCM7XX_PINCFG(94, ga20kbc, MFSEL1, 17, smb5d, I2CSEGSEL, 21, none, NONE, 0, 0), NPCM7XX_PINCFG(95, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, 0), @@ -1062,34 +1061,34 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(133, smb10, MFSEL4, 13, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(134, smb11, MFSEL4, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(135, smb11, MFSEL4, 14, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(136, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(137, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(138, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(139, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(140, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(136, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(137, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(138, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(139, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(140, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(141, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(142, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(142, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(143, sd1, MFSEL3, 12, sd1pwr, MFSEL4, 5, none, NONE, 0, 0), - NPCM7XX_PINCFG(144, pwm4, MFSEL2, 20, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(145, pwm5, MFSEL2, 21, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(146, pwm6, MFSEL2, 22, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(147, pwm7, MFSEL2, 23, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(148, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(149, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(150, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(151, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(152, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(144, pwm4, MFSEL2, 20, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(145, pwm5, MFSEL2, 21, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(146, pwm6, MFSEL2, 22, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(147, pwm7, MFSEL2, 23, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(148, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(149, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(150, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(151, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(152, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(153, mmcwp, FLOCKR1, 24, none, NONE, 0, none, NONE, 0, 0), /* Z1/A1 */ - NPCM7XX_PINCFG(154, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(154, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(155, mmccd, MFSEL3, 25, mmcrst, MFSEL4, 6, none, NONE, 0, 0), /* Z1/A1 */ - NPCM7XX_PINCFG(156, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(157, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(158, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(159, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - - NPCM7XX_PINCFG(160, clkout, MFSEL1, 21, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(161, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, DS(8, 12)), - NPCM7XX_PINCFG(162, serirq, NONE, 0, gpio, MFSEL1, 31, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(156, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(157, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(158, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(159, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + + NPCM7XX_PINCFG(160, clkout, MFSEL1, 21, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(161, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, DSTR(8, 12)), + NPCM7XX_PINCFG(162, serirq, NONE, 0, gpio, MFSEL1, 31, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(163, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, 0), NPCM7XX_PINCFG(164, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, SLEWLPC), NPCM7XX_PINCFG(165, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, SLEWLPC), @@ -1102,25 +1101,25 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(172, smb6, MFSEL3, 1, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(173, smb7, MFSEL3, 2, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(174, smb7, MFSEL3, 2, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(175, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(176, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(177, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(178, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(179, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(180, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(175, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(176, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(177, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(178, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(179, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(180, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(181, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(182, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(183, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(184, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO), - NPCM7XX_PINCFG(185, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO), - NPCM7XX_PINCFG(186, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(187, spi3cs1, MFSEL4, 17, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(188, spi3quad, MFSEL4, 20, spi3cs2, MFSEL4, 18, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(189, spi3quad, MFSEL4, 20, spi3cs3, MFSEL4, 19, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(190, gpio, FLOCKR1, 20, nprd_smi, NONE, 0, none, NONE, 0, DS(2, 4)), - NPCM7XX_PINCFG(191, none, NONE, 0, none, NONE, 0, none, NONE, 0, DS(8, 12)), /* XX */ - - NPCM7XX_PINCFG(192, none, NONE, 0, none, NONE, 0, none, NONE, 0, DS(8, 12)), /* XX */ + NPCM7XX_PINCFG(183, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(184, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO), + NPCM7XX_PINCFG(185, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO), + NPCM7XX_PINCFG(186, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(187, spi3cs1, MFSEL4, 17, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(188, spi3quad, MFSEL4, 20, spi3cs2, MFSEL4, 18, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(189, spi3quad, MFSEL4, 20, spi3cs3, MFSEL4, 19, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(190, gpio, FLOCKR1, 20, nprd_smi, NONE, 0, none, NONE, 0, DSTR(2, 4)), + NPCM7XX_PINCFG(191, none, NONE, 0, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), /* XX */ + + NPCM7XX_PINCFG(192, none, NONE, 0, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), /* XX */ NPCM7XX_PINCFG(193, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(194, smb0b, I2CSEGSEL, 0, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(195, smb0b, I2CSEGSEL, 0, none, NONE, 0, none, NONE, 0, 0), @@ -1131,11 +1130,11 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(200, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(201, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(202, smb0c, I2CSEGSEL, 1, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(203, faninx, MFSEL3, 3, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(203, faninx, MFSEL3, 3, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(204, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, SLEW), NPCM7XX_PINCFG(205, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, SLEW), - NPCM7XX_PINCFG(206, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(207, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DS(4, 8)), + NPCM7XX_PINCFG(206, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(207, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DSTR(4, 8)), NPCM7XX_PINCFG(208, rg2, MFSEL4, 24, ddr, MFSEL3, 26, none, NONE, 0, 0), NPCM7XX_PINCFG(209, rg2, MFSEL4, 24, ddr, MFSEL3, 26, none, NONE, 0, 0), NPCM7XX_PINCFG(210, rg2, MFSEL4, 24, ddr, MFSEL3, 26, none, NONE, 0, 0), @@ -1147,20 +1146,20 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(216, rg2mdio, MFSEL4, 23, ddr, MFSEL3, 26, none, NONE, 0, 0), NPCM7XX_PINCFG(217, rg2mdio, MFSEL4, 23, ddr, MFSEL3, 26, none, NONE, 0, 0), NPCM7XX_PINCFG(218, wdog1, MFSEL3, 19, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(219, wdog2, MFSEL3, 20, none, NONE, 0, none, NONE, 0, DS(4, 8)), + NPCM7XX_PINCFG(219, wdog2, MFSEL3, 20, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), NPCM7XX_PINCFG(220, smb12, MFSEL3, 5, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(221, smb12, MFSEL3, 5, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(222, smb13, MFSEL3, 6, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(223, smb13, MFSEL3, 6, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(224, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, SLEW), - NPCM7XX_PINCFG(225, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO), - NPCM7XX_PINCFG(226, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO), - NPCM7XX_PINCFG(227, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(228, spixcs1, MFSEL4, 28, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(229, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(230, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(231, clkreq, MFSEL4, 9, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(225, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO), + NPCM7XX_PINCFG(226, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO), + NPCM7XX_PINCFG(227, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(228, spixcs1, MFSEL4, 28, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(229, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(230, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(231, clkreq, MFSEL4, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(253, none, NONE, 0, none, NONE, 0, none, NONE, 0, GPI), /* SDHC1 power */ NPCM7XX_PINCFG(254, none, NONE, 0, none, NONE, 0, none, NONE, 0, GPI), /* SDHC2 power */ NPCM7XX_PINCFG(255, none, NONE, 0, none, NONE, 0, none, NONE, 0, GPI), /* DACOSEL */ @@ -1561,7 +1560,7 @@ static int npcm7xx_get_groups_count(struct pinctrl_dev *pctldev) { struct npcm7xx_pinctrl *npcm = pinctrl_dev_get_drvdata(pctldev); - dev_dbg(npcm->dev, "group size: %d\n", ARRAY_SIZE(npcm7xx_groups)); + dev_dbg(npcm->dev, "group size: %zu\n", ARRAY_SIZE(npcm7xx_groups)); return ARRAY_SIZE(npcm7xx_groups); } diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c index f8edcc88ac013..415d1df8f46a5 100644 --- a/drivers/pinctrl/pinconf-generic.c +++ b/drivers/pinctrl/pinconf-generic.c @@ -30,10 +30,10 @@ static const struct pin_config_item conf_items[] = { PCONFDUMP(PIN_CONFIG_BIAS_BUS_HOLD, "input bias bus hold", NULL, false), PCONFDUMP(PIN_CONFIG_BIAS_DISABLE, "input bias disabled", NULL, false), PCONFDUMP(PIN_CONFIG_BIAS_HIGH_IMPEDANCE, "input bias high impedance", NULL, false), - PCONFDUMP(PIN_CONFIG_BIAS_PULL_DOWN, "input bias pull down", NULL, false), + PCONFDUMP(PIN_CONFIG_BIAS_PULL_DOWN, "input bias pull down", "ohms", true), PCONFDUMP(PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, - "input bias pull to pin specific state", NULL, false), - PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", NULL, false), + "input bias pull to pin specific state", "ohms", true), + PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", "ohms", true), PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL, false), diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index 2712f51eb2381..fa6becca17889 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -119,6 +119,8 @@ struct ingenic_chip_info { unsigned int num_functions; const u32 *pull_ups, *pull_downs; + + const struct regmap_access_table *access_table; }; struct ingenic_pinctrl { @@ -2179,6 +2181,17 @@ static const struct function_desc x1000_functions[] = { { "mac", x1000_mac_groups, ARRAY_SIZE(x1000_mac_groups), }, }; +static const struct regmap_range x1000_access_ranges[] = { + regmap_reg_range(0x000, 0x400 - 4), + regmap_reg_range(0x700, 0x800 - 4), +}; + +/* shared with X1500 */ +static const struct regmap_access_table x1000_access_table = { + .yes_ranges = x1000_access_ranges, + .n_yes_ranges = ARRAY_SIZE(x1000_access_ranges), +}; + static const struct ingenic_chip_info x1000_chip_info = { .num_chips = 4, .reg_offset = 0x100, @@ -2189,6 +2202,7 @@ static const struct ingenic_chip_info x1000_chip_info = { .num_functions = ARRAY_SIZE(x1000_functions), .pull_ups = x1000_pull_ups, .pull_downs = x1000_pull_downs, + .access_table = &x1000_access_table, }; static int x1500_uart0_data_pins[] = { 0x4a, 0x4b, }; @@ -2300,6 +2314,7 @@ static const struct ingenic_chip_info x1500_chip_info = { .num_functions = ARRAY_SIZE(x1500_functions), .pull_ups = x1000_pull_ups, .pull_downs = x1000_pull_downs, + .access_table = &x1000_access_table, }; static const u32 x1830_pull_ups[4] = { @@ -2506,6 +2521,16 @@ static const struct function_desc x1830_functions[] = { { "mac", x1830_mac_groups, ARRAY_SIZE(x1830_mac_groups), }, }; +static const struct regmap_range x1830_access_ranges[] = { + regmap_reg_range(0x0000, 0x4000 - 4), + regmap_reg_range(0x7000, 0x8000 - 4), +}; + +static const struct regmap_access_table x1830_access_table = { + .yes_ranges = x1830_access_ranges, + .n_yes_ranges = ARRAY_SIZE(x1830_access_ranges), +}; + static const struct ingenic_chip_info x1830_chip_info = { .num_chips = 4, .reg_offset = 0x1000, @@ -2516,6 +2541,7 @@ static const struct ingenic_chip_info x1830_chip_info = { .num_functions = ARRAY_SIZE(x1830_functions), .pull_ups = x1830_pull_ups, .pull_downs = x1830_pull_downs, + .access_table = &x1830_access_table, }; static const u32 x2000_pull_ups[5] = { @@ -2969,6 +2995,17 @@ static const struct function_desc x2000_functions[] = { { "otg", x2000_otg_groups, ARRAY_SIZE(x2000_otg_groups), }, }; +static const struct regmap_range x2000_access_ranges[] = { + regmap_reg_range(0x000, 0x500 - 4), + regmap_reg_range(0x700, 0x800 - 4), +}; + +/* shared with X2100 */ +static const struct regmap_access_table x2000_access_table = { + .yes_ranges = x2000_access_ranges, + .n_yes_ranges = ARRAY_SIZE(x2000_access_ranges), +}; + static const struct ingenic_chip_info x2000_chip_info = { .num_chips = 5, .reg_offset = 0x100, @@ -2979,6 +3016,7 @@ static const struct ingenic_chip_info x2000_chip_info = { .num_functions = ARRAY_SIZE(x2000_functions), .pull_ups = x2000_pull_ups, .pull_downs = x2000_pull_downs, + .access_table = &x2000_access_table, }; static const u32 x2100_pull_ups[5] = { @@ -3189,6 +3227,7 @@ static const struct ingenic_chip_info x2100_chip_info = { .num_functions = ARRAY_SIZE(x2100_functions), .pull_ups = x2100_pull_ups, .pull_downs = x2100_pull_downs, + .access_table = &x2000_access_table, }; static u32 ingenic_gpio_read_reg(struct ingenic_gpio_chip *jzgc, u8 reg) @@ -4168,7 +4207,12 @@ static int __init ingenic_pinctrl_probe(struct platform_device *pdev) return PTR_ERR(base); regmap_config = ingenic_pinctrl_regmap_config; - regmap_config.max_register = chip_info->num_chips * chip_info->reg_offset; + if (chip_info->access_table) { + regmap_config.rd_table = chip_info->access_table; + regmap_config.wr_table = chip_info->access_table; + } else { + regmap_config.max_register = chip_info->num_chips * chip_info->reg_offset - 4; + } jzpc->map = devm_regmap_init_mmio(dev, base, ®map_config); if (IS_ERR(jzpc->map)) { diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c b/drivers/pinctrl/pinctrl-microchip-sgpio.c index 639f1130e9892..666f1e3889e00 100644 --- a/drivers/pinctrl/pinctrl-microchip-sgpio.c +++ b/drivers/pinctrl/pinctrl-microchip-sgpio.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "core.h" #include "pinconf.h" @@ -116,6 +117,7 @@ struct sgpio_priv { u32 clock; struct regmap *regs; const struct sgpio_properties *properties; + spinlock_t lock; }; struct sgpio_port_addr { @@ -229,6 +231,7 @@ static void sgpio_output_set(struct sgpio_priv *priv, int value) { unsigned int bit = SGPIO_SRC_BITS * addr->bit; + unsigned long flags; u32 clr, set; switch (priv->properties->arch) { @@ -247,7 +250,10 @@ static void sgpio_output_set(struct sgpio_priv *priv, default: return; } + + spin_lock_irqsave(&priv->lock, flags); sgpio_clrsetbits(priv, REG_PORT_CONFIG, addr->port, clr, set); + spin_unlock_irqrestore(&priv->lock, flags); } static int sgpio_output_get(struct sgpio_priv *priv, @@ -575,10 +581,13 @@ static void microchip_sgpio_irq_settype(struct irq_data *data, struct sgpio_bank *bank = gpiochip_get_data(chip); unsigned int gpio = irqd_to_hwirq(data); struct sgpio_port_addr addr; + unsigned long flags; u32 ena; sgpio_pin_to_addr(bank->priv, gpio, &addr); + spin_lock_irqsave(&bank->priv->lock, flags); + /* Disable interrupt while changing type */ ena = sgpio_readl(bank->priv, REG_INT_ENABLE, addr.bit); sgpio_writel(bank->priv, ena & ~BIT(addr.port), REG_INT_ENABLE, addr.bit); @@ -595,6 +604,8 @@ static void microchip_sgpio_irq_settype(struct irq_data *data, /* Possibly re-enable interrupts */ sgpio_writel(bank->priv, ena, REG_INT_ENABLE, addr.bit); + + spin_unlock_irqrestore(&bank->priv->lock, flags); } static void microchip_sgpio_irq_setreg(struct irq_data *data, @@ -605,13 +616,16 @@ static void microchip_sgpio_irq_setreg(struct irq_data *data, struct sgpio_bank *bank = gpiochip_get_data(chip); unsigned int gpio = irqd_to_hwirq(data); struct sgpio_port_addr addr; + unsigned long flags; sgpio_pin_to_addr(bank->priv, gpio, &addr); + spin_lock_irqsave(&bank->priv->lock, flags); if (clear) sgpio_clrsetbits(bank->priv, reg, addr.bit, BIT(addr.port), 0); else sgpio_clrsetbits(bank->priv, reg, addr.bit, 0, BIT(addr.port)); + spin_unlock_irqrestore(&bank->priv->lock, flags); } static void microchip_sgpio_irq_mask(struct irq_data *data) @@ -833,6 +847,7 @@ static int microchip_sgpio_probe(struct platform_device *pdev) return -ENOMEM; priv->dev = dev; + spin_lock_init(&priv->lock); reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch"); if (IS_ERR(reset)) diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c index fc969208d904c..370459243007b 100644 --- a/drivers/pinctrl/pinctrl-ocelot.c +++ b/drivers/pinctrl/pinctrl-ocelot.c @@ -1750,8 +1750,8 @@ static int ocelot_gpiochip_register(struct platform_device *pdev, gc->base = -1; gc->label = "ocelot-gpio"; - irq = irq_of_parse_and_map(gc->of_node, 0); - if (irq) { + irq = platform_get_irq_optional(pdev, 0); + if (irq > 0) { girq = &gc->irq; girq->chip = &ocelot_irqchip; girq->parent_handler = ocelot_irq_handler; @@ -1788,9 +1788,10 @@ static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev) .val_bits = 32, .reg_stride = 4, .max_register = 32, + .name = "pincfg", }; - base = devm_platform_ioremap_resource(pdev, 0); + base = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(base)) { dev_dbg(&pdev->dev, "Failed to ioremap config registers (no extended pinconf)\n"); return NULL; diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index d8dd8415fa81b..a1b598b86aa9f 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -2693,6 +2693,7 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) node = of_parse_phandle(np, "rockchip,grf", 0); if (node) { info->regmap_base = syscon_node_to_regmap(node); + of_node_put(node); if (IS_ERR(info->regmap_base)) return PTR_ERR(info->regmap_base); } else { @@ -2725,6 +2726,7 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) node = of_parse_phandle(np, "rockchip,pmu", 0); if (node) { info->regmap_pmu = syscon_node_to_regmap(node); + of_node_put(node); if (IS_ERR(info->regmap_pmu)) return PTR_ERR(info->regmap_pmu); } diff --git a/drivers/pinctrl/renesas/core.c b/drivers/pinctrl/renesas/core.c index 0d4ea2e22a535..12d41ac017b53 100644 --- a/drivers/pinctrl/renesas/core.c +++ b/drivers/pinctrl/renesas/core.c @@ -741,7 +741,7 @@ static int sh_pfc_suspend_init(struct sh_pfc *pfc) { return 0; } #ifdef DEBUG #define SH_PFC_MAX_REGS 300 -#define SH_PFC_MAX_ENUMS 3000 +#define SH_PFC_MAX_ENUMS 5000 static unsigned int sh_pfc_errors __initdata; static unsigned int sh_pfc_warnings __initdata; @@ -865,7 +865,8 @@ static void __init sh_pfc_check_cfg_reg(const char *drvname, GENMASK(cfg_reg->reg_width - 1, 0)); if (cfg_reg->field_width) { - n = cfg_reg->reg_width / cfg_reg->field_width; + fw = cfg_reg->field_width; + n = (cfg_reg->reg_width / fw) << fw; /* Skip field checks (done at build time) */ goto check_enum_ids; } diff --git a/drivers/pinctrl/renesas/pfc-r8a77470.c b/drivers/pinctrl/renesas/pfc-r8a77470.c index e6e5487691c16..cf7153d06a953 100644 --- a/drivers/pinctrl/renesas/pfc-r8a77470.c +++ b/drivers/pinctrl/renesas/pfc-r8a77470.c @@ -2140,7 +2140,7 @@ static const unsigned int vin0_clk_mux[] = { VI0_CLK_MARK, }; /* - VIN1 ------------------------------------------------------------------- */ -static const union vin_data vin1_data_pins = { +static const union vin_data12 vin1_data_pins = { .data12 = { RCAR_GP_PIN(3, 1), RCAR_GP_PIN(3, 2), RCAR_GP_PIN(3, 3), RCAR_GP_PIN(3, 4), @@ -2150,7 +2150,7 @@ static const union vin_data vin1_data_pins = { RCAR_GP_PIN(3, 15), RCAR_GP_PIN(3, 16), }, }; -static const union vin_data vin1_data_mux = { +static const union vin_data12 vin1_data_mux = { .data12 = { VI1_DATA0_MARK, VI1_DATA1_MARK, VI1_DATA2_MARK, VI1_DATA3_MARK, diff --git a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c index 2e490e7696f47..4102ce955bd7f 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c @@ -585,13 +585,11 @@ static const struct samsung_pin_ctrl exynos850_pin_ctrl[] __initconst = { /* pin-controller instance 0 ALIVE data */ .pin_banks = exynos850_pin_banks0, .nr_banks = ARRAY_SIZE(exynos850_pin_banks0), - .eint_gpio_init = exynos_eint_gpio_init, .eint_wkup_init = exynos_eint_wkup_init, }, { /* pin-controller instance 1 CMGP data */ .pin_banks = exynos850_pin_banks1, .nr_banks = ARRAY_SIZE(exynos850_pin_banks1), - .eint_gpio_init = exynos_eint_gpio_init, .eint_wkup_init = exynos_eint_wkup_init, }, { /* pin-controller instance 2 AUD data */ diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index 0f6e9305fec58..c4175fea7d741 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -1002,6 +1002,16 @@ samsung_pinctrl_get_soc_data_for_of_alias(struct platform_device *pdev) return &(of_data->ctrl[id]); } +static void samsung_banks_of_node_put(struct samsung_pinctrl_drv_data *d) +{ + struct samsung_pin_bank *bank; + unsigned int i; + + bank = d->pin_banks; + for (i = 0; i < d->nr_banks; ++i, ++bank) + of_node_put(bank->of_node); +} + /* retrieve the soc specific data */ static const struct samsung_pin_ctrl * samsung_pinctrl_get_soc_data(struct samsung_pinctrl_drv_data *d, @@ -1117,19 +1127,19 @@ static int samsung_pinctrl_probe(struct platform_device *pdev) if (ctrl->retention_data) { drvdata->retention_ctrl = ctrl->retention_data->init(drvdata, ctrl->retention_data); - if (IS_ERR(drvdata->retention_ctrl)) - return PTR_ERR(drvdata->retention_ctrl); + if (IS_ERR(drvdata->retention_ctrl)) { + ret = PTR_ERR(drvdata->retention_ctrl); + goto err_put_banks; + } } ret = samsung_pinctrl_register(pdev, drvdata); if (ret) - return ret; + goto err_put_banks; ret = samsung_gpiolib_register(pdev, drvdata); - if (ret) { - samsung_pinctrl_unregister(pdev, drvdata); - return ret; - } + if (ret) + goto err_unregister; if (ctrl->eint_gpio_init) ctrl->eint_gpio_init(drvdata); @@ -1139,6 +1149,12 @@ static int samsung_pinctrl_probe(struct platform_device *pdev) platform_set_drvdata(pdev, drvdata); return 0; + +err_unregister: + samsung_pinctrl_unregister(pdev, drvdata); +err_put_banks: + samsung_banks_of_node_put(drvdata); + return ret; } /* diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile index f901d2e43166c..88cbc434c06b2 100644 --- a/drivers/platform/chrome/Makefile +++ b/drivers/platform/chrome/Makefile @@ -2,6 +2,7 @@ # tell define_trace.h where to find the cros ec trace header CFLAGS_cros_ec_trace.o:= -I$(src) +CFLAGS_cros_ec_sensorhub_ring.o:= -I$(src) obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o @@ -20,7 +21,7 @@ obj-$(CONFIG_CROS_EC_CHARDEV) += cros_ec_chardev.o obj-$(CONFIG_CROS_EC_LIGHTBAR) += cros_ec_lightbar.o obj-$(CONFIG_CROS_EC_VBC) += cros_ec_vbc.o obj-$(CONFIG_CROS_EC_DEBUGFS) += cros_ec_debugfs.o -cros-ec-sensorhub-objs := cros_ec_sensorhub.o cros_ec_sensorhub_ring.o cros_ec_trace.o +cros-ec-sensorhub-objs := cros_ec_sensorhub.o cros_ec_sensorhub_ring.o obj-$(CONFIG_CROS_EC_SENSORHUB) += cros-ec-sensorhub.o obj-$(CONFIG_CROS_EC_SYSFS) += cros_ec_sysfs.o obj-$(CONFIG_CROS_USBPD_LOGGER) += cros_usbpd_logger.o diff --git a/drivers/platform/chrome/cros_ec_sensorhub_ring.c b/drivers/platform/chrome/cros_ec_sensorhub_ring.c index 98e37080f7609..71948dade0e2a 100644 --- a/drivers/platform/chrome/cros_ec_sensorhub_ring.c +++ b/drivers/platform/chrome/cros_ec_sensorhub_ring.c @@ -17,7 +17,8 @@ #include #include -#include "cros_ec_trace.h" +#define CREATE_TRACE_POINTS +#include "cros_ec_sensorhub_trace.h" /* Precision of fixed point for the m values from the filter */ #define M_PRECISION BIT(23) diff --git a/drivers/platform/chrome/cros_ec_sensorhub_trace.h b/drivers/platform/chrome/cros_ec_sensorhub_trace.h new file mode 100644 index 0000000000000..57d9b47859692 --- /dev/null +++ b/drivers/platform/chrome/cros_ec_sensorhub_trace.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Trace events for the ChromeOS Sensorhub kernel module + * + * Copyright 2021 Google LLC. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cros_ec + +#if !defined(_CROS_EC_SENSORHUB_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _CROS_EC_SENSORHUB_TRACE_H_ + +#include +#include + +#include + +TRACE_EVENT(cros_ec_sensorhub_timestamp, + TP_PROTO(u32 ec_sample_timestamp, u32 ec_fifo_timestamp, s64 fifo_timestamp, + s64 current_timestamp, s64 current_time), + TP_ARGS(ec_sample_timestamp, ec_fifo_timestamp, fifo_timestamp, current_timestamp, + current_time), + TP_STRUCT__entry( + __field(u32, ec_sample_timestamp) + __field(u32, ec_fifo_timestamp) + __field(s64, fifo_timestamp) + __field(s64, current_timestamp) + __field(s64, current_time) + __field(s64, delta) + ), + TP_fast_assign( + __entry->ec_sample_timestamp = ec_sample_timestamp; + __entry->ec_fifo_timestamp = ec_fifo_timestamp; + __entry->fifo_timestamp = fifo_timestamp; + __entry->current_timestamp = current_timestamp; + __entry->current_time = current_time; + __entry->delta = current_timestamp - current_time; + ), + TP_printk("ec_ts: %9u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", + __entry->ec_sample_timestamp, + __entry->ec_fifo_timestamp, + __entry->fifo_timestamp, + __entry->current_timestamp, + __entry->current_time, + __entry->delta + ) +); + +TRACE_EVENT(cros_ec_sensorhub_data, + TP_PROTO(u32 ec_sensor_num, u32 ec_fifo_timestamp, s64 fifo_timestamp, + s64 current_timestamp, s64 current_time), + TP_ARGS(ec_sensor_num, ec_fifo_timestamp, fifo_timestamp, current_timestamp, current_time), + TP_STRUCT__entry( + __field(u32, ec_sensor_num) + __field(u32, ec_fifo_timestamp) + __field(s64, fifo_timestamp) + __field(s64, current_timestamp) + __field(s64, current_time) + __field(s64, delta) + ), + TP_fast_assign( + __entry->ec_sensor_num = ec_sensor_num; + __entry->ec_fifo_timestamp = ec_fifo_timestamp; + __entry->fifo_timestamp = fifo_timestamp; + __entry->current_timestamp = current_timestamp; + __entry->current_time = current_time; + __entry->delta = current_timestamp - current_time; + ), + TP_printk("ec_num: %4u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", + __entry->ec_sensor_num, + __entry->ec_fifo_timestamp, + __entry->fifo_timestamp, + __entry->current_timestamp, + __entry->current_time, + __entry->delta + ) +); + +TRACE_EVENT(cros_ec_sensorhub_filter, + TP_PROTO(struct cros_ec_sensors_ts_filter_state *state, s64 dx, s64 dy), + TP_ARGS(state, dx, dy), + TP_STRUCT__entry( + __field(s64, dx) + __field(s64, dy) + __field(s64, median_m) + __field(s64, median_error) + __field(s64, history_len) + __field(s64, x) + __field(s64, y) + ), + TP_fast_assign( + __entry->dx = dx; + __entry->dy = dy; + __entry->median_m = state->median_m; + __entry->median_error = state->median_error; + __entry->history_len = state->history_len; + __entry->x = state->x_offset; + __entry->y = state->y_offset; + ), + TP_printk("dx: %12lld. dy: %12lld median_m: %12lld median_error: %12lld len: %lld x: %12lld y: %12lld", + __entry->dx, + __entry->dy, + __entry->median_m, + __entry->median_error, + __entry->history_len, + __entry->x, + __entry->y + ) +); + + +#endif /* _CROS_EC_SENSORHUB_TRACE_H_ */ + +/* this part must be outside header guard */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . + +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE cros_ec_sensorhub_trace + +#include diff --git a/drivers/platform/chrome/cros_ec_trace.h b/drivers/platform/chrome/cros_ec_trace.h index 7e7cfc98657a4..9bb5cd2c98b8b 100644 --- a/drivers/platform/chrome/cros_ec_trace.h +++ b/drivers/platform/chrome/cros_ec_trace.h @@ -15,7 +15,6 @@ #include #include #include -#include #include @@ -71,100 +70,6 @@ TRACE_EVENT(cros_ec_request_done, __entry->retval) ); -TRACE_EVENT(cros_ec_sensorhub_timestamp, - TP_PROTO(u32 ec_sample_timestamp, u32 ec_fifo_timestamp, s64 fifo_timestamp, - s64 current_timestamp, s64 current_time), - TP_ARGS(ec_sample_timestamp, ec_fifo_timestamp, fifo_timestamp, current_timestamp, - current_time), - TP_STRUCT__entry( - __field(u32, ec_sample_timestamp) - __field(u32, ec_fifo_timestamp) - __field(s64, fifo_timestamp) - __field(s64, current_timestamp) - __field(s64, current_time) - __field(s64, delta) - ), - TP_fast_assign( - __entry->ec_sample_timestamp = ec_sample_timestamp; - __entry->ec_fifo_timestamp = ec_fifo_timestamp; - __entry->fifo_timestamp = fifo_timestamp; - __entry->current_timestamp = current_timestamp; - __entry->current_time = current_time; - __entry->delta = current_timestamp - current_time; - ), - TP_printk("ec_ts: %9u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", - __entry->ec_sample_timestamp, - __entry->ec_fifo_timestamp, - __entry->fifo_timestamp, - __entry->current_timestamp, - __entry->current_time, - __entry->delta - ) -); - -TRACE_EVENT(cros_ec_sensorhub_data, - TP_PROTO(u32 ec_sensor_num, u32 ec_fifo_timestamp, s64 fifo_timestamp, - s64 current_timestamp, s64 current_time), - TP_ARGS(ec_sensor_num, ec_fifo_timestamp, fifo_timestamp, current_timestamp, current_time), - TP_STRUCT__entry( - __field(u32, ec_sensor_num) - __field(u32, ec_fifo_timestamp) - __field(s64, fifo_timestamp) - __field(s64, current_timestamp) - __field(s64, current_time) - __field(s64, delta) - ), - TP_fast_assign( - __entry->ec_sensor_num = ec_sensor_num; - __entry->ec_fifo_timestamp = ec_fifo_timestamp; - __entry->fifo_timestamp = fifo_timestamp; - __entry->current_timestamp = current_timestamp; - __entry->current_time = current_time; - __entry->delta = current_timestamp - current_time; - ), - TP_printk("ec_num: %4u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", - __entry->ec_sensor_num, - __entry->ec_fifo_timestamp, - __entry->fifo_timestamp, - __entry->current_timestamp, - __entry->current_time, - __entry->delta - ) -); - -TRACE_EVENT(cros_ec_sensorhub_filter, - TP_PROTO(struct cros_ec_sensors_ts_filter_state *state, s64 dx, s64 dy), - TP_ARGS(state, dx, dy), - TP_STRUCT__entry( - __field(s64, dx) - __field(s64, dy) - __field(s64, median_m) - __field(s64, median_error) - __field(s64, history_len) - __field(s64, x) - __field(s64, y) - ), - TP_fast_assign( - __entry->dx = dx; - __entry->dy = dy; - __entry->median_m = state->median_m; - __entry->median_error = state->median_error; - __entry->history_len = state->history_len; - __entry->x = state->x_offset; - __entry->y = state->y_offset; - ), - TP_printk("dx: %12lld. dy: %12lld median_m: %12lld median_error: %12lld len: %lld x: %12lld y: %12lld", - __entry->dx, - __entry->dy, - __entry->median_m, - __entry->median_error, - __entry->history_len, - __entry->x, - __entry->y - ) -); - - #endif /* _CROS_EC_TRACE_H_ */ /* this part must be outside header guard */ diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c index 5de0bfb0bc4d9..952c1756f59ee 100644 --- a/drivers/platform/chrome/cros_ec_typec.c +++ b/drivers/platform/chrome/cros_ec_typec.c @@ -1075,7 +1075,13 @@ static int cros_typec_probe(struct platform_device *pdev) return -ENOMEM; typec->dev = dev; + typec->ec = dev_get_drvdata(pdev->dev.parent); + if (!typec->ec) { + dev_err(dev, "couldn't find parent EC device\n"); + return -ENODEV; + } + platform_set_drvdata(pdev, typec); ret = cros_typec_get_cmd_version(typec); diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 24deeeb29af21..53abd553b842e 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1027,7 +1027,7 @@ config TOUCHSCREEN_DMI config X86_ANDROID_TABLETS tristate "X86 Android tablet support" - depends on I2C && SERIAL_DEV_BUS && ACPI && GPIOLIB + depends on I2C && SPI && SERIAL_DEV_BUS && ACPI && EFI && GPIOLIB help X86 tablets which ship with Android as (part of) the factory image typically have various problems with their DSDTs. The factory kernels diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 2104a2621e507..adab31b52f2af 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2059,7 +2059,7 @@ static int fan_boost_mode_check_present(struct asus_wmi *asus) err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_FAN_BOOST_MODE, &result); if (err) { - if (err == -ENODEV) + if (err == -ENODEV || err == -ENODATA) return 0; else return err; diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 48a46466f0862..88f0bfd6ecf1a 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -35,10 +35,6 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("wmi:95F24279-4D7B-4334-9387-ACCDC67EF61C"); MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4"); -static int enable_tablet_mode_sw = -1; -module_param(enable_tablet_mode_sw, int, 0444); -MODULE_PARM_DESC(enable_tablet_mode_sw, "Enable SW_TABLET_MODE reporting (-1=auto, 0=no, 1=yes)"); - #define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C" #define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4" #define HP_OMEN_EC_THERMAL_PROFILE_OFFSET 0x95 @@ -107,6 +103,7 @@ enum hp_wmi_commandtype { HPWMI_FEATURE2_QUERY = 0x0d, HPWMI_WIRELESS2_QUERY = 0x1b, HPWMI_POSTCODEERROR_QUERY = 0x2a, + HPWMI_SYSTEM_DEVICE_MODE = 0x40, HPWMI_THERMAL_PROFILE_QUERY = 0x4c, }; @@ -217,6 +214,19 @@ struct rfkill2_device { static int rfkill2_count; static struct rfkill2_device rfkill2[HPWMI_MAX_RFKILL2_DEVICES]; +/* + * Chassis Types values were obtained from SMBIOS reference + * specification version 3.00. A complete list of system enclosures + * and chassis types is available on Table 17. + */ +static const char * const tablet_chassis_types[] = { + "30", /* Tablet*/ + "31", /* Convertible */ + "32" /* Detachable */ +}; + +#define DEVICE_MODE_TABLET 0x06 + /* map output size to the corresponding WMI method id */ static inline int encode_outsize_for_pvsz(int outsize) { @@ -320,7 +330,7 @@ static int hp_wmi_get_fan_speed(int fan) char fan_data[4] = { fan, 0, 0, 0 }; int ret = hp_wmi_perform_query(HPWMI_FAN_SPEED_GET_QUERY, HPWMI_GM, - &fan_data, sizeof(fan_data), + &fan_data, sizeof(char), sizeof(fan_data)); if (ret != 0) @@ -345,14 +355,39 @@ static int hp_wmi_read_int(int query) return val; } -static int hp_wmi_hw_state(int mask) +static int hp_wmi_get_dock_state(void) { int state = hp_wmi_read_int(HPWMI_HARDWARE_QUERY); if (state < 0) return state; - return !!(state & mask); + return !!(state & HPWMI_DOCK_MASK); +} + +static int hp_wmi_get_tablet_mode(void) +{ + char system_device_mode[4] = { 0 }; + const char *chassis_type; + bool tablet_found; + int ret; + + chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE); + if (!chassis_type) + return -ENODEV; + + tablet_found = match_string(tablet_chassis_types, + ARRAY_SIZE(tablet_chassis_types), + chassis_type) >= 0; + if (!tablet_found) + return -ENODEV; + + ret = hp_wmi_perform_query(HPWMI_SYSTEM_DEVICE_MODE, HPWMI_READ, + system_device_mode, 0, sizeof(system_device_mode)); + if (ret < 0) + return ret; + + return system_device_mode[0] == DEVICE_MODE_TABLET; } static int omen_thermal_profile_set(int mode) @@ -364,7 +399,7 @@ static int omen_thermal_profile_set(int mode) return -EINVAL; ret = hp_wmi_perform_query(HPWMI_SET_PERFORMANCE_MODE, HPWMI_GM, - &buffer, sizeof(buffer), sizeof(buffer)); + &buffer, sizeof(buffer), 0); if (ret) return ret < 0 ? ret : -EINVAL; @@ -401,7 +436,7 @@ static int hp_wmi_fan_speed_max_set(int enabled) int ret; ret = hp_wmi_perform_query(HPWMI_FAN_SPEED_MAX_SET_QUERY, HPWMI_GM, - &enabled, sizeof(enabled), sizeof(enabled)); + &enabled, sizeof(enabled), 0); if (ret) return ret < 0 ? ret : -EINVAL; @@ -414,7 +449,7 @@ static int hp_wmi_fan_speed_max_get(void) int val = 0, ret; ret = hp_wmi_perform_query(HPWMI_FAN_SPEED_MAX_GET_QUERY, HPWMI_GM, - &val, sizeof(val), sizeof(val)); + &val, 0, sizeof(val)); if (ret) return ret < 0 ? ret : -EINVAL; @@ -426,7 +461,7 @@ static int __init hp_wmi_bios_2008_later(void) { int state = 0; int ret = hp_wmi_perform_query(HPWMI_FEATURE_QUERY, HPWMI_READ, &state, - sizeof(state), sizeof(state)); + 0, sizeof(state)); if (!ret) return 1; @@ -437,7 +472,7 @@ static int __init hp_wmi_bios_2009_later(void) { u8 state[128]; int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, HPWMI_READ, &state, - sizeof(state), sizeof(state)); + 0, sizeof(state)); if (!ret) return 1; @@ -515,7 +550,7 @@ static int hp_wmi_rfkill2_refresh(void) int err, i; err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state, - sizeof(state), sizeof(state)); + 0, sizeof(state)); if (err) return err; @@ -568,7 +603,7 @@ static ssize_t als_show(struct device *dev, struct device_attribute *attr, static ssize_t dock_show(struct device *dev, struct device_attribute *attr, char *buf) { - int value = hp_wmi_hw_state(HPWMI_DOCK_MASK); + int value = hp_wmi_get_dock_state(); if (value < 0) return value; return sprintf(buf, "%d\n", value); @@ -577,7 +612,7 @@ static ssize_t dock_show(struct device *dev, struct device_attribute *attr, static ssize_t tablet_show(struct device *dev, struct device_attribute *attr, char *buf) { - int value = hp_wmi_hw_state(HPWMI_TABLET_MASK); + int value = hp_wmi_get_tablet_mode(); if (value < 0) return value; return sprintf(buf, "%d\n", value); @@ -604,7 +639,7 @@ static ssize_t als_store(struct device *dev, struct device_attribute *attr, return ret; ret = hp_wmi_perform_query(HPWMI_ALS_QUERY, HPWMI_WRITE, &tmp, - sizeof(tmp), sizeof(tmp)); + sizeof(tmp), 0); if (ret) return ret < 0 ? ret : -EINVAL; @@ -625,9 +660,9 @@ static ssize_t postcode_store(struct device *dev, struct device_attribute *attr, if (clear == false) return -EINVAL; - /* Clear the POST error code. It is kept until until cleared. */ + /* Clear the POST error code. It is kept until cleared. */ ret = hp_wmi_perform_query(HPWMI_POSTCODEERROR_QUERY, HPWMI_WRITE, &tmp, - sizeof(tmp), sizeof(tmp)); + sizeof(tmp), 0); if (ret) return ret < 0 ? ret : -EINVAL; @@ -699,10 +734,10 @@ static void hp_wmi_notify(u32 value, void *context) case HPWMI_DOCK_EVENT: if (test_bit(SW_DOCK, hp_wmi_input_dev->swbit)) input_report_switch(hp_wmi_input_dev, SW_DOCK, - hp_wmi_hw_state(HPWMI_DOCK_MASK)); + hp_wmi_get_dock_state()); if (test_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit)) input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, - hp_wmi_hw_state(HPWMI_TABLET_MASK)); + hp_wmi_get_tablet_mode()); input_sync(hp_wmi_input_dev); break; case HPWMI_PARK_HDD: @@ -780,19 +815,17 @@ static int __init hp_wmi_input_setup(void) __set_bit(EV_SW, hp_wmi_input_dev->evbit); /* Dock */ - val = hp_wmi_hw_state(HPWMI_DOCK_MASK); + val = hp_wmi_get_dock_state(); if (!(val < 0)) { __set_bit(SW_DOCK, hp_wmi_input_dev->swbit); input_report_switch(hp_wmi_input_dev, SW_DOCK, val); } /* Tablet mode */ - if (enable_tablet_mode_sw > 0) { - val = hp_wmi_hw_state(HPWMI_TABLET_MASK); - if (val >= 0) { - __set_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit); - input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, val); - } + val = hp_wmi_get_tablet_mode(); + if (!(val < 0)) { + __set_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit); + input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, val); } err = sparse_keymap_setup(hp_wmi_input_dev, hp_wmi_keymap, NULL); @@ -919,7 +952,7 @@ static int __init hp_wmi_rfkill2_setup(struct platform_device *device) int err, i; err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state, - sizeof(state), sizeof(state)); + 0, sizeof(state)); if (err) return err < 0 ? err : -EINVAL; @@ -1227,10 +1260,10 @@ static int hp_wmi_resume_handler(struct device *device) if (hp_wmi_input_dev) { if (test_bit(SW_DOCK, hp_wmi_input_dev->swbit)) input_report_switch(hp_wmi_input_dev, SW_DOCK, - hp_wmi_hw_state(HPWMI_DOCK_MASK)); + hp_wmi_get_dock_state()); if (test_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit)) input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, - hp_wmi_hw_state(HPWMI_TABLET_MASK)); + hp_wmi_get_tablet_mode()); input_sync(hp_wmi_input_dev); } diff --git a/drivers/platform/x86/huawei-wmi.c b/drivers/platform/x86/huawei-wmi.c index a2d846c4a7eef..eac3e6b4ea113 100644 --- a/drivers/platform/x86/huawei-wmi.c +++ b/drivers/platform/x86/huawei-wmi.c @@ -470,10 +470,17 @@ static DEVICE_ATTR_RW(charge_control_thresholds); static int huawei_wmi_battery_add(struct power_supply *battery) { - device_create_file(&battery->dev, &dev_attr_charge_control_start_threshold); - device_create_file(&battery->dev, &dev_attr_charge_control_end_threshold); + int err = 0; - return 0; + err = device_create_file(&battery->dev, &dev_attr_charge_control_start_threshold); + if (err) + return err; + + err = device_create_file(&battery->dev, &dev_attr_charge_control_end_threshold); + if (err) + device_remove_file(&battery->dev, &dev_attr_charge_control_start_threshold); + + return err; } static int huawei_wmi_battery_remove(struct power_supply *battery) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 3424b080db772..3fb8cda31eb9e 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -8699,10 +8699,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('N', '2', 'N', TPACPI_FAN_2CTL), /* P53 / P73 */ TPACPI_Q_LNV3('N', '2', 'E', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (1st gen) */ TPACPI_Q_LNV3('N', '2', 'O', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (2nd gen) */ - TPACPI_Q_LNV3('N', '2', 'V', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (3nd gen) */ - TPACPI_Q_LNV3('N', '4', '0', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (4nd gen) */ TPACPI_Q_LNV3('N', '3', '0', TPACPI_FAN_2CTL), /* P15 (1st gen) / P15v (1st gen) */ - TPACPI_Q_LNV3('N', '3', '2', TPACPI_FAN_2CTL), /* X1 Carbon (9th gen) */ TPACPI_Q_LNV3('N', '3', '7', TPACPI_FAN_2CTL), /* T15g (2nd gen) */ TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */ }; @@ -8746,6 +8743,9 @@ static int __init fan_init(struct ibm_init_struct *iibm) * ThinkPad ECs supports the fan control register */ if (likely(acpi_ec_read(fan_status_offset, &fan_control_initial_status))) { + int res; + unsigned int speed; + fan_status_access_mode = TPACPI_FAN_RD_TPEC; if (quirks & TPACPI_FAN_Q1) fan_quirk1_setup(); @@ -8758,6 +8758,15 @@ static int __init fan_init(struct ibm_init_struct *iibm) tp_features.second_fan_ctl = 1; pr_info("secondary fan control enabled\n"); } + /* Try and probe the 2nd fan */ + res = fan2_get_speed(&speed); + if (res >= 0) { + /* It responded - so let's assume it's there */ + tp_features.second_fan = 1; + tp_features.second_fan_ctl = 1; + pr_info("secondary fan control detected & enabled\n"); + } + } else { pr_err("ThinkPad ACPI EC access misbehaving, fan status and control unavailable\n"); return -ENODEV; diff --git a/drivers/power/reset/gemini-poweroff.c b/drivers/power/reset/gemini-poweroff.c index 90e35c07240ae..b7f7a8225f22e 100644 --- a/drivers/power/reset/gemini-poweroff.c +++ b/drivers/power/reset/gemini-poweroff.c @@ -107,8 +107,8 @@ static int gemini_poweroff_probe(struct platform_device *pdev) return PTR_ERR(gpw->base); irq = platform_get_irq(pdev, 0); - if (!irq) - return -EINVAL; + if (irq < 0) + return irq; gpw->dev = dev; diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig index b366e2fd8e97f..5e4a693528111 100644 --- a/drivers/power/supply/Kconfig +++ b/drivers/power/supply/Kconfig @@ -351,14 +351,14 @@ config AXP20X_POWER config AXP288_CHARGER tristate "X-Powers AXP288 Charger" - depends on MFD_AXP20X && EXTCON_AXP288 && IOSF_MBI + depends on MFD_AXP20X && EXTCON_AXP288 && IOSF_MBI && ACPI help Say yes here to have support X-Power AXP288 power management IC (PMIC) integrated charger. config AXP288_FUEL_GAUGE tristate "X-Powers AXP288 Fuel Gauge" - depends on MFD_AXP20X && IIO && IOSF_MBI + depends on MFD_AXP20X && IIO && IOSF_MBI && ACPI help Say yes here to have support for X-Power power management IC (PMIC) Fuel Gauge. The device provides battery statistics and status diff --git a/drivers/power/supply/ab8500_bmdata.c b/drivers/power/supply/ab8500_bmdata.c index 7ae95f5375801..9a8334a65de1b 100644 --- a/drivers/power/supply/ab8500_bmdata.c +++ b/drivers/power/supply/ab8500_bmdata.c @@ -188,13 +188,11 @@ int ab8500_bm_of_probe(struct power_supply *psy, * fall back to safe defaults. */ if ((bi->voltage_min_design_uv < 0) || - (bi->voltage_max_design_uv < 0) || - (bi->overvoltage_limit_uv < 0)) { + (bi->voltage_max_design_uv < 0)) { /* Nominal voltage is 3.7V for unknown batteries */ bi->voltage_min_design_uv = 3700000; - bi->voltage_max_design_uv = 3700000; - /* Termination voltage (overcharge limit) 4.05V */ - bi->overvoltage_limit_uv = 4050000; + /* Termination voltage 4.05V */ + bi->voltage_max_design_uv = 4050000; } if (bi->constant_charge_current_max_ua < 0) diff --git a/drivers/power/supply/ab8500_chargalg.c b/drivers/power/supply/ab8500_chargalg.c index c4a2fe07126c3..da490e090ce48 100644 --- a/drivers/power/supply/ab8500_chargalg.c +++ b/drivers/power/supply/ab8500_chargalg.c @@ -802,7 +802,7 @@ static void ab8500_chargalg_end_of_charge(struct ab8500_chargalg *di) if (di->charge_status == POWER_SUPPLY_STATUS_CHARGING && di->charge_state == STATE_NORMAL && !di->maintenance_chg && (di->batt_data.volt_uv >= - di->bm->bi->overvoltage_limit_uv || + di->bm->bi->voltage_max_design_uv || di->events.usb_cv_active || di->events.ac_cv_active) && di->batt_data.avg_curr_ua < di->bm->bi->charge_term_current_ua && @@ -2020,11 +2020,11 @@ static int ab8500_chargalg_probe(struct platform_device *pdev) psy_cfg.drv_data = di; /* Initilialize safety timer */ - hrtimer_init(&di->safety_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + hrtimer_init(&di->safety_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); di->safety_timer.function = ab8500_chargalg_safety_timer_expired; /* Initilialize maintenance timer */ - hrtimer_init(&di->maintenance_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + hrtimer_init(&di->maintenance_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); di->maintenance_timer.function = ab8500_chargalg_maintenance_timer_expired; diff --git a/drivers/power/supply/ab8500_fg.c b/drivers/power/supply/ab8500_fg.c index b0919a6a65878..09a4cbd69676a 100644 --- a/drivers/power/supply/ab8500_fg.c +++ b/drivers/power/supply/ab8500_fg.c @@ -2263,7 +2263,13 @@ static int ab8500_fg_init_hw_registers(struct ab8500_fg *di) { int ret; - /* Set VBAT OVV threshold */ + /* + * Set VBAT OVV (overvoltage) threshold to 4.75V (typ) this is what + * the hardware supports, nothing else can be configured in hardware. + * See this as an "outer limit" where the charger will certainly + * shut down. Other (lower) overvoltage levels need to be implemented + * in software. + */ ret = abx500_mask_and_set_register_interruptible(di->dev, AB8500_CHARGER, AB8500_BATT_OVV, @@ -2521,8 +2527,10 @@ static int ab8500_fg_sysfs_init(struct ab8500_fg *di) ret = kobject_init_and_add(&di->fg_kobject, &ab8500_fg_ktype, NULL, "battery"); - if (ret < 0) + if (ret < 0) { + kobject_put(&di->fg_kobject); dev_err(di->dev, "failed to create sysfs entry\n"); + } return ret; } diff --git a/drivers/power/supply/axp20x_battery.c b/drivers/power/supply/axp20x_battery.c index 5d197141f4760..9106077c0dbb4 100644 --- a/drivers/power/supply/axp20x_battery.c +++ b/drivers/power/supply/axp20x_battery.c @@ -186,7 +186,6 @@ static int axp20x_battery_get_prop(struct power_supply *psy, union power_supply_propval *val) { struct axp20x_batt_ps *axp20x_batt = power_supply_get_drvdata(psy); - struct iio_channel *chan; int ret = 0, reg, val1; switch (psp) { @@ -266,12 +265,12 @@ static int axp20x_battery_get_prop(struct power_supply *psy, if (ret) return ret; - if (reg & AXP20X_PWR_STATUS_BAT_CHARGING) - chan = axp20x_batt->batt_chrg_i; - else - chan = axp20x_batt->batt_dischrg_i; - - ret = iio_read_channel_processed(chan, &val->intval); + if (reg & AXP20X_PWR_STATUS_BAT_CHARGING) { + ret = iio_read_channel_processed(axp20x_batt->batt_chrg_i, &val->intval); + } else { + ret = iio_read_channel_processed(axp20x_batt->batt_dischrg_i, &val1); + val->intval = -val1; + } if (ret) return ret; diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c index ec41f6cd3f93f..19746e658a6a8 100644 --- a/drivers/power/supply/axp288_charger.c +++ b/drivers/power/supply/axp288_charger.c @@ -42,11 +42,11 @@ #define VBUS_ISPOUT_CUR_LIM_1500MA 0x1 /* 1500mA */ #define VBUS_ISPOUT_CUR_LIM_2000MA 0x2 /* 2000mA */ #define VBUS_ISPOUT_CUR_NO_LIM 0x3 /* 2500mA */ -#define VBUS_ISPOUT_VHOLD_SET_MASK 0x31 +#define VBUS_ISPOUT_VHOLD_SET_MASK 0x38 #define VBUS_ISPOUT_VHOLD_SET_BIT_POS 0x3 #define VBUS_ISPOUT_VHOLD_SET_OFFSET 4000 /* 4000mV */ #define VBUS_ISPOUT_VHOLD_SET_LSB_RES 100 /* 100mV */ -#define VBUS_ISPOUT_VHOLD_SET_4300MV 0x3 /* 4300mV */ +#define VBUS_ISPOUT_VHOLD_SET_4400MV 0x4 /* 4400mV */ #define VBUS_ISPOUT_VBUS_PATH_DIS BIT(7) #define CHRG_CCCV_CC_MASK 0xf /* 4 bits */ @@ -769,6 +769,16 @@ static int charger_init_hw_regs(struct axp288_chrg_info *info) ret = axp288_charger_vbus_path_select(info, true); if (ret < 0) return ret; + } else { + /* Set Vhold to the factory default / recommended 4.4V */ + val = VBUS_ISPOUT_VHOLD_SET_4400MV << VBUS_ISPOUT_VHOLD_SET_BIT_POS; + ret = regmap_update_bits(info->regmap, AXP20X_VBUS_IPSOUT_MGMT, + VBUS_ISPOUT_VHOLD_SET_MASK, val); + if (ret < 0) { + dev_err(&info->pdev->dev, "register(%x) write error(%d)\n", + AXP20X_VBUS_IPSOUT_MGMT, ret); + return ret; + } } /* Read current charge voltage and current limit */ @@ -828,6 +838,13 @@ static int axp288_charger_probe(struct platform_device *pdev) struct power_supply_config charger_cfg = {}; unsigned int val; + /* + * Normally the native AXP288 fg/charger drivers are preferred but + * on some devices the ACPI drivers should be used instead. + */ + if (!acpi_quirk_skip_acpi_ac_and_battery()) + return -ENODEV; + /* * On some devices the fuelgauge and charger parts of the axp288 are * not used, check that the fuelgauge is enabled (CC_CTRL != 0). diff --git a/drivers/power/supply/axp288_fuel_gauge.c b/drivers/power/supply/axp288_fuel_gauge.c index c1da217fdb0e2..ce8ffd0a41b5a 100644 --- a/drivers/power/supply/axp288_fuel_gauge.c +++ b/drivers/power/supply/axp288_fuel_gauge.c @@ -9,6 +9,7 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +#include #include #include #include @@ -560,12 +561,6 @@ static const struct dmi_system_id axp288_no_battery_list[] = { DMI_EXACT_MATCH(DMI_BIOS_VERSION, "1.000"), }, }, - { - /* ECS EF20EA */ - .matches = { - DMI_MATCH(DMI_PRODUCT_NAME, "EF20EA"), - }, - }, { /* Intel Cherry Trail Compute Stick, Windows version */ .matches = { @@ -624,6 +619,13 @@ static int axp288_fuel_gauge_probe(struct platform_device *pdev) }; unsigned int val; + /* + * Normally the native AXP288 fg/charger drivers are preferred but + * on some devices the ACPI drivers should be used instead. + */ + if (!acpi_quirk_skip_acpi_ac_and_battery()) + return -ENODEV; + if (dmi_check_system(axp288_no_battery_list)) return -ENODEV; diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c index 06c34b09349ca..8ad1b3b02490c 100644 --- a/drivers/power/supply/bq24190_charger.c +++ b/drivers/power/supply/bq24190_charger.c @@ -39,6 +39,7 @@ #define BQ24190_REG_POC_CHG_CONFIG_DISABLE 0x0 #define BQ24190_REG_POC_CHG_CONFIG_CHARGE 0x1 #define BQ24190_REG_POC_CHG_CONFIG_OTG 0x2 +#define BQ24190_REG_POC_CHG_CONFIG_OTG_ALT 0x3 #define BQ24190_REG_POC_SYS_MIN_MASK (BIT(3) | BIT(2) | BIT(1)) #define BQ24190_REG_POC_SYS_MIN_SHIFT 1 #define BQ24190_REG_POC_SYS_MIN_MIN 3000 @@ -550,7 +551,11 @@ static int bq24190_vbus_is_enabled(struct regulator_dev *dev) pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); - return ret ? ret : val == BQ24190_REG_POC_CHG_CONFIG_OTG; + if (ret) + return ret; + + return (val == BQ24190_REG_POC_CHG_CONFIG_OTG || + val == BQ24190_REG_POC_CHG_CONFIG_OTG_ALT); } static const struct regulator_ops bq24190_vbus_ops = { diff --git a/drivers/power/supply/sbs-charger.c b/drivers/power/supply/sbs-charger.c index 6fa65d118ec12..b08f7d0c41815 100644 --- a/drivers/power/supply/sbs-charger.c +++ b/drivers/power/supply/sbs-charger.c @@ -18,6 +18,7 @@ #include #include #include +#include #define SBS_CHARGER_REG_SPEC_INFO 0x11 #define SBS_CHARGER_REG_STATUS 0x13 @@ -209,7 +210,12 @@ static int sbs_probe(struct i2c_client *client, if (ret) return dev_err_probe(&client->dev, ret, "Failed to request irq\n"); } else { - INIT_DELAYED_WORK(&chip->work, sbs_delayed_work); + ret = devm_delayed_work_autocancel(&client->dev, &chip->work, + sbs_delayed_work); + if (ret) + return dev_err_probe(&client->dev, ret, + "Failed to init work for polling\n"); + schedule_delayed_work(&chip->work, msecs_to_jiffies(SBS_CHARGER_POLL_TIME)); } @@ -220,15 +226,6 @@ static int sbs_probe(struct i2c_client *client, return 0; } -static int sbs_remove(struct i2c_client *client) -{ - struct sbs_info *chip = i2c_get_clientdata(client); - - cancel_delayed_work_sync(&chip->work); - - return 0; -} - #ifdef CONFIG_OF static const struct of_device_id sbs_dt_ids[] = { { .compatible = "sbs,sbs-charger" }, @@ -245,7 +242,6 @@ MODULE_DEVICE_TABLE(i2c, sbs_id); static struct i2c_driver sbs_driver = { .probe = sbs_probe, - .remove = sbs_remove, .id_table = sbs_id, .driver = { .name = "sbs-charger", diff --git a/drivers/power/supply/wm8350_power.c b/drivers/power/supply/wm8350_power.c index e05cee457471b..908cfd45d2624 100644 --- a/drivers/power/supply/wm8350_power.c +++ b/drivers/power/supply/wm8350_power.c @@ -408,44 +408,112 @@ static const struct power_supply_desc wm8350_usb_desc = { * Initialisation *********************************************************************/ -static void wm8350_init_charger(struct wm8350 *wm8350) +static int wm8350_init_charger(struct wm8350 *wm8350) { + int ret; + /* register our interest in charger events */ - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT, wm8350_charger_handler, 0, "Battery hot", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD, + if (ret) + goto err; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD, wm8350_charger_handler, 0, "Battery cold", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL, + if (ret) + goto free_chg_bat_hot; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL, wm8350_charger_handler, 0, "Battery fail", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_TO, + if (ret) + goto free_chg_bat_cold; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_TO, wm8350_charger_handler, 0, "Charger timeout", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_END, + if (ret) + goto free_chg_bat_fail; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_END, wm8350_charger_handler, 0, "Charge end", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_START, + if (ret) + goto free_chg_to; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_START, wm8350_charger_handler, 0, "Charge start", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY, + if (ret) + goto free_chg_end; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY, wm8350_charger_handler, 0, "Fast charge ready", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, + if (ret) + goto free_chg_start; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, wm8350_charger_handler, 0, "Battery <3.9V", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, + if (ret) + goto free_chg_fast_rdy; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, wm8350_charger_handler, 0, "Battery <3.1V", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, + if (ret) + goto free_chg_vbatt_lt_3p9; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, wm8350_charger_handler, 0, "Battery <2.85V", wm8350); + if (ret) + goto free_chg_vbatt_lt_3p1; /* and supply change events */ - wm8350_register_irq(wm8350, WM8350_IRQ_EXT_USB_FB, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_EXT_USB_FB, wm8350_charger_handler, 0, "USB", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_EXT_WALL_FB, + if (ret) + goto free_chg_vbatt_lt_2p85; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_EXT_WALL_FB, wm8350_charger_handler, 0, "Wall", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_EXT_BAT_FB, + if (ret) + goto free_ext_usb_fb; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_EXT_BAT_FB, wm8350_charger_handler, 0, "Battery", wm8350); + if (ret) + goto free_ext_wall_fb; + + return 0; + +free_ext_wall_fb: + wm8350_free_irq(wm8350, WM8350_IRQ_EXT_WALL_FB, wm8350); +free_ext_usb_fb: + wm8350_free_irq(wm8350, WM8350_IRQ_EXT_USB_FB, wm8350); +free_chg_vbatt_lt_2p85: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, wm8350); +free_chg_vbatt_lt_3p1: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, wm8350); +free_chg_vbatt_lt_3p9: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, wm8350); +free_chg_fast_rdy: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY, wm8350); +free_chg_start: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_START, wm8350); +free_chg_end: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_END, wm8350); +free_chg_to: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_TO, wm8350); +free_chg_bat_fail: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL, wm8350); +free_chg_bat_cold: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD, wm8350); +free_chg_bat_hot: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT, wm8350); +err: + return ret; } static void free_charger_irq(struct wm8350 *wm8350) @@ -456,6 +524,7 @@ static void free_charger_irq(struct wm8350 *wm8350) wm8350_free_irq(wm8350, WM8350_IRQ_CHG_TO, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_END, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_START, wm8350); + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, wm8350); diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index b740866b228d9..1e8cac699646c 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -150,10 +150,17 @@ static int update_pd_power_uw(struct dtpm *dtpm) static void pd_release(struct dtpm *dtpm) { struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); + struct cpufreq_policy *policy; if (freq_qos_request_active(&dtpm_cpu->qos_req)) freq_qos_remove_request(&dtpm_cpu->qos_req); + policy = cpufreq_cpu_get(dtpm_cpu->cpu); + if (policy) { + for_each_cpu(dtpm_cpu->cpu, policy->related_cpus) + per_cpu(dtpm_per_cpu, dtpm_cpu->cpu) = NULL; + } + kfree(dtpm_cpu); } diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c index 35799e6401c99..2f4b11b4dfcd9 100644 --- a/drivers/pps/clients/pps-gpio.c +++ b/drivers/pps/clients/pps-gpio.c @@ -169,7 +169,7 @@ static int pps_gpio_probe(struct platform_device *pdev) /* GPIO setup */ ret = pps_gpio_setup(dev); if (ret) - return -EINVAL; + return ret; /* IRQ setup */ ret = gpiod_to_irq(data->gpio_pin); diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 0e4bc8b9329dd..b6f2cfd15dd2d 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -317,11 +317,18 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, } EXPORT_SYMBOL(ptp_clock_register); +static int unregister_vclock(struct device *dev, void *data) +{ + struct ptp_clock *ptp = dev_get_drvdata(dev); + + ptp_vclock_unregister(info_to_vclock(ptp->info)); + return 0; +} + int ptp_clock_unregister(struct ptp_clock *ptp) { if (ptp_vclock_in_use(ptp)) { - pr_err("ptp: virtual clock in use\n"); - return -EBUSY; + device_for_each_child(&ptp->dev, NULL, unregister_vclock); } ptp->defunct = 1; diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index 41b92dc2f011a..9233bfedeb174 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -14,7 +14,7 @@ static ssize_t clock_name_show(struct device *dev, struct device_attribute *attr, char *page) { struct ptp_clock *ptp = dev_get_drvdata(dev); - return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name); + return sysfs_emit(page, "%s\n", ptp->info->name); } static DEVICE_ATTR_RO(clock_name); @@ -387,7 +387,7 @@ static ssize_t ptp_pin_show(struct device *dev, struct device_attribute *attr, mutex_unlock(&ptp->pincfg_mux); - return snprintf(page, PAGE_SIZE, "%u %u\n", func, chan); + return sysfs_emit(page, "%u %u\n", func, chan); } static ssize_t ptp_pin_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/pwm/pwm-lpc18xx-sct.c b/drivers/pwm/pwm-lpc18xx-sct.c index 8e461f3baa05a..8cc8ae16553cf 100644 --- a/drivers/pwm/pwm-lpc18xx-sct.c +++ b/drivers/pwm/pwm-lpc18xx-sct.c @@ -395,12 +395,6 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev) lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_LIMIT, BIT(lpc18xx_pwm->period_event)); - ret = pwmchip_add(&lpc18xx_pwm->chip); - if (ret < 0) { - dev_err(&pdev->dev, "pwmchip_add failed: %d\n", ret); - goto disable_pwmclk; - } - for (i = 0; i < lpc18xx_pwm->chip.npwm; i++) { struct lpc18xx_pwm_data *data; @@ -410,14 +404,12 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev) GFP_KERNEL); if (!data) { ret = -ENOMEM; - goto remove_pwmchip; + goto disable_pwmclk; } pwm_set_chip_data(pwm, data); } - platform_set_drvdata(pdev, lpc18xx_pwm); - val = lpc18xx_pwm_readl(lpc18xx_pwm, LPC18XX_PWM_CTRL); val &= ~LPC18XX_PWM_BIDIR; val &= ~LPC18XX_PWM_CTRL_HALT; @@ -425,10 +417,16 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev) val |= LPC18XX_PWM_PRE(0); lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_CTRL, val); + ret = pwmchip_add(&lpc18xx_pwm->chip); + if (ret < 0) { + dev_err(&pdev->dev, "pwmchip_add failed: %d\n", ret); + goto disable_pwmclk; + } + + platform_set_drvdata(pdev, lpc18xx_pwm); + return 0; -remove_pwmchip: - pwmchip_remove(&lpc18xx_pwm->chip); disable_pwmclk: clk_disable_unprepare(lpc18xx_pwm->pwm_clk); return ret; diff --git a/drivers/regulator/atc260x-regulator.c b/drivers/regulator/atc260x-regulator.c index 05147d2c38428..485e58b264c04 100644 --- a/drivers/regulator/atc260x-regulator.c +++ b/drivers/regulator/atc260x-regulator.c @@ -292,6 +292,7 @@ enum atc2603c_reg_ids { .bypass_mask = BIT(5), \ .active_discharge_reg = ATC2603C_PMU_SWITCH_CTL, \ .active_discharge_mask = BIT(1), \ + .active_discharge_on = BIT(1), \ .owner = THIS_MODULE, \ } diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c index 9fc666107a06c..8490aa8eecb1a 100644 --- a/drivers/regulator/qcom_smd-regulator.c +++ b/drivers/regulator/qcom_smd-regulator.c @@ -1317,8 +1317,10 @@ static int rpm_reg_probe(struct platform_device *pdev) for_each_available_child_of_node(dev->of_node, node) { vreg = devm_kzalloc(&pdev->dev, sizeof(*vreg), GFP_KERNEL); - if (!vreg) + if (!vreg) { + of_node_put(node); return -ENOMEM; + } ret = rpm_regulator_init_vreg(vreg, dev, node, rpm, vreg_data); diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c index ee46bfbf5eee7..991b4730d7687 100644 --- a/drivers/regulator/rpi-panel-attiny-regulator.c +++ b/drivers/regulator/rpi-panel-attiny-regulator.c @@ -37,11 +37,24 @@ static const struct regmap_config attiny_regmap_config = { static int attiny_lcd_power_enable(struct regulator_dev *rdev) { unsigned int data; + int ret, i; regmap_write(rdev->regmap, REG_POWERON, 1); + msleep(80); + /* Wait for nPWRDWN to go low to indicate poweron is done. */ - regmap_read_poll_timeout(rdev->regmap, REG_PORTB, data, - data & BIT(0), 10, 1000000); + for (i = 0; i < 20; i++) { + ret = regmap_read(rdev->regmap, REG_PORTB, &data); + if (!ret) { + if (data & BIT(0)) + break; + } + usleep_range(10000, 12000); + } + usleep_range(10000, 12000); + + if (ret) + pr_err("%s: regmap_read_poll_timeout failed %d\n", __func__, ret); /* Default to the same orientation as the closed source * firmware used for the panel. Runtime rotation @@ -57,23 +70,34 @@ static int attiny_lcd_power_disable(struct regulator_dev *rdev) { regmap_write(rdev->regmap, REG_PWM, 0); regmap_write(rdev->regmap, REG_POWERON, 0); - udelay(1); + msleep(30); return 0; } static int attiny_lcd_power_is_enabled(struct regulator_dev *rdev) { unsigned int data; - int ret; + int ret, i; - ret = regmap_read(rdev->regmap, REG_POWERON, &data); + for (i = 0; i < 10; i++) { + ret = regmap_read(rdev->regmap, REG_POWERON, &data); + if (!ret) + break; + usleep_range(10000, 12000); + } if (ret < 0) return ret; if (!(data & BIT(0))) return 0; - ret = regmap_read(rdev->regmap, REG_PORTB, &data); + for (i = 0; i < 10; i++) { + ret = regmap_read(rdev->regmap, REG_PORTB, &data); + if (!ret) + break; + usleep_range(10000, 12000); + } + if (ret < 0) return ret; @@ -103,20 +127,32 @@ static int attiny_update_status(struct backlight_device *bl) { struct regmap *regmap = bl_get_data(bl); int brightness = bl->props.brightness; + int ret, i; if (bl->props.power != FB_BLANK_UNBLANK || bl->props.fb_blank != FB_BLANK_UNBLANK) brightness = 0; - return regmap_write(regmap, REG_PWM, brightness); + for (i = 0; i < 10; i++) { + ret = regmap_write(regmap, REG_PWM, brightness); + if (!ret) + break; + } + + return ret; } static int attiny_get_brightness(struct backlight_device *bl) { struct regmap *regmap = bl_get_data(bl); - int ret, brightness; + int ret, brightness, i; + + for (i = 0; i < 10; i++) { + ret = regmap_read(regmap, REG_PWM, &brightness); + if (!ret) + break; + } - ret = regmap_read(regmap, REG_PWM, &brightness); if (ret) return ret; @@ -166,7 +202,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c, } regmap_write(regmap, REG_POWERON, 0); - mdelay(1); + msleep(30); config.dev = &i2c->dev; config.regmap = regmap; diff --git a/drivers/regulator/rtq2134-regulator.c b/drivers/regulator/rtq2134-regulator.c index f21e3f8b21f23..8e13dea354a21 100644 --- a/drivers/regulator/rtq2134-regulator.c +++ b/drivers/regulator/rtq2134-regulator.c @@ -285,6 +285,7 @@ static const unsigned int rtq2134_buck_ramp_delay_table[] = { .enable_mask = RTQ2134_VOUTEN_MASK, \ .active_discharge_reg = RTQ2134_REG_BUCK##_id##_CFG0, \ .active_discharge_mask = RTQ2134_ACTDISCHG_MASK, \ + .active_discharge_on = RTQ2134_ACTDISCHG_MASK, \ .ramp_reg = RTQ2134_REG_BUCK##_id##_RSPCFG, \ .ramp_mask = RTQ2134_RSPUP_MASK, \ .ramp_delay_table = rtq2134_buck_ramp_delay_table, \ diff --git a/drivers/regulator/wm8994-regulator.c b/drivers/regulator/wm8994-regulator.c index cadea0344486f..40befdd9dfa92 100644 --- a/drivers/regulator/wm8994-regulator.c +++ b/drivers/regulator/wm8994-regulator.c @@ -71,6 +71,35 @@ static const struct regulator_ops wm8994_ldo2_ops = { }; static const struct regulator_desc wm8994_ldo_desc[] = { + { + .name = "LDO1", + .id = 1, + .type = REGULATOR_VOLTAGE, + .n_voltages = WM8994_LDO1_MAX_SELECTOR + 1, + .vsel_reg = WM8994_LDO_1, + .vsel_mask = WM8994_LDO1_VSEL_MASK, + .ops = &wm8994_ldo1_ops, + .min_uV = 2400000, + .uV_step = 100000, + .enable_time = 3000, + .off_on_delay = 36000, + .owner = THIS_MODULE, + }, + { + .name = "LDO2", + .id = 2, + .type = REGULATOR_VOLTAGE, + .n_voltages = WM8994_LDO2_MAX_SELECTOR + 1, + .vsel_reg = WM8994_LDO_2, + .vsel_mask = WM8994_LDO2_VSEL_MASK, + .ops = &wm8994_ldo2_ops, + .enable_time = 3000, + .off_on_delay = 36000, + .owner = THIS_MODULE, + }, +}; + +static const struct regulator_desc wm8958_ldo_desc[] = { { .name = "LDO1", .id = 1, @@ -172,9 +201,16 @@ static int wm8994_ldo_probe(struct platform_device *pdev) * regulator core and we need not worry about it on the * error path. */ - ldo->regulator = devm_regulator_register(&pdev->dev, - &wm8994_ldo_desc[id], - &config); + if (ldo->wm8994->type == WM8994) { + ldo->regulator = devm_regulator_register(&pdev->dev, + &wm8994_ldo_desc[id], + &config); + } else { + ldo->regulator = devm_regulator_register(&pdev->dev, + &wm8958_ldo_desc[id], + &config); + } + if (IS_ERR(ldo->regulator)) { ret = PTR_ERR(ldo->regulator); dev_err(wm8994->dev, "Failed to register LDO%d: %d\n", diff --git a/drivers/remoteproc/qcom_q6v5_adsp.c b/drivers/remoteproc/qcom_q6v5_adsp.c index 098362e6e233b..7c02bc1322479 100644 --- a/drivers/remoteproc/qcom_q6v5_adsp.c +++ b/drivers/remoteproc/qcom_q6v5_adsp.c @@ -408,6 +408,7 @@ static int adsp_alloc_memory_region(struct qcom_adsp *adsp) } ret = of_address_to_resource(node, 0, &r); + of_node_put(node); if (ret) return ret; diff --git a/drivers/remoteproc/qcom_q6v5_mss.c b/drivers/remoteproc/qcom_q6v5_mss.c index 43ea8455546ca..b9ab91540b00d 100644 --- a/drivers/remoteproc/qcom_q6v5_mss.c +++ b/drivers/remoteproc/qcom_q6v5_mss.c @@ -1806,18 +1806,20 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc) * reserved memory regions from device's memory-region property. */ child = of_get_child_by_name(qproc->dev->of_node, "mba"); - if (!child) + if (!child) { node = of_parse_phandle(qproc->dev->of_node, "memory-region", 0); - else + } else { node = of_parse_phandle(child, "memory-region", 0); + of_node_put(child); + } ret = of_address_to_resource(node, 0, &r); + of_node_put(node); if (ret) { dev_err(qproc->dev, "unable to resolve mba region\n"); return ret; } - of_node_put(node); qproc->mba_phys = r.start; qproc->mba_size = resource_size(&r); @@ -1828,14 +1830,15 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc) } else { child = of_get_child_by_name(qproc->dev->of_node, "mpss"); node = of_parse_phandle(child, "memory-region", 0); + of_node_put(child); } ret = of_address_to_resource(node, 0, &r); + of_node_put(node); if (ret) { dev_err(qproc->dev, "unable to resolve mpss region\n"); return ret; } - of_node_put(node); qproc->mpss_phys = qproc->mpss_reloc = r.start; qproc->mpss_size = resource_size(&r); diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c index 80bbafee98463..9a223d394087f 100644 --- a/drivers/remoteproc/qcom_wcnss.c +++ b/drivers/remoteproc/qcom_wcnss.c @@ -500,6 +500,7 @@ static int wcnss_alloc_memory_region(struct qcom_wcnss *wcnss) } ret = of_address_to_resource(node, 0, &r); + of_node_put(node); if (ret) return ret; diff --git a/drivers/remoteproc/remoteproc_debugfs.c b/drivers/remoteproc/remoteproc_debugfs.c index b5a1e3b697d9f..581930483ef84 100644 --- a/drivers/remoteproc/remoteproc_debugfs.c +++ b/drivers/remoteproc/remoteproc_debugfs.c @@ -76,7 +76,7 @@ static ssize_t rproc_coredump_write(struct file *filp, int ret, err = 0; char buf[20]; - if (count > sizeof(buf)) + if (count < 1 || count > sizeof(buf)) return -EINVAL; ret = copy_from_user(buf, user_buf, count); diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index d8e8357981537..9edd662c69ace 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -804,9 +804,13 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue); struct rtc_time tm; ktime_t now; + int err; + + err = __rtc_read_time(rtc, &tm); + if (err) + return err; timer->enabled = 1; - __rtc_read_time(rtc, &tm); now = rtc_tm_to_ktime(tm); /* Skip over expired timers */ @@ -820,7 +824,6 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) trace_rtc_timer_enqueue(timer); if (!next || ktime_before(timer->node.expires, next->expires)) { struct rtc_wkalrm alarm; - int err; alarm.time = rtc_ktime_to_tm(timer->node.expires); alarm.enabled = 1; diff --git a/drivers/rtc/rtc-gamecube.c b/drivers/rtc/rtc-gamecube.c index f717b36f4738c..18ca3b38b2d04 100644 --- a/drivers/rtc/rtc-gamecube.c +++ b/drivers/rtc/rtc-gamecube.c @@ -235,6 +235,7 @@ static int gamecube_rtc_read_offset_from_sram(struct priv *d) } ret = of_address_to_resource(np, 0, &res); + of_node_put(np); if (ret) { pr_err("no io memory range found\n"); return -1; diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index ae9f131b43c0c..562f99b664a24 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -232,8 +232,10 @@ int mc146818_set_time(struct rtc_time *time) if (yrs >= 100) yrs -= 100; - if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) - || RTC_ALWAYS_BCD) { + spin_lock_irqsave(&rtc_lock, flags); + save_control = CMOS_READ(RTC_CONTROL); + spin_unlock_irqrestore(&rtc_lock, flags); + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { sec = bin2bcd(sec); min = bin2bcd(min); hrs = bin2bcd(hrs); diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index e38ee88483855..bad6a5d9c6839 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -350,9 +350,6 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) } } - if (!adev->irq[0]) - clear_bit(RTC_FEATURE_ALARM, ldata->rtc->features); - device_init_wakeup(&adev->dev, true); ldata->rtc = devm_rtc_allocate_device(&adev->dev); if (IS_ERR(ldata->rtc)) { @@ -360,6 +357,9 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) goto out; } + if (!adev->irq[0]) + clear_bit(RTC_FEATURE_ALARM, ldata->rtc->features); + ldata->rtc->ops = ops; ldata->rtc->range_min = vendor->range_min; ldata->rtc->range_max = vendor->range_max; diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c index 2018614f258f6..6eaa9321c0741 100644 --- a/drivers/rtc/rtc-wm8350.c +++ b/drivers/rtc/rtc-wm8350.c @@ -432,14 +432,21 @@ static int wm8350_rtc_probe(struct platform_device *pdev) return ret; } - wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC, wm8350_rtc_update_handler, 0, "RTC Seconds", wm8350); + if (ret) + return ret; + wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC); - wm8350_register_irq(wm8350, WM8350_IRQ_RTC_ALM, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_RTC_ALM, wm8350_rtc_alarm_handler, 0, "RTC Alarm", wm8350); + if (ret) { + wm8350_free_irq(wm8350, WM8350_IRQ_RTC_SEC, wm8350); + return ret; + } return 0; } diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c index d17880b57d17b..2449b4215b32d 100644 --- a/drivers/scsi/aha152x.c +++ b/drivers/scsi/aha152x.c @@ -3375,13 +3375,11 @@ static int __init aha152x_setup(char *str) setup[setup_count].synchronous = ints[0] >= 6 ? ints[6] : 1; setup[setup_count].delay = ints[0] >= 7 ? ints[7] : DELAY_DEFAULT; setup[setup_count].ext_trans = ints[0] >= 8 ? ints[8] : 0; - if (ints[0] > 8) { /*}*/ + if (ints[0] > 8) printk(KERN_NOTICE "aha152x: usage: aha152x=[,[," "[,[,[,[,[,]]]]]]]\n"); - } else { + else setup_count++; - return 0; - } return 1; } diff --git a/drivers/scsi/bfa/bfad_attr.c b/drivers/scsi/bfa/bfad_attr.c index f46989bd083cc..5a85401e9e2d3 100644 --- a/drivers/scsi/bfa/bfad_attr.c +++ b/drivers/scsi/bfa/bfad_attr.c @@ -711,7 +711,7 @@ bfad_im_serial_num_show(struct device *dev, struct device_attribute *attr, char serial_num[BFA_ADAPTER_SERIAL_NUM_LEN]; bfa_get_adapter_serial_num(&bfad->bfa, serial_num); - return snprintf(buf, PAGE_SIZE, "%s\n", serial_num); + return sysfs_emit(buf, "%s\n", serial_num); } static ssize_t @@ -725,7 +725,7 @@ bfad_im_model_show(struct device *dev, struct device_attribute *attr, char model[BFA_ADAPTER_MODEL_NAME_LEN]; bfa_get_adapter_model(&bfad->bfa, model); - return snprintf(buf, PAGE_SIZE, "%s\n", model); + return sysfs_emit(buf, "%s\n", model); } static ssize_t @@ -805,7 +805,7 @@ bfad_im_model_desc_show(struct device *dev, struct device_attribute *attr, snprintf(model_descr, BFA_ADAPTER_MODEL_DESCR_LEN, "Invalid Model"); - return snprintf(buf, PAGE_SIZE, "%s\n", model_descr); + return sysfs_emit(buf, "%s\n", model_descr); } static ssize_t @@ -819,7 +819,7 @@ bfad_im_node_name_show(struct device *dev, struct device_attribute *attr, u64 nwwn; nwwn = bfa_fcs_lport_get_nwwn(port->fcs_port); - return snprintf(buf, PAGE_SIZE, "0x%llx\n", cpu_to_be64(nwwn)); + return sysfs_emit(buf, "0x%llx\n", cpu_to_be64(nwwn)); } static ssize_t @@ -836,7 +836,7 @@ bfad_im_symbolic_name_show(struct device *dev, struct device_attribute *attr, bfa_fcs_lport_get_attr(&bfad->bfa_fcs.fabric.bport, &port_attr); strlcpy(symname, port_attr.port_cfg.sym_name.symname, BFA_SYMNAME_MAXLEN); - return snprintf(buf, PAGE_SIZE, "%s\n", symname); + return sysfs_emit(buf, "%s\n", symname); } static ssize_t @@ -850,14 +850,14 @@ bfad_im_hw_version_show(struct device *dev, struct device_attribute *attr, char hw_ver[BFA_VERSION_LEN]; bfa_get_pci_chip_rev(&bfad->bfa, hw_ver); - return snprintf(buf, PAGE_SIZE, "%s\n", hw_ver); + return sysfs_emit(buf, "%s\n", hw_ver); } static ssize_t bfad_im_drv_version_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%s\n", BFAD_DRIVER_VERSION); + return sysfs_emit(buf, "%s\n", BFAD_DRIVER_VERSION); } static ssize_t @@ -871,7 +871,7 @@ bfad_im_optionrom_version_show(struct device *dev, char optrom_ver[BFA_VERSION_LEN]; bfa_get_adapter_optrom_ver(&bfad->bfa, optrom_ver); - return snprintf(buf, PAGE_SIZE, "%s\n", optrom_ver); + return sysfs_emit(buf, "%s\n", optrom_ver); } static ssize_t @@ -885,7 +885,7 @@ bfad_im_fw_version_show(struct device *dev, struct device_attribute *attr, char fw_ver[BFA_VERSION_LEN]; bfa_get_adapter_fw_ver(&bfad->bfa, fw_ver); - return snprintf(buf, PAGE_SIZE, "%s\n", fw_ver); + return sysfs_emit(buf, "%s\n", fw_ver); } static ssize_t @@ -897,7 +897,7 @@ bfad_im_num_of_ports_show(struct device *dev, struct device_attribute *attr, (struct bfad_im_port_s *) shost->hostdata[0]; struct bfad_s *bfad = im_port->bfad; - return snprintf(buf, PAGE_SIZE, "%d\n", + return sysfs_emit(buf, "%d\n", bfa_get_nports(&bfad->bfa)); } @@ -905,7 +905,7 @@ static ssize_t bfad_im_drv_name_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%s\n", BFAD_DRIVER_NAME); + return sysfs_emit(buf, "%s\n", BFAD_DRIVER_NAME); } static ssize_t @@ -924,14 +924,14 @@ bfad_im_num_of_discovered_ports_show(struct device *dev, rports = kcalloc(nrports, sizeof(struct bfa_rport_qualifier_s), GFP_ATOMIC); if (rports == NULL) - return snprintf(buf, PAGE_SIZE, "Failed\n"); + return sysfs_emit(buf, "Failed\n"); spin_lock_irqsave(&bfad->bfad_lock, flags); bfa_fcs_lport_get_rport_quals(port->fcs_port, rports, &nrports); spin_unlock_irqrestore(&bfad->bfad_lock, flags); kfree(rports); - return snprintf(buf, PAGE_SIZE, "%d\n", nrports); + return sysfs_emit(buf, "%d\n", nrports); } static DEVICE_ATTR(serial_number, S_IRUGO, diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 40a52feb315da..65047806a5410 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -604,7 +604,7 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc) FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no, tag, sc, io_req, sg_count, cmd_trace, - (((u64)CMD_FLAGS(sc) >> 32) | CMD_STATE(sc))); + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); /* if only we issued IO, will we have the io lock */ if (io_lock_acquired) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index a01a3a7b706b5..52089538e9de6 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -530,7 +530,7 @@ MODULE_PARM_DESC(intr_conv, "interrupt converge enable (0-1)"); /* permit overriding the host protection capabilities mask (EEDP/T10 PI) */ static int prot_mask; -module_param(prot_mask, int, 0); +module_param(prot_mask, int, 0444); MODULE_PARM_DESC(prot_mask, " host protection capabilities mask, def=0x0 "); static void debugfs_work_handler_v3_hw(struct work_struct *work); @@ -2398,17 +2398,25 @@ static irqreturn_t cq_interrupt_v3_hw(int irq_no, void *p) return IRQ_WAKE_THREAD; } +static void hisi_sas_v3_free_vectors(void *data) +{ + struct pci_dev *pdev = data; + + pci_free_irq_vectors(pdev); +} + static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba) { int vectors; int max_msi = HISI_SAS_MSI_COUNT_V3_HW, min_msi; struct Scsi_Host *shost = hisi_hba->shost; + struct pci_dev *pdev = hisi_hba->pci_dev; struct irq_affinity desc = { .pre_vectors = BASE_VECTORS_V3_HW, }; min_msi = MIN_AFFINE_VECTORS_V3_HW; - vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev, + vectors = pci_alloc_irq_vectors_affinity(pdev, min_msi, max_msi, PCI_IRQ_MSI | PCI_IRQ_AFFINITY, @@ -2420,6 +2428,7 @@ static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba) hisi_hba->cq_nvecs = vectors - BASE_VECTORS_V3_HW; shost->nr_hw_queues = hisi_hba->cq_nvecs; + devm_add_action(&pdev->dev, hisi_sas_v3_free_vectors, pdev); return 0; } @@ -3967,6 +3976,54 @@ static const struct file_operations debugfs_bist_phy_v3_hw_fops = { .owner = THIS_MODULE, }; +static ssize_t debugfs_bist_cnt_v3_hw_write(struct file *filp, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *m = filp->private_data; + struct hisi_hba *hisi_hba = m->private; + unsigned int cnt; + int val; + + if (hisi_hba->debugfs_bist_enable) + return -EPERM; + + val = kstrtouint_from_user(buf, count, 0, &cnt); + if (val) + return val; + + if (cnt) + return -EINVAL; + + hisi_hba->debugfs_bist_cnt = 0; + return count; +} + +static int debugfs_bist_cnt_v3_hw_show(struct seq_file *s, void *p) +{ + struct hisi_hba *hisi_hba = s->private; + + seq_printf(s, "%u\n", hisi_hba->debugfs_bist_cnt); + + return 0; +} + +static int debugfs_bist_cnt_v3_hw_open(struct inode *inode, + struct file *filp) +{ + return single_open(filp, debugfs_bist_cnt_v3_hw_show, + inode->i_private); +} + +static const struct file_operations debugfs_bist_cnt_v3_hw_ops = { + .open = debugfs_bist_cnt_v3_hw_open, + .read = seq_read, + .write = debugfs_bist_cnt_v3_hw_write, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + static const struct { int value; char *name; @@ -4604,8 +4661,8 @@ static void debugfs_bist_init_v3_hw(struct hisi_hba *hisi_hba) debugfs_create_file("phy_id", 0600, hisi_hba->debugfs_bist_dentry, hisi_hba, &debugfs_bist_phy_v3_hw_fops); - debugfs_create_u32("cnt", 0600, hisi_hba->debugfs_bist_dentry, - &hisi_hba->debugfs_bist_cnt); + debugfs_create_file("cnt", 0600, hisi_hba->debugfs_bist_dentry, + hisi_hba, &debugfs_bist_cnt_v3_hw_ops); debugfs_create_file("loopback_mode", 0600, hisi_hba->debugfs_bist_dentry, @@ -4769,7 +4826,7 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) rc = scsi_add_host(shost, dev); if (rc) - goto err_out_free_irq_vectors; + goto err_out_debugfs; rc = sas_register_ha(sha); if (rc) @@ -4800,8 +4857,6 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) sas_unregister_ha(sha); err_out_register_ha: scsi_remove_host(shost); -err_out_free_irq_vectors: - pci_free_irq_vectors(pdev); err_out_debugfs: debugfs_exit_v3_hw(hisi_hba); err_out_ha: @@ -4825,7 +4880,6 @@ hisi_sas_v3_destroy_irqs(struct pci_dev *pdev, struct hisi_hba *hisi_hba) devm_free_irq(&pdev->dev, pci_irq_vector(pdev, nr), cq); } - pci_free_irq_vectors(pdev); } static void hisi_sas_v3_remove(struct pci_dev *pdev) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 61f06f6885a56..89b9fbce7488a 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -36,7 +36,7 @@ #define IBMVSCSIS_VERSION "v0.2" -#define INITIAL_SRP_LIMIT 800 +#define INITIAL_SRP_LIMIT 1024 #define DEFAULT_MAX_SECTORS 256 #define MAX_TXU 1024 * 1024 diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 841000445b9a1..aa223db4cf53c 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -1701,6 +1701,7 @@ static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp) if (cancel_delayed_work_sync(&ep->timeout_work)) { FC_EXCH_DBG(ep, "Exchange timer canceled due to ABTS response\n"); fc_exch_release(ep); /* release from pending timer hold */ + return; } spin_lock_bh(&ep->ex_lock); diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index a315715b36227..7e0cde710fc3c 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -197,7 +197,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) task->total_xfer_len = qc->nbytes; task->num_scatter = qc->n_elem; task->data_dir = qc->dma_dir; - } else if (qc->tf.protocol == ATA_PROT_NODATA) { + } else if (!ata_is_data(qc->tf.protocol)) { task->data_dir = DMA_NONE; } else { for_each_sg(qc->sg, sg, qc->n_elem, si) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 98cabe09c0404..8748c5996478f 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -897,6 +897,11 @@ enum lpfc_irq_chann_mode { NHT_MODE, }; +enum lpfc_hba_bit_flags { + FABRIC_COMANDS_BLOCKED, + HBA_PCI_ERR, +}; + struct lpfc_hba { /* SCSI interface function jump table entries */ struct lpfc_io_buf * (*lpfc_get_scsi_buf) @@ -1025,7 +1030,6 @@ struct lpfc_hba { * Firmware supports Forced Link Speed * capability */ -#define HBA_PCI_ERR 0x80000 /* The PCI slot is offline */ #define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */ #define HBA_SHORT_CMF 0x200000 /* shorter CMF timer routine */ #define HBA_CGN_DAY_WRAP 0x400000 /* HBA Congestion info day wraps */ @@ -1335,7 +1339,6 @@ struct lpfc_hba { atomic_t fabric_iocb_count; struct timer_list fabric_block_timer; unsigned long bit_flags; -#define FABRIC_COMANDS_BLOCKED 0 atomic_t num_rsrc_err; atomic_t num_cmd_success; unsigned long last_rsrc_error_time; diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 89e36bf14d8f8..d4340e5a3aac2 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -652,3 +652,6 @@ struct lpfc_vmid *lpfc_get_vmid_from_hashtable(struct lpfc_vport *vport, uint32_t hash, uint8_t *buf); void lpfc_vmid_vport_cleanup(struct lpfc_vport *vport); int lpfc_issue_els_qfpa(struct lpfc_vport *vport); + +void lpfc_sli_rpi_release(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp); diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 816fc406135b3..0cba306de0dbf 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -109,8 +109,8 @@ lpfc_rport_invalid(struct fc_rport *rport) ndlp = rdata->pnode; if (!rdata->pnode) { - pr_err("**** %s: NULL ndlp on rport x%px SID x%x\n", - __func__, rport, rport->scsi_target_id); + pr_info("**** %s: NULL ndlp on rport x%px SID x%x\n", + __func__, rport, rport->scsi_target_id); return -EINVAL; } @@ -169,9 +169,10 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, "3181 dev_loss_callbk x%06x, rport x%px flg x%x " - "load_flag x%x refcnt %d\n", + "load_flag x%x refcnt %d state %d xpt x%x\n", ndlp->nlp_DID, ndlp->rport, ndlp->nlp_flag, - vport->load_flag, kref_read(&ndlp->kref)); + vport->load_flag, kref_read(&ndlp->kref), + ndlp->nlp_state, ndlp->fc4_xpt_flags); /* Don't schedule a worker thread event if the vport is going down. * The teardown process cleans up the node via lpfc_drop_node. @@ -181,6 +182,11 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) ndlp->rport = NULL; ndlp->fc4_xpt_flags &= ~SCSI_XPT_REGD; + /* clear the NLP_XPT_REGD if the node is not registered + * with nvme-fc + */ + if (ndlp->fc4_xpt_flags == NLP_XPT_REGD) + ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD; /* Remove the node reference from remote_port_add now. * The driver will not call remote_port_delete. @@ -225,18 +231,36 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) ndlp->rport = NULL; spin_unlock_irqrestore(&ndlp->lock, iflags); - /* We need to hold the node by incrementing the reference - * count until this queued work is done - */ - evtp->evt_arg1 = lpfc_nlp_get(ndlp); + if (phba->worker_thread) { + /* We need to hold the node by incrementing the reference + * count until this queued work is done + */ + evtp->evt_arg1 = lpfc_nlp_get(ndlp); + + spin_lock_irqsave(&phba->hbalock, iflags); + if (evtp->evt_arg1) { + evtp->evt = LPFC_EVT_DEV_LOSS; + list_add_tail(&evtp->evt_listp, &phba->work_list); + lpfc_worker_wake_up(phba); + } + spin_unlock_irqrestore(&phba->hbalock, iflags); + } else { + lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, + "3188 worker thread is stopped %s x%06x, " + " rport x%px flg x%x load_flag x%x refcnt " + "%d\n", __func__, ndlp->nlp_DID, + ndlp->rport, ndlp->nlp_flag, + vport->load_flag, kref_read(&ndlp->kref)); + if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD)) { + spin_lock_irqsave(&ndlp->lock, iflags); + /* Node is in dev loss. No further transaction. */ + ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS; + spin_unlock_irqrestore(&ndlp->lock, iflags); + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RM); + } - spin_lock_irqsave(&phba->hbalock, iflags); - if (evtp->evt_arg1) { - evtp->evt = LPFC_EVT_DEV_LOSS; - list_add_tail(&evtp->evt_listp, &phba->work_list); - lpfc_worker_wake_up(phba); } - spin_unlock_irqrestore(&phba->hbalock, iflags); return; } @@ -503,11 +527,12 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, "0203 Devloss timeout on " "WWPN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x " - "NPort x%06x Data: x%x x%x x%x\n", + "NPort x%06x Data: x%x x%x x%x refcnt %d\n", *name, *(name+1), *(name+2), *(name+3), *(name+4), *(name+5), *(name+6), *(name+7), ndlp->nlp_DID, ndlp->nlp_flag, - ndlp->nlp_state, ndlp->nlp_rpi); + ndlp->nlp_state, ndlp->nlp_rpi, + kref_read(&ndlp->kref)); } else { lpfc_printf_vlog(vport, KERN_INFO, LOG_TRACE_EVENT, "0204 Devloss timeout on " @@ -755,18 +780,22 @@ lpfc_work_list_done(struct lpfc_hba *phba) int free_evt; int fcf_inuse; uint32_t nlp_did; + bool hba_pci_err; spin_lock_irq(&phba->hbalock); while (!list_empty(&phba->work_list)) { list_remove_head((&phba->work_list), evtp, typeof(*evtp), evt_listp); spin_unlock_irq(&phba->hbalock); + hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags); free_evt = 1; switch (evtp->evt) { case LPFC_EVT_ELS_RETRY: ndlp = (struct lpfc_nodelist *) (evtp->evt_arg1); - lpfc_els_retry_delay_handler(ndlp); - free_evt = 0; /* evt is part of ndlp */ + if (!hba_pci_err) { + lpfc_els_retry_delay_handler(ndlp); + free_evt = 0; /* evt is part of ndlp */ + } /* decrement the node reference count held * for this queued work */ @@ -788,8 +817,10 @@ lpfc_work_list_done(struct lpfc_hba *phba) break; case LPFC_EVT_RECOVER_PORT: ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1); - lpfc_sli_abts_recover_port(ndlp->vport, ndlp); - free_evt = 0; + if (!hba_pci_err) { + lpfc_sli_abts_recover_port(ndlp->vport, ndlp); + free_evt = 0; + } /* decrement the node reference count held for * this queued work */ @@ -859,14 +890,18 @@ lpfc_work_done(struct lpfc_hba *phba) struct lpfc_vport **vports; struct lpfc_vport *vport; int i; + bool hba_pci_err; + hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags); spin_lock_irq(&phba->hbalock); ha_copy = phba->work_ha; phba->work_ha = 0; spin_unlock_irq(&phba->hbalock); + if (hba_pci_err) + ha_copy = 0; /* First, try to post the next mailbox command to SLI4 device */ - if (phba->pci_dev_grp == LPFC_PCI_DEV_OC) + if (phba->pci_dev_grp == LPFC_PCI_DEV_OC && !hba_pci_err) lpfc_sli4_post_async_mbox(phba); if (ha_copy & HA_ERATT) { @@ -886,7 +921,7 @@ lpfc_work_done(struct lpfc_hba *phba) lpfc_handle_latt(phba); /* Handle VMID Events */ - if (lpfc_is_vmid_enabled(phba)) { + if (lpfc_is_vmid_enabled(phba) && !hba_pci_err) { if (phba->pport->work_port_events & WORKER_CHECK_VMID_ISSUE_QFPA) { lpfc_check_vmid_qfpa_issue(phba); @@ -936,6 +971,8 @@ lpfc_work_done(struct lpfc_hba *phba) work_port_events = vport->work_port_events; vport->work_port_events &= ~work_port_events; spin_unlock_irq(&vport->work_port_lock); + if (hba_pci_err) + continue; if (work_port_events & WORKER_DISC_TMO) lpfc_disc_timeout_handler(vport); if (work_port_events & WORKER_ELS_TMO) @@ -1173,12 +1210,14 @@ lpfc_linkdown(struct lpfc_hba *phba) struct lpfc_vport **vports; LPFC_MBOXQ_t *mb; int i; + int offline; if (phba->link_state == LPFC_LINK_DOWN) return 0; /* Block all SCSI stack I/Os */ lpfc_scsi_dev_block(phba); + offline = pci_channel_offline(phba->pcidev); phba->defer_flogi_acc_flag = false; @@ -1219,7 +1258,7 @@ lpfc_linkdown(struct lpfc_hba *phba) lpfc_destroy_vport_work_array(phba, vports); /* Clean up any SLI3 firmware default rpi's */ - if (phba->sli_rev > LPFC_SLI_REV3) + if (phba->sli_rev > LPFC_SLI_REV3 || offline) goto skip_unreg_did; mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); @@ -4712,6 +4751,11 @@ lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) spin_lock_irqsave(&ndlp->lock, iflags); if (!(ndlp->fc4_xpt_flags & NLP_XPT_REGD)) { spin_unlock_irqrestore(&ndlp->lock, iflags); + lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, + "0999 %s Not regd: ndlp x%px rport x%px DID " + "x%x FLG x%x XPT x%x\n", + __func__, ndlp, ndlp->rport, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->fc4_xpt_flags); return; } @@ -4722,6 +4766,13 @@ lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) ndlp->fc4_xpt_flags & SCSI_XPT_REGD) { vport->phba->nport_event_cnt++; lpfc_unregister_remote_port(ndlp); + } else if (!ndlp->rport) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, + "1999 %s NDLP in devloss x%px DID x%x FLG x%x" + " XPT x%x refcnt %d\n", + __func__, ndlp, ndlp->nlp_DID, ndlp->nlp_flag, + ndlp->fc4_xpt_flags, + kref_read(&ndlp->kref)); } if (ndlp->fc4_xpt_flags & NVME_XPT_REGD) { @@ -5365,6 +5416,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) ndlp->nlp_flag &= ~NLP_UNREG_INP; mempool_free(mbox, phba->mbox_mem_pool); acc_plogi = 1; + lpfc_nlp_put(ndlp); } } else { lpfc_printf_vlog(vport, KERN_INFO, @@ -6089,12 +6141,34 @@ lpfc_disc_flush_list(struct lpfc_vport *vport) } } +/* + * lpfc_notify_xport_npr - notifies xport of node disappearance + * @vport: Pointer to Virtual Port object. + * + * Transitions all ndlps to NPR state. When lpfc_nlp_set_state + * calls lpfc_nlp_state_cleanup, the ndlp->rport is unregistered + * and transport notified that the node is gone. + * Return Code: + * none + */ +static void +lpfc_notify_xport_npr(struct lpfc_vport *vport) +{ + struct lpfc_nodelist *ndlp, *next_ndlp; + + list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, + nlp_listp) { + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + } +} void lpfc_cleanup_discovery_resources(struct lpfc_vport *vport) { lpfc_els_flush_rscn(vport); lpfc_els_flush_cmd(vport); lpfc_disc_flush_list(vport); + if (pci_channel_offline(vport->phba->pcidev)) + lpfc_notify_xport_npr(vport); } /*****************************************************************************/ diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 558f7d2559c4d..9569a7390f9d5 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -95,6 +95,7 @@ static void lpfc_sli4_oas_verify(struct lpfc_hba *phba); static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int); static void lpfc_setup_bg(struct lpfc_hba *, struct Scsi_Host *); static int lpfc_sli4_cgn_parm_chg_evt(struct lpfc_hba *); +static void lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba); static struct scsi_transport_template *lpfc_transport_template = NULL; static struct scsi_transport_template *lpfc_vport_transport_template = NULL; @@ -1652,7 +1653,7 @@ lpfc_sli4_offline_eratt(struct lpfc_hba *phba) { spin_lock_irq(&phba->hbalock); if (phba->link_state == LPFC_HBA_ERROR && - phba->hba_flag & HBA_PCI_ERR) { + test_bit(HBA_PCI_ERR, &phba->bit_flags)) { spin_unlock_irq(&phba->hbalock); return; } @@ -1995,6 +1996,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba) if (pci_channel_offline(phba->pcidev)) { lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, "3166 pci channel is offline\n"); + lpfc_sli_flush_io_rings(phba); return; } @@ -2983,6 +2985,22 @@ lpfc_cleanup(struct lpfc_vport *vport) NLP_EVT_DEVICE_RM); } + /* This is a special case flush to return all + * IOs before entering this loop. There are + * two points in the code where a flush is + * avoided if the FC_UNLOADING flag is set. + * one is in the multipool destroy, + * (this prevents a crash) and the other is + * in the nvme abort handler, ( also prevents + * a crash). Both of these exceptions are + * cases where the slot is still accessible. + * The flush here is only when the pci slot + * is offline. + */ + if (vport->load_flag & FC_UNLOADING && + pci_channel_offline(phba->pcidev)) + lpfc_sli_flush_io_rings(vport->phba); + /* At this point, ALL ndlp's should be gone * because of the previous NLP_EVT_DEVICE_RM. * Lets wait for this to happen, if needed. @@ -2995,7 +3013,7 @@ lpfc_cleanup(struct lpfc_vport *vport) list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { lpfc_printf_vlog(ndlp->vport, KERN_ERR, - LOG_TRACE_EVENT, + LOG_DISCOVERY, "0282 did:x%x ndlp:x%px " "refcnt:%d xflags x%x nflag x%x\n", ndlp->nlp_DID, (void *)ndlp, @@ -3692,7 +3710,8 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) struct lpfc_vport **vports; struct Scsi_Host *shost; int i; - int offline = 0; + int offline; + bool hba_pci_err; if (vport->fc_flag & FC_OFFLINE_MODE) return; @@ -3702,6 +3721,7 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) lpfc_linkdown(phba); offline = pci_channel_offline(phba->pcidev); + hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags); /* Issue an unreg_login to all nodes on all vports */ vports = lpfc_create_vport_work_array(phba); @@ -3725,11 +3745,14 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) ndlp->nlp_flag &= ~NLP_NPR_ADISC; spin_unlock_irq(&ndlp->lock); - if (offline) { + if (offline || hba_pci_err) { spin_lock_irq(&ndlp->lock); ndlp->nlp_flag &= ~(NLP_UNREG_INP | NLP_RPI_REGISTERED); spin_unlock_irq(&ndlp->lock); + if (phba->sli_rev == LPFC_SLI_REV4) + lpfc_sli_rpi_release(vports[i], + ndlp); } else { lpfc_unreg_rpi(vports[i], ndlp); } @@ -13366,8 +13389,9 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba) /* Abort all iocbs associated with the hba */ lpfc_sli_hba_iocb_abort(phba); - /* Wait for completion of device XRI exchange busy */ - lpfc_sli4_xri_exchange_busy_wait(phba); + if (!pci_channel_offline(phba->pcidev)) + /* Wait for completion of device XRI exchange busy */ + lpfc_sli4_xri_exchange_busy_wait(phba); /* per-phba callback de-registration for hotplug event */ if (phba->pport) @@ -13386,15 +13410,12 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba) /* Disable FW logging to host memory */ lpfc_ras_stop_fwlog(phba); - /* Unset the queues shared with the hardware then release all - * allocated resources. - */ - lpfc_sli4_queue_unset(phba); - lpfc_sli4_queue_destroy(phba); - /* Reset SLI4 HBA FCoE function */ lpfc_pci_function_reset(phba); + /* release all queue allocated resources. */ + lpfc_sli4_queue_destroy(phba); + /* Free RAS DMA memory */ if (phba->ras_fwlog.ras_enabled) lpfc_sli4_ras_dma_free(phba); @@ -14274,6 +14295,7 @@ lpfc_sli_prep_dev_for_perm_failure(struct lpfc_hba *phba) "2711 PCI channel permanent disable for failure\n"); /* Block all SCSI devices' I/Os on the host */ lpfc_scsi_dev_block(phba); + lpfc_sli4_prep_dev_for_reset(phba); /* stop all timers */ lpfc_stop_hba_timers(phba); @@ -15069,24 +15091,28 @@ lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba) static void lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba) { - lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, - "2826 PCI channel disable preparing for reset\n"); + int offline = pci_channel_offline(phba->pcidev); + + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "2826 PCI channel disable preparing for reset offline" + " %d\n", offline); /* Block any management I/Os to the device */ lpfc_block_mgmt_io(phba, LPFC_MBX_NO_WAIT); - /* Block all SCSI devices' I/Os on the host */ - lpfc_scsi_dev_block(phba); + /* HBA_PCI_ERR was set in io_error_detect */ + lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT); /* Flush all driver's outstanding I/Os as we are to reset */ lpfc_sli_flush_io_rings(phba); + lpfc_offline(phba); /* stop all timers */ lpfc_stop_hba_timers(phba); + lpfc_sli4_queue_destroy(phba); /* Disable interrupt and pci device */ lpfc_sli4_disable_intr(phba); - lpfc_sli4_queue_destroy(phba); pci_disable_device(phba->pcidev); } @@ -15135,6 +15161,7 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state) { struct Scsi_Host *shost = pci_get_drvdata(pdev); struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; + bool hba_pci_err; switch (state) { case pci_channel_io_normal: @@ -15142,17 +15169,24 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state) lpfc_sli4_prep_dev_for_recover(phba); return PCI_ERS_RESULT_CAN_RECOVER; case pci_channel_io_frozen: - phba->hba_flag |= HBA_PCI_ERR; + hba_pci_err = test_and_set_bit(HBA_PCI_ERR, &phba->bit_flags); /* Fatal error, prepare for slot reset */ - lpfc_sli4_prep_dev_for_reset(phba); + if (!hba_pci_err) + lpfc_sli4_prep_dev_for_reset(phba); + else + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "2832 Already handling PCI error " + "state: x%x\n", state); return PCI_ERS_RESULT_NEED_RESET; case pci_channel_io_perm_failure: - phba->hba_flag |= HBA_PCI_ERR; + set_bit(HBA_PCI_ERR, &phba->bit_flags); /* Permanent failure, prepare for device down */ lpfc_sli4_prep_dev_for_perm_failure(phba); return PCI_ERS_RESULT_DISCONNECT; default: - phba->hba_flag |= HBA_PCI_ERR; + hba_pci_err = test_and_set_bit(HBA_PCI_ERR, &phba->bit_flags); + if (!hba_pci_err) + lpfc_sli4_prep_dev_for_reset(phba); /* Unknown state, prepare and request slot reset */ lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, "2825 Unknown PCI error state: x%x\n", state); @@ -15186,17 +15220,21 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev) struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; struct lpfc_sli *psli = &phba->sli; uint32_t intr_mode; + bool hba_pci_err; dev_printk(KERN_INFO, &pdev->dev, "recovering from a slot reset.\n"); if (pci_enable_device_mem(pdev)) { printk(KERN_ERR "lpfc: Cannot re-enable " - "PCI device after reset.\n"); + "PCI device after reset.\n"); return PCI_ERS_RESULT_DISCONNECT; } pci_restore_state(pdev); - phba->hba_flag &= ~HBA_PCI_ERR; + hba_pci_err = test_and_clear_bit(HBA_PCI_ERR, &phba->bit_flags); + if (!hba_pci_err) + dev_info(&pdev->dev, + "hba_pci_err was not set, recovering slot reset.\n"); /* * As the new kernel behavior of pci_restore_state() API call clears * device saved_state flag, need to save the restored state again. @@ -15210,6 +15248,8 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev) psli->sli_flag &= ~LPFC_SLI_ACTIVE; spin_unlock_irq(&phba->hbalock); + /* Init cpu_map array */ + lpfc_cpu_map_array_init(phba); /* Configure and enable interrupt */ intr_mode = lpfc_sli4_enable_intr(phba, phba->intr_mode); if (intr_mode == LPFC_INTR_ERROR) { @@ -15251,8 +15291,6 @@ lpfc_io_resume_s4(struct pci_dev *pdev) */ if (!(phba->sli.sli_flag & LPFC_SLI_ACTIVE)) { /* Perform device reset */ - lpfc_offline_prep(phba, LPFC_MBX_WAIT); - lpfc_offline(phba); lpfc_sli_brdrestart(phba); /* Bring the device back online */ lpfc_online(phba); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 9601edd838e10..df73abb59407e 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -93,6 +93,11 @@ lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport, lport = (struct lpfc_nvme_lport *)pnvme_lport->private; vport = lport->vport; + + if (!vport || vport->load_flag & FC_UNLOADING || + vport->phba->hba_flag & HBA_IOQ_FLUSH) + return -ENODEV; + qhandle = kzalloc(sizeof(struct lpfc_nvme_qhandle), GFP_KERNEL); if (qhandle == NULL) return -ENOMEM; @@ -267,7 +272,8 @@ lpfc_nvme_handle_lsreq(struct lpfc_hba *phba, return -EINVAL; remoteport = lpfc_rport->remoteport; - if (!vport->localport) + if (!vport->localport || + vport->phba->hba_flag & HBA_IOQ_FLUSH) return -EINVAL; lport = vport->localport->private; @@ -559,6 +565,8 @@ __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ndlp->nlp_DID, ntype, nstate); return -ENODEV; } + if (vport->phba->hba_flag & HBA_IOQ_FLUSH) + return -ENODEV; if (!vport->phba->sli4_hba.nvmels_wq) return -ENOMEM; @@ -662,7 +670,8 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, return -EINVAL; vport = lport->vport; - if (vport->load_flag & FC_UNLOADING) + if (vport->load_flag & FC_UNLOADING || + vport->phba->hba_flag & HBA_IOQ_FLUSH) return -ENODEV; atomic_inc(&lport->fc4NvmeLsRequests); @@ -1515,7 +1524,8 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, phba = vport->phba; - if (unlikely(vport->load_flag & FC_UNLOADING)) { + if ((unlikely(vport->load_flag & FC_UNLOADING)) || + phba->hba_flag & HBA_IOQ_FLUSH) { lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR, "6124 Fail IO, Driver unload\n"); atomic_inc(&lport->xmt_fcp_err); @@ -2169,8 +2179,7 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, abts_nvme = 0; for (i = 0; i < phba->cfg_hdw_queue; i++) { qp = &phba->sli4_hba.hdwq[i]; - if (!vport || !vport->localport || - !qp || !qp->io_wq) + if (!vport->localport || !qp || !qp->io_wq) return; pring = qp->io_wq->pring; @@ -2180,8 +2189,9 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, abts_scsi += qp->abts_scsi_io_bufs; abts_nvme += qp->abts_nvme_io_bufs; } - if (!vport || !vport->localport || - vport->phba->hba_flag & HBA_PCI_ERR) + if (!vport->localport || + test_bit(HBA_PCI_ERR, &vport->phba->bit_flags) || + vport->load_flag & FC_UNLOADING) return; lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, @@ -2541,8 +2551,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) * return values is ignored. The upcall is a courtesy to the * transport. */ - if (vport->load_flag & FC_UNLOADING || - unlikely(vport->phba->hba_flag & HBA_PCI_ERR)) + if (vport->load_flag & FC_UNLOADING) (void)nvme_fc_set_remoteport_devloss(remoteport, 0); ret = nvme_fc_unregister_remoteport(remoteport); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 430abebf99f15..b64c5f157ce90 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -2833,6 +2833,12 @@ __lpfc_sli_rpi_release(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) ndlp->nlp_flag &= ~NLP_UNREG_INP; } +void +lpfc_sli_rpi_release(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) +{ + __lpfc_sli_rpi_release(vport, ndlp); +} + /** * lpfc_sli_def_mbox_cmpl - Default mailbox completion handler * @phba: Pointer to HBA context object. @@ -4466,42 +4472,62 @@ lpfc_sli_handle_slow_ring_event_s4(struct lpfc_hba *phba, void lpfc_sli_abort_iocb_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) { - LIST_HEAD(completions); + LIST_HEAD(tx_completions); + LIST_HEAD(txcmplq_completions); struct lpfc_iocbq *iocb, *next_iocb; + int offline; if (pring->ringno == LPFC_ELS_RING) { lpfc_fabric_abort_hba(phba); } + offline = pci_channel_offline(phba->pcidev); /* Error everything on txq and txcmplq * First do the txq. */ if (phba->sli_rev >= LPFC_SLI_REV4) { spin_lock_irq(&pring->ring_lock); - list_splice_init(&pring->txq, &completions); + list_splice_init(&pring->txq, &tx_completions); pring->txq_cnt = 0; - spin_unlock_irq(&pring->ring_lock); - spin_lock_irq(&phba->hbalock); - /* Next issue ABTS for everything on the txcmplq */ - list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) - lpfc_sli_issue_abort_iotag(phba, pring, iocb, NULL); - spin_unlock_irq(&phba->hbalock); + if (offline) { + list_splice_init(&pring->txcmplq, + &txcmplq_completions); + } else { + /* Next issue ABTS for everything on the txcmplq */ + list_for_each_entry_safe(iocb, next_iocb, + &pring->txcmplq, list) + lpfc_sli_issue_abort_iotag(phba, pring, + iocb, NULL); + } + spin_unlock_irq(&pring->ring_lock); } else { spin_lock_irq(&phba->hbalock); - list_splice_init(&pring->txq, &completions); + list_splice_init(&pring->txq, &tx_completions); pring->txq_cnt = 0; - /* Next issue ABTS for everything on the txcmplq */ - list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) - lpfc_sli_issue_abort_iotag(phba, pring, iocb, NULL); + if (offline) { + list_splice_init(&pring->txcmplq, &txcmplq_completions); + } else { + /* Next issue ABTS for everything on the txcmplq */ + list_for_each_entry_safe(iocb, next_iocb, + &pring->txcmplq, list) + lpfc_sli_issue_abort_iotag(phba, pring, + iocb, NULL); + } spin_unlock_irq(&phba->hbalock); } - /* Make sure HBA is alive */ - lpfc_issue_hb_tmo(phba); + if (offline) { + /* Cancel all the IOCBs from the completions list */ + lpfc_sli_cancel_iocbs(phba, &txcmplq_completions, + IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED); + } else { + /* Make sure HBA is alive */ + lpfc_issue_hb_tmo(phba); + } /* Cancel all the IOCBs from the completions list */ - lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT, + lpfc_sli_cancel_iocbs(phba, &tx_completions, IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED); } @@ -4554,11 +4580,6 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba) struct lpfc_iocbq *piocb, *next_iocb; spin_lock_irq(&phba->hbalock); - if (phba->hba_flag & HBA_IOQ_FLUSH || - !phba->sli4_hba.hdwq) { - spin_unlock_irq(&phba->hbalock); - return; - } /* Indicate the I/O queues are flushed */ phba->hba_flag |= HBA_IOQ_FLUSH; spin_unlock_irq(&phba->hbalock); @@ -11235,6 +11256,10 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number, unsigned long iflags; int rc; + /* If the PCI channel is in offline state, do not post iocbs. */ + if (unlikely(pci_channel_offline(phba->pcidev))) + return IOCB_ERROR; + if (phba->sli_rev == LPFC_SLI_REV4) { eq = phba->sli4_hba.hdwq[piocb->hba_wqidx].hba_eq; diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 2c9d1b7964756..ae2aef9ba8cfe 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2558,6 +2558,9 @@ struct megasas_instance_template { #define MEGASAS_IS_LOGICAL(sdev) \ ((sdev->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1) +#define MEGASAS_IS_LUN_VALID(sdev) \ + (((sdev)->lun == 0) ? 1 : 0) + #define MEGASAS_DEV_INDEX(scp) \ (((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + \ scp->device->id) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 82e1e24257bcd..ca563498dcdb8 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2126,6 +2126,9 @@ static int megasas_slave_alloc(struct scsi_device *sdev) goto scan_target; } return -ENXIO; + } else if (!MEGASAS_IS_LUN_VALID(sdev)) { + sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__); + return -ENXIO; } scan_target: @@ -2156,6 +2159,10 @@ static void megasas_slave_destroy(struct scsi_device *sdev) instance = megasas_lookup_instance(sdev->host->host_no); if (MEGASAS_IS_LOGICAL(sdev)) { + if (!MEGASAS_IS_LUN_VALID(sdev)) { + sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__); + return; + } ld_tgt_id = MEGASAS_TARGET_ID(sdev); instance->ld_tgtid_status[ld_tgt_id] = LD_TARGET_ID_DELETED; if (megasas_dbg_lvl & LD_PD_DEBUG) diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index fc4eaf6d1e47e..d892ade421bf9 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h +++ b/drivers/scsi/mpi3mr/mpi3mr.h @@ -866,6 +866,8 @@ struct mpi3mr_ioc { * @send_ack: Event acknowledgment required or not * @process_evt: Bottomhalf processing required or not * @evt_ctx: Event context to send in Ack + * @pending_at_sml: waiting for device add/remove API to complete + * @discard: discard this event * @ref_count: kref count * @event_data: Actual MPI3 event data */ @@ -877,6 +879,8 @@ struct mpi3mr_fwevt { bool send_ack; bool process_evt; u32 evt_ctx; + bool pending_at_sml; + bool discard; struct kref ref_count; char event_data[0] __aligned(4); }; diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 15bdc21ead669..e44868230197f 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -1520,7 +1520,7 @@ static void mpi3mr_free_op_req_q_segments(struct mpi3mr_ioc *mrioc, u16 q_idx) MPI3MR_MAX_SEG_LIST_SIZE, mrioc->req_qinfo[q_idx].q_segment_list, mrioc->req_qinfo[q_idx].q_segment_list_dma); - mrioc->op_reply_qinfo[q_idx].q_segment_list = NULL; + mrioc->req_qinfo[q_idx].q_segment_list = NULL; } } else size = mrioc->req_qinfo[q_idx].segment_qd * @@ -4353,8 +4353,8 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc, memset(mrioc->devrem_bitmap, 0, mrioc->devrem_bitmap_sz); memset(mrioc->removepend_bitmap, 0, mrioc->dev_handle_bitmap_sz); memset(mrioc->evtack_cmds_bitmap, 0, mrioc->evtack_cmds_bitmap_sz); - mpi3mr_cleanup_fwevt_list(mrioc); mpi3mr_flush_host_io(mrioc); + mpi3mr_cleanup_fwevt_list(mrioc); mpi3mr_invalidate_devhandles(mrioc); if (mrioc->prepare_for_reset) { mrioc->prepare_for_reset = 0; diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 284117da9086a..f7893de35b26b 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -285,6 +285,35 @@ static struct mpi3mr_fwevt *mpi3mr_dequeue_fwevt( return fwevt; } +/** + * mpi3mr_cancel_work - cancel firmware event + * @fwevt: fwevt object which needs to be canceled + * + * Return: Nothing. + */ +static void mpi3mr_cancel_work(struct mpi3mr_fwevt *fwevt) +{ + /* + * Wait on the fwevt to complete. If this returns 1, then + * the event was never executed. + * + * If it did execute, we wait for it to finish, and the put will + * happen from mpi3mr_process_fwevt() + */ + if (cancel_work_sync(&fwevt->work)) { + /* + * Put fwevt reference count after + * dequeuing it from worker queue + */ + mpi3mr_fwevt_put(fwevt); + /* + * Put fwevt reference count to neutralize + * kref_init increment + */ + mpi3mr_fwevt_put(fwevt); + } +} + /** * mpi3mr_cleanup_fwevt_list - Cleanup firmware event list * @mrioc: Adapter instance reference @@ -302,28 +331,25 @@ void mpi3mr_cleanup_fwevt_list(struct mpi3mr_ioc *mrioc) !mrioc->fwevt_worker_thread) return; - while ((fwevt = mpi3mr_dequeue_fwevt(mrioc)) || - (fwevt = mrioc->current_event)) { + while ((fwevt = mpi3mr_dequeue_fwevt(mrioc))) + mpi3mr_cancel_work(fwevt); + + if (mrioc->current_event) { + fwevt = mrioc->current_event; /* - * Wait on the fwevt to complete. If this returns 1, then - * the event was never executed, and we need a put for the - * reference the work had on the fwevt. - * - * If it did execute, we wait for it to finish, and the put will - * happen from mpi3mr_process_fwevt() + * Don't call cancel_work_sync() API for the + * fwevt work if the controller reset is + * get called as part of processing the + * same fwevt work (or) when worker thread is + * waiting for device add/remove APIs to complete. + * Otherwise we will see deadlock. */ - if (cancel_work_sync(&fwevt->work)) { - /* - * Put fwevt reference count after - * dequeuing it from worker queue - */ - mpi3mr_fwevt_put(fwevt); - /* - * Put fwevt reference count to neutralize - * kref_init increment - */ - mpi3mr_fwevt_put(fwevt); + if (current_work() == &fwevt->work || fwevt->pending_at_sml) { + fwevt->discard = 1; + return; } + + mpi3mr_cancel_work(fwevt); } } @@ -690,6 +716,24 @@ static struct mpi3mr_tgt_dev *__mpi3mr_get_tgtdev_from_tgtpriv( return tgtdev; } +/** + * mpi3mr_print_device_event_notice - print notice related to post processing of + * device event after controller reset. + * + * @mrioc: Adapter instance reference + * @device_add: true for device add event and false for device removal event + * + * Return: None. + */ +static void mpi3mr_print_device_event_notice(struct mpi3mr_ioc *mrioc, + bool device_add) +{ + ioc_notice(mrioc, "Device %s was in progress before the reset and\n", + (device_add ? "addition" : "removal")); + ioc_notice(mrioc, "completed after reset, verify whether the exposed devices\n"); + ioc_notice(mrioc, "are matched with attached devices for correctness\n"); +} + /** * mpi3mr_remove_tgtdev_from_host - Remove dev from upper layers * @mrioc: Adapter instance reference @@ -714,8 +758,17 @@ static void mpi3mr_remove_tgtdev_from_host(struct mpi3mr_ioc *mrioc, } if (tgtdev->starget) { + if (mrioc->current_event) + mrioc->current_event->pending_at_sml = 1; scsi_remove_target(&tgtdev->starget->dev); tgtdev->host_exposed = 0; + if (mrioc->current_event) { + mrioc->current_event->pending_at_sml = 0; + if (mrioc->current_event->discard) { + mpi3mr_print_device_event_notice(mrioc, false); + return; + } + } } ioc_info(mrioc, "%s :Removed handle(0x%04x), wwid(0x%016llx)\n", __func__, tgtdev->dev_handle, (unsigned long long)tgtdev->wwid); @@ -749,11 +802,20 @@ static int mpi3mr_report_tgtdev_to_host(struct mpi3mr_ioc *mrioc, } if (!tgtdev->host_exposed && !mrioc->reset_in_progress) { tgtdev->host_exposed = 1; + if (mrioc->current_event) + mrioc->current_event->pending_at_sml = 1; scsi_scan_target(&mrioc->shost->shost_gendev, 0, tgtdev->perst_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL); if (!tgtdev->starget) tgtdev->host_exposed = 0; + if (mrioc->current_event) { + mrioc->current_event->pending_at_sml = 0; + if (mrioc->current_event->discard) { + mpi3mr_print_device_event_notice(mrioc, true); + goto out; + } + } } out: if (tgtdev) @@ -1193,6 +1255,8 @@ static void mpi3mr_sastopochg_evt_bh(struct mpi3mr_ioc *mrioc, mpi3mr_sastopochg_evt_debug(mrioc, event_data); for (i = 0; i < event_data->num_entries; i++) { + if (fwevt->discard) + return; handle = le16_to_cpu(event_data->phy_entry[i].attached_dev_handle); if (!handle) continue; @@ -1324,6 +1388,8 @@ static void mpi3mr_pcietopochg_evt_bh(struct mpi3mr_ioc *mrioc, mpi3mr_pcietopochg_evt_debug(mrioc, event_data); for (i = 0; i < event_data->num_entries; i++) { + if (fwevt->discard) + return; handle = le16_to_cpu(event_data->port_entry[i].attached_dev_handle); if (!handle) @@ -1362,8 +1428,8 @@ static void mpi3mr_pcietopochg_evt_bh(struct mpi3mr_ioc *mrioc, static void mpi3mr_fwevt_bh(struct mpi3mr_ioc *mrioc, struct mpi3mr_fwevt *fwevt) { - mrioc->current_event = fwevt; mpi3mr_fwevt_del_from_list(mrioc, fwevt); + mrioc->current_event = fwevt; if (mrioc->stop_drv_processing) goto out; @@ -2551,6 +2617,8 @@ void mpi3mr_process_op_reply_desc(struct mpi3mr_ioc *mrioc, scmd->result = DID_OK << 16; goto out_success; } + + scsi_set_resid(scmd, scsi_bufflen(scmd) - xfer_count); if (ioc_status == MPI3_IOCSTATUS_SCSI_DATA_UNDERRUN && xfer_count == 0 && (scsi_status == MPI3_SCSI_STATUS_BUSY || scsi_status == MPI3_SCSI_STATUS_RESERVATION_CONFLICT || diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 76229b839560a..fb5a3a348dbec 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -5736,14 +5736,13 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) */ static int -mpt3sas_check_same_4gb_region(long reply_pool_start_address, u32 pool_sz) +mpt3sas_check_same_4gb_region(dma_addr_t start_address, u32 pool_sz) { - long reply_pool_end_address; + dma_addr_t end_address; - reply_pool_end_address = reply_pool_start_address + pool_sz; + end_address = start_address + pool_sz - 1; - if (upper_32_bits(reply_pool_start_address) == - upper_32_bits(reply_pool_end_address)) + if (upper_32_bits(start_address) == upper_32_bits(end_address)) return 1; else return 0; @@ -5804,7 +5803,7 @@ _base_allocate_pcie_sgl_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) } if (!mpt3sas_check_same_4gb_region( - (long)ioc->pcie_sg_lookup[i].pcie_sgl, sz)) { + ioc->pcie_sg_lookup[i].pcie_sgl_dma, sz)) { ioc_err(ioc, "PCIE SGLs are not in same 4G !! pcie sgl (0x%p) dma = (0x%llx)\n", ioc->pcie_sg_lookup[i].pcie_sgl, (unsigned long long) @@ -5859,8 +5858,8 @@ _base_allocate_chain_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) GFP_KERNEL, &ctr->chain_buffer_dma); if (!ctr->chain_buffer) return -EAGAIN; - if (!mpt3sas_check_same_4gb_region((long) - ctr->chain_buffer, ioc->chain_segment_sz)) { + if (!mpt3sas_check_same_4gb_region( + ctr->chain_buffer_dma, ioc->chain_segment_sz)) { ioc_err(ioc, "Chain buffers are not in same 4G !!! Chain buff (0x%p) dma = (0x%llx)\n", ctr->chain_buffer, @@ -5896,7 +5895,7 @@ _base_allocate_sense_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) GFP_KERNEL, &ioc->sense_dma); if (!ioc->sense) return -EAGAIN; - if (!mpt3sas_check_same_4gb_region((long)ioc->sense, sz)) { + if (!mpt3sas_check_same_4gb_region(ioc->sense_dma, sz)) { dinitprintk(ioc, pr_err( "Bad Sense Pool! sense (0x%p) sense_dma = (0x%llx)\n", ioc->sense, (unsigned long long) ioc->sense_dma)); @@ -5929,7 +5928,7 @@ _base_allocate_reply_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) &ioc->reply_dma); if (!ioc->reply) return -EAGAIN; - if (!mpt3sas_check_same_4gb_region((long)ioc->reply_free, sz)) { + if (!mpt3sas_check_same_4gb_region(ioc->reply_dma, sz)) { dinitprintk(ioc, pr_err( "Bad Reply Pool! Reply (0x%p) Reply dma = (0x%llx)\n", ioc->reply, (unsigned long long) ioc->reply_dma)); @@ -5964,7 +5963,7 @@ _base_allocate_reply_free_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) GFP_KERNEL, &ioc->reply_free_dma); if (!ioc->reply_free) return -EAGAIN; - if (!mpt3sas_check_same_4gb_region((long)ioc->reply_free, sz)) { + if (!mpt3sas_check_same_4gb_region(ioc->reply_free_dma, sz)) { dinitprintk(ioc, pr_err("Bad Reply Free Pool! Reply Free (0x%p) Reply Free dma = (0x%llx)\n", ioc->reply_free, (unsigned long long) ioc->reply_free_dma)); @@ -6003,7 +6002,7 @@ _base_allocate_reply_post_free_array(struct MPT3SAS_ADAPTER *ioc, GFP_KERNEL, &ioc->reply_post_free_array_dma); if (!ioc->reply_post_free_array) return -EAGAIN; - if (!mpt3sas_check_same_4gb_region((long)ioc->reply_post_free_array, + if (!mpt3sas_check_same_4gb_region(ioc->reply_post_free_array_dma, reply_post_free_array_sz)) { dinitprintk(ioc, pr_err( "Bad Reply Free Pool! Reply Free (0x%p) Reply Free dma = (0x%llx)\n", @@ -6068,7 +6067,7 @@ base_alloc_rdpq_dma_pool(struct MPT3SAS_ADAPTER *ioc, int sz) * resources and set DMA mask to 32 and allocate. */ if (!mpt3sas_check_same_4gb_region( - (long)ioc->reply_post[i].reply_post_free, sz)) { + ioc->reply_post[i].reply_post_free_dma, sz)) { dinitprintk(ioc, ioc_err(ioc, "bad Replypost free pool(0x%p)" "reply_post_free_dma = (0x%llx)\n", diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c index 0563078227de6..a8dd14c91efdb 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_config.c +++ b/drivers/scsi/mpt3sas/mpt3sas_config.c @@ -394,10 +394,13 @@ _config_request(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigRequest_t retry_count++; if (ioc->config_cmds.smid == smid) mpt3sas_base_free_smid(ioc, smid); - if ((ioc->shost_recovery) || (ioc->config_cmds.status & - MPT3_CMD_RESET) || ioc->pci_error_recovery) + if (ioc->config_cmds.status & MPT3_CMD_RESET) goto retry_config; - issue_host_reset = 1; + if (ioc->shost_recovery || ioc->pci_error_recovery) { + issue_host_reset = 0; + r = -EFAULT; + } else + issue_host_reset = 1; goto free_mem; } diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 00792767c620d..7e476f50935b8 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -11035,6 +11035,7 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc, { struct _sas_port *mpt3sas_port, *next; unsigned long flags; + int port_id; /* remove sibling ports attached to this expander */ list_for_each_entry_safe(mpt3sas_port, next, @@ -11055,6 +11056,8 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc, mpt3sas_port->hba_port); } + port_id = sas_expander->port->port_id; + mpt3sas_transport_port_remove(ioc, sas_expander->sas_address, sas_expander->sas_address_parent, sas_expander->port); @@ -11062,7 +11065,7 @@ _scsih_expander_node_remove(struct MPT3SAS_ADAPTER *ioc, "expander_remove: handle(0x%04x), sas_addr(0x%016llx), port:%d\n", sas_expander->handle, (unsigned long long) sas_expander->sas_address, - sas_expander->port->port_id); + port_id); spin_lock_irqsave(&ioc->sas_node_lock, flags); list_del(&sas_expander->list); diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c index dcae2d4464f90..605a8eb7344a7 100644 --- a/drivers/scsi/mvsas/mv_init.c +++ b/drivers/scsi/mvsas/mv_init.c @@ -646,6 +646,7 @@ static struct pci_device_id mvs_pci_table[] = { { PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1300), chip_1300 }, { PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1320), chip_1320 }, { PCI_VDEVICE(ADAPTEC2, 0x0450), chip_6440 }, + { PCI_VDEVICE(TTI, 0x2640), chip_6440 }, { PCI_VDEVICE(TTI, 0x2710), chip_9480 }, { PCI_VDEVICE(TTI, 0x2720), chip_9480 }, { PCI_VDEVICE(TTI, 0x2721), chip_9480 }, @@ -696,7 +697,7 @@ static struct pci_driver mvs_pci_driver = { static ssize_t driver_version_show(struct device *cdev, struct device_attribute *attr, char *buffer) { - return snprintf(buffer, PAGE_SIZE, "%s\n", DRV_VERSION); + return sysfs_emit(buffer, "%s\n", DRV_VERSION); } static DEVICE_ATTR_RO(driver_version); @@ -744,7 +745,7 @@ static ssize_t interrupt_coalescing_store(struct device *cdev, static ssize_t interrupt_coalescing_show(struct device *cdev, struct device_attribute *attr, char *buffer) { - return snprintf(buffer, PAGE_SIZE, "%d\n", interrupt_coalescing); + return sysfs_emit(buffer, "%d\n", interrupt_coalescing); } static DEVICE_ATTR_RW(interrupt_coalescing); diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 9ec310b795c33..27ead825c2bb6 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -1522,7 +1522,6 @@ void pm8001_work_fn(struct work_struct *work) case IO_XFER_ERROR_BREAK: { /* This one stashes the sas_task instead */ struct sas_task *t = (struct sas_task *)pm8001_dev; - u32 tag; struct pm8001_ccb_info *ccb; struct pm8001_hba_info *pm8001_ha = pw->pm8001_ha; unsigned long flags, flags1; @@ -1544,8 +1543,8 @@ void pm8001_work_fn(struct work_struct *work) /* Search for a possible ccb that matches the task */ for (i = 0; ccb = NULL, i < PM8001_MAX_CCB; i++) { ccb = &pm8001_ha->ccb_info[i]; - tag = ccb->ccb_tag; - if ((tag != 0xFFFFFFFF) && (ccb->task == t)) + if ((ccb->ccb_tag != PM8001_INVALID_TAG) && + (ccb->task == t)) break; } if (!ccb) { @@ -1567,11 +1566,11 @@ void pm8001_work_fn(struct work_struct *work) spin_unlock_irqrestore(&t->task_state_lock, flags1); pm8001_dbg(pm8001_ha, FAIL, "task 0x%p done with event 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n", t, pw->handler, ts->resp, ts->stat); - pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + pm8001_ccb_task_free(pm8001_ha, t, ccb, ccb->ccb_tag); spin_unlock_irqrestore(&pm8001_ha->lock, flags); } else { spin_unlock_irqrestore(&t->task_state_lock, flags1); - pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); + pm8001_ccb_task_free(pm8001_ha, t, ccb, ccb->ccb_tag); mb();/* in order to force CPU ordering */ spin_unlock_irqrestore(&pm8001_ha->lock, flags); t->task_done(t); @@ -1580,7 +1579,6 @@ void pm8001_work_fn(struct work_struct *work) case IO_XFER_OPEN_RETRY_TIMEOUT: { /* This one stashes the sas_task instead */ struct sas_task *t = (struct sas_task *)pm8001_dev; - u32 tag; struct pm8001_ccb_info *ccb; struct pm8001_hba_info *pm8001_ha = pw->pm8001_ha; unsigned long flags, flags1; @@ -1614,8 +1612,8 @@ void pm8001_work_fn(struct work_struct *work) /* Search for a possible ccb that matches the task */ for (i = 0; ccb = NULL, i < PM8001_MAX_CCB; i++) { ccb = &pm8001_ha->ccb_info[i]; - tag = ccb->ccb_tag; - if ((tag != 0xFFFFFFFF) && (ccb->task == t)) + if ((ccb->ccb_tag != PM8001_INVALID_TAG) && + (ccb->task == t)) break; } if (!ccb) { @@ -1686,19 +1684,13 @@ void pm8001_work_fn(struct work_struct *work) struct task_status_struct *ts; struct sas_task *task; int i; - u32 tag, device_id; + u32 device_id; for (i = 0; ccb = NULL, i < PM8001_MAX_CCB; i++) { ccb = &pm8001_ha->ccb_info[i]; task = ccb->task; ts = &task->task_status; - tag = ccb->ccb_tag; - /* check if tag is NULL */ - if (!tag) { - pm8001_dbg(pm8001_ha, FAIL, - "tag Null\n"); - continue; - } + if (task != NULL) { dev = task->dev; if (!dev) { @@ -1707,10 +1699,11 @@ void pm8001_work_fn(struct work_struct *work) continue; } /*complete sas task and update to top layer */ - pm8001_ccb_task_free(pm8001_ha, task, ccb, tag); + pm8001_ccb_task_free(pm8001_ha, task, ccb, + ccb->ccb_tag); ts->resp = SAS_TASK_COMPLETE; task->task_done(task); - } else if (tag != 0xFFFFFFFF) { + } else if (ccb->ccb_tag != PM8001_INVALID_TAG) { /* complete the internal commands/non-sas task */ pm8001_dev = ccb->device; if (pm8001_dev->dcompletion) { @@ -1718,7 +1711,7 @@ void pm8001_work_fn(struct work_struct *work) pm8001_dev->dcompletion = NULL; } complete(pm8001_ha->nvmd_completion); - pm8001_tag_free(pm8001_ha, tag); + pm8001_tag_free(pm8001_ha, ccb->ccb_tag); } } /* Deregister all the device ids */ @@ -1772,7 +1765,6 @@ static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha, } task = sas_alloc_slow_task(GFP_ATOMIC); - if (!task) { pm8001_dbg(pm8001_ha, FAIL, "cannot allocate task\n"); return; @@ -1781,13 +1773,16 @@ static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha, task->task_done = pm8001_task_done; res = pm8001_tag_alloc(pm8001_ha, &ccb_tag); - if (res) + if (res) { + sas_free_task(task); return; + } ccb = &pm8001_ha->ccb_info[ccb_tag]; ccb->device = pm8001_ha_dev; ccb->ccb_tag = ccb_tag; ccb->task = task; + ccb->n_elem = 0; circularQ = &pm8001_ha->inbnd_q_tbl[0]; @@ -1798,8 +1793,10 @@ static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha, ret = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &task_abort, sizeof(task_abort), 0); - if (ret) + if (ret) { + sas_free_task(task); pm8001_tag_free(pm8001_ha, ccb_tag); + } } @@ -1849,6 +1846,7 @@ static void pm8001_send_read_log(struct pm8001_hba_info *pm8001_ha, ccb->device = pm8001_ha_dev; ccb->ccb_tag = ccb_tag; ccb->task = task; + ccb->n_elem = 0; pm8001_ha_dev->id |= NCQ_READ_LOG_FLAG; pm8001_ha_dev->id |= NCQ_2ND_RLE_FLAG; @@ -1865,7 +1863,7 @@ static void pm8001_send_read_log(struct pm8001_hba_info *pm8001_ha, sata_cmd.tag = cpu_to_le32(ccb_tag); sata_cmd.device_id = cpu_to_le32(pm8001_ha_dev->device_id); - sata_cmd.ncqtag_atap_dir_m |= ((0x1 << 7) | (0x5 << 9)); + sata_cmd.ncqtag_atap_dir_m = cpu_to_le32((0x1 << 7) | (0x5 << 9)); memcpy(&sata_cmd.sata_fis, &fis, sizeof(struct host_to_dev_fis)); res = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &sata_cmd, @@ -2314,11 +2312,6 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) param = le32_to_cpu(psataPayload->param); tag = le32_to_cpu(psataPayload->tag); - if (!tag) { - pm8001_dbg(pm8001_ha, FAIL, "tag null\n"); - return; - } - ccb = &pm8001_ha->ccb_info[tag]; t = ccb->task; pm8001_dev = ccb->device; @@ -2418,7 +2411,8 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) len = sizeof(struct pio_setup_fis); pm8001_dbg(pm8001_ha, IO, "PIO read len = %d\n", len); - } else if (t->ata_task.use_ncq) { + } else if (t->ata_task.use_ncq && + t->data_dir != DMA_NONE) { len = sizeof(struct set_dev_bits_fis); pm8001_dbg(pm8001_ha, IO, "FPDMA len = %d\n", len); @@ -3053,7 +3047,7 @@ void pm8001_mpi_set_dev_state_resp(struct pm8001_hba_info *pm8001_ha, device_id, pds, nds, status); complete(pm8001_dev->setds_completion); ccb->task = NULL; - ccb->ccb_tag = 0xFFFFFFFF; + ccb->ccb_tag = PM8001_INVALID_TAG; pm8001_tag_free(pm8001_ha, tag); } @@ -3071,7 +3065,7 @@ void pm8001_mpi_set_nvmd_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) dlen_status); } ccb->task = NULL; - ccb->ccb_tag = 0xFFFFFFFF; + ccb->ccb_tag = PM8001_INVALID_TAG; pm8001_tag_free(pm8001_ha, tag); } @@ -3098,7 +3092,7 @@ pm8001_mpi_get_nvmd_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) * freed by requesting path anywhere. */ ccb->task = NULL; - ccb->ccb_tag = 0xFFFFFFFF; + ccb->ccb_tag = PM8001_INVALID_TAG; pm8001_tag_free(pm8001_ha, tag); return; } @@ -3144,7 +3138,7 @@ pm8001_mpi_get_nvmd_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) complete(pm8001_ha->nvmd_completion); pm8001_dbg(pm8001_ha, MSG, "Get nvmd data complete!\n"); ccb->task = NULL; - ccb->ccb_tag = 0xFFFFFFFF; + ccb->ccb_tag = PM8001_INVALID_TAG; pm8001_tag_free(pm8001_ha, tag); } @@ -3557,7 +3551,7 @@ int pm8001_mpi_reg_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) } complete(pm8001_dev->dcompletion); ccb->task = NULL; - ccb->ccb_tag = 0xFFFFFFFF; + ccb->ccb_tag = PM8001_INVALID_TAG; pm8001_tag_free(pm8001_ha, htag); return 0; } @@ -3629,7 +3623,7 @@ int pm8001_mpi_fw_flash_update_resp(struct pm8001_hba_info *pm8001_ha, } kfree(ccb->fw_control_context); ccb->task = NULL; - ccb->ccb_tag = 0xFFFFFFFF; + ccb->ccb_tag = PM8001_INVALID_TAG; pm8001_tag_free(pm8001_ha, tag); complete(pm8001_ha->nvmd_completion); return 0; @@ -3665,10 +3659,6 @@ int pm8001_mpi_task_abort_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) status = le32_to_cpu(pPayload->status); tag = le32_to_cpu(pPayload->tag); - if (!tag) { - pm8001_dbg(pm8001_ha, FAIL, " TAG NULL. RETURNING !!!\n"); - return -1; - } scp = le32_to_cpu(pPayload->scp); ccb = &pm8001_ha->ccb_info[tag]; @@ -3703,12 +3693,11 @@ int pm8001_mpi_task_abort_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) mb(); if (pm8001_dev->id & NCQ_ABORT_ALL_FLAG) { - pm8001_tag_free(pm8001_ha, tag); sas_free_task(t); - /* clear the flag */ - pm8001_dev->id &= 0xBFFFFFFF; - } else + pm8001_dev->id &= ~NCQ_ABORT_ALL_FLAG; + } else { t->task_done(t); + } return 0; } @@ -4271,22 +4260,22 @@ static int pm8001_chip_sata_req(struct pm8001_hba_info *pm8001_ha, u32 opc = OPC_INB_SATA_HOST_OPSTART; memset(&sata_cmd, 0, sizeof(sata_cmd)); circularQ = &pm8001_ha->inbnd_q_tbl[0]; - if (task->data_dir == DMA_NONE) { + + if (task->data_dir == DMA_NONE && !task->ata_task.use_ncq) { ATAP = 0x04; /* no data*/ pm8001_dbg(pm8001_ha, IO, "no data\n"); } else if (likely(!task->ata_task.device_control_reg_update)) { - if (task->ata_task.dma_xfer) { + if (task->ata_task.use_ncq && + dev->sata_dev.class != ATA_DEV_ATAPI) { + ATAP = 0x07; /* FPDMA */ + pm8001_dbg(pm8001_ha, IO, "FPDMA\n"); + } else if (task->ata_task.dma_xfer) { ATAP = 0x06; /* DMA */ pm8001_dbg(pm8001_ha, IO, "DMA\n"); } else { ATAP = 0x05; /* PIO*/ pm8001_dbg(pm8001_ha, IO, "PIO\n"); } - if (task->ata_task.use_ncq && - dev->sata_dev.class != ATA_DEV_ATAPI) { - ATAP = 0x07; /* FPDMA */ - pm8001_dbg(pm8001_ha, IO, "FPDMA\n"); - } } if (task->ata_task.use_ncq && pm8001_get_ncq_tag(task, &hdr_tag)) { task->ata_task.fis.sector_count |= (u8) (hdr_tag << 3); @@ -4476,6 +4465,9 @@ static int pm8001_chip_reg_dev_req(struct pm8001_hba_info *pm8001_ha, SAS_ADDR_SIZE); rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload, sizeof(payload), 0); + if (rc) + pm8001_tag_free(pm8001_ha, tag); + return rc; } @@ -4626,7 +4618,7 @@ int pm8001_chip_ssp_tm_req(struct pm8001_hba_info *pm8001_ha, memcpy(sspTMCmd.lun, task->ssp_task.LUN, 8); sspTMCmd.tag = cpu_to_le32(ccb->ccb_tag); if (pm8001_ha->chip_id != chip_8001) - sspTMCmd.ds_ads_m = 0x08; + sspTMCmd.ds_ads_m = cpu_to_le32(0x08); circularQ = &pm8001_ha->inbnd_q_tbl[0]; ret = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &sspTMCmd, sizeof(sspTMCmd), 0); @@ -4888,6 +4880,11 @@ pm8001_chip_fw_flash_update_req(struct pm8001_hba_info *pm8001_ha, ccb->ccb_tag = tag; rc = pm8001_chip_fw_flash_update_build(pm8001_ha, &flash_update_info, tag); + if (rc) { + kfree(fw_control_context); + pm8001_tag_free(pm8001_ha, tag); + } + return rc; } @@ -4992,6 +4989,9 @@ pm8001_chip_set_dev_state_req(struct pm8001_hba_info *pm8001_ha, payload.nds = cpu_to_le32(state); rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload, sizeof(payload), 0); + if (rc) + pm8001_tag_free(pm8001_ha, tag); + return rc; } diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index d8a2121cb8d93..d2a2593f669d6 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -1216,10 +1216,11 @@ pm8001_init_ccb_tag(struct pm8001_hba_info *pm8001_ha, struct Scsi_Host *shost, goto err_out; } pm8001_ha->ccb_info[i].task = NULL; - pm8001_ha->ccb_info[i].ccb_tag = 0xffffffff; + pm8001_ha->ccb_info[i].ccb_tag = PM8001_INVALID_TAG; pm8001_ha->ccb_info[i].device = NULL; ++pm8001_ha->tags_num; } + return 0; err_out_noccb: diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 32edda3e55c6c..b68c8400ca158 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -567,7 +567,7 @@ void pm8001_ccb_task_free(struct pm8001_hba_info *pm8001_ha, task->lldd_task = NULL; ccb->task = NULL; - ccb->ccb_tag = 0xFFFFFFFF; + ccb->ccb_tag = PM8001_INVALID_TAG; ccb->open_retry = 0; pm8001_tag_free(pm8001_ha, ccb_idx); } @@ -847,10 +847,10 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha, res = PM8001_CHIP_DISP->task_abort(pm8001_ha, pm8001_dev, flag, task_tag, ccb_tag); - if (res) { del_timer(&task->slow_task->timer); pm8001_dbg(pm8001_ha, FAIL, "Executing internal task failed\n"); + pm8001_tag_free(pm8001_ha, ccb_tag); goto ex_err; } wait_for_completion(&task->slow_task->completion); @@ -952,9 +952,11 @@ void pm8001_open_reject_retry( struct task_status_struct *ts; struct pm8001_device *pm8001_dev; unsigned long flags1; - u32 tag; struct pm8001_ccb_info *ccb = &pm8001_ha->ccb_info[i]; + if (ccb->ccb_tag == PM8001_INVALID_TAG) + continue; + pm8001_dev = ccb->device; if (!pm8001_dev || (pm8001_dev->dev_type == SAS_PHY_UNUSED)) continue; @@ -966,9 +968,6 @@ void pm8001_open_reject_retry( continue; } else if (pm8001_dev != device_to_close) continue; - tag = ccb->ccb_tag; - if (!tag || (tag == 0xFFFFFFFF)) - continue; task = ccb->task; if (!task || !task->task_done) continue; @@ -989,11 +988,11 @@ void pm8001_open_reject_retry( & SAS_TASK_STATE_ABORTED))) { spin_unlock_irqrestore(&task->task_state_lock, flags1); - pm8001_ccb_task_free(pm8001_ha, task, ccb, tag); + pm8001_ccb_task_free(pm8001_ha, task, ccb, ccb->ccb_tag); } else { spin_unlock_irqrestore(&task->task_state_lock, flags1); - pm8001_ccb_task_free(pm8001_ha, task, ccb, tag); + pm8001_ccb_task_free(pm8001_ha, task, ccb, ccb->ccb_tag); mb();/* in order to force CPU ordering */ spin_unlock_irqrestore(&pm8001_ha->lock, flags); task->task_done(task); diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h index a17da1cebce17..1791cdf302762 100644 --- a/drivers/scsi/pm8001/pm8001_sas.h +++ b/drivers/scsi/pm8001/pm8001_sas.h @@ -738,6 +738,8 @@ void pm8001_free_dev(struct pm8001_device *pm8001_dev); /* ctl shared API */ extern const struct attribute_group *pm8001_host_groups[]; +#define PM8001_INVALID_TAG ((u32)-1) + static inline void pm8001_ccb_task_free_done(struct pm8001_hba_info *pm8001_ha, struct sas_task *task, struct pm8001_ccb_info *ccb, diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 9d20f8009b89f..5853b3c0d76db 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -67,18 +67,16 @@ int pm80xx_bar4_shift(struct pm8001_hba_info *pm8001_ha, u32 shift_value) } static void pm80xx_pci_mem_copy(struct pm8001_hba_info *pm8001_ha, u32 soffset, - const void *destination, + __le32 *destination, u32 dw_count, u32 bus_base_number) { u32 index, value, offset; - u32 *destination1; - destination1 = (u32 *)destination; - for (index = 0; index < dw_count; index += 4, destination1++) { + for (index = 0; index < dw_count; index += 4, destination++) { offset = (soffset + index); if (offset < (64 * 1024)) { value = pm8001_cr32(pm8001_ha, bus_base_number, offset); - *destination1 = cpu_to_le32(value); + *destination = cpu_to_le32(value); } } return; @@ -768,6 +766,10 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha) pm8001_ha->main_cfg_tbl.pm80xx_tbl.pcs_event_log_severity = 0x01; pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt = 0x01; + /* Enable higher IQs and OQs, 32 to 63, bit 16 */ + if (pm8001_ha->max_q_num > 32) + pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt |= + 1 << 16; /* Disable end to end CRC checking */ pm8001_ha->main_cfg_tbl.pm80xx_tbl.crc_core_dump = (0x1 << 16); @@ -1029,6 +1031,13 @@ static int mpi_init_check(struct pm8001_hba_info *pm8001_ha) if (0x0000 != gst_len_mpistate) return -EBUSY; + /* + * As per controller datasheet, after successful MPI + * initialization minimum 500ms delay is required before + * issuing commands. + */ + msleep(500); + return 0; } @@ -1203,9 +1212,11 @@ pm80xx_set_thermal_config(struct pm8001_hba_info *pm8001_ha) else page_code = THERMAL_PAGE_CODE_8H; - payload.cfg_pg[0] = (THERMAL_LOG_ENABLE << 9) | - (THERMAL_ENABLE << 8) | page_code; - payload.cfg_pg[1] = (LTEMPHIL << 24) | (RTEMPHIL << 8); + payload.cfg_pg[0] = + cpu_to_le32((THERMAL_LOG_ENABLE << 9) | + (THERMAL_ENABLE << 8) | page_code); + payload.cfg_pg[1] = + cpu_to_le32((LTEMPHIL << 24) | (RTEMPHIL << 8)); pm8001_dbg(pm8001_ha, DEV, "Setting up thermal config. cfg_pg 0 0x%x cfg_pg 1 0x%x\n", @@ -1245,43 +1256,41 @@ pm80xx_set_sas_protocol_timer_config(struct pm8001_hba_info *pm8001_ha) circularQ = &pm8001_ha->inbnd_q_tbl[0]; payload.tag = cpu_to_le32(tag); - SASConfigPage.pageCode = SAS_PROTOCOL_TIMER_CONFIG_PAGE; - SASConfigPage.MST_MSI = 3 << 15; - SASConfigPage.STP_SSP_MCT_TMO = (STP_MCT_TMO << 16) | SSP_MCT_TMO; - SASConfigPage.STP_FRM_TMO = (SAS_MAX_OPEN_TIME << 24) | - (SMP_MAX_CONN_TIMER << 16) | STP_FRM_TIMER; - SASConfigPage.STP_IDLE_TMO = STP_IDLE_TIME; - - if (SASConfigPage.STP_IDLE_TMO > 0x3FFFFFF) - SASConfigPage.STP_IDLE_TMO = 0x3FFFFFF; - - - SASConfigPage.OPNRJT_RTRY_INTVL = (SAS_MFD << 16) | - SAS_OPNRJT_RTRY_INTVL; - SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO = (SAS_DOPNRJT_RTRY_TMO << 16) - | SAS_COPNRJT_RTRY_TMO; - SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR = (SAS_DOPNRJT_RTRY_THR << 16) - | SAS_COPNRJT_RTRY_THR; - SASConfigPage.MAX_AIP = SAS_MAX_AIP; + SASConfigPage.pageCode = cpu_to_le32(SAS_PROTOCOL_TIMER_CONFIG_PAGE); + SASConfigPage.MST_MSI = cpu_to_le32(3 << 15); + SASConfigPage.STP_SSP_MCT_TMO = + cpu_to_le32((STP_MCT_TMO << 16) | SSP_MCT_TMO); + SASConfigPage.STP_FRM_TMO = + cpu_to_le32((SAS_MAX_OPEN_TIME << 24) | + (SMP_MAX_CONN_TIMER << 16) | STP_FRM_TIMER); + SASConfigPage.STP_IDLE_TMO = cpu_to_le32(STP_IDLE_TIME); + + SASConfigPage.OPNRJT_RTRY_INTVL = + cpu_to_le32((SAS_MFD << 16) | SAS_OPNRJT_RTRY_INTVL); + SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO = + cpu_to_le32((SAS_DOPNRJT_RTRY_TMO << 16) | SAS_COPNRJT_RTRY_TMO); + SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR = + cpu_to_le32((SAS_DOPNRJT_RTRY_THR << 16) | SAS_COPNRJT_RTRY_THR); + SASConfigPage.MAX_AIP = cpu_to_le32(SAS_MAX_AIP); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.pageCode 0x%08x\n", - SASConfigPage.pageCode); + le32_to_cpu(SASConfigPage.pageCode)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.MST_MSI 0x%08x\n", - SASConfigPage.MST_MSI); + le32_to_cpu(SASConfigPage.MST_MSI)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.STP_SSP_MCT_TMO 0x%08x\n", - SASConfigPage.STP_SSP_MCT_TMO); + le32_to_cpu(SASConfigPage.STP_SSP_MCT_TMO)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.STP_FRM_TMO 0x%08x\n", - SASConfigPage.STP_FRM_TMO); + le32_to_cpu(SASConfigPage.STP_FRM_TMO)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.STP_IDLE_TMO 0x%08x\n", - SASConfigPage.STP_IDLE_TMO); + le32_to_cpu(SASConfigPage.STP_IDLE_TMO)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.OPNRJT_RTRY_INTVL 0x%08x\n", - SASConfigPage.OPNRJT_RTRY_INTVL); + le32_to_cpu(SASConfigPage.OPNRJT_RTRY_INTVL)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO 0x%08x\n", - SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO); + le32_to_cpu(SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR 0x%08x\n", - SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR); + le32_to_cpu(SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.MAX_AIP 0x%08x\n", - SASConfigPage.MAX_AIP); + le32_to_cpu(SASConfigPage.MAX_AIP)); memcpy(&payload.cfg_pg, &SASConfigPage, sizeof(SASProtocolTimerConfig_t)); @@ -1407,12 +1416,13 @@ static int pm80xx_encrypt_update(struct pm8001_hba_info *pm8001_ha) /* Currently only one key is used. New KEK index is 1. * Current KEK index is 1. Store KEK to NVRAM is 1. */ - payload.new_curidx_ksop = ((1 << 24) | (1 << 16) | (1 << 8) | - KEK_MGMT_SUBOP_KEYCARDUPDATE); + payload.new_curidx_ksop = + cpu_to_le32(((1 << 24) | (1 << 16) | (1 << 8) | + KEK_MGMT_SUBOP_KEYCARDUPDATE)); pm8001_dbg(pm8001_ha, DEV, "Saving Encryption info to flash. payload 0x%x\n", - payload.new_curidx_ksop); + le32_to_cpu(payload.new_curidx_ksop)); rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload, sizeof(payload), 0); @@ -1735,10 +1745,11 @@ static void pm80xx_chip_interrupt_enable(struct pm8001_hba_info *pm8001_ha, u8 vec) { #ifdef PM8001_USE_MSIX - u32 mask; - mask = (u32)(1 << vec); - - pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR, (u32)(mask & 0xFFFFFFFF)); + if (vec < 32) + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR, 1U << vec); + else + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR_U, + 1U << (vec - 32)); return; #endif pm80xx_chip_intx_interrupt_enable(pm8001_ha); @@ -1754,12 +1765,15 @@ static void pm80xx_chip_interrupt_disable(struct pm8001_hba_info *pm8001_ha, u8 vec) { #ifdef PM8001_USE_MSIX - u32 mask; - if (vec == 0xFF) - mask = 0xFFFFFFFF; + if (vec == 0xFF) { + /* disable all vectors 0-31, 32-63 */ + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, 0xFFFFFFFF); + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_U, 0xFFFFFFFF); + } else if (vec < 32) + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, 1U << vec); else - mask = (u32)(1 << vec); - pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, (u32)(mask & 0xFFFFFFFF)); + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_U, + 1U << (vec - 32)); return; #endif pm80xx_chip_intx_interrupt_disable(pm8001_ha); @@ -1801,6 +1815,7 @@ static void pm80xx_send_abort_all(struct pm8001_hba_info *pm8001_ha, ccb->device = pm8001_ha_dev; ccb->ccb_tag = ccb_tag; ccb->task = task; + ccb->n_elem = 0; circularQ = &pm8001_ha->inbnd_q_tbl[0]; @@ -1882,7 +1897,7 @@ static void pm80xx_send_read_log(struct pm8001_hba_info *pm8001_ha, sata_cmd.tag = cpu_to_le32(ccb_tag); sata_cmd.device_id = cpu_to_le32(pm8001_ha_dev->device_id); - sata_cmd.ncqtag_atap_dir_m_dad |= ((0x1 << 7) | (0x5 << 9)); + sata_cmd.ncqtag_atap_dir_m_dad = cpu_to_le32(((0x1 << 7) | (0x5 << 9))); memcpy(&sata_cmd.sata_fis, &fis, sizeof(struct host_to_dev_fis)); res = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &sata_cmd, @@ -2404,11 +2419,6 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, param = le32_to_cpu(psataPayload->param); tag = le32_to_cpu(psataPayload->tag); - if (!tag) { - pm8001_dbg(pm8001_ha, FAIL, "tag null\n"); - return; - } - ccb = &pm8001_ha->ccb_info[tag]; t = ccb->task; pm8001_dev = ccb->device; @@ -2510,7 +2520,8 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, len = sizeof(struct pio_setup_fis); pm8001_dbg(pm8001_ha, IO, "PIO read len = %d\n", len); - } else if (t->ata_task.use_ncq) { + } else if (t->ata_task.use_ncq && + t->data_dir != DMA_NONE) { len = sizeof(struct set_dev_bits_fis); pm8001_dbg(pm8001_ha, IO, "FPDMA len = %d\n", len); @@ -4379,13 +4390,15 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha, struct ssp_ini_io_start_req ssp_cmd; u32 tag = ccb->ccb_tag; int ret; - u64 phys_addr, start_addr, end_addr; + u64 phys_addr, end_addr; u32 end_addr_high, end_addr_low; struct inbound_queue_table *circularQ; u32 q_index, cpu_id; u32 opc = OPC_INB_SSPINIIOSTART; + memset(&ssp_cmd, 0, sizeof(ssp_cmd)); memcpy(ssp_cmd.ssp_iu.lun, task->ssp_task.LUN, 8); + /* data address domain added for spcv; set to 0 by host, * used internally by controller * 0 for SAS 1.1 and SAS 2.0 compatible TLR @@ -4396,7 +4409,7 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha, ssp_cmd.device_id = cpu_to_le32(pm8001_dev->device_id); ssp_cmd.tag = cpu_to_le32(tag); if (task->ssp_task.enable_first_burst) - ssp_cmd.ssp_iu.efb_prio_attr |= 0x80; + ssp_cmd.ssp_iu.efb_prio_attr = 0x80; ssp_cmd.ssp_iu.efb_prio_attr |= (task->ssp_task.task_prio << 3); ssp_cmd.ssp_iu.efb_prio_attr |= (task->ssp_task.task_attr & 7); memcpy(ssp_cmd.ssp_iu.cdb, task->ssp_task.cmd->cmnd, @@ -4428,21 +4441,24 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha, ssp_cmd.enc_esgl = cpu_to_le32(1<<31); } else if (task->num_scatter == 1) { u64 dma_addr = sg_dma_address(task->scatter); + ssp_cmd.enc_addr_low = cpu_to_le32(lower_32_bits(dma_addr)); ssp_cmd.enc_addr_high = cpu_to_le32(upper_32_bits(dma_addr)); ssp_cmd.enc_len = cpu_to_le32(task->total_xfer_len); ssp_cmd.enc_esgl = 0; + /* Check 4G Boundary */ - start_addr = cpu_to_le64(dma_addr); - end_addr = (start_addr + ssp_cmd.enc_len) - 1; - end_addr_low = cpu_to_le32(lower_32_bits(end_addr)); - end_addr_high = cpu_to_le32(upper_32_bits(end_addr)); - if (end_addr_high != ssp_cmd.enc_addr_high) { + end_addr = dma_addr + le32_to_cpu(ssp_cmd.enc_len) - 1; + end_addr_low = lower_32_bits(end_addr); + end_addr_high = upper_32_bits(end_addr); + + if (end_addr_high != le32_to_cpu(ssp_cmd.enc_addr_high)) { pm8001_dbg(pm8001_ha, FAIL, "The sg list address start_addr=0x%016llx data_len=0x%x end_addr_high=0x%08x end_addr_low=0x%08x has crossed 4G boundary\n", - start_addr, ssp_cmd.enc_len, + dma_addr, + le32_to_cpu(ssp_cmd.enc_len), end_addr_high, end_addr_low); pm8001_chip_make_sg(task->scatter, 1, ccb->buf_prd); @@ -4451,7 +4467,7 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha, cpu_to_le32(lower_32_bits(phys_addr)); ssp_cmd.enc_addr_high = cpu_to_le32(upper_32_bits(phys_addr)); - ssp_cmd.enc_esgl = cpu_to_le32(1<<31); + ssp_cmd.enc_esgl = cpu_to_le32(1U<<31); } } else if (task->num_scatter == 0) { ssp_cmd.enc_addr_low = 0; @@ -4459,8 +4475,10 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha, ssp_cmd.enc_len = cpu_to_le32(task->total_xfer_len); ssp_cmd.enc_esgl = 0; } + /* XTS mode. All other fields are 0 */ - ssp_cmd.key_cmode = 0x6 << 4; + ssp_cmd.key_cmode = cpu_to_le32(0x6 << 4); + /* set tweak values. Should be the start lba */ ssp_cmd.twk_val0 = cpu_to_le32((task->ssp_task.cmd->cmnd[2] << 24) | (task->ssp_task.cmd->cmnd[3] << 16) | @@ -4482,20 +4500,22 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha, ssp_cmd.esgl = cpu_to_le32(1<<31); } else if (task->num_scatter == 1) { u64 dma_addr = sg_dma_address(task->scatter); + ssp_cmd.addr_low = cpu_to_le32(lower_32_bits(dma_addr)); ssp_cmd.addr_high = cpu_to_le32(upper_32_bits(dma_addr)); ssp_cmd.len = cpu_to_le32(task->total_xfer_len); ssp_cmd.esgl = 0; + /* Check 4G Boundary */ - start_addr = cpu_to_le64(dma_addr); - end_addr = (start_addr + ssp_cmd.len) - 1; - end_addr_low = cpu_to_le32(lower_32_bits(end_addr)); - end_addr_high = cpu_to_le32(upper_32_bits(end_addr)); - if (end_addr_high != ssp_cmd.addr_high) { + end_addr = dma_addr + le32_to_cpu(ssp_cmd.len) - 1; + end_addr_low = lower_32_bits(end_addr); + end_addr_high = upper_32_bits(end_addr); + if (end_addr_high != le32_to_cpu(ssp_cmd.addr_high)) { pm8001_dbg(pm8001_ha, FAIL, "The sg list address start_addr=0x%016llx data_len=0x%x end_addr_high=0x%08x end_addr_low=0x%08x has crossed 4G boundary\n", - start_addr, ssp_cmd.len, + dma_addr, + le32_to_cpu(ssp_cmd.len), end_addr_high, end_addr_low); pm8001_chip_make_sg(task->scatter, 1, ccb->buf_prd); @@ -4530,7 +4550,7 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha, u32 q_index, cpu_id; struct sata_start_req sata_cmd; u32 hdr_tag, ncg_tag = 0; - u64 phys_addr, start_addr, end_addr; + u64 phys_addr, end_addr; u32 end_addr_high, end_addr_low; u32 ATAP = 0x0; u32 dir; @@ -4542,22 +4562,21 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha, q_index = (u32) (cpu_id) % (pm8001_ha->max_q_num); circularQ = &pm8001_ha->inbnd_q_tbl[q_index]; - if (task->data_dir == DMA_NONE) { + if (task->data_dir == DMA_NONE && !task->ata_task.use_ncq) { ATAP = 0x04; /* no data*/ pm8001_dbg(pm8001_ha, IO, "no data\n"); } else if (likely(!task->ata_task.device_control_reg_update)) { - if (task->ata_task.dma_xfer) { + if (task->ata_task.use_ncq && + dev->sata_dev.class != ATA_DEV_ATAPI) { + ATAP = 0x07; /* FPDMA */ + pm8001_dbg(pm8001_ha, IO, "FPDMA\n"); + } else if (task->ata_task.dma_xfer) { ATAP = 0x06; /* DMA */ pm8001_dbg(pm8001_ha, IO, "DMA\n"); } else { ATAP = 0x05; /* PIO*/ pm8001_dbg(pm8001_ha, IO, "PIO\n"); } - if (task->ata_task.use_ncq && - dev->sata_dev.class != ATA_DEV_ATAPI) { - ATAP = 0x07; /* FPDMA */ - pm8001_dbg(pm8001_ha, IO, "FPDMA\n"); - } } if (task->ata_task.use_ncq && pm8001_get_ncq_tag(task, &hdr_tag)) { task->ata_task.fis.sector_count |= (u8) (hdr_tag << 3); @@ -4591,32 +4610,38 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha, pm8001_chip_make_sg(task->scatter, ccb->n_elem, ccb->buf_prd); phys_addr = ccb->ccb_dma_handle; - sata_cmd.enc_addr_low = lower_32_bits(phys_addr); - sata_cmd.enc_addr_high = upper_32_bits(phys_addr); + sata_cmd.enc_addr_low = + cpu_to_le32(lower_32_bits(phys_addr)); + sata_cmd.enc_addr_high = + cpu_to_le32(upper_32_bits(phys_addr)); sata_cmd.enc_esgl = cpu_to_le32(1 << 31); } else if (task->num_scatter == 1) { u64 dma_addr = sg_dma_address(task->scatter); - sata_cmd.enc_addr_low = lower_32_bits(dma_addr); - sata_cmd.enc_addr_high = upper_32_bits(dma_addr); + + sata_cmd.enc_addr_low = + cpu_to_le32(lower_32_bits(dma_addr)); + sata_cmd.enc_addr_high = + cpu_to_le32(upper_32_bits(dma_addr)); sata_cmd.enc_len = cpu_to_le32(task->total_xfer_len); sata_cmd.enc_esgl = 0; + /* Check 4G Boundary */ - start_addr = cpu_to_le64(dma_addr); - end_addr = (start_addr + sata_cmd.enc_len) - 1; - end_addr_low = cpu_to_le32(lower_32_bits(end_addr)); - end_addr_high = cpu_to_le32(upper_32_bits(end_addr)); - if (end_addr_high != sata_cmd.enc_addr_high) { + end_addr = dma_addr + le32_to_cpu(sata_cmd.enc_len) - 1; + end_addr_low = lower_32_bits(end_addr); + end_addr_high = upper_32_bits(end_addr); + if (end_addr_high != le32_to_cpu(sata_cmd.enc_addr_high)) { pm8001_dbg(pm8001_ha, FAIL, "The sg list address start_addr=0x%016llx data_len=0x%x end_addr_high=0x%08x end_addr_low=0x%08x has crossed 4G boundary\n", - start_addr, sata_cmd.enc_len, + dma_addr, + le32_to_cpu(sata_cmd.enc_len), end_addr_high, end_addr_low); pm8001_chip_make_sg(task->scatter, 1, ccb->buf_prd); phys_addr = ccb->ccb_dma_handle; sata_cmd.enc_addr_low = - lower_32_bits(phys_addr); + cpu_to_le32(lower_32_bits(phys_addr)); sata_cmd.enc_addr_high = - upper_32_bits(phys_addr); + cpu_to_le32(upper_32_bits(phys_addr)); sata_cmd.enc_esgl = cpu_to_le32(1 << 31); } @@ -4627,7 +4652,8 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha, sata_cmd.enc_esgl = 0; } /* XTS mode. All other fields are 0 */ - sata_cmd.key_index_mode = 0x6 << 4; + sata_cmd.key_index_mode = cpu_to_le32(0x6 << 4); + /* set tweak values. Should be the start lba */ sata_cmd.twk_val0 = cpu_to_le32((sata_cmd.sata_fis.lbal_exp << 24) | @@ -4653,31 +4679,31 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha, phys_addr = ccb->ccb_dma_handle; sata_cmd.addr_low = lower_32_bits(phys_addr); sata_cmd.addr_high = upper_32_bits(phys_addr); - sata_cmd.esgl = cpu_to_le32(1 << 31); + sata_cmd.esgl = cpu_to_le32(1U << 31); } else if (task->num_scatter == 1) { u64 dma_addr = sg_dma_address(task->scatter); + sata_cmd.addr_low = lower_32_bits(dma_addr); sata_cmd.addr_high = upper_32_bits(dma_addr); sata_cmd.len = cpu_to_le32(task->total_xfer_len); sata_cmd.esgl = 0; + /* Check 4G Boundary */ - start_addr = cpu_to_le64(dma_addr); - end_addr = (start_addr + sata_cmd.len) - 1; - end_addr_low = cpu_to_le32(lower_32_bits(end_addr)); - end_addr_high = cpu_to_le32(upper_32_bits(end_addr)); + end_addr = dma_addr + le32_to_cpu(sata_cmd.len) - 1; + end_addr_low = lower_32_bits(end_addr); + end_addr_high = upper_32_bits(end_addr); if (end_addr_high != sata_cmd.addr_high) { pm8001_dbg(pm8001_ha, FAIL, "The sg list address start_addr=0x%016llx data_len=0x%xend_addr_high=0x%08x end_addr_low=0x%08x has crossed 4G boundary\n", - start_addr, sata_cmd.len, + dma_addr, + le32_to_cpu(sata_cmd.len), end_addr_high, end_addr_low); pm8001_chip_make_sg(task->scatter, 1, ccb->buf_prd); phys_addr = ccb->ccb_dma_handle; - sata_cmd.addr_low = - lower_32_bits(phys_addr); - sata_cmd.addr_high = - upper_32_bits(phys_addr); - sata_cmd.esgl = cpu_to_le32(1 << 31); + sata_cmd.addr_low = lower_32_bits(phys_addr); + sata_cmd.addr_high = upper_32_bits(phys_addr); + sata_cmd.esgl = cpu_to_le32(1U << 31); } } else if (task->num_scatter == 0) { sata_cmd.addr_low = 0; @@ -4685,27 +4711,28 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha, sata_cmd.len = cpu_to_le32(task->total_xfer_len); sata_cmd.esgl = 0; } + /* scsi cdb */ sata_cmd.atapi_scsi_cdb[0] = cpu_to_le32(((task->ata_task.atapi_packet[0]) | - (task->ata_task.atapi_packet[1] << 8) | - (task->ata_task.atapi_packet[2] << 16) | - (task->ata_task.atapi_packet[3] << 24))); + (task->ata_task.atapi_packet[1] << 8) | + (task->ata_task.atapi_packet[2] << 16) | + (task->ata_task.atapi_packet[3] << 24))); sata_cmd.atapi_scsi_cdb[1] = cpu_to_le32(((task->ata_task.atapi_packet[4]) | - (task->ata_task.atapi_packet[5] << 8) | - (task->ata_task.atapi_packet[6] << 16) | - (task->ata_task.atapi_packet[7] << 24))); + (task->ata_task.atapi_packet[5] << 8) | + (task->ata_task.atapi_packet[6] << 16) | + (task->ata_task.atapi_packet[7] << 24))); sata_cmd.atapi_scsi_cdb[2] = cpu_to_le32(((task->ata_task.atapi_packet[8]) | - (task->ata_task.atapi_packet[9] << 8) | - (task->ata_task.atapi_packet[10] << 16) | - (task->ata_task.atapi_packet[11] << 24))); + (task->ata_task.atapi_packet[9] << 8) | + (task->ata_task.atapi_packet[10] << 16) | + (task->ata_task.atapi_packet[11] << 24))); sata_cmd.atapi_scsi_cdb[3] = cpu_to_le32(((task->ata_task.atapi_packet[12]) | - (task->ata_task.atapi_packet[13] << 8) | - (task->ata_task.atapi_packet[14] << 16) | - (task->ata_task.atapi_packet[15] << 24))); + (task->ata_task.atapi_packet[13] << 8) | + (task->ata_task.atapi_packet[14] << 16) | + (task->ata_task.atapi_packet[15] << 24))); } /* Check for read log for failed drive and return */ @@ -4908,8 +4935,13 @@ static int pm80xx_chip_phy_ctl_req(struct pm8001_hba_info *pm8001_ha, payload.tag = cpu_to_le32(tag); payload.phyop_phyid = cpu_to_le32(((phy_op & 0xFF) << 8) | (phyId & 0xFF)); - return pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload, - sizeof(payload), 0); + + rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload, + sizeof(payload), 0); + if (rc) + pm8001_tag_free(pm8001_ha, tag); + + return rc; } static u32 pm80xx_chip_is_our_interrupt(struct pm8001_hba_info *pm8001_ha) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index db55737000ab5..3b3e4234f37a0 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -555,7 +555,7 @@ qla2x00_sysfs_read_vpd(struct file *filp, struct kobject *kobj, if (!capable(CAP_SYS_ADMIN)) return -EINVAL; - if (IS_NOCACHE_VPD_TYPE(ha)) + if (!IS_NOCACHE_VPD_TYPE(ha)) goto skip; faddr = ha->flt_region_vpd << 2; @@ -745,7 +745,7 @@ qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj, ql_log(ql_log_info, vha, 0x706f, "Issuing MPI reset.\n"); - if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { + if (IS_QLA83XX(ha)) { uint32_t idc_control; qla83xx_idc_lock(vha, 0); @@ -1056,9 +1056,6 @@ qla2x00_free_sysfs_attr(scsi_qla_host_t *vha, bool stop_beacon) continue; if (iter->type == 3 && !(IS_CNA_CAPABLE(ha))) continue; - if (iter->type == 0x27 && - (!IS_QLA27XX(ha) || !IS_QLA28XX(ha))) - continue; sysfs_remove_bin_file(&host->shost_gendev.kobj, iter->attr); diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 9da8034ccad40..c2f00f076f799 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -29,7 +29,8 @@ void qla2x00_bsg_job_done(srb_t *sp, int res) "%s: sp hdl %x, result=%x bsg ptr %p\n", __func__, sp->handle, res, bsg_job); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); bsg_reply->result = res; bsg_job_done(bsg_job, bsg_reply->result, @@ -3013,7 +3014,8 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job) done: spin_unlock_irqrestore(&ha->hardware_lock, flags); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return 0; } diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 9ebf4a234d9a9..aefb29d7c7aee 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -726,6 +726,11 @@ typedef struct srb { * code. */ void (*put_fn)(struct kref *kref); + + /* + * Report completion for asynchronous commands. + */ + void (*async_done)(struct srb *sp, int res); } srb_t; #define GET_CMD_SP(sp) (sp->u.scmd.cmd) @@ -2886,7 +2891,11 @@ struct ct_fdmi2_hba_attributes { #define FDMI_PORT_SPEED_8GB 0x10 #define FDMI_PORT_SPEED_16GB 0x20 #define FDMI_PORT_SPEED_32GB 0x40 -#define FDMI_PORT_SPEED_64GB 0x80 +#define FDMI_PORT_SPEED_20GB 0x80 +#define FDMI_PORT_SPEED_40GB 0x100 +#define FDMI_PORT_SPEED_128GB 0x200 +#define FDMI_PORT_SPEED_64GB 0x400 +#define FDMI_PORT_SPEED_256GB 0x800 #define FDMI_PORT_SPEED_UNKNOWN 0x8000 #define FC_CLASS_2 0x04 @@ -4262,8 +4271,10 @@ struct qla_hw_data { #define QLA_ABTS_WAIT_ENABLED(_sp) \ (QLA_NVME_IOS(_sp) && QLA_ABTS_FW_ENABLED(_sp->fcport->vha->hw)) -#define IS_PI_UNINIT_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha)) -#define IS_PI_IPGUARD_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha)) +#define IS_PI_UNINIT_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha) || \ + IS_QLA28XX(ha)) +#define IS_PI_IPGUARD_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha) || \ + IS_QLA28XX(ha)) #define IS_PI_DIFB_DIX0_CAPABLE(ha) (0) #define IS_PI_SPLIT_DET_CAPABLE_HBA(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha) || \ IS_QLA28XX(ha)) @@ -4610,6 +4621,7 @@ struct qla_hw_data { struct workqueue_struct *wq; struct work_struct heartbeat_work; struct qlfc_fw fw_buf; + unsigned long last_heartbeat_run_jiffies; /* FCP_CMND priority support */ struct qla_fcp_prio_cfg *fcp_prio_cfg; @@ -5427,4 +5439,8 @@ struct ql_vnd_tgt_stats_resp { #include "qla_gbl.h" #include "qla_dbg.h" #include "qla_inline.h" + +#define IS_SESSION_DELETED(_fcport) (_fcport->disc_state == DSC_DELETE_PEND || \ + _fcport->disc_state == DSC_DELETED) + #endif diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c index 53d2b85620271..0628633c7c7e9 100644 --- a/drivers/scsi/qla2xxx/qla_edif.c +++ b/drivers/scsi/qla2xxx/qla_edif.c @@ -668,6 +668,11 @@ qla_edif_app_authok(scsi_qla_host_t *vha, struct bsg_job *bsg_job) bsg_job->request_payload.sg_cnt, &appplogiok, sizeof(struct auth_complete_cmd)); + /* silent unaligned access warning */ + portid.b.domain = appplogiok.u.d_id.b.domain; + portid.b.area = appplogiok.u.d_id.b.area; + portid.b.al_pa = appplogiok.u.d_id.b.al_pa; + switch (appplogiok.type) { case PL_TYPE_WWPN: fcport = qla2x00_find_fcport_by_wwpn(vha, @@ -678,7 +683,7 @@ qla_edif_app_authok(scsi_qla_host_t *vha, struct bsg_job *bsg_job) __func__, appplogiok.u.wwpn); break; case PL_TYPE_DID: - fcport = qla2x00_find_fcport_by_pid(vha, &appplogiok.u.d_id); + fcport = qla2x00_find_fcport_by_pid(vha, &portid); if (!fcport) ql_dbg(ql_dbg_edif, vha, 0x911d, "%s d_id lookup failed: %x\n", __func__, @@ -777,6 +782,11 @@ qla_edif_app_authfail(scsi_qla_host_t *vha, struct bsg_job *bsg_job) bsg_job->request_payload.sg_cnt, &appplogifail, sizeof(struct auth_complete_cmd)); + /* silent unaligned access warning */ + portid.b.domain = appplogifail.u.d_id.b.domain; + portid.b.area = appplogifail.u.d_id.b.area; + portid.b.al_pa = appplogifail.u.d_id.b.al_pa; + /* * TODO: edif: app has failed this plogi. Inform driver to * take any action (if any). @@ -788,7 +798,7 @@ qla_edif_app_authfail(scsi_qla_host_t *vha, struct bsg_job *bsg_job) SET_DID_STATUS(bsg_reply->result, DID_OK); break; case PL_TYPE_DID: - fcport = qla2x00_find_fcport_by_pid(vha, &appplogifail.u.d_id); + fcport = qla2x00_find_fcport_by_pid(vha, &portid); if (!fcport) ql_dbg(ql_dbg_edif, vha, 0x911d, "%s d_id lookup failed: %x\n", __func__, @@ -1253,6 +1263,7 @@ qla24xx_sadb_update(struct bsg_job *bsg_job) int result = 0; struct qla_sa_update_frame sa_frame; struct srb_iocb *iocb_cmd; + port_id_t portid; ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x911d, "%s entered, vha: 0x%p\n", __func__, vha); @@ -1276,7 +1287,12 @@ qla24xx_sadb_update(struct bsg_job *bsg_job) goto done; } - fcport = qla2x00_find_fcport_by_pid(vha, &sa_frame.port_id); + /* silent unaligned access warning */ + portid.b.domain = sa_frame.port_id.b.domain; + portid.b.area = sa_frame.port_id.b.area; + portid.b.al_pa = sa_frame.port_id.b.al_pa; + + fcport = qla2x00_find_fcport_by_pid(vha, &portid); if (fcport) { found = 1; if (sa_frame.flags == QLA_SA_UPDATE_FLAGS_TX_KEY) @@ -2146,7 +2162,8 @@ edif_doorbell_show(struct device *dev, struct device_attribute *attr, static void qla_noop_sp_done(srb_t *sp, int res) { - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } /* diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 8d8503a284790..3f8b8bbabe6de 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -316,7 +316,8 @@ extern int qla2x00_start_sp(srb_t *); extern int qla24xx_dif_start_scsi(srb_t *); extern int qla2x00_start_bidir(srb_t *, struct scsi_qla_host *, uint32_t); extern int qla2xxx_dif_start_scsi_mq(srb_t *); -extern void qla2x00_init_timer(srb_t *sp, unsigned long tmo); +extern void qla2x00_init_async_sp(srb_t *sp, unsigned long tmo, + void (*done)(struct srb *, int)); extern unsigned long qla2x00_get_async_timeout(struct scsi_qla_host *); extern void *qla2x00_alloc_iocbs(struct scsi_qla_host *, srb_t *); @@ -332,6 +333,7 @@ extern int qla24xx_get_one_block_sg(uint32_t, struct qla2_sgx *, uint32_t *); extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *); extern int qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha, struct qla_work_evt *e); +void qla2x00_sp_release(struct kref *kref); /* * Global Function Prototypes in qla_mbx.c source file. diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index 28b574e20ef32..6b67bd561810d 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -529,7 +529,6 @@ static void qla2x00_async_sns_sp_done(srb_t *sp, int rc) if (!e) goto err2; - del_timer(&sp->u.iocb_cmd.timer); e->u.iosb.sp = sp; qla2x00_post_work(vha, e); return; @@ -556,8 +555,8 @@ static void qla2x00_async_sns_sp_done(srb_t *sp, int rc) sp->u.iocb_cmd.u.ctarg.rsp = NULL; } - sp->free(sp); - + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return; } @@ -592,13 +591,15 @@ static int qla_async_rftid(scsi_qla_host_t *vha, port_id_t *d_id) if (!vha->flags.online) goto done; + /* ref: INIT */ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_CT_PTHRU_CMD; sp->name = "rft_id"; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_sns_sp_done); sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev, sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma, @@ -638,8 +639,6 @@ static int qla_async_rftid(scsi_qla_host_t *vha, port_id_t *d_id) sp->u.iocb_cmd.u.ctarg.req_size = RFT_ID_REQ_SIZE; sp->u.iocb_cmd.u.ctarg.rsp_size = RFT_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - sp->done = qla2x00_async_sns_sp_done; ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s - hdl=%x portid %06x.\n", @@ -653,7 +652,8 @@ static int qla_async_rftid(scsi_qla_host_t *vha, port_id_t *d_id) } return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } @@ -676,8 +676,7 @@ qla2x00_rff_id(scsi_qla_host_t *vha, u8 type) return (QLA_SUCCESS); } - return qla_async_rffid(vha, &vha->d_id, qlt_rff_id(vha), - FC4_TYPE_FCP_SCSI); + return qla_async_rffid(vha, &vha->d_id, qlt_rff_id(vha), type); } static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id, @@ -688,13 +687,15 @@ static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id, srb_t *sp; struct ct_sns_pkt *ct_sns; + /* ref: INIT */ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_CT_PTHRU_CMD; sp->name = "rff_id"; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_sns_sp_done); sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev, sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma, @@ -727,13 +728,11 @@ static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id, /* Prepare CT arguments -- port_id, FC-4 feature, FC-4 type */ ct_req->req.rff_id.port_id = port_id_to_be_id(*d_id); ct_req->req.rff_id.fc4_feature = fc4feature; - ct_req->req.rff_id.fc4_type = fc4type; /* SCSI - FCP */ + ct_req->req.rff_id.fc4_type = fc4type; /* SCSI-FCP or FC-NVMe */ sp->u.iocb_cmd.u.ctarg.req_size = RFF_ID_REQ_SIZE; sp->u.iocb_cmd.u.ctarg.rsp_size = RFF_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - sp->done = qla2x00_async_sns_sp_done; ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s - hdl=%x portid %06x feature %x type %x.\n", @@ -749,7 +748,8 @@ static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id, return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } @@ -779,13 +779,15 @@ static int qla_async_rnnid(scsi_qla_host_t *vha, port_id_t *d_id, srb_t *sp; struct ct_sns_pkt *ct_sns; + /* ref: INIT */ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_CT_PTHRU_CMD; sp->name = "rnid"; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_sns_sp_done); sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev, sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma, @@ -823,9 +825,6 @@ static int qla_async_rnnid(scsi_qla_host_t *vha, port_id_t *d_id, sp->u.iocb_cmd.u.ctarg.rsp_size = RNN_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - sp->done = qla2x00_async_sns_sp_done; - ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s - hdl=%x portid %06x\n", sp->name, sp->handle, d_id->b24); @@ -840,7 +839,8 @@ static int qla_async_rnnid(scsi_qla_host_t *vha, port_id_t *d_id, return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } @@ -886,13 +886,15 @@ static int qla_async_rsnn_nn(scsi_qla_host_t *vha) srb_t *sp; struct ct_sns_pkt *ct_sns; + /* ref: INIT */ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_CT_PTHRU_CMD; sp->name = "rsnn_nn"; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_sns_sp_done); sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev, sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma, @@ -936,9 +938,6 @@ static int qla_async_rsnn_nn(scsi_qla_host_t *vha) sp->u.iocb_cmd.u.ctarg.rsp_size = RSNN_NN_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - sp->done = qla2x00_async_sns_sp_done; - ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s - hdl=%x.\n", sp->name, sp->handle); @@ -953,7 +952,8 @@ static int qla_async_rsnn_nn(scsi_qla_host_t *vha) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } @@ -2893,7 +2893,8 @@ static void qla24xx_async_gpsc_sp_done(srb_t *sp, int res) qla24xx_handle_gpsc_event(vha, &ea); done: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport) @@ -2905,6 +2906,7 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport) if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) return rval; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; @@ -2913,8 +2915,8 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport) sp->name = "gpsc"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; - - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla24xx_async_gpsc_sp_done); /* CT_IU preamble */ ct_req = qla24xx_prep_ct_fm_req(fcport->ct_desc.ct_sns, GPSC_CMD, @@ -2932,9 +2934,6 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport) sp->u.iocb_cmd.u.ctarg.rsp_size = GPSC_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = vha->mgmt_svr_loop_id; - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - sp->done = qla24xx_async_gpsc_sp_done; - ql_dbg(ql_dbg_disc, vha, 0x205e, "Async-%s %8phC hdl=%x loopid=%x portid=%02x%02x%02x.\n", sp->name, fcport->port_name, sp->handle, @@ -2947,7 +2946,8 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } @@ -2996,7 +2996,8 @@ void qla24xx_sp_unmap(scsi_qla_host_t *vha, srb_t *sp) break; } - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } void qla24xx_handle_gpnid_event(scsi_qla_host_t *vha, struct event_arg *ea) @@ -3135,13 +3136,15 @@ static void qla2x00_async_gpnid_sp_done(srb_t *sp, int res) if (res) { if (res == QLA_FUNCTION_TIMEOUT) { qla24xx_post_gpnid_work(sp->vha, &ea.id); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return; } } else if (sp->gen1) { /* There was another RSCN for this Nport ID */ qla24xx_post_gpnid_work(sp->vha, &ea.id); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return; } @@ -3162,7 +3165,8 @@ static void qla2x00_async_gpnid_sp_done(srb_t *sp, int res) sp->u.iocb_cmd.u.ctarg.rsp_dma); sp->u.iocb_cmd.u.ctarg.rsp = NULL; - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return; } @@ -3182,6 +3186,7 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id) if (!vha->flags.online) goto done; + /* ref: INIT */ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); if (!sp) goto done; @@ -3190,14 +3195,16 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id) sp->name = "gpnid"; sp->u.iocb_cmd.u.ctarg.id = *id; sp->gen1 = 0; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_gpnid_sp_done); spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); list_for_each_entry(tsp, &vha->gpnid_list, elem) { if (tsp->u.iocb_cmd.u.ctarg.id.b24 == id->b24) { tsp->gen1++; spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); goto done; } } @@ -3238,9 +3245,6 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id) sp->u.iocb_cmd.u.ctarg.rsp_size = GPN_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - sp->done = qla2x00_async_gpnid_sp_done; - ql_dbg(ql_dbg_disc, vha, 0x2067, "Async-%s hdl=%x ID %3phC.\n", sp->name, sp->handle, &ct_req->req.port_id.port_id); @@ -3270,8 +3274,8 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id) sp->u.iocb_cmd.u.ctarg.rsp_dma); sp->u.iocb_cmd.u.ctarg.rsp = NULL; } - - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } @@ -3326,7 +3330,8 @@ void qla24xx_async_gffid_sp_done(srb_t *sp, int res) ea.rc = res; qla24xx_handle_gffid_event(vha, &ea); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } /* Get FC4 Feature with Nport ID. */ @@ -3339,6 +3344,7 @@ int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport) if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) return rval; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) return rval; @@ -3348,9 +3354,8 @@ int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport) sp->name = "gffid"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; - - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla24xx_async_gffid_sp_done); /* CT_IU preamble */ ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GFF_ID_CMD, @@ -3368,8 +3373,6 @@ int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport) sp->u.iocb_cmd.u.ctarg.rsp_size = GFF_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->done = qla24xx_async_gffid_sp_done; - ql_dbg(ql_dbg_disc, vha, 0x2132, "Async-%s hdl=%x %8phC.\n", sp->name, sp->handle, fcport->port_name); @@ -3380,7 +3383,8 @@ int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); fcport->flags &= ~FCF_ASYNC_SENT; return rval; } @@ -3767,7 +3771,6 @@ static void qla2x00_async_gpnft_gnnft_sp_done(srb_t *sp, int res) "Async done-%s res %x FC4Type %x\n", sp->name, res, sp->gen2); - del_timer(&sp->u.iocb_cmd.timer); sp->rc = res; if (res) { unsigned long flags; @@ -3892,9 +3895,8 @@ static int qla24xx_async_gnnft(scsi_qla_host_t *vha, struct srb *sp, sp->name = "gnnft"; sp->gen1 = vha->hw->base_qpair->chip_reset; sp->gen2 = fc4_type; - - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_gpnft_gnnft_sp_done); memset(sp->u.iocb_cmd.u.ctarg.rsp, 0, sp->u.iocb_cmd.u.ctarg.rsp_size); memset(sp->u.iocb_cmd.u.ctarg.req, 0, sp->u.iocb_cmd.u.ctarg.req_size); @@ -3910,8 +3912,6 @@ static int qla24xx_async_gnnft(scsi_qla_host_t *vha, struct srb *sp, sp->u.iocb_cmd.u.ctarg.req_size = GNN_FT_REQ_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->done = qla2x00_async_gpnft_gnnft_sp_done; - ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s hdl=%x FC4Type %x.\n", sp->name, sp->handle, ct_req->req.gpn_ft.port_type); @@ -3938,8 +3938,8 @@ static int qla24xx_async_gnnft(scsi_qla_host_t *vha, struct srb *sp, sp->u.iocb_cmd.u.ctarg.rsp_dma); sp->u.iocb_cmd.u.ctarg.rsp = NULL; } - - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); spin_lock_irqsave(&vha->work_lock, flags); vha->scan.scan_flags &= ~SF_SCANNING; @@ -3991,9 +3991,12 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp) ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0xffff, "%s: Performing FCP Scan\n", __func__); - if (sp) - sp->free(sp); /* should not happen */ + if (sp) { + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); + } + /* ref: INIT */ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); if (!sp) { spin_lock_irqsave(&vha->work_lock, flags); @@ -4038,6 +4041,7 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp) sp->u.iocb_cmd.u.ctarg.req, sp->u.iocb_cmd.u.ctarg.req_dma); sp->u.iocb_cmd.u.ctarg.req = NULL; + /* ref: INIT */ qla2x00_rel_sp(sp); return rval; } @@ -4057,9 +4061,8 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp) sp->name = "gpnft"; sp->gen1 = vha->hw->base_qpair->chip_reset; sp->gen2 = fc4_type; - - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_gpnft_gnnft_sp_done); rspsz = sp->u.iocb_cmd.u.ctarg.rsp_size; memset(sp->u.iocb_cmd.u.ctarg.rsp, 0, sp->u.iocb_cmd.u.ctarg.rsp_size); @@ -4074,8 +4077,6 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp) sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->done = qla2x00_async_gpnft_gnnft_sp_done; - ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s hdl=%x FC4Type %x.\n", sp->name, sp->handle, ct_req->req.gpn_ft.port_type); @@ -4103,7 +4104,8 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp) sp->u.iocb_cmd.u.ctarg.rsp = NULL; } - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); spin_lock_irqsave(&vha->work_lock, flags); vha->scan.scan_flags &= ~SF_SCANNING; @@ -4167,7 +4169,8 @@ static void qla2x00_async_gnnid_sp_done(srb_t *sp, int res) qla24xx_handle_gnnid_event(vha, &ea); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport) @@ -4180,6 +4183,7 @@ int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport) return rval; qla2x00_set_fcport_disc_state(fcport, DSC_GNN_ID); + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC); if (!sp) goto done; @@ -4189,9 +4193,8 @@ int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport) sp->name = "gnnid"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; - - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_gnnid_sp_done); /* CT_IU preamble */ ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GNN_ID_CMD, @@ -4210,8 +4213,6 @@ int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport) sp->u.iocb_cmd.u.ctarg.rsp_size = GNN_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->done = qla2x00_async_gnnid_sp_done; - ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s - %8phC hdl=%x loopid=%x portid %06x.\n", sp->name, fcport->port_name, @@ -4223,7 +4224,8 @@ int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); fcport->flags &= ~FCF_ASYNC_SENT; done: return rval; @@ -4297,7 +4299,8 @@ static void qla2x00_async_gfpnid_sp_done(srb_t *sp, int res) qla24xx_handle_gfpnid_event(vha, &ea); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport) @@ -4309,6 +4312,7 @@ int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport) if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) return rval; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC); if (!sp) goto done; @@ -4317,9 +4321,8 @@ int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport) sp->name = "gfpnid"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; - - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_gfpnid_sp_done); /* CT_IU preamble */ ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GFPN_ID_CMD, @@ -4338,8 +4341,6 @@ int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport) sp->u.iocb_cmd.u.ctarg.rsp_size = GFPN_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->done = qla2x00_async_gfpnid_sp_done; - ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s - %8phC hdl=%x loopid=%x portid %06x.\n", sp->name, fcport->port_name, @@ -4352,7 +4353,8 @@ int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 1fe4966fc2f68..7f81525c4fb32 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -51,6 +51,9 @@ qla2x00_sp_timeout(struct timer_list *t) WARN_ON(irqs_disabled()); iocb = &sp->u.iocb_cmd; iocb->timeout(sp); + + /* ref: TMR */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } void qla2x00_sp_free(srb_t *sp) @@ -125,8 +128,13 @@ static void qla24xx_abort_iocb_timeout(void *data) } spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); - if (sp->cmd_sp) + if (sp->cmd_sp) { + /* + * This done function should take care of + * original command ref: INIT + */ sp->cmd_sp->done(sp->cmd_sp, QLA_OS_TIMER_EXPIRED); + } abt->u.abt.comp_status = cpu_to_le16(CS_TIMEOUT); sp->done(sp, QLA_OS_TIMER_EXPIRED); @@ -140,11 +148,11 @@ static void qla24xx_abort_sp_done(srb_t *sp, int res) if (orig_sp) qla_wait_nvme_release_cmd_kref(orig_sp); - del_timer(&sp->u.iocb_cmd.timer); if (sp->flags & SRB_WAKEUP_ON_COMP) complete(&abt->u.abt.comp); else - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) @@ -154,6 +162,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) srb_t *sp; int rval = QLA_FUNCTION_FAILED; + /* ref: INIT for ABTS command */ sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport, GFP_ATOMIC); if (!sp) @@ -167,23 +176,22 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) if (wait) sp->flags = SRB_WAKEUP_ON_COMP; - abt_iocb->timeout = qla24xx_abort_iocb_timeout; init_completion(&abt_iocb->u.abt.comp); /* FW can send 2 x ABTS's timeout/20s */ - qla2x00_init_timer(sp, 42); + qla2x00_init_async_sp(sp, 42, qla24xx_abort_sp_done); + sp->u.iocb_cmd.timeout = qla24xx_abort_iocb_timeout; abt_iocb->u.abt.cmd_hndl = cmd_sp->handle; abt_iocb->u.abt.req_que_no = cpu_to_le16(cmd_sp->qpair->req->id); - sp->done = qla24xx_abort_sp_done; - ql_dbg(ql_dbg_async, vha, 0x507c, "Abort command issued - hdl=%x, type=%x\n", cmd_sp->handle, cmd_sp->type); rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return rval; } @@ -191,7 +199,8 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) wait_for_completion(&abt_iocb->u.abt.comp); rval = abt_iocb->u.abt.comp_status == CS_COMPLETE ? QLA_SUCCESS : QLA_ERR_FROM_FW; - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } return rval; @@ -286,10 +295,13 @@ static void qla2x00_async_login_sp_done(srb_t *sp, int res) ea.iop[0] = lio->u.logio.iop[0]; ea.iop[1] = lio->u.logio.iop[1]; ea.sp = sp; + if (res) + ea.data[0] = MBS_COMMAND_ERROR; qla24xx_handle_plogi_done_event(vha, &ea); } - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int @@ -308,6 +320,7 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport, return rval; } + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; @@ -320,12 +333,10 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport, sp->name = "login"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_login_sp_done); lio = &sp->u.iocb_cmd; - lio->timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); - - sp->done = qla2x00_async_login_sp_done; if (N2N_TOPO(fcport->vha->hw) && fcport_is_bigger(fcport)) { lio->u.logio.flags |= SRB_LOGIN_PRLI_ONLY; } else { @@ -358,7 +369,8 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport, return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); fcport->flags &= ~FCF_ASYNC_SENT; done: fcport->flags &= ~FCF_ASYNC_ACTIVE; @@ -370,29 +382,26 @@ static void qla2x00_async_logout_sp_done(srb_t *sp, int res) sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); sp->fcport->login_gen++; qlt_logo_completion_handler(sp->fcport, sp->u.iocb_cmd.u.logio.data[0]); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport) { srb_t *sp; - struct srb_iocb *lio; int rval = QLA_FUNCTION_FAILED; fcport->flags |= FCF_ASYNC_SENT; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_LOGOUT_CMD; sp->name = "logout"; - - lio = &sp->u.iocb_cmd; - lio->timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); - - sp->done = qla2x00_async_logout_sp_done; + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_logout_sp_done), ql_dbg(ql_dbg_disc, vha, 0x2070, "Async-logout - hdl=%x loop-id=%x portid=%02x%02x%02x %8phC explicit %d.\n", @@ -406,7 +415,8 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); return rval; @@ -432,29 +442,26 @@ static void qla2x00_async_prlo_sp_done(srb_t *sp, int res) if (!test_bit(UNLOADING, &vha->dpc_flags)) qla2x00_post_async_prlo_done_work(sp->fcport->vha, sp->fcport, lio->u.logio.data); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla2x00_async_prlo(struct scsi_qla_host *vha, fc_port_t *fcport) { srb_t *sp; - struct srb_iocb *lio; int rval; rval = QLA_FUNCTION_FAILED; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_PRLO_CMD; sp->name = "prlo"; - - lio = &sp->u.iocb_cmd; - lio->timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); - - sp->done = qla2x00_async_prlo_sp_done; + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_prlo_sp_done); ql_dbg(ql_dbg_disc, vha, 0x2070, "Async-prlo - hdl=%x loop-id=%x portid=%02x%02x%02x.\n", @@ -468,7 +475,8 @@ qla2x00_async_prlo(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: fcport->flags &= ~FCF_ASYNC_ACTIVE; return rval; @@ -551,10 +559,12 @@ static void qla2x00_async_adisc_sp_done(srb_t *sp, int res) ea.iop[1] = lio->u.logio.iop[1]; ea.fcport = sp->fcport; ea.sp = sp; + if (res) + ea.data[0] = MBS_COMMAND_ERROR; qla24xx_handle_adisc_event(vha, &ea); - - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int @@ -565,26 +575,34 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, struct srb_iocb *lio; int rval = QLA_FUNCTION_FAILED; + if (IS_SESSION_DELETED(fcport)) { + ql_log(ql_log_warn, vha, 0xffff, + "%s: %8phC is being delete - not sending command.\n", + __func__, fcport->port_name); + fcport->flags &= ~FCF_ASYNC_ACTIVE; + return rval; + } + if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) return rval; fcport->flags |= FCF_ASYNC_SENT; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_ADISC_CMD; sp->name = "adisc"; - - lio = &sp->u.iocb_cmd; - lio->timeout = qla2x00_async_iocb_timeout; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_adisc_sp_done); - sp->done = qla2x00_async_adisc_sp_done; - if (data[1] & QLA_LOGIO_LOGIN_RETRIED) + if (data[1] & QLA_LOGIO_LOGIN_RETRIED) { + lio = &sp->u.iocb_cmd; lio->u.logio.flags |= SRB_LOGIN_RETRIED; + } ql_dbg(ql_dbg_disc, vha, 0x206f, "Async-adisc - hdl=%x loopid=%x portid=%06x %8phC.\n", @@ -597,7 +615,8 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); qla2x00_post_async_adisc_work(vha, fcport, data); @@ -963,6 +982,9 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, set_bit(RELOGIN_NEEDED, &vha->dpc_flags); } break; + case ISP_CFG_NL: + qla24xx_fcport_handle_login(vha, fcport); + break; default: break; } @@ -1078,13 +1100,13 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) } spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) { srb_t *sp; - struct srb_iocb *mbx; int rval = QLA_FUNCTION_FAILED; unsigned long flags; u16 *mb; @@ -1109,6 +1131,7 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) vha->gnl.sent = 1; spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; @@ -1117,10 +1140,8 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) sp->name = "gnlist"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; - - mbx = &sp->u.iocb_cmd; - mbx->timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha)+2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla24xx_async_gnl_sp_done); mb = sp->u.iocb_cmd.u.mbx.out_mb; mb[0] = MBC_PORT_NODE_NAME_LIST; @@ -1132,8 +1153,6 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) mb[8] = vha->gnl.size; mb[9] = vha->vp_idx; - sp->done = qla24xx_async_gnl_sp_done; - ql_dbg(ql_dbg_disc, vha, 0x20da, "Async-%s - OUT WWPN %8phC hndl %x\n", sp->name, fcport->port_name, sp->handle); @@ -1145,7 +1164,8 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: fcport->flags &= ~(FCF_ASYNC_ACTIVE | FCF_ASYNC_SENT); return rval; @@ -1191,7 +1211,7 @@ static void qla24xx_async_gpdb_sp_done(srb_t *sp, int res) dma_pool_free(ha->s_dma_pool, sp->u.iocb_cmd.u.mbx.in, sp->u.iocb_cmd.u.mbx.in_dma); - sp->free(sp); + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla24xx_post_prli_work(struct scsi_qla_host *vha, fc_port_t *fcport) @@ -1232,11 +1252,13 @@ static void qla2x00_async_prli_sp_done(srb_t *sp, int res) ea.sp = sp; if (res == QLA_OS_TIMER_EXPIRED) ea.data[0] = QLA_OS_TIMER_EXPIRED; + else if (res) + ea.data[0] = MBS_COMMAND_ERROR; qla24xx_handle_prli_done_event(vha, &ea); } - sp->free(sp); + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int @@ -1269,12 +1291,10 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport) sp->type = SRB_PRLI_CMD; sp->name = "prli"; + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_prli_sp_done); lio = &sp->u.iocb_cmd; - lio->timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); - - sp->done = qla2x00_async_prli_sp_done; lio->u.logio.flags = 0; if (NVME_TARGET(vha->hw, fcport)) @@ -1296,7 +1316,8 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); fcport->flags &= ~FCF_ASYNC_SENT; return rval; } @@ -1325,14 +1346,21 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) struct port_database_24xx *pd; struct qla_hw_data *ha = vha->hw; - if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT) || - fcport->loop_id == FC_NO_LOOP_ID) { + if (IS_SESSION_DELETED(fcport)) { ql_log(ql_log_warn, vha, 0xffff, - "%s: %8phC - not sending command.\n", - __func__, fcport->port_name); + "%s: %8phC is being delete - not sending command.\n", + __func__, fcport->port_name); + fcport->flags &= ~FCF_ASYNC_ACTIVE; return rval; } + if (!vha->flags.online || fcport->flags & FCF_ASYNC_SENT) { + ql_log(ql_log_warn, vha, 0xffff, + "%s: %8phC online %d flags %x - not sending command.\n", + __func__, fcport->port_name, vha->flags.online, fcport->flags); + goto done; + } + sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; @@ -1344,10 +1372,8 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) sp->name = "gpdb"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; - - mbx = &sp->u.iocb_cmd; - mbx->timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla24xx_async_gpdb_sp_done); pd = dma_pool_zalloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma); if (pd == NULL) { @@ -1366,11 +1392,10 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) mb[9] = vha->vp_idx; mb[10] = opt; - mbx->u.mbx.in = pd; + mbx = &sp->u.iocb_cmd; + mbx->u.mbx.in = (void *)pd; mbx->u.mbx.in_dma = pd_dma; - sp->done = qla24xx_async_gpdb_sp_done; - ql_dbg(ql_dbg_disc, vha, 0x20dc, "Async-%s %8phC hndl %x opt %x\n", sp->name, fcport->port_name, sp->handle, opt); @@ -1384,7 +1409,7 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) if (pd) dma_pool_free(ha->s_dma_pool, pd, pd_dma); - sp->free(sp); + kref_put(&sp->cmd_kref, qla2x00_sp_release); fcport->flags &= ~FCF_ASYNC_SENT; done: fcport->flags &= ~FCF_ASYNC_ACTIVE; @@ -1556,6 +1581,11 @@ static void qla_chk_n2n_b4_login(struct scsi_qla_host *vha, fc_port_t *fcport) u8 login = 0; int rc; + ql_dbg(ql_dbg_disc, vha, 0x307b, + "%s %8phC DS %d LS %d lid %d retries=%d\n", + __func__, fcport->port_name, fcport->disc_state, + fcport->fw_login_state, fcport->loop_id, fcport->login_retry); + if (qla_tgt_mode_enabled(vha)) return; @@ -1614,7 +1644,8 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) fcport->login_gen, fcport->loop_id, fcport->scan_state, fcport->fc4_type); - if (fcport->scan_state != QLA_FCPORT_FOUND) + if (fcport->scan_state != QLA_FCPORT_FOUND || + fcport->disc_state == DSC_DELETE_PEND) return 0; if ((fcport->loop_id != FC_NO_LOOP_ID) && @@ -1635,7 +1666,7 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) if (vha->host->active_mode == MODE_TARGET && !N2N_TOPO(vha->hw)) return 0; - if (fcport->flags & FCF_ASYNC_SENT) { + if (fcport->flags & (FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE)) { set_bit(RELOGIN_NEEDED, &vha->dpc_flags); return 0; } @@ -1970,22 +2001,21 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, srb_t *sp; int rval = QLA_FUNCTION_FAILED; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; - tm_iocb = &sp->u.iocb_cmd; sp->type = SRB_TM_CMD; sp->name = "tmf"; + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha), + qla2x00_tmf_sp_done); + sp->u.iocb_cmd.timeout = qla2x00_tmf_iocb_timeout; - tm_iocb->timeout = qla2x00_tmf_iocb_timeout; + tm_iocb = &sp->u.iocb_cmd; init_completion(&tm_iocb->u.tmf.comp); - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha)); - tm_iocb->u.tmf.flags = flags; tm_iocb->u.tmf.lun = lun; - tm_iocb->u.tmf.data = tag; - sp->done = qla2x00_tmf_sp_done; ql_dbg(ql_dbg_taskm, vha, 0x802f, "Async-tmf hdl=%x loop-id=%x portid=%02x%02x%02x.\n", @@ -2015,7 +2045,8 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, } done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); fcport->flags &= ~FCF_ASYNC_SENT; done: return rval; @@ -2074,13 +2105,6 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea) qla24xx_post_gpdb_work(vha, ea->fcport, 0); break; default: - if ((ea->iop[0] == LSC_SCODE_ELS_REJECT) && - (ea->iop[1] == 0x50000)) { /* reson 5=busy expl:0x0 */ - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); - ea->fcport->fw_login_state = DSC_LS_PLOGI_COMP; - break; - } - sp = ea->sp; ql_dbg(ql_dbg_disc, vha, 0x2118, "%s %d %8phC priority %s, fc4type %x prev try %s\n", @@ -2224,12 +2248,7 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea) ql_dbg(ql_dbg_disc, vha, 0x20eb, "%s %d %8phC cmd error %x\n", __func__, __LINE__, ea->fcport->port_name, ea->data[1]); - ea->fcport->flags &= ~FCF_ASYNC_SENT; - qla2x00_set_fcport_disc_state(ea->fcport, DSC_LOGIN_FAILED); - if (ea->data[1] & QLA_LOGIO_LOGIN_RETRIED) - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); - else - qla2x00_mark_device_lost(vha, ea->fcport, 1); + qlt_schedule_sess_for_deletion(ea->fcport); break; case MBS_LOOP_ID_USED: /* data[1] = IO PARAM 1 = nport ID */ @@ -3472,6 +3491,14 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) struct rsp_que *rsp = ha->rsp_q_map[0]; struct qla2xxx_fw_dump *fw_dump; + if (ha->fw_dump) { + ql_dbg(ql_dbg_init, vha, 0x00bd, + "Firmware dump already allocated.\n"); + return; + } + + ha->fw_dumped = 0; + ha->fw_dump_cap_flags = 0; dump_size = fixed_size = mem_size = eft_size = fce_size = mq_size = 0; req_q_size = rsp_q_size = 0; @@ -3482,7 +3509,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) mem_size = (ha->fw_memory_size - 0x11000 + 1) * sizeof(uint16_t); } else if (IS_FWI2_CAPABLE(ha)) { - if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) + if (IS_QLA83XX(ha)) fixed_size = offsetof(struct qla83xx_fw_dump, ext_mem); else if (IS_QLA81XX(ha)) fixed_size = offsetof(struct qla81xx_fw_dump, ext_mem); @@ -3494,8 +3521,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) mem_size = (ha->fw_memory_size - 0x100000 + 1) * sizeof(uint32_t); if (ha->mqenable) { - if (!IS_QLA83XX(ha) && !IS_QLA27XX(ha) && - !IS_QLA28XX(ha)) + if (!IS_QLA83XX(ha)) mq_size = sizeof(struct qla2xxx_mq_chain); /* * Allocate maximum buffer size for all queues - Q0. @@ -4056,8 +4082,7 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) ha->fw_major_version, ha->fw_minor_version, ha->fw_subminor_version); - if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || - IS_QLA28XX(ha)) { + if (IS_QLA83XX(ha)) { ha->flags.fac_supported = 0; rval = QLA_SUCCESS; } @@ -5602,6 +5627,13 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) memcpy(fcport->node_name, new_fcport->node_name, WWN_SIZE); fcport->scan_state = QLA_FCPORT_FOUND; + if (fcport->login_retry == 0) { + fcport->login_retry = vha->hw->login_retry_count; + ql_dbg(ql_dbg_disc, vha, 0x2135, + "Port login retry %8phN, lid 0x%04x retry cnt=%d.\n", + fcport->port_name, fcport->loop_id, + fcport->login_retry); + } found++; break; } @@ -5735,6 +5767,8 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport) if (atomic_read(&fcport->state) == FCS_ONLINE) return; + qla2x00_set_fcport_state(fcport, FCS_ONLINE); + rport_ids.node_name = wwn_to_u64(fcport->node_name); rport_ids.port_name = wwn_to_u64(fcport->port_name); rport_ids.port_id = fcport->d_id.b.domain << 16 | @@ -5835,6 +5869,7 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport) qla2x00_reg_remote_port(vha, fcport); break; case MODE_TARGET: + qla2x00_set_fcport_state(fcport, FCS_ONLINE); if (!vha->vha_tgt.qla_tgt->tgt_stop && !vha->vha_tgt.qla_tgt->tgt_stopped) qlt_fc_port_added(vha, fcport); @@ -5852,8 +5887,6 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport) if (NVME_TARGET(vha->hw, fcport)) qla_nvme_register_remote(vha, fcport); - qla2x00_set_fcport_state(fcport, FCS_ONLINE); - if (IS_IIDMA_CAPABLE(vha->hw) && vha->hw->flags.gpsc_supported) { if (fcport->id_changed) { fcport->id_changed = 0; @@ -9390,7 +9423,7 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos, qpair->rsp->req = qpair->req; qpair->rsp->qpair = qpair; /* init qpair to this cpu. Will adjust at run time. */ - qla_cpu_update(qpair, smp_processor_id()); + qla_cpu_update(qpair, raw_smp_processor_id()); if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) { if (ha->fw_attributes & BIT_4) diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index 5f3b7995cc8f3..db17f7f410cdd 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -184,6 +184,8 @@ static void qla2xxx_init_sp(srb_t *sp, scsi_qla_host_t *vha, sp->vha = vha; sp->qpair = qpair; sp->cmd_type = TYPE_SRB; + /* ref : INIT - normal flow */ + kref_init(&sp->cmd_kref); INIT_LIST_HEAD(&sp->elem); } diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index ed604f2185bf2..e0fe9ddb4bd2c 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2560,11 +2560,38 @@ qla24xx_tm_iocb(srb_t *sp, struct tsk_mgmt_entry *tsk) } } -void qla2x00_init_timer(srb_t *sp, unsigned long tmo) +static void +qla2x00_async_done(struct srb *sp, int res) +{ + if (del_timer(&sp->u.iocb_cmd.timer)) { + /* + * Successfully cancelled the timeout handler + * ref: TMR + */ + if (kref_put(&sp->cmd_kref, qla2x00_sp_release)) + return; + } + sp->async_done(sp, res); +} + +void +qla2x00_sp_release(struct kref *kref) +{ + struct srb *sp = container_of(kref, struct srb, cmd_kref); + + sp->free(sp); +} + +void +qla2x00_init_async_sp(srb_t *sp, unsigned long tmo, + void (*done)(struct srb *sp, int res)) { timer_setup(&sp->u.iocb_cmd.timer, qla2x00_sp_timeout, 0); - sp->u.iocb_cmd.timer.expires = jiffies + tmo * HZ; + sp->done = qla2x00_async_done; + sp->async_done = done; sp->free = qla2x00_sp_free; + sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; + sp->u.iocb_cmd.timer.expires = jiffies + tmo * HZ; if (IS_QLAFX00(sp->vha->hw) && sp->type == SRB_FXIOCB_DCMD) init_completion(&sp->u.iocb_cmd.u.fxiocb.fxiocb_comp); sp->start_timer = 1; @@ -2651,7 +2678,9 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, return -ENOMEM; } - /* Alloc SRB structure */ + /* Alloc SRB structure + * ref: INIT + */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) { kfree(fcport); @@ -2672,18 +2701,19 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, sp->type = SRB_ELS_DCMD; sp->name = "ELS_DCMD"; sp->fcport = fcport; - elsio->timeout = qla2x00_els_dcmd_iocb_timeout; - qla2x00_init_timer(sp, ELS_DCMD_TIMEOUT); - init_completion(&sp->u.iocb_cmd.u.els_logo.comp); - sp->done = qla2x00_els_dcmd_sp_done; + qla2x00_init_async_sp(sp, ELS_DCMD_TIMEOUT, + qla2x00_els_dcmd_sp_done); sp->free = qla2x00_els_dcmd_sp_free; + sp->u.iocb_cmd.timeout = qla2x00_els_dcmd_iocb_timeout; + init_completion(&sp->u.iocb_cmd.u.els_logo.comp); elsio->u.els_logo.els_logo_pyld = dma_alloc_coherent(&ha->pdev->dev, DMA_POOL_SIZE, &elsio->u.els_logo.els_logo_pyld_dma, GFP_KERNEL); if (!elsio->u.els_logo.els_logo_pyld) { - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return QLA_FUNCTION_FAILED; } @@ -2706,7 +2736,8 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return QLA_FUNCTION_FAILED; } @@ -2717,7 +2748,8 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, wait_for_completion(&elsio->u.els_logo.comp); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return rval; } @@ -2850,7 +2882,6 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) sp->name, res, sp->handle, fcport->d_id.b24, fcport->port_name); fcport->flags &= ~(FCF_ASYNC_SENT|FCF_ASYNC_ACTIVE); - del_timer(&sp->u.iocb_cmd.timer); if (sp->flags & SRB_WAKEUP_ON_COMP) complete(&lio->u.els_plogi.comp); @@ -2927,6 +2958,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); qla2xxx_wake_dpc(vha); + break; } fallthrough; default: @@ -2936,9 +2968,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) fw_status[0], fw_status[1], fw_status[2]); fcport->flags &= ~FCF_ASYNC_SENT; - qla2x00_set_fcport_disc_state(fcport, - DSC_LOGIN_FAILED); - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + qlt_schedule_sess_for_deletion(fcport); break; } break; @@ -2950,8 +2980,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) fw_status[0], fw_status[1], fw_status[2]); sp->fcport->flags &= ~FCF_ASYNC_SENT; - qla2x00_set_fcport_disc_state(fcport, DSC_LOGIN_FAILED); - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + qlt_schedule_sess_for_deletion(fcport); break; } @@ -2960,7 +2989,8 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) struct srb_iocb *elsio = &sp->u.iocb_cmd; qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return; } e->u.iosb.sp = sp; @@ -2978,7 +3008,9 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, int rval = QLA_SUCCESS; void *ptr, *resp_ptr; - /* Alloc SRB structure */ + /* Alloc SRB structure + * ref: INIT + */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) { ql_log(ql_log_info, vha, 0x70e6, @@ -2993,17 +3025,16 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, ql_dbg(ql_dbg_io, vha, 0x3073, "%s Enter: PLOGI portid=%06x\n", __func__, fcport->d_id.b24); - sp->type = SRB_ELS_DCMD; - sp->name = "ELS_DCMD"; - sp->fcport = fcport; - - elsio->timeout = qla2x00_els_dcmd2_iocb_timeout; if (wait) sp->flags = SRB_WAKEUP_ON_COMP; - qla2x00_init_timer(sp, ELS_DCMD_TIMEOUT + 2); + sp->type = SRB_ELS_DCMD; + sp->name = "ELS_DCMD"; + sp->fcport = fcport; + qla2x00_init_async_sp(sp, ELS_DCMD_TIMEOUT + 2, + qla2x00_els_dcmd2_sp_done); + sp->u.iocb_cmd.timeout = qla2x00_els_dcmd2_iocb_timeout; - sp->done = qla2x00_els_dcmd2_sp_done; elsio->u.els_plogi.tx_size = elsio->u.els_plogi.rx_size = DMA_POOL_SIZE; ptr = elsio->u.els_plogi.els_plogi_pyld = @@ -3068,7 +3099,8 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, out: fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } @@ -3879,8 +3911,15 @@ qla2x00_start_sp(srb_t *sp) break; } - if (sp->start_timer) + if (sp->start_timer) { + /* ref: TMR timer ref + * this code should be just before start_iocbs function + * This will make sure that caller function don't to do + * kref_put even on failure + */ + kref_get(&sp->cmd_kref); add_timer(&sp->u.iocb_cmd.timer); + } wmb(); qla2x00_start_iocbs(vha, qp->req); diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index aaf6504570fdd..198b782d77901 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2498,6 +2498,7 @@ qla24xx_tm_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, void *tsk) iocb->u.tmf.data = QLA_FUNCTION_FAILED; } else if ((le16_to_cpu(sts->scsi_status) & SS_RESPONSE_INFO_LEN_VALID)) { + host_to_fcp_swap(sts->data, sizeof(sts->data)); if (le32_to_cpu(sts->rsp_data_len) < 4) { ql_log(ql_log_warn, fcport->vha, 0x503b, "Async-%s error - hdl=%x not enough response(%d).\n", diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 10d2655ef6767..7f236db058869 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -9,6 +9,12 @@ #include #include +#ifdef CONFIG_PPC +#define IS_PPCARCH true +#else +#define IS_PPCARCH false +#endif + static struct mb_cmd_name { uint16_t cmd; const char *str; @@ -728,6 +734,9 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr) vha->min_supported_speed = nv->min_supported_speed; } + + if (IS_PPCARCH) + mcp->mb[11] |= BIT_4; } if (ha->flags.exlogins_enabled) @@ -3029,8 +3038,7 @@ qla2x00_get_resource_cnts(scsi_qla_host_t *vha) ha->orig_fw_iocb_count = mcp->mb[10]; if (ha->flags.npiv_supported) ha->max_npiv_vports = mcp->mb[11]; - if (IS_QLA81XX(ha) || IS_QLA83XX(ha) || IS_QLA27XX(ha) || - IS_QLA28XX(ha)) + if (IS_QLA81XX(ha) || IS_QLA83XX(ha)) ha->fw_max_fcf_count = mcp->mb[12]; } @@ -5621,7 +5629,7 @@ qla2x00_get_data_rate(scsi_qla_host_t *vha) mcp->out_mb = MBX_1|MBX_0; mcp->in_mb = MBX_2|MBX_1|MBX_0; if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) - mcp->in_mb |= MBX_3; + mcp->in_mb |= MBX_4|MBX_3; mcp->tov = MBX_TOV_SECONDS; mcp->flags = 0; rval = qla2x00_mailbox_command(vha, mcp); @@ -6479,23 +6487,21 @@ int qla24xx_send_mb_cmd(struct scsi_qla_host *vha, mbx_cmd_t *mcp) if (!vha->hw->flags.fw_started) goto done; + /* ref: INIT */ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); if (!sp) goto done; - sp->type = SRB_MB_IOCB; - sp->name = mb_to_str(mcp->mb[0]); - c = &sp->u.iocb_cmd; - c->timeout = qla2x00_async_iocb_timeout; init_completion(&c->u.mbx.comp); - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + sp->type = SRB_MB_IOCB; + sp->name = mb_to_str(mcp->mb[0]); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_mb_sp_done); memcpy(sp->u.iocb_cmd.u.mbx.out_mb, mcp->mb, SIZEOF_IOCB_MB_REG); - sp->done = qla2x00_async_mb_sp_done; - rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { ql_dbg(ql_dbg_mbx, vha, 0x1018, @@ -6527,7 +6533,8 @@ int qla24xx_send_mb_cmd(struct scsi_qla_host *vha, mbx_cmd_t *mcp) } done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 1c024055f8c50..e6b5c4ccce97b 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -965,6 +965,7 @@ int qla24xx_control_vp(scsi_qla_host_t *vha, int cmd) if (vp_index == 0 || vp_index >= ha->max_npiv_vports) return QLA_PARAMETER_ERROR; + /* ref: INIT */ sp = qla2x00_get_sp(base_vha, NULL, GFP_KERNEL); if (!sp) return rval; @@ -972,9 +973,8 @@ int qla24xx_control_vp(scsi_qla_host_t *vha, int cmd) sp->type = SRB_CTRL_VP; sp->name = "ctrl_vp"; sp->comp = ∁ - sp->done = qla_ctrlvp_sp_done; - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla_ctrlvp_sp_done); sp->u.iocb_cmd.u.ctrlvp.cmd = cmd; sp->u.iocb_cmd.u.ctrlvp.vp_index = vp_index; @@ -1008,6 +1008,7 @@ int qla24xx_control_vp(scsi_qla_host_t *vha, int cmd) break; } done: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return rval; } diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c index 350b0c4346fb6..f726eb8449c5e 100644 --- a/drivers/scsi/qla2xxx/qla_mr.c +++ b/drivers/scsi/qla2xxx/qla_mr.c @@ -1787,17 +1787,18 @@ qlafx00_fx_disc(scsi_qla_host_t *vha, fc_port_t *fcport, uint16_t fx_type) struct register_host_info *preg_hsi; struct new_utsname *p_sysid = NULL; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_FXIOCB_DCMD; sp->name = "fxdisc"; + qla2x00_init_async_sp(sp, FXDISC_TIMEOUT, + qla2x00_fxdisc_sp_done); + sp->u.iocb_cmd.timeout = qla2x00_fxdisc_iocb_timeout; fdisc = &sp->u.iocb_cmd; - fdisc->timeout = qla2x00_fxdisc_iocb_timeout; - qla2x00_init_timer(sp, FXDISC_TIMEOUT); - switch (fx_type) { case FXDISC_GET_CONFIG_INFO: fdisc->u.fxiocb.flags = @@ -1898,7 +1899,6 @@ qlafx00_fx_disc(scsi_qla_host_t *vha, fc_port_t *fcport, uint16_t fx_type) } fdisc->u.fxiocb.req_func_type = cpu_to_le16(fx_type); - sp->done = qla2x00_fxdisc_sp_done; rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) @@ -1974,7 +1974,8 @@ qlafx00_fx_disc(scsi_qla_host_t *vha, fc_port_t *fcport, uint16_t fx_type) dma_free_coherent(&ha->pdev->dev, fdisc->u.fxiocb.req_len, fdisc->u.fxiocb.req_addr, fdisc->u.fxiocb.req_dma_handle); done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index e22ec7cb65db5..4cfc2efdf7766 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -37,6 +37,11 @@ int qla_nvme_register_remote(struct scsi_qla_host *vha, struct fc_port *fcport) (fcport->nvme_flag & NVME_FLAG_REGISTERED)) return 0; + if (atomic_read(&fcport->state) == FCS_ONLINE) + return 0; + + qla2x00_set_fcport_state(fcport, FCS_ONLINE); + fcport->nvme_flag &= ~NVME_FLAG_RESETTING; memset(&req, 0, sizeof(struct nvme_fc_port_info)); @@ -170,6 +175,18 @@ static void qla_nvme_release_fcp_cmd_kref(struct kref *kref) qla2xxx_rel_qpair_sp(sp->qpair, sp); } +static void qla_nvme_ls_unmap(struct srb *sp, struct nvmefc_ls_req *fd) +{ + if (sp->flags & SRB_DMA_VALID) { + struct srb_iocb *nvme = &sp->u.iocb_cmd; + struct qla_hw_data *ha = sp->fcport->vha->hw; + + dma_unmap_single(&ha->pdev->dev, nvme->u.nvme.cmd_dma, + fd->rqstlen, DMA_TO_DEVICE); + sp->flags &= ~SRB_DMA_VALID; + } +} + static void qla_nvme_release_ls_cmd_kref(struct kref *kref) { struct srb *sp = container_of(kref, struct srb, cmd_kref); @@ -186,6 +203,8 @@ static void qla_nvme_release_ls_cmd_kref(struct kref *kref) spin_unlock_irqrestore(&priv->cmd_lock, flags); fd = priv->fd; + + qla_nvme_ls_unmap(sp, fd); fd->done(fd, priv->comp_status); out: qla2x00_rel_sp(sp); @@ -356,6 +375,8 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma, fd->rqstlen, DMA_TO_DEVICE); + sp->flags |= SRB_DMA_VALID; + rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x700e, @@ -363,6 +384,7 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, wake_up(&sp->nvme_ls_waitq); sp->priv = NULL; priv->sp = NULL; + qla_nvme_ls_unmap(sp, fd); qla2x00_rel_sp(sp); return rval; } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index abcd309172638..6dc2189badd33 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -728,7 +728,8 @@ void qla2x00_sp_compl(srb_t *sp, int res) struct scsi_cmnd *cmd = GET_CMD_SP(sp); struct completion *comp = sp->comp; - sp->free(sp); + /* kref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); cmd->result = res; CMD_SP(cmd) = NULL; scsi_done(cmd); @@ -819,7 +820,8 @@ void qla2xxx_qpair_sp_compl(srb_t *sp, int res) struct scsi_cmnd *cmd = GET_CMD_SP(sp); struct completion *comp = sp->comp; - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); cmd->result = res; CMD_SP(cmd) = NULL; scsi_done(cmd); @@ -919,6 +921,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) goto qc24_target_busy; sp = scsi_cmd_priv(cmd); + /* ref: INIT */ qla2xxx_init_sp(sp, vha, vha->hw->base_qpair, fcport); sp->u.scmd.cmd = cmd; @@ -938,7 +941,8 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) return 0; qc24_host_busy_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); qc24_target_busy: return SCSI_MLQUEUE_TARGET_BUSY; @@ -1008,6 +1012,7 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd, goto qc24_target_busy; sp = scsi_cmd_priv(cmd); + /* ref: INIT */ qla2xxx_init_sp(sp, vha, qpair, fcport); sp->u.scmd.cmd = cmd; @@ -1026,7 +1031,8 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd, return 0; qc24_host_busy_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); qc24_target_busy: return SCSI_MLQUEUE_TARGET_BUSY; @@ -3748,8 +3754,7 @@ qla2x00_unmap_iobases(struct qla_hw_data *ha) if (ha->mqiobase) iounmap(ha->mqiobase); - if ((IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) && - ha->msixbase) + if (ha->msixbase) iounmap(ha->msixbase); } } @@ -3891,6 +3896,8 @@ qla24xx_free_purex_list(struct purex_list *list) spin_lock_irqsave(&list->lock, flags); list_for_each_entry_safe(item, next, &list->head, list) { list_del(&item->list); + if (item == &item->vha->default_item) + continue; kfree(item); } spin_unlock_irqrestore(&list->lock, flags); @@ -5526,6 +5533,11 @@ void qla2x00_relogin(struct scsi_qla_host *vha) memset(&ea, 0, sizeof(ea)); ea.fcport = fcport; qla24xx_handle_relogin_event(vha, &ea); + } else if (vha->hw->current_topology == + ISP_CFG_NL && + IS_QLA2XXX_MIDTYPE(vha->hw)) { + (void)qla24xx_fcport_handle_login(vha, + fcport); } else if (vha->hw->current_topology == ISP_CFG_NL) { fcport->login_retry--; @@ -7199,7 +7211,7 @@ static bool qla_do_heartbeat(struct scsi_qla_host *vha) return do_heartbeat; } -static void qla_heart_beat(struct scsi_qla_host *vha) +static void qla_heart_beat(struct scsi_qla_host *vha, u16 dpc_started) { struct qla_hw_data *ha = vha->hw; @@ -7209,8 +7221,19 @@ static void qla_heart_beat(struct scsi_qla_host *vha) if (vha->hw->flags.eeh_busy || qla2x00_chip_is_down(vha)) return; - if (qla_do_heartbeat(vha)) + /* + * dpc thread cannot run if heartbeat is running at the same time. + * We also do not want to starve heartbeat task. Therefore, do + * heartbeat task at least once every 5 seconds. + */ + if (dpc_started && + time_before(jiffies, ha->last_heartbeat_run_jiffies + 5 * HZ)) + return; + + if (qla_do_heartbeat(vha)) { + ha->last_heartbeat_run_jiffies = jiffies; queue_work(ha->wq, &ha->heartbeat_work); + } } /************************************************************************** @@ -7401,6 +7424,8 @@ qla2x00_timer(struct timer_list *t) start_dpc++; } + /* borrowing w to signify dpc will run */ + w = 0; /* Schedule the DPC routine if needed */ if ((test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) || test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags) || @@ -7433,9 +7458,10 @@ qla2x00_timer(struct timer_list *t) test_bit(RELOGIN_NEEDED, &vha->dpc_flags), test_bit(PROCESS_PUREX_IOCB, &vha->dpc_flags)); qla2xxx_wake_dpc(vha); + w = 1; } - qla_heart_beat(vha); + qla_heart_beat(vha, w); qla2x00_restart_timer(vha, WATCH_INTERVAL); } @@ -7633,7 +7659,7 @@ qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) switch (state) { case pci_channel_io_normal: - ha->flags.eeh_busy = 0; + qla_pci_set_eeh_busy(vha); if (ql2xmqsupport || ql2xnvmeenable) { set_bit(QPAIR_ONLINE_CHECK_NEEDED, &vha->dpc_flags); qla2xxx_wake_dpc(vha); @@ -7674,9 +7700,16 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev) "mmio enabled\n"); ha->pci_error_state = QLA_PCI_MMIO_ENABLED; + if (IS_QLA82XX(ha)) return PCI_ERS_RESULT_RECOVERED; + if (qla2x00_isp_reg_stat(ha)) { + ql_log(ql_log_info, base_vha, 0x803f, + "During mmio enabled, PCI/Register disconnect still detected.\n"); + goto out; + } + spin_lock_irqsave(&ha->hardware_lock, flags); if (IS_QLA2100(ha) || IS_QLA2200(ha)){ stat = rd_reg_word(®->hccr); @@ -7698,6 +7731,7 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev) "RISC paused -- mmio_enabled, Dumping firmware.\n"); qla2xxx_dump_fw(base_vha); } +out: /* set PCI_ERS_RESULT_NEED_RESET to trigger call to qla2xxx_pci_slot_reset */ ql_dbg(ql_dbg_aer, base_vha, 0x600d, "mmio enabled returning.\n"); diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index a0aeba69513d4..c092a6b1ced4f 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -844,7 +844,7 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr) ha->flt_region_nvram = start; break; case FLT_REG_IMG_PRI_27XX: - if (IS_QLA27XX(ha) && !IS_QLA28XX(ha)) + if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) ha->flt_region_img_status_pri = start; break; case FLT_REG_IMG_SEC_27XX: @@ -1356,7 +1356,7 @@ qla24xx_write_flash_data(scsi_qla_host_t *vha, __le32 *dwptr, uint32_t faddr, flash_data_addr(ha, faddr), le32_to_cpu(*dwptr)); if (ret) { ql_dbg(ql_dbg_user, vha, 0x7006, - "Failed slopw write %x (%x)\n", faddr, *dwptr); + "Failed slow write %x (%x)\n", faddr, *dwptr); break; } } diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 8993d438e0b72..b109716d44fb7 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -620,7 +620,7 @@ static void qla2x00_async_nack_sp_done(srb_t *sp, int res) } spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); - sp->free(sp); + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport, @@ -656,12 +656,10 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport, sp->type = type; sp->name = "nack"; - - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha)+2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_nack_sp_done); sp->u.iocb_cmd.u.nack.ntfy = ntfy; - sp->done = qla2x00_async_nack_sp_done; ql_dbg(ql_dbg_disc, vha, 0x20f4, "Async-%s %8phC hndl %x %s\n", @@ -674,7 +672,7 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport, return rval; done_free_sp: - sp->free(sp); + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: fcport->flags &= ~FCF_ASYNC_SENT; return rval; @@ -3320,6 +3318,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, "RESET-RSP online/active/old-count/new-count = %d/%d/%d/%d.\n", vha->flags.online, qla2x00_reset_active(vha), cmd->reset_count, qpair->chip_reset); + res = 0; goto out_unmap_unlock; } @@ -7221,8 +7220,7 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha) if (!QLA_TGT_MODE_ENABLED()) return; - if ((ql2xenablemsix == 0) || IS_QLA83XX(ha) || IS_QLA27XX(ha) || - IS_QLA28XX(ha)) { + if (ha->mqenable || IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { ISP_ATIO_Q_IN(base_vha) = &ha->mqiobase->isp25mq.atio_q_in; ISP_ATIO_Q_OUT(base_vha) = &ha->mqiobase->isp25mq.atio_q_out; } else { diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index 26c13a953b975..b0a74b036cf4b 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -435,8 +435,13 @@ qla27xx_fwdt_entry_t266(struct scsi_qla_host *vha, { ql_dbg(ql_dbg_misc, vha, 0xd20a, "%s: reset risc [%lx]\n", __func__, *len); - if (buf) - WARN_ON_ONCE(qla24xx_soft_reset(vha->hw) != QLA_SUCCESS); + if (buf) { + if (qla24xx_soft_reset(vha->hw) != QLA_SUCCESS) { + ql_dbg(ql_dbg_async, vha, 0x5001, + "%s: unable to soft reset\n", __func__); + return INVALID_ENTRY; + } + } return qla27xx_next_entry(ent); } diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 60a6ae9d1219f..a75499616f5ef 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -484,8 +484,13 @@ static void scsi_report_sense(struct scsi_device *sdev, if (sshdr->asc == 0x29) { evt_type = SDEV_EVT_POWER_ON_RESET_OCCURRED; - sdev_printk(KERN_WARNING, sdev, - "Power-on or device reset occurred\n"); + /* + * Do not print message if it is an expected side-effect + * of runtime PM. + */ + if (!sdev->silence_suspend) + sdev_printk(KERN_WARNING, sdev, + "Power-on or device reset occurred\n"); } if (sshdr->asc == 0x2a && sshdr->ascq == 0x01) { diff --git a/drivers/scsi/scsi_logging.c b/drivers/scsi/scsi_logging.c index 1f8f80b2dbfcb..a9f8de5e9639a 100644 --- a/drivers/scsi/scsi_logging.c +++ b/drivers/scsi/scsi_logging.c @@ -30,7 +30,7 @@ static inline const char *scmd_name(const struct scsi_cmnd *scmd) { struct request *rq = scsi_cmd_to_rq((struct scsi_cmnd *)scmd); - if (!rq->q->disk) + if (!rq->q || !rq->q->disk) return NULL; return rq->q->disk->disk_name; } diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index f4e6c68ac99ed..2ef78083f1eff 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -223,6 +223,8 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, int ret; struct sbitmap sb_backup; + depth = min_t(unsigned int, depth, scsi_device_max_queue_depth(sdev)); + /* * realloc if new shift is calculated, which is caused by setting * up one new default queue depth after calling ->slave_configure @@ -245,6 +247,9 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, scsi_device_max_queue_depth(sdev), new_shift, GFP_KERNEL, sdev->request_queue->node, false, true); + if (!ret) + sbitmap_resize(&sdev->budget_map, depth); + if (need_free) { if (ret) sdev->budget_map = sb_backup; diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 60e406bcf42a9..a2524106206db 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -34,7 +34,7 @@ static int fc_bsg_hostadd(struct Scsi_Host *, struct fc_host_attrs *); static int fc_bsg_rportadd(struct Scsi_Host *, struct fc_rport *); static void fc_bsg_remove(struct request_queue *); static void fc_bsg_goose_queue(struct fc_rport *); -static void fc_li_stats_update(struct fc_fn_li_desc *li_desc, +static void fc_li_stats_update(u16 event_type, struct fc_fpin_stats *stats); static void fc_delivery_stats_update(u32 reason_code, struct fc_fpin_stats *stats); @@ -670,42 +670,34 @@ fc_find_rport_by_wwpn(struct Scsi_Host *shost, u64 wwpn) EXPORT_SYMBOL(fc_find_rport_by_wwpn); static void -fc_li_stats_update(struct fc_fn_li_desc *li_desc, +fc_li_stats_update(u16 event_type, struct fc_fpin_stats *stats) { - stats->li += be32_to_cpu(li_desc->event_count); - switch (be16_to_cpu(li_desc->event_type)) { + stats->li++; + switch (event_type) { case FPIN_LI_UNKNOWN: - stats->li_failure_unknown += - be32_to_cpu(li_desc->event_count); + stats->li_failure_unknown++; break; case FPIN_LI_LINK_FAILURE: - stats->li_link_failure_count += - be32_to_cpu(li_desc->event_count); + stats->li_link_failure_count++; break; case FPIN_LI_LOSS_OF_SYNC: - stats->li_loss_of_sync_count += - be32_to_cpu(li_desc->event_count); + stats->li_loss_of_sync_count++; break; case FPIN_LI_LOSS_OF_SIG: - stats->li_loss_of_signals_count += - be32_to_cpu(li_desc->event_count); + stats->li_loss_of_signals_count++; break; case FPIN_LI_PRIM_SEQ_ERR: - stats->li_prim_seq_err_count += - be32_to_cpu(li_desc->event_count); + stats->li_prim_seq_err_count++; break; case FPIN_LI_INVALID_TX_WD: - stats->li_invalid_tx_word_count += - be32_to_cpu(li_desc->event_count); + stats->li_invalid_tx_word_count++; break; case FPIN_LI_INVALID_CRC: - stats->li_invalid_crc_count += - be32_to_cpu(li_desc->event_count); + stats->li_invalid_crc_count++; break; case FPIN_LI_DEVICE_SPEC: - stats->li_device_specific += - be32_to_cpu(li_desc->event_count); + stats->li_device_specific++; break; } } @@ -767,6 +759,7 @@ fc_fpin_li_stats_update(struct Scsi_Host *shost, struct fc_tlv_desc *tlv) struct fc_rport *attach_rport = NULL; struct fc_host_attrs *fc_host = shost_to_fc_host(shost); struct fc_fn_li_desc *li_desc = (struct fc_fn_li_desc *)tlv; + u16 event_type = be16_to_cpu(li_desc->event_type); u64 wwpn; rport = fc_find_rport_by_wwpn(shost, @@ -775,7 +768,7 @@ fc_fpin_li_stats_update(struct Scsi_Host *shost, struct fc_tlv_desc *tlv) (rport->roles & FC_PORT_ROLE_FCP_TARGET || rport->roles & FC_PORT_ROLE_NVME_TARGET)) { attach_rport = rport; - fc_li_stats_update(li_desc, &attach_rport->fpin_stats); + fc_li_stats_update(event_type, &attach_rport->fpin_stats); } if (be32_to_cpu(li_desc->pname_count) > 0) { @@ -789,14 +782,14 @@ fc_fpin_li_stats_update(struct Scsi_Host *shost, struct fc_tlv_desc *tlv) rport->roles & FC_PORT_ROLE_NVME_TARGET)) { if (rport == attach_rport) continue; - fc_li_stats_update(li_desc, + fc_li_stats_update(event_type, &rport->fpin_stats); } } } if (fc_host->port_name == be64_to_cpu(li_desc->attached_wwpn)) - fc_li_stats_update(li_desc, &fc_host->fpin_stats); + fc_li_stats_update(event_type, &fc_host->fpin_stats); } /* diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 554b6f7842236..c7b1b2e8bb02f 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2221,10 +2221,10 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) switch (flag) { case STOP_CONN_RECOVER: - conn->state = ISCSI_CONN_FAILED; + WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); break; case STOP_CONN_TERM: - conn->state = ISCSI_CONN_DOWN; + WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); break; default: iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n", @@ -2236,6 +2236,49 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) ISCSI_DBG_TRANS_CONN(conn, "Stopping conn done.\n"); } +static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) +{ + struct iscsi_cls_session *session = iscsi_conn_to_session(conn); + struct iscsi_endpoint *ep; + + ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); + WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); + + if (!conn->ep || !session->transport->ep_disconnect) + return; + + ep = conn->ep; + conn->ep = NULL; + + session->transport->unbind_conn(conn, is_active); + session->transport->ep_disconnect(ep); + ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); +} + +static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn, + struct iscsi_endpoint *ep, + bool is_active) +{ + /* Check if this was a conn error and the kernel took ownership */ + spin_lock_irq(&conn->lock); + if (!test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + spin_unlock_irq(&conn->lock); + iscsi_ep_disconnect(conn, is_active); + } else { + spin_unlock_irq(&conn->lock); + ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); + mutex_unlock(&conn->ep_mutex); + + flush_work(&conn->cleanup_work); + /* + * Userspace is now done with the EP so we can release the ref + * iscsi_cleanup_conn_work_fn took. + */ + iscsi_put_endpoint(ep); + mutex_lock(&conn->ep_mutex); + } +} + static int iscsi_if_stop_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { @@ -2256,12 +2299,25 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, cancel_work_sync(&conn->cleanup_work); iscsi_stop_conn(conn, flag); } else { + /* + * For offload, when iscsid is restarted it won't know about + * existing endpoints so it can't do a ep_disconnect. We clean + * it up here for userspace. + */ + mutex_lock(&conn->ep_mutex); + if (conn->ep) + iscsi_if_disconnect_bound_ep(conn, conn->ep, true); + mutex_unlock(&conn->ep_mutex); + /* * Figure out if it was the kernel or userspace initiating this. */ + spin_lock_irq(&conn->lock); if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + spin_unlock_irq(&conn->lock); iscsi_stop_conn(conn, flag); } else { + spin_unlock_irq(&conn->lock); ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); flush_work(&conn->cleanup_work); @@ -2270,31 +2326,14 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, * Only clear for recovery to avoid extra cleanup runs during * termination. */ + spin_lock_irq(&conn->lock); clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); + spin_unlock_irq(&conn->lock); } ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop done.\n"); return 0; } -static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) -{ - struct iscsi_cls_session *session = iscsi_conn_to_session(conn); - struct iscsi_endpoint *ep; - - ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); - conn->state = ISCSI_CONN_FAILED; - - if (!conn->ep || !session->transport->ep_disconnect) - return; - - ep = conn->ep; - conn->ep = NULL; - - session->transport->unbind_conn(conn, is_active); - session->transport->ep_disconnect(ep); - ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); -} - static void iscsi_cleanup_conn_work_fn(struct work_struct *work) { struct iscsi_cls_conn *conn = container_of(work, struct iscsi_cls_conn, @@ -2303,18 +2342,11 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work) mutex_lock(&conn->ep_mutex); /* - * If we are not at least bound there is nothing for us to do. Userspace - * will do a ep_disconnect call if offload is used, but will not be - * doing a stop since there is nothing to clean up, so we have to clear - * the cleanup bit here. + * Get a ref to the ep, so we don't release its ID until after + * userspace is done referencing it in iscsi_if_disconnect_bound_ep. */ - if (conn->state != ISCSI_CONN_BOUND && conn->state != ISCSI_CONN_UP) { - ISCSI_DBG_TRANS_CONN(conn, "Got error while conn is already failed. Ignoring.\n"); - clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); - mutex_unlock(&conn->ep_mutex); - return; - } - + if (conn->ep) + get_device(&conn->ep->dev); iscsi_ep_disconnect(conn, false); if (system_state != SYSTEM_RUNNING) { @@ -2370,11 +2402,12 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) conn->dd_data = &conn[1]; mutex_init(&conn->ep_mutex); + spin_lock_init(&conn->lock); INIT_LIST_HEAD(&conn->conn_list); INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn); conn->transport = transport; conn->cid = cid; - conn->state = ISCSI_CONN_DOWN; + WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); /* this is released in the dev's release function */ if (!get_device(&session->dev)) @@ -2561,9 +2594,32 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) struct iscsi_uevent *ev; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); + unsigned long flags; + int state; - if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) - queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work); + spin_lock_irqsave(&conn->lock, flags); + /* + * Userspace will only do a stop call if we are at least bound. And, we + * only need to do the in kernel cleanup if in the UP state so cmds can + * be released to upper layers. If in other states just wait for + * userspace to avoid races that can leave the cleanup_work queued. + */ + state = READ_ONCE(conn->state); + switch (state) { + case ISCSI_CONN_BOUND: + case ISCSI_CONN_UP: + if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, + &conn->flags)) { + queue_work(iscsi_conn_cleanup_workq, + &conn->cleanup_work); + } + break; + default: + ISCSI_DBG_TRANS_CONN(conn, "Got conn error in state %d\n", + state); + break; + } + spin_unlock_irqrestore(&conn->lock, flags); priv = iscsi_if_transport_lookup(conn->transport); if (!priv) @@ -2913,7 +2969,7 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) char *data = (char*)ev + sizeof(*ev); struct iscsi_cls_conn *conn; struct iscsi_cls_session *session; - int err = 0, value = 0; + int err = 0, value = 0, state; if (ev->u.set_param.len > PAGE_SIZE) return -EINVAL; @@ -2930,8 +2986,8 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) session->recovery_tmo = value; break; default: - if ((conn->state == ISCSI_CONN_BOUND) || - (conn->state == ISCSI_CONN_UP)) { + state = READ_ONCE(conn->state); + if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) { err = transport->set_param(conn, ev->u.set_param.param, data, ev->u.set_param.len); } else { @@ -3003,16 +3059,7 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, } mutex_lock(&conn->ep_mutex); - /* Check if this was a conn error and the kernel took ownership */ - if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { - ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); - mutex_unlock(&conn->ep_mutex); - - flush_work(&conn->cleanup_work); - goto put_ep; - } - - iscsi_ep_disconnect(conn, false); + iscsi_if_disconnect_bound_ep(conn, ep, false); mutex_unlock(&conn->ep_mutex); put_ep: iscsi_put_endpoint(ep); @@ -3715,24 +3762,17 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, return -EINVAL; mutex_lock(&conn->ep_mutex); + spin_lock_irq(&conn->lock); if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + spin_unlock_irq(&conn->lock); mutex_unlock(&conn->ep_mutex); ev->r.retcode = -ENOTCONN; return 0; } + spin_unlock_irq(&conn->lock); switch (nlh->nlmsg_type) { case ISCSI_UEVENT_BIND_CONN: - if (conn->ep) { - /* - * For offload boot support where iscsid is restarted - * during the pivot root stage, the ep will be intact - * here when the new iscsid instance starts up and - * reconnects. - */ - iscsi_ep_disconnect(conn, true); - } - session = iscsi_session_lookup(ev->u.b_conn.sid); if (!session) { err = -EINVAL; @@ -3743,7 +3783,7 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, ev->u.b_conn.transport_eph, ev->u.b_conn.is_leading); if (!ev->r.retcode) - conn->state = ISCSI_CONN_BOUND; + WRITE_ONCE(conn->state, ISCSI_CONN_BOUND); if (ev->r.retcode || !transport->ep_connect) break; @@ -3762,7 +3802,8 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, case ISCSI_UEVENT_START_CONN: ev->r.retcode = transport->start_conn(conn); if (!ev->r.retcode) - conn->state = ISCSI_CONN_UP; + WRITE_ONCE(conn->state, ISCSI_CONN_UP); + break; case ISCSI_UEVENT_SEND_PDU: pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev); @@ -4070,10 +4111,11 @@ static ssize_t show_conn_state(struct device *dev, { struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent); const char *state = "unknown"; + int conn_state = READ_ONCE(conn->state); - if (conn->state >= 0 && - conn->state < ARRAY_SIZE(connection_state_names)) - state = connection_state_names[conn->state]; + if (conn_state >= 0 && + conn_state < ARRAY_SIZE(connection_state_names)) + state = connection_state_names[conn_state]; return sysfs_emit(buf, "%s\n", state); } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 62eb9921cc947..8b5d2a4076c21 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3320,6 +3320,7 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_block_limits(sdkp); sd_read_block_characteristics(sdkp); sd_zbc_read_zones(sdkp, buffer); + sd_read_cpr(sdkp); } sd_print_capacity(sdkp, old_capacity); @@ -3329,7 +3330,6 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_app_tag_own(sdkp, buffer); sd_read_write_same(sdkp, buffer); sd_read_security(sdkp, buffer); - sd_read_cpr(sdkp); } /* @@ -3752,7 +3752,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) return 0; if (sdkp->WCE && sdkp->media_present) { - sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); + if (!sdkp->device->silence_suspend) + sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); ret = sd_sync_cache(sdkp, &sshdr); if (ret) { @@ -3774,7 +3775,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) } if (sdkp->device->manage_start_stop) { - sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); + if (!sdkp->device->silence_suspend) + sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); /* an error is not worth aborting a system sleep */ ret = sd_start_stop_device(sdkp, 0); if (ignore_stop_errors) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index f0897d587454a..f3749e5086737 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -2513,17 +2513,15 @@ static void pqi_remove_all_scsi_devices(struct pqi_ctrl_info *ctrl_info) struct pqi_scsi_dev *device; struct pqi_scsi_dev *next; - spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); - list_for_each_entry_safe(device, next, &ctrl_info->scsi_device_list, scsi_device_list_entry) { if (pqi_is_device_added(device)) pqi_remove_device(ctrl_info, device); + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); list_del(&device->scsi_device_list_entry); pqi_free_device(device); + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); } - - spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); } static int pqi_scan_scsi_devices(struct pqi_ctrl_info *ctrl_info) @@ -7857,6 +7855,21 @@ static int pqi_force_sis_mode(struct pqi_ctrl_info *ctrl_info) return pqi_revert_to_sis_mode(ctrl_info); } +static void pqi_perform_lockup_action(void) +{ + switch (pqi_lockup_action) { + case PANIC: + panic("FATAL: Smart Family Controller lockup detected"); + break; + case REBOOT: + emergency_restart(); + break; + case NONE: + default: + break; + } +} + static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info) { int rc; @@ -7881,8 +7894,15 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info) * commands. */ rc = sis_wait_for_ctrl_ready(ctrl_info); - if (rc) + if (rc) { + if (reset_devices) { + dev_err(&ctrl_info->pci_dev->dev, + "kdump init failed with error %d\n", rc); + pqi_lockup_action = REBOOT; + pqi_perform_lockup_action(); + } return rc; + } /* * Get the controller properties. This allows us to determine @@ -8607,21 +8627,6 @@ static int pqi_ofa_ctrl_restart(struct pqi_ctrl_info *ctrl_info, unsigned int de return pqi_ctrl_init_resume(ctrl_info); } -static void pqi_perform_lockup_action(void) -{ - switch (pqi_lockup_action) { - case PANIC: - panic("FATAL: Smart Family Controller lockup detected"); - break; - case REBOOT: - emergency_restart(); - break; - case NONE: - default: - break; - } -} - static struct pqi_raid_error_info pqi_ctrl_offline_raid_error_info = { .data_out_result = PQI_DATA_IN_OUT_HARDWARE_ERROR, .status = SAM_STAT_CHECK_CONDITION, diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index f925b1f1f9ada..a0beb11abdc9d 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -578,7 +578,7 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, scsi_autopm_get_device(sdev); - if (ret != CDROMCLOSETRAY && ret != CDROMEJECT) { + if (cmd != CDROMCLOSETRAY && cmd != CDROMEJECT) { ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg); if (ret != -ENOSYS) goto put; diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index f76692053ca17..e892b9feffb11 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -428,6 +428,12 @@ static int ufs_intel_adl_init(struct ufs_hba *hba) return ufs_intel_common_init(hba); } +static int ufs_intel_mtl_init(struct ufs_hba *hba) +{ + hba->caps |= UFSHCD_CAP_CRYPTO | UFSHCD_CAP_WB_EN; + return ufs_intel_common_init(hba); +} + static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = { .name = "intel-pci", .init = ufs_intel_common_init, @@ -465,6 +471,16 @@ static struct ufs_hba_variant_ops ufs_intel_adl_hba_vops = { .device_reset = ufs_intel_device_reset, }; +static struct ufs_hba_variant_ops ufs_intel_mtl_hba_vops = { + .name = "intel-pci", + .init = ufs_intel_mtl_init, + .exit = ufs_intel_common_exit, + .hce_enable_notify = ufs_intel_hce_enable_notify, + .link_startup_notify = ufs_intel_link_startup_notify, + .resume = ufs_intel_resume, + .device_reset = ufs_intel_device_reset, +}; + #ifdef CONFIG_PM_SLEEP static int ufshcd_pci_restore(struct device *dev) { @@ -579,6 +595,7 @@ static const struct pci_device_id ufshcd_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x98FA), (kernel_ulong_t)&ufs_intel_lkf_hba_vops }, { PCI_VDEVICE(INTEL, 0x51FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops }, { PCI_VDEVICE(INTEL, 0x54FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops }, + { PCI_VDEVICE(INTEL, 0x7E47), (kernel_ulong_t)&ufs_intel_mtl_hba_vops }, { } /* terminate list */ }; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 9349557b8a01b..cb285d277201c 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -585,7 +585,12 @@ static void ufshcd_print_pwr_info(struct ufs_hba *hba) "INVALID MODE", }; - dev_err(hba->dev, "%s:[RX, TX]: gear=[%d, %d], lane[%d, %d], pwr[%s, %s], rate = %d\n", + /* + * Using dev_dbg to avoid messages during runtime PM to avoid + * never-ending cycles of messages written back to storage by user space + * causing runtime resume, causing more messages and so on. + */ + dev_dbg(hba->dev, "%s:[RX, TX]: gear=[%d, %d], lane[%d, %d], pwr[%s, %s], rate = %d\n", __func__, hba->pwr_info.gear_rx, hba->pwr_info.gear_tx, hba->pwr_info.lane_rx, hba->pwr_info.lane_tx, @@ -5024,6 +5029,12 @@ static int ufshcd_slave_configure(struct scsi_device *sdev) pm_runtime_get_noresume(&sdev->sdev_gendev); else if (ufshcd_is_rpm_autosuspend_allowed(hba)) sdev->rpm_autosuspend = 1; + /* + * Do not print messages during runtime PM to avoid never-ending cycles + * of messages written back to storage by user space causing runtime + * resume, causing more messages and so on. + */ + sdev->silence_suspend = 1; ufshcd_crypto_register(hba, q); @@ -7339,7 +7350,13 @@ static u32 ufshcd_find_max_sup_active_icc_level(struct ufs_hba *hba, if (!hba->vreg_info.vcc || !hba->vreg_info.vccq || !hba->vreg_info.vccq2) { - dev_err(hba->dev, + /* + * Using dev_dbg to avoid messages during runtime PM to avoid + * never-ending cycles of messages written back to storage by + * user space causing runtime resume, causing more messages and + * so on. + */ + dev_dbg(hba->dev, "%s: Regulator capability was not set, actvIccLevel=%d", __func__, icc_level); goto out; diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c index 2d36a0715fca6..b34feba1f53de 100644 --- a/drivers/scsi/ufs/ufshpb.c +++ b/drivers/scsi/ufs/ufshpb.c @@ -869,12 +869,6 @@ static struct ufshpb_region *ufshpb_victim_lru_info(struct ufshpb_lu *hpb) struct ufshpb_region *rgn, *victim_rgn = NULL; list_for_each_entry(rgn, &lru_info->lh_lru_rgn, list_lru_rgn) { - if (!rgn) { - dev_err(&hpb->sdev_ufs_lu->sdev_dev, - "%s: no region allocated\n", - __func__); - return NULL; - } if (ufshpb_check_srgns_issue_state(hpb, rgn)) continue; @@ -890,6 +884,11 @@ static struct ufshpb_region *ufshpb_victim_lru_info(struct ufshpb_lu *hpb) break; } + if (!victim_rgn) + dev_err(&hpb->sdev_ufs_lu->sdev_dev, + "%s: no region allocated\n", + __func__); + return victim_rgn; } diff --git a/drivers/scsi/zorro7xx.c b/drivers/scsi/zorro7xx.c index 27b9e2baab1a6..7acf9193a9e80 100644 --- a/drivers/scsi/zorro7xx.c +++ b/drivers/scsi/zorro7xx.c @@ -159,6 +159,8 @@ static void zorro7xx_remove_one(struct zorro_dev *z) scsi_remove_host(host); NCR_700_release(host); + if (host->base > 0x01000000) + iounmap(hostdata->base); kfree(hostdata); free_irq(host->irq, host); zorro_release_device(z); diff --git a/drivers/soc/mediatek/mtk-pm-domains.c b/drivers/soc/mediatek/mtk-pm-domains.c index b762bc40f56bd..afd2fd74802d2 100644 --- a/drivers/soc/mediatek/mtk-pm-domains.c +++ b/drivers/soc/mediatek/mtk-pm-domains.c @@ -443,6 +443,9 @@ generic_pm_domain *scpsys_add_one_domain(struct scpsys *scpsys, struct device_no pd->genpd.power_off = scpsys_power_off; pd->genpd.power_on = scpsys_power_on; + if (MTK_SCPD_CAPS(pd, MTK_SCPD_ACTIVE_WAKEUP)) + pd->genpd.flags |= GENPD_FLAG_ACTIVE_WAKEUP; + if (MTK_SCPD_CAPS(pd, MTK_SCPD_KEEP_DEFAULT_OFF)) pm_genpd_init(&pd->genpd, NULL, true); else diff --git a/drivers/soc/qcom/ocmem.c b/drivers/soc/qcom/ocmem.c index d2dacbbaafbd1..97fd24c178f8d 100644 --- a/drivers/soc/qcom/ocmem.c +++ b/drivers/soc/qcom/ocmem.c @@ -206,6 +206,7 @@ struct ocmem *of_get_ocmem(struct device *dev) ocmem = platform_get_drvdata(pdev); if (!ocmem) { dev_err(dev, "Cannot get ocmem\n"); + put_device(&pdev->dev); return ERR_PTR(-ENODEV); } return ocmem; diff --git a/drivers/soc/qcom/qcom_aoss.c b/drivers/soc/qcom/qcom_aoss.c index cbe5e39fdaeb0..a59bb34e5ebaf 100644 --- a/drivers/soc/qcom/qcom_aoss.c +++ b/drivers/soc/qcom/qcom_aoss.c @@ -451,7 +451,11 @@ struct qmp *qmp_get(struct device *dev) qmp = platform_get_drvdata(pdev); - return qmp ? qmp : ERR_PTR(-EPROBE_DEFER); + if (!qmp) { + put_device(&pdev->dev); + return ERR_PTR(-EPROBE_DEFER); + } + return qmp; } EXPORT_SYMBOL(qmp_get); @@ -497,7 +501,7 @@ static int qmp_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - ret = devm_request_irq(&pdev->dev, irq, qmp_intr, IRQF_ONESHOT, + ret = devm_request_irq(&pdev->dev, irq, qmp_intr, 0, "aoss-qmp", qmp); if (ret < 0) { dev_err(&pdev->dev, "failed to request interrupt\n"); diff --git a/drivers/soc/qcom/rpmpd.c b/drivers/soc/qcom/rpmpd.c index 0a8d8d24bfb77..624b5630feb87 100644 --- a/drivers/soc/qcom/rpmpd.c +++ b/drivers/soc/qcom/rpmpd.c @@ -610,6 +610,9 @@ static int rpmpd_probe(struct platform_device *pdev) data->domains = devm_kcalloc(&pdev->dev, num, sizeof(*data->domains), GFP_KERNEL); + if (!data->domains) + return -ENOMEM; + data->num_domains = num; for (i = 0; i < num; i++) { diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c index 72386bd393fed..2f03ced0f4113 100644 --- a/drivers/soc/ti/wkup_m3_ipc.c +++ b/drivers/soc/ti/wkup_m3_ipc.c @@ -450,9 +450,9 @@ static int wkup_m3_ipc_probe(struct platform_device *pdev) return PTR_ERR(m3_ipc->ipc_mem_base); irq = platform_get_irq(pdev, 0); - if (!irq) { + if (irq < 0) { dev_err(&pdev->dev, "no irq resource\n"); - return -ENXIO; + return irq; } ret = devm_request_irq(dev, irq, wkup_m3_txev_handler, diff --git a/drivers/soundwire/dmi-quirks.c b/drivers/soundwire/dmi-quirks.c index 0ca2a3e3a02e2..747983743a14b 100644 --- a/drivers/soundwire/dmi-quirks.c +++ b/drivers/soundwire/dmi-quirks.c @@ -59,7 +59,7 @@ static const struct dmi_system_id adr_remap_quirk_table[] = { { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Convertible"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Conv"), }, .driver_data = (void *)intel_tgl_bios, }, diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 122f7a29d8ca9..63101f1ba2713 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -448,8 +448,8 @@ static void intel_shim_wake(struct sdw_intel *sdw, bool wake_enable) /* Clear wake status */ wake_sts = intel_readw(shim, SDW_SHIM_WAKESTS); - wake_sts |= (SDW_SHIM_WAKEEN_ENABLE << link_id); - intel_writew(shim, SDW_SHIM_WAKESTS_STATUS, wake_sts); + wake_sts |= (SDW_SHIM_WAKESTS_STATUS << link_id); + intel_writew(shim, SDW_SHIM_WAKESTS, wake_sts); } mutex_unlock(sdw->link_res->shim_lock); } diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 86c76211b3d3d..cad2d55dcd3d2 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -1205,7 +1205,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, addr = op->addr.val; len = op->data.nbytes; - if (bcm_qspi_bspi_ver_three(qspi) == true) { + if (has_bspi(qspi) && bcm_qspi_bspi_ver_three(qspi) == true) { /* * The address coming into this function is a raw flash offset. * But for BSPI <= V3, we need to convert it to a remapped BSPI @@ -1224,7 +1224,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, len < 4) mspi_read = true; - if (mspi_read) + if (!has_bspi(qspi) || mspi_read) return bcm_qspi_mspi_exec_mem_op(spi, op); ret = bcm_qspi_bspi_set_mode(qspi, op, 0); diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index b808c94641fa6..75f3560411386 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -102,12 +103,6 @@ struct cqspi_driver_platdata { #define CQSPI_TIMEOUT_MS 500 #define CQSPI_READ_TIMEOUT_MS 10 -/* Instruction type */ -#define CQSPI_INST_TYPE_SINGLE 0 -#define CQSPI_INST_TYPE_DUAL 1 -#define CQSPI_INST_TYPE_QUAD 2 -#define CQSPI_INST_TYPE_OCTAL 3 - #define CQSPI_DUMMY_CLKS_PER_BYTE 8 #define CQSPI_DUMMY_BYTES_MAX 4 #define CQSPI_DUMMY_CLKS_MAX 31 @@ -376,10 +371,6 @@ static unsigned int cqspi_calc_dummy(const struct spi_mem_op *op, bool dtr) static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, const struct spi_mem_op *op) { - f_pdata->inst_width = CQSPI_INST_TYPE_SINGLE; - f_pdata->addr_width = CQSPI_INST_TYPE_SINGLE; - f_pdata->data_width = CQSPI_INST_TYPE_SINGLE; - /* * For an op to be DTR, cmd phase along with every other non-empty * phase should have dtr field set to 1. If an op phase has zero @@ -389,32 +380,23 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, (!op->addr.nbytes || op->addr.dtr) && (!op->data.nbytes || op->data.dtr); - switch (op->data.buswidth) { - case 0: - break; - case 1: - f_pdata->data_width = CQSPI_INST_TYPE_SINGLE; - break; - case 2: - f_pdata->data_width = CQSPI_INST_TYPE_DUAL; - break; - case 4: - f_pdata->data_width = CQSPI_INST_TYPE_QUAD; - break; - case 8: - f_pdata->data_width = CQSPI_INST_TYPE_OCTAL; - break; - default: - return -EINVAL; - } + f_pdata->inst_width = 0; + if (op->cmd.buswidth) + f_pdata->inst_width = ilog2(op->cmd.buswidth); + + f_pdata->addr_width = 0; + if (op->addr.buswidth) + f_pdata->addr_width = ilog2(op->addr.buswidth); + + f_pdata->data_width = 0; + if (op->data.buswidth) + f_pdata->data_width = ilog2(op->data.buswidth); /* Right now we only support 8-8-8 DTR mode. */ if (f_pdata->dtr) { switch (op->cmd.buswidth) { case 0: - break; case 8: - f_pdata->inst_width = CQSPI_INST_TYPE_OCTAL; break; default: return -EINVAL; @@ -422,9 +404,7 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, switch (op->addr.buswidth) { case 0: - break; case 8: - f_pdata->addr_width = CQSPI_INST_TYPE_OCTAL; break; default: return -EINVAL; @@ -432,9 +412,7 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, switch (op->data.buswidth) { case 0: - break; case 8: - f_pdata->data_width = CQSPI_INST_TYPE_OCTAL; break; default: return -EINVAL; diff --git a/drivers/spi/spi-fsi.c b/drivers/spi/spi-fsi.c index b6c7467f0b590..d403a7a3021d0 100644 --- a/drivers/spi/spi-fsi.c +++ b/drivers/spi/spi-fsi.c @@ -25,6 +25,7 @@ #define SPI_FSI_BASE 0x70000 #define SPI_FSI_INIT_TIMEOUT_MS 1000 +#define SPI_FSI_STATUS_TIMEOUT_MS 100 #define SPI_FSI_MAX_RX_SIZE 8 #define SPI_FSI_MAX_TX_SIZE 40 @@ -299,6 +300,7 @@ static int fsi_spi_transfer_data(struct fsi_spi *ctx, struct spi_transfer *transfer) { int rc = 0; + unsigned long end; u64 status = 0ULL; if (transfer->tx_buf) { @@ -315,10 +317,14 @@ static int fsi_spi_transfer_data(struct fsi_spi *ctx, if (rc) return rc; + end = jiffies + msecs_to_jiffies(SPI_FSI_STATUS_TIMEOUT_MS); do { rc = fsi_spi_status(ctx, &status, "TX"); if (rc) return rc; + + if (time_after(jiffies, end)) + return -ETIMEDOUT; } while (status & SPI_FSI_STATUS_TDR_FULL); sent += nb; @@ -329,10 +335,14 @@ static int fsi_spi_transfer_data(struct fsi_spi *ctx, u8 *rx = transfer->rx_buf; while (transfer->len > recv) { + end = jiffies + msecs_to_jiffies(SPI_FSI_STATUS_TIMEOUT_MS); do { rc = fsi_spi_status(ctx, &status, "RX"); if (rc) return rc; + + if (time_after(jiffies, end)) + return -ETIMEDOUT; } while (!(status & SPI_FSI_STATUS_RDR_FULL)); rc = fsi_spi_read_reg(ctx, SPI_FSI_DATA_RX, &in); diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 753bd313e6fda..2ca19b01948a2 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -43,8 +43,11 @@ #define SPI_CFG1_PACKET_LOOP_OFFSET 8 #define SPI_CFG1_PACKET_LENGTH_OFFSET 16 #define SPI_CFG1_GET_TICK_DLY_OFFSET 29 +#define SPI_CFG1_GET_TICK_DLY_OFFSET_V1 30 #define SPI_CFG1_GET_TICK_DLY_MASK 0xe0000000 +#define SPI_CFG1_GET_TICK_DLY_MASK_V1 0xc0000000 + #define SPI_CFG1_CS_IDLE_MASK 0xff #define SPI_CFG1_PACKET_LOOP_MASK 0xff00 #define SPI_CFG1_PACKET_LENGTH_MASK 0x3ff0000 @@ -346,9 +349,15 @@ static int mtk_spi_prepare_message(struct spi_master *master, /* tick delay */ reg_val = readl(mdata->base + SPI_CFG1_REG); - reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK; - reg_val |= ((chip_config->tick_delay & 0x7) - << SPI_CFG1_GET_TICK_DLY_OFFSET); + if (mdata->dev_comp->enhance_timing) { + reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK; + reg_val |= ((chip_config->tick_delay & 0x7) + << SPI_CFG1_GET_TICK_DLY_OFFSET); + } else { + reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK_V1; + reg_val |= ((chip_config->tick_delay & 0x3) + << SPI_CFG1_GET_TICK_DLY_OFFSET_V1); + } writel(reg_val, mdata->base + SPI_CFG1_REG); /* set hw cs timing */ diff --git a/drivers/spi/spi-mxic.c b/drivers/spi/spi-mxic.c index 45889947afed8..03fce4493aa79 100644 --- a/drivers/spi/spi-mxic.c +++ b/drivers/spi/spi-mxic.c @@ -304,25 +304,21 @@ static int mxic_spi_data_xfer(struct mxic_spi *mxic, const void *txbuf, writel(data, mxic->regs + TXD(nbytes % 4)); + ret = readl_poll_timeout(mxic->regs + INT_STS, sts, + sts & INT_TX_EMPTY, 0, USEC_PER_SEC); + if (ret) + return ret; + + ret = readl_poll_timeout(mxic->regs + INT_STS, sts, + sts & INT_RX_NOT_EMPTY, 0, + USEC_PER_SEC); + if (ret) + return ret; + + data = readl(mxic->regs + RXD); if (rxbuf) { - ret = readl_poll_timeout(mxic->regs + INT_STS, sts, - sts & INT_TX_EMPTY, 0, - USEC_PER_SEC); - if (ret) - return ret; - - ret = readl_poll_timeout(mxic->regs + INT_STS, sts, - sts & INT_RX_NOT_EMPTY, 0, - USEC_PER_SEC); - if (ret) - return ret; - - data = readl(mxic->regs + RXD); data >>= (8 * (4 - nbytes)); memcpy(rxbuf + pos, &data, nbytes); - WARN_ON(readl(mxic->regs + INT_STS) & INT_RX_NOT_EMPTY); - } else { - readl(mxic->regs + RXD); } WARN_ON(readl(mxic->regs + INT_STS) & INT_RX_NOT_EMPTY); diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 2e134eb4bd2c9..6502fda6243e0 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -76,14 +76,23 @@ static bool lpss_dma_filter(struct dma_chan *chan, void *param) return true; } +static void lpss_dma_put_device(void *dma_dev) +{ + pci_dev_put(dma_dev); +} + static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { struct pci_dev *dma_dev; + int ret; c->num_chipselect = 1; c->max_clk_rate = 50000000; dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); + ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev); + if (ret) + return ret; if (c->tx_param) { struct dw_dma_slave *slave = c->tx_param; @@ -107,8 +116,9 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { - struct pci_dev *dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0)); struct dw_dma_slave *tx, *rx; + struct pci_dev *dma_dev; + int ret; switch (PCI_FUNC(dev->devfn)) { case 0: @@ -133,6 +143,11 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) return -ENODEV; } + dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0)); + ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev); + if (ret) + return ret; + tx = c->tx_param; tx->dma_dev = &dma_dev->dev; diff --git a/drivers/spi/spi-rpc-if.c b/drivers/spi/spi-rpc-if.c index fe82f3575df4f..24ec1c83f379c 100644 --- a/drivers/spi/spi-rpc-if.c +++ b/drivers/spi/spi-rpc-if.c @@ -158,14 +158,18 @@ static int rpcif_spi_probe(struct platform_device *pdev) error = rpcif_hw_init(rpc, false); if (error) - return error; + goto out_disable_rpm; error = spi_register_controller(ctlr); if (error) { dev_err(&pdev->dev, "spi_register_controller failed\n"); - rpcif_disable_rpm(rpc); + goto out_disable_rpm; } + return 0; + +out_disable_rpm: + rpcif_disable_rpm(rpc); return error; } diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index e9de1d958bbd2..8f345247a8c32 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -1352,6 +1352,10 @@ static int tegra_spi_probe(struct platform_device *pdev) tspi->phys = r->start; spi_irq = platform_get_irq(pdev, 0); + if (spi_irq < 0) { + ret = spi_irq; + goto exit_free_master; + } tspi->irq = spi_irq; tspi->clk = devm_clk_get(&pdev->dev, "spi"); diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 2a03739a0c609..80c3787deea9d 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1006,14 +1006,8 @@ static int tegra_slink_probe(struct platform_device *pdev) struct resource *r; int ret, spi_irq; const struct tegra_slink_chip_data *cdata = NULL; - const struct of_device_id *match; - match = of_match_device(tegra_slink_of_match, &pdev->dev); - if (!match) { - dev_err(&pdev->dev, "Error: No device match found\n"); - return -ENODEV; - } - cdata = match->data; + cdata = of_device_get_match_data(&pdev->dev); master = spi_alloc_master(&pdev->dev, sizeof(*tspi)); if (!master) { diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index ce1bdb4767ea3..cb00ac2fc7d8e 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -1240,6 +1240,8 @@ static int tegra_qspi_probe(struct platform_device *pdev) tqspi->phys = r->start; qspi_irq = platform_get_irq(pdev, 0); + if (qspi_irq < 0) + return qspi_irq; tqspi->irq = qspi_irq; tqspi->clk = devm_clk_get(&pdev->dev, "qspi"); diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index 328b6559bb19a..2b5afae8ff7fc 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -1172,7 +1172,10 @@ static int zynqmp_qspi_probe(struct platform_device *pdev) goto clk_dis_all; } - dma_set_mask(&pdev->dev, DMA_BIT_MASK(44)); + ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(44)); + if (ret) + goto clk_dis_all; + ctlr->bits_per_word_mask = SPI_BPW_MASK(8); ctlr->num_chipselect = GQSPI_DEFAULT_NUM_CS; ctlr->mem_ops = &zynqmp_qspi_mem_ops; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index d96082dc3340d..bbf977a0d2c57 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1149,11 +1149,15 @@ static int __spi_unmap_msg(struct spi_controller *ctlr, struct spi_message *msg) if (ctlr->dma_tx) tx_dev = ctlr->dma_tx->device->dev; + else if (ctlr->dma_map_dev) + tx_dev = ctlr->dma_map_dev; else tx_dev = ctlr->dev.parent; if (ctlr->dma_rx) rx_dev = ctlr->dma_rx->device->dev; + else if (ctlr->dma_map_dev) + rx_dev = ctlr->dma_map_dev; else rx_dev = ctlr->dev.parent; diff --git a/drivers/staging/iio/adc/ad7280a.c b/drivers/staging/iio/adc/ad7280a.c index fef0055b89909..20183b2ea1279 100644 --- a/drivers/staging/iio/adc/ad7280a.c +++ b/drivers/staging/iio/adc/ad7280a.c @@ -107,9 +107,9 @@ static unsigned int ad7280a_devaddr(unsigned int addr) { return ((addr & 0x1) << 4) | - ((addr & 0x2) << 3) | + ((addr & 0x2) << 2) | (addr & 0x4) | - ((addr & 0x8) >> 3) | + ((addr & 0x8) >> 2) | ((addr & 0x10) >> 4); } diff --git a/drivers/staging/media/atomisp/pci/atomisp_acc.c b/drivers/staging/media/atomisp/pci/atomisp_acc.c index 9a1751895ab03..28cb271663c47 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_acc.c +++ b/drivers/staging/media/atomisp/pci/atomisp_acc.c @@ -439,6 +439,18 @@ int atomisp_acc_s_mapped_arg(struct atomisp_sub_device *asd, return 0; } +static void atomisp_acc_unload_some_extensions(struct atomisp_sub_device *asd, + int i, + struct atomisp_acc_fw *acc_fw) +{ + while (--i >= 0) { + if (acc_fw->flags & acc_flag_to_pipe[i].flag) { + atomisp_css_unload_acc_extension(asd, acc_fw->fw, + acc_flag_to_pipe[i].pipe_id); + } + } +} + /* * Appends the loaded acceleration binary extensions to the * current ISP mode. Must be called just before sh_css_start(). @@ -479,16 +491,20 @@ int atomisp_acc_load_extensions(struct atomisp_sub_device *asd) acc_fw->fw, acc_flag_to_pipe[i].pipe_id, acc_fw->type); - if (ret) + if (ret) { + atomisp_acc_unload_some_extensions(asd, i, acc_fw); goto error; + } ext_loaded = true; } } ret = atomisp_css_set_acc_parameters(acc_fw); - if (ret < 0) + if (ret < 0) { + atomisp_acc_unload_some_extensions(asd, i, acc_fw); goto error; + } } if (!ext_loaded) @@ -497,6 +513,7 @@ int atomisp_acc_load_extensions(struct atomisp_sub_device *asd) ret = atomisp_css_update_stream(asd); if (ret) { dev_err(isp->dev, "%s: update stream failed.\n", __func__); + atomisp_acc_unload_extensions(asd); goto error; } @@ -504,13 +521,6 @@ int atomisp_acc_load_extensions(struct atomisp_sub_device *asd) return 0; error: - while (--i >= 0) { - if (acc_fw->flags & acc_flag_to_pipe[i].flag) { - atomisp_css_unload_acc_extension(asd, acc_fw->fw, - acc_flag_to_pipe[i].pipe_id); - } - } - list_for_each_entry_continue_reverse(acc_fw, &asd->acc.fw, list) { if (acc_fw->type != ATOMISP_ACC_FW_LOAD_TYPE_OUTPUT && acc_fw->type != ATOMISP_ACC_FW_LOAD_TYPE_VIEWFINDER) diff --git a/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c b/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c index 1cc581074ba76..9a194fbb305b7 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c +++ b/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c @@ -748,6 +748,21 @@ static int axp_regulator_set(struct device *dev, struct gmin_subdev *gs, return 0; } +/* + * Some boards contain a hw-bug where turning eldo2 back on after having turned + * it off causes the CPLM3218 ambient-light-sensor on the image-sensor's I2C bus + * to crash, hanging the bus. Do not turn eldo2 off on these systems. + */ +static const struct dmi_system_id axp_leave_eldo2_on_ids[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TrekStor"), + DMI_MATCH(DMI_PRODUCT_NAME, "SurfTab duo W1 10.1 (VT4)"), + }, + }, + { } +}; + static int axp_v1p8_on(struct device *dev, struct gmin_subdev *gs) { int ret; @@ -782,6 +797,9 @@ static int axp_v1p8_off(struct device *dev, struct gmin_subdev *gs) if (ret) return ret; + if (dmi_check_system(axp_leave_eldo2_on_ids)) + return 0; + ret = axp_regulator_set(dev, gs, gs->eldo2_sel_reg, gs->eldo2_1p8v, ELDO_CTRL_REG, gs->eldo2_ctrl_shift, false); return ret; diff --git a/drivers/staging/media/atomisp/pci/hmm/hmm.c b/drivers/staging/media/atomisp/pci/hmm/hmm.c index 6a5ee46070898..c1cda16f2dc01 100644 --- a/drivers/staging/media/atomisp/pci/hmm/hmm.c +++ b/drivers/staging/media/atomisp/pci/hmm/hmm.c @@ -39,7 +39,7 @@ struct hmm_bo_device bo_device; struct hmm_pool dynamic_pool; struct hmm_pool reserved_pool; -static ia_css_ptr dummy_ptr; +static ia_css_ptr dummy_ptr = mmgr_EXCEPTION; static bool hmm_initialized; struct _hmm_mem_stat hmm_mem_stat; @@ -209,7 +209,7 @@ int hmm_init(void) void hmm_cleanup(void) { - if (!dummy_ptr) + if (dummy_ptr == mmgr_EXCEPTION) return; sysfs_remove_group(&atomisp_dev->kobj, atomisp_attribute_group); @@ -288,7 +288,8 @@ void hmm_free(ia_css_ptr virt) dev_dbg(atomisp_dev, "%s: free 0x%08x\n", __func__, virt); - WARN_ON(!virt); + if (WARN_ON(virt == mmgr_EXCEPTION)) + return; bo = hmm_bo_device_search_start(&bo_device, (unsigned int)virt); diff --git a/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c b/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c index 1450013d3685d..c5d32048d90ff 100644 --- a/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c +++ b/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c @@ -23,7 +23,7 @@ static void hantro_h1_set_src_img_ctrl(struct hantro_dev *vpu, reg = H1_REG_IN_IMG_CTRL_ROW_LEN(pix_fmt->width) | H1_REG_IN_IMG_CTRL_OVRFLR_D4(0) - | H1_REG_IN_IMG_CTRL_OVRFLB_D4(0) + | H1_REG_IN_IMG_CTRL_OVRFLB(0) | H1_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt); vepu_write_relaxed(vpu, reg, H1_REG_IN_IMG_CTRL); } diff --git a/drivers/staging/media/hantro/hantro_h1_regs.h b/drivers/staging/media/hantro/hantro_h1_regs.h index d6e9825bb5c7b..30e7e7b920b55 100644 --- a/drivers/staging/media/hantro/hantro_h1_regs.h +++ b/drivers/staging/media/hantro/hantro_h1_regs.h @@ -47,7 +47,7 @@ #define H1_REG_IN_IMG_CTRL 0x03c #define H1_REG_IN_IMG_CTRL_ROW_LEN(x) ((x) << 12) #define H1_REG_IN_IMG_CTRL_OVRFLR_D4(x) ((x) << 10) -#define H1_REG_IN_IMG_CTRL_OVRFLB_D4(x) ((x) << 6) +#define H1_REG_IN_IMG_CTRL_OVRFLB(x) ((x) << 6) #define H1_REG_IN_IMG_CTRL_FMT(x) ((x) << 2) #define H1_REG_ENC_CTRL0 0x040 #define H1_REG_ENC_CTRL0_INIT_QP(x) ((x) << 26) diff --git a/drivers/staging/media/hantro/sunxi_vpu_hw.c b/drivers/staging/media/hantro/sunxi_vpu_hw.c index 90633406c4eb8..c0edd5856a0c8 100644 --- a/drivers/staging/media/hantro/sunxi_vpu_hw.c +++ b/drivers/staging/media/hantro/sunxi_vpu_hw.c @@ -29,10 +29,10 @@ static const struct hantro_fmt sunxi_vpu_dec_fmts[] = { .frmsize = { .min_width = 48, .max_width = 3840, - .step_width = MB_DIM, + .step_width = 32, .min_height = 48, .max_height = 2160, - .step_height = MB_DIM, + .step_height = 32, }, }, }; diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c index 2b73fa55c938b..9ea723bb5f209 100644 --- a/drivers/staging/media/imx/imx7-mipi-csis.c +++ b/drivers/staging/media/imx/imx7-mipi-csis.c @@ -32,7 +32,6 @@ #include #define CSIS_DRIVER_NAME "imx7-mipi-csis" -#define CSIS_SUBDEV_NAME CSIS_DRIVER_NAME #define CSIS_PAD_SINK 0 #define CSIS_PAD_SOURCE 1 @@ -311,7 +310,6 @@ struct csi_state { struct reset_control *mrst; struct regulator *mipi_phy_regulator; const struct mipi_csis_info *info; - u8 index; struct v4l2_subdev sd; struct media_pad pads[CSIS_PADS_NUM]; @@ -1303,8 +1301,8 @@ static int mipi_csis_subdev_init(struct csi_state *state) v4l2_subdev_init(sd, &mipi_csis_subdev_ops); sd->owner = THIS_MODULE; - snprintf(sd->name, sizeof(sd->name), "%s.%d", - CSIS_SUBDEV_NAME, state->index); + snprintf(sd->name, sizeof(sd->name), "csis-%s", + dev_name(state->dev)); sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE; sd->ctrl_handler = NULL; diff --git a/drivers/staging/media/imx/imx8mq-mipi-csi2.c b/drivers/staging/media/imx/imx8mq-mipi-csi2.c index 7adbdd14daa93..3b9fa75efac6b 100644 --- a/drivers/staging/media/imx/imx8mq-mipi-csi2.c +++ b/drivers/staging/media/imx/imx8mq-mipi-csi2.c @@ -398,9 +398,6 @@ static int imx8mq_mipi_csi_s_stream(struct v4l2_subdev *sd, int enable) struct csi_state *state = mipi_sd_to_csi2_state(sd); int ret = 0; - imx8mq_mipi_csi_write(state, CSI2RX_IRQ_MASK, - CSI2RX_IRQ_MASK_ULPS_STATUS_CHANGE); - if (enable) { ret = pm_runtime_resume_and_get(state->dev); if (ret < 0) @@ -696,7 +693,7 @@ static int imx8mq_mipi_csi_async_register(struct csi_state *state) * Suspend/resume */ -static int imx8mq_mipi_csi_pm_suspend(struct device *dev, bool runtime) +static int imx8mq_mipi_csi_pm_suspend(struct device *dev) { struct v4l2_subdev *sd = dev_get_drvdata(dev); struct csi_state *state = mipi_sd_to_csi2_state(sd); @@ -708,36 +705,21 @@ static int imx8mq_mipi_csi_pm_suspend(struct device *dev, bool runtime) imx8mq_mipi_csi_stop_stream(state); imx8mq_mipi_csi_clk_disable(state); state->state &= ~ST_POWERED; - if (!runtime) - state->state |= ST_SUSPENDED; } mutex_unlock(&state->lock); - ret = icc_set_bw(state->icc_path, 0, 0); - if (ret) - dev_err(dev, "icc_set_bw failed with %d\n", ret); - return ret ? -EAGAIN : 0; } -static int imx8mq_mipi_csi_pm_resume(struct device *dev, bool runtime) +static int imx8mq_mipi_csi_pm_resume(struct device *dev) { struct v4l2_subdev *sd = dev_get_drvdata(dev); struct csi_state *state = mipi_sd_to_csi2_state(sd); int ret = 0; - ret = icc_set_bw(state->icc_path, 0, state->icc_path_bw); - if (ret) { - dev_err(dev, "icc_set_bw failed with %d\n", ret); - return ret; - } - mutex_lock(&state->lock); - if (!runtime && !(state->state & ST_SUSPENDED)) - goto unlock; - if (!(state->state & ST_POWERED)) { state->state |= ST_POWERED; ret = imx8mq_mipi_csi_clk_enable(state); @@ -758,22 +740,60 @@ static int imx8mq_mipi_csi_pm_resume(struct device *dev, bool runtime) static int __maybe_unused imx8mq_mipi_csi_suspend(struct device *dev) { - return imx8mq_mipi_csi_pm_suspend(dev, false); + struct v4l2_subdev *sd = dev_get_drvdata(dev); + struct csi_state *state = mipi_sd_to_csi2_state(sd); + int ret; + + ret = imx8mq_mipi_csi_pm_suspend(dev); + if (ret) + return ret; + + state->state |= ST_SUSPENDED; + + return ret; } static int __maybe_unused imx8mq_mipi_csi_resume(struct device *dev) { - return imx8mq_mipi_csi_pm_resume(dev, false); + struct v4l2_subdev *sd = dev_get_drvdata(dev); + struct csi_state *state = mipi_sd_to_csi2_state(sd); + + if (!(state->state & ST_SUSPENDED)) + return 0; + + return imx8mq_mipi_csi_pm_resume(dev); } static int __maybe_unused imx8mq_mipi_csi_runtime_suspend(struct device *dev) { - return imx8mq_mipi_csi_pm_suspend(dev, true); + struct v4l2_subdev *sd = dev_get_drvdata(dev); + struct csi_state *state = mipi_sd_to_csi2_state(sd); + int ret; + + ret = imx8mq_mipi_csi_pm_suspend(dev); + if (ret) + return ret; + + ret = icc_set_bw(state->icc_path, 0, 0); + if (ret) + dev_err(dev, "icc_set_bw failed with %d\n", ret); + + return ret; } static int __maybe_unused imx8mq_mipi_csi_runtime_resume(struct device *dev) { - return imx8mq_mipi_csi_pm_resume(dev, true); + struct v4l2_subdev *sd = dev_get_drvdata(dev); + struct csi_state *state = mipi_sd_to_csi2_state(sd); + int ret; + + ret = icc_set_bw(state->icc_path, 0, state->icc_path_bw); + if (ret) { + dev_err(dev, "icc_set_bw failed with %d\n", ret); + return ret; + } + + return imx8mq_mipi_csi_pm_resume(dev); } static const struct dev_pm_ops imx8mq_mipi_csi_pm_ops = { @@ -921,7 +941,7 @@ static int imx8mq_mipi_csi_probe(struct platform_device *pdev) /* Enable runtime PM. */ pm_runtime_enable(dev); if (!pm_runtime_enabled(dev)) { - ret = imx8mq_mipi_csi_pm_resume(dev, true); + ret = imx8mq_mipi_csi_runtime_resume(dev); if (ret < 0) goto icc; } @@ -934,7 +954,7 @@ static int imx8mq_mipi_csi_probe(struct platform_device *pdev) cleanup: pm_runtime_disable(&pdev->dev); - imx8mq_mipi_csi_pm_suspend(&pdev->dev, true); + imx8mq_mipi_csi_runtime_suspend(&pdev->dev); media_entity_cleanup(&state->sd.entity); v4l2_async_nf_unregister(&state->notifier); @@ -958,7 +978,7 @@ static int imx8mq_mipi_csi_remove(struct platform_device *pdev) v4l2_async_unregister_subdev(&state->sd); pm_runtime_disable(&pdev->dev); - imx8mq_mipi_csi_pm_suspend(&pdev->dev, true); + imx8mq_mipi_csi_runtime_suspend(&pdev->dev); media_entity_cleanup(&state->sd.entity); mutex_destroy(&state->lock); pm_runtime_set_suspended(&pdev->dev); diff --git a/drivers/staging/media/meson/vdec/esparser.c b/drivers/staging/media/meson/vdec/esparser.c index db7022707ff8d..86ccc8937afca 100644 --- a/drivers/staging/media/meson/vdec/esparser.c +++ b/drivers/staging/media/meson/vdec/esparser.c @@ -328,7 +328,12 @@ esparser_queue(struct amvdec_session *sess, struct vb2_v4l2_buffer *vbuf) offset = esparser_get_offset(sess); - amvdec_add_ts(sess, vb->timestamp, vbuf->timecode, offset, vbuf->flags); + ret = amvdec_add_ts(sess, vb->timestamp, vbuf->timecode, offset, vbuf->flags); + if (ret) { + v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR); + return ret; + } + dev_dbg(core->dev, "esparser: ts = %llu pld_size = %u offset = %08X flags = %08X\n", vb->timestamp, payload_size, offset, vbuf->flags); diff --git a/drivers/staging/media/meson/vdec/vdec_helpers.c b/drivers/staging/media/meson/vdec/vdec_helpers.c index 203d7afa085d7..7d2a756532503 100644 --- a/drivers/staging/media/meson/vdec/vdec_helpers.c +++ b/drivers/staging/media/meson/vdec/vdec_helpers.c @@ -227,13 +227,16 @@ int amvdec_set_canvases(struct amvdec_session *sess, } EXPORT_SYMBOL_GPL(amvdec_set_canvases); -void amvdec_add_ts(struct amvdec_session *sess, u64 ts, - struct v4l2_timecode tc, u32 offset, u32 vbuf_flags) +int amvdec_add_ts(struct amvdec_session *sess, u64 ts, + struct v4l2_timecode tc, u32 offset, u32 vbuf_flags) { struct amvdec_timestamp *new_ts; unsigned long flags; new_ts = kzalloc(sizeof(*new_ts), GFP_KERNEL); + if (!new_ts) + return -ENOMEM; + new_ts->ts = ts; new_ts->tc = tc; new_ts->offset = offset; @@ -242,6 +245,7 @@ void amvdec_add_ts(struct amvdec_session *sess, u64 ts, spin_lock_irqsave(&sess->ts_spinlock, flags); list_add_tail(&new_ts->list, &sess->timestamps); spin_unlock_irqrestore(&sess->ts_spinlock, flags); + return 0; } EXPORT_SYMBOL_GPL(amvdec_add_ts); diff --git a/drivers/staging/media/meson/vdec/vdec_helpers.h b/drivers/staging/media/meson/vdec/vdec_helpers.h index 88137d15aa3ad..4bf3e61d081b3 100644 --- a/drivers/staging/media/meson/vdec/vdec_helpers.h +++ b/drivers/staging/media/meson/vdec/vdec_helpers.h @@ -56,8 +56,8 @@ void amvdec_dst_buf_done_offset(struct amvdec_session *sess, * @offset: offset in the VIFIFO where the associated packet was written * @flags: the vb2_v4l2_buffer flags */ -void amvdec_add_ts(struct amvdec_session *sess, u64 ts, - struct v4l2_timecode tc, u32 offset, u32 flags); +int amvdec_add_ts(struct amvdec_session *sess, u64 ts, + struct v4l2_timecode tc, u32 offset, u32 flags); void amvdec_remove_ts(struct amvdec_session *sess, u64 ts); /** diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c index b4173a8926d69..d8fb93035470e 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c @@ -38,7 +38,7 @@ struct cedrus_h264_sram_ref_pic { #define CEDRUS_H264_FRAME_NUM 18 -#define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (16 * SZ_1K) +#define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (32 * SZ_1K) #define CEDRUS_MIN_PIC_INFO_BUF_SIZE (130 * SZ_1K) static void cedrus_h264_write_sram(struct cedrus_dev *dev, diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c index 8829a7bab07ec..ffade5cbd2e40 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c @@ -23,7 +23,7 @@ * Subsequent BSP implementations seem to double the neighbor info buffer size * for the H6 SoC, which may be related to 10 bit H265 support. */ -#define CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE (397 * SZ_1K) +#define CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE (794 * SZ_1K) #define CEDRUS_H265_ENTRY_POINTS_BUF_SIZE (4 * SZ_1K) #define CEDRUS_H265_MV_COL_BUF_UNIT_CTB_SIZE 160 diff --git a/drivers/staging/media/zoran/zoran.h b/drivers/staging/media/zoran/zoran.h index b1ad2a2b914cd..50d5a7acfab6c 100644 --- a/drivers/staging/media/zoran/zoran.h +++ b/drivers/staging/media/zoran/zoran.h @@ -313,6 +313,6 @@ static inline struct zoran *to_zoran(struct v4l2_device *v4l2_dev) #endif -int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq); +int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq, int dir); void zoran_queue_exit(struct zoran *zr); int zr_set_buf(struct zoran *zr); diff --git a/drivers/staging/media/zoran/zoran_card.c b/drivers/staging/media/zoran/zoran_card.c index f259585b06897..11d415c0c05d2 100644 --- a/drivers/staging/media/zoran/zoran_card.c +++ b/drivers/staging/media/zoran/zoran_card.c @@ -803,6 +803,52 @@ int zoran_check_jpg_settings(struct zoran *zr, return 0; } +static int zoran_init_video_device(struct zoran *zr, struct video_device *video_dev, int dir) +{ + int err; + + /* Now add the template and register the device unit. */ + *video_dev = zoran_template; + video_dev->v4l2_dev = &zr->v4l2_dev; + video_dev->lock = &zr->lock; + video_dev->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE | dir; + + strscpy(video_dev->name, ZR_DEVNAME(zr), sizeof(video_dev->name)); + /* + * It's not a mem2mem device, but you can both capture and output from one and the same + * device. This should really be split up into two device nodes, but that's a job for + * another day. + */ + video_dev->vfl_dir = VFL_DIR_M2M; + zoran_queue_init(zr, &zr->vq, V4L2_BUF_TYPE_VIDEO_CAPTURE); + + err = video_register_device(video_dev, VFL_TYPE_VIDEO, video_nr[zr->id]); + if (err < 0) + return err; + video_set_drvdata(video_dev, zr); + return 0; +} + +static void zoran_exit_video_devices(struct zoran *zr) +{ + video_unregister_device(zr->video_dev); + kfree(zr->video_dev); +} + +static int zoran_init_video_devices(struct zoran *zr) +{ + int err; + + zr->video_dev = video_device_alloc(); + if (!zr->video_dev) + return -ENOMEM; + + err = zoran_init_video_device(zr, zr->video_dev, V4L2_CAP_VIDEO_CAPTURE); + if (err) + kfree(zr->video_dev); + return err; +} + void zoran_open_init_params(struct zoran *zr) { int i; @@ -874,17 +920,11 @@ static int zr36057_init(struct zoran *zr) zoran_open_init_params(zr); /* allocate memory *before* doing anything to the hardware in case allocation fails */ - zr->video_dev = video_device_alloc(); - if (!zr->video_dev) { - err = -ENOMEM; - goto exit; - } zr->stat_com = dma_alloc_coherent(&zr->pci_dev->dev, BUZ_NUM_STAT_COM * sizeof(u32), &zr->p_sc, GFP_KERNEL); if (!zr->stat_com) { - err = -ENOMEM; - goto exit_video; + return -ENOMEM; } for (j = 0; j < BUZ_NUM_STAT_COM; j++) zr->stat_com[j] = cpu_to_le32(1); /* mark as unavailable to zr36057 */ @@ -897,26 +937,9 @@ static int zr36057_init(struct zoran *zr) goto exit_statcom; } - /* Now add the template and register the device unit. */ - *zr->video_dev = zoran_template; - zr->video_dev->v4l2_dev = &zr->v4l2_dev; - zr->video_dev->lock = &zr->lock; - zr->video_dev->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_CAPTURE; - - strscpy(zr->video_dev->name, ZR_DEVNAME(zr), sizeof(zr->video_dev->name)); - /* - * It's not a mem2mem device, but you can both capture and output from one and the same - * device. This should really be split up into two device nodes, but that's a job for - * another day. - */ - zr->video_dev->vfl_dir = VFL_DIR_M2M; - - zoran_queue_init(zr, &zr->vq); - - err = video_register_device(zr->video_dev, VFL_TYPE_VIDEO, video_nr[zr->id]); - if (err < 0) + err = zoran_init_video_devices(zr); + if (err) goto exit_statcomb; - video_set_drvdata(zr->video_dev, zr); zoran_init_hardware(zr); if (!pass_through) { @@ -931,9 +954,6 @@ static int zr36057_init(struct zoran *zr) dma_free_coherent(&zr->pci_dev->dev, BUZ_NUM_STAT_COM * sizeof(u32) * 2, zr->stat_comb, zr->p_scb); exit_statcom: dma_free_coherent(&zr->pci_dev->dev, BUZ_NUM_STAT_COM * sizeof(u32), zr->stat_com, zr->p_sc); -exit_video: - kfree(zr->video_dev); -exit: return err; } @@ -965,7 +985,7 @@ static void zoran_remove(struct pci_dev *pdev) dma_free_coherent(&zr->pci_dev->dev, BUZ_NUM_STAT_COM * sizeof(u32) * 2, zr->stat_comb, zr->p_scb); pci_release_regions(pdev); pci_disable_device(zr->pci_dev); - video_unregister_device(zr->video_dev); + zoran_exit_video_devices(zr); exit_free: v4l2_ctrl_handler_free(&zr->hdl); v4l2_device_unregister(&zr->v4l2_dev); @@ -1069,8 +1089,10 @@ static int zoran_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) - return -ENODEV; - vb2_dma_contig_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32)); + return err; + err = vb2_dma_contig_set_max_seg_size(&pdev->dev, U32_MAX); + if (err) + return err; nr = zoran_num++; if (nr >= BUZ_MAX) { diff --git a/drivers/staging/media/zoran/zoran_device.c b/drivers/staging/media/zoran/zoran_device.c index 5b12a730a2290..fb1f0465ca87f 100644 --- a/drivers/staging/media/zoran/zoran_device.c +++ b/drivers/staging/media/zoran/zoran_device.c @@ -814,7 +814,7 @@ static void zoran_reap_stat_com(struct zoran *zr) if (zr->jpg_settings.tmp_dcm == 1) i = (zr->jpg_dma_tail - zr->jpg_err_shift) & BUZ_MASK_STAT_COM; else - i = ((zr->jpg_dma_tail - zr->jpg_err_shift) & 1) * 2 + 1; + i = ((zr->jpg_dma_tail - zr->jpg_err_shift) & 1) * 2; stat_com = le32_to_cpu(zr->stat_com[i]); if ((stat_com & 1) == 0) { @@ -826,6 +826,11 @@ static void zoran_reap_stat_com(struct zoran *zr) size = (stat_com & GENMASK(22, 1)) >> 1; buf = zr->inuse[i]; + if (!buf) { + spin_unlock_irqrestore(&zr->queued_bufs_lock, flags); + pci_err(zr->pci_dev, "No buffer at slot %d\n", i); + return; + } buf->vbuf.vb2_buf.timestamp = ktime_get_ns(); if (zr->codec_mode == BUZ_MODE_MOTION_COMPRESS) { diff --git a/drivers/staging/media/zoran/zoran_driver.c b/drivers/staging/media/zoran/zoran_driver.c index 46382e43f1bf7..84665637ebb79 100644 --- a/drivers/staging/media/zoran/zoran_driver.c +++ b/drivers/staging/media/zoran/zoran_driver.c @@ -255,8 +255,6 @@ static int zoran_querycap(struct file *file, void *__fh, struct v4l2_capability strscpy(cap->card, ZR_DEVNAME(zr), sizeof(cap->card)); strscpy(cap->driver, "zoran", sizeof(cap->driver)); snprintf(cap->bus_info, sizeof(cap->bus_info), "PCI:%s", pci_name(zr->pci_dev)); - cap->device_caps = zr->video_dev->device_caps; - cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS; return 0; } @@ -582,6 +580,9 @@ static int zoran_s_std(struct file *file, void *__fh, v4l2_std_id std) struct zoran *zr = video_drvdata(file); int res = 0; + if (zr->norm == std) + return 0; + if (zr->running != ZORAN_MAP_MODE_NONE) return -EBUSY; @@ -739,6 +740,7 @@ static int zoran_g_parm(struct file *file, void *priv, struct v4l2_streamparm *p if (parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) return -EINVAL; + parm->parm.capture.readbuffers = 9; return 0; } @@ -869,6 +871,10 @@ int zr_set_buf(struct zoran *zr) vbuf = &buf->vbuf; buf->vbuf.field = V4L2_FIELD_INTERLACED; + if (BUZ_MAX_HEIGHT < (zr->v4l_settings.height * 2)) + buf->vbuf.field = V4L2_FIELD_INTERLACED; + else + buf->vbuf.field = V4L2_FIELD_TOP; vb2_set_plane_payload(&buf->vbuf.vb2_buf, 0, zr->buffer_size); vb2_buffer_done(&buf->vbuf.vb2_buf, VB2_BUF_STATE_DONE); zr->inuse[0] = NULL; @@ -928,6 +934,7 @@ static int zr_vb2_start_streaming(struct vb2_queue *vq, unsigned int count) zr->stat_com[j] = cpu_to_le32(1); zr->inuse[j] = NULL; } + zr->vbseq = 0; if (zr->map_mode != ZORAN_MAP_MODE_RAW) { pci_info(zr->pci_dev, "START JPG\n"); @@ -1008,7 +1015,7 @@ static const struct vb2_ops zr_video_qops = { .wait_finish = vb2_ops_wait_finish, }; -int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq) +int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq, int dir) { int err; @@ -1016,8 +1023,9 @@ int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq) INIT_LIST_HEAD(&zr->queued_bufs); vq->dev = &zr->pci_dev->dev; - vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - vq->io_modes = VB2_USERPTR | VB2_DMABUF | VB2_MMAP | VB2_READ | VB2_WRITE; + vq->type = dir; + + vq->io_modes = VB2_DMABUF | VB2_MMAP | VB2_READ | VB2_WRITE; vq->drv_priv = zr; vq->buf_struct_size = sizeof(struct zr_buffer); vq->ops = &zr_video_qops; diff --git a/drivers/staging/mt7621-dts/gbpc1.dts b/drivers/staging/mt7621-dts/gbpc1.dts index e38a083811e54..5ae94b1ad5998 100644 --- a/drivers/staging/mt7621-dts/gbpc1.dts +++ b/drivers/staging/mt7621-dts/gbpc1.dts @@ -12,7 +12,8 @@ memory@0 { device_type = "memory"; - reg = <0x0 0x1c000000>, <0x20000000 0x4000000>; + reg = <0x00000000 0x1c000000>, + <0x20000000 0x04000000>; }; chosen { @@ -38,24 +39,16 @@ gpio-leds { compatible = "gpio-leds"; - system { - label = "gb-pc1:green:system"; + power { + label = "green:power"; gpios = <&gpio 6 GPIO_ACTIVE_LOW>; + linux,default-trigger = "default-on"; }; - status { - label = "gb-pc1:green:status"; + system { + label = "green:system"; gpios = <&gpio 8 GPIO_ACTIVE_LOW>; - }; - - lan1 { - label = "gb-pc1:green:lan1"; - gpios = <&gpio 24 GPIO_ACTIVE_LOW>; - }; - - lan2 { - label = "gb-pc1:green:lan2"; - gpios = <&gpio 25 GPIO_ACTIVE_LOW>; + linux,default-trigger = "disk-activity"; }; }; }; @@ -95,9 +88,8 @@ partition@50000 { label = "firmware"; - reg = <0x50000 0x1FB0000>; + reg = <0x50000 0x1fb0000>; }; - }; }; @@ -106,9 +98,12 @@ }; &pinctrl { - state_default: pinctrl0 { - default_gpio: gpio { - groups = "wdt", "rgmii2", "uart3"; + pinctrl-names = "default"; + pinctrl-0 = <&state_default>; + + state_default: state-default { + gpio-pinmux { + groups = "rgmii2", "uart3", "wdt"; function = "gpio"; }; }; @@ -117,12 +112,13 @@ &switch0 { ports { port@0 { + status = "okay"; label = "ethblack"; - status = "ok"; }; + port@4 { + status = "okay"; label = "ethblue"; - status = "ok"; }; }; }; diff --git a/drivers/staging/mt7621-dts/gbpc2.dts b/drivers/staging/mt7621-dts/gbpc2.dts index 6fe603c7711d7..a7fce8de61472 100644 --- a/drivers/staging/mt7621-dts/gbpc2.dts +++ b/drivers/staging/mt7621-dts/gbpc2.dts @@ -1,22 +1,122 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /dts-v1/; -#include "gbpc1.dts" +#include "mt7621.dtsi" + +#include +#include / { compatible = "gnubee,gb-pc2", "mediatek,mt7621-soc"; model = "GB-PC2"; + + memory@0 { + device_type = "memory"; + reg = <0x00000000 0x1c000000>, + <0x20000000 0x04000000>; + }; + + chosen { + bootargs = "console=ttyS0,57600"; + }; + + palmbus: palmbus@1e000000 { + i2c@900 { + status = "okay"; + }; + }; + + gpio-keys { + compatible = "gpio-keys"; + + reset { + label = "reset"; + gpios = <&gpio 18 GPIO_ACTIVE_HIGH>; + linux,code = ; + }; + }; +}; + +&sdhci { + status = "okay"; +}; + +&spi0 { + status = "okay"; + + m25p80@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "jedec,spi-nor"; + reg = <0>; + spi-max-frequency = <50000000>; + broken-flash-reset; + + partition@0 { + label = "u-boot"; + reg = <0x0 0x30000>; + read-only; + }; + + partition@30000 { + label = "u-boot-env"; + reg = <0x30000 0x10000>; + read-only; + }; + + factory: partition@40000 { + label = "factory"; + reg = <0x40000 0x10000>; + read-only; + }; + + partition@50000 { + label = "firmware"; + reg = <0x50000 0x1fb0000>; + }; + }; }; -&default_gpio { - groups = "wdt", "uart3"; - function = "gpio"; +&pcie { + status = "okay"; }; -&gmac1 { - status = "ok"; +&pinctrl { + pinctrl-names = "default"; + pinctrl-0 = <&state_default>; + + state_default: state-default { + gpio-pinmux { + groups = "wdt"; + function = "gpio"; + }; + }; }; -&phy_external { - status = "ok"; +ðernet { + gmac1: mac@1 { + status = "okay"; + phy-handle = <ðphy7>; + }; + + mdio-bus { + ethphy7: ethernet-phy@7 { + reg = <7>; + phy-mode = "rgmii-rxid"; + }; + }; +}; + +&switch0 { + ports { + port@0 { + status = "okay"; + label = "ethblack"; + }; + + port@4 { + status = "okay"; + label = "ethblue"; + }; + }; }; diff --git a/drivers/staging/mt7621-dts/mt7621.dtsi b/drivers/staging/mt7621-dts/mt7621.dtsi index 644a65d1a6a16..786cdb5fc4da1 100644 --- a/drivers/staging/mt7621-dts/mt7621.dtsi +++ b/drivers/staging/mt7621-dts/mt7621.dtsi @@ -44,9 +44,9 @@ regulator-max-microvolt = <3300000>; enable-active-high; regulator-always-on; - }; + }; - mmc_fixed_1v8_io: fixedregulator@1 { + mmc_fixed_1v8_io: fixedregulator@1 { compatible = "regulator-fixed"; regulator-name = "mmc_io"; regulator-min-microvolt = <1800000>; @@ -325,37 +325,32 @@ mediatek,ethsys = <&sysc>; + pinctrl-names = "default"; + pinctrl-0 = <&mdio_pins>, <&rgmii1_pins>, <&rgmii2_pins>; gmac0: mac@0 { compatible = "mediatek,eth-mac"; reg = <0>; phy-mode = "rgmii"; + fixed-link { speed = <1000>; full-duplex; pause; }; }; + gmac1: mac@1 { compatible = "mediatek,eth-mac"; reg = <1>; status = "off"; phy-mode = "rgmii-rxid"; - phy-handle = <&phy_external>; }; + mdio-bus { #address-cells = <1>; #size-cells = <0>; - phy_external: ethernet-phy@5 { - status = "off"; - reg = <5>; - phy-mode = "rgmii-rxid"; - - pinctrl-names = "default"; - pinctrl-0 = <&rgmii2_pins>; - }; - switch0: switch0@0 { compatible = "mediatek,mt7621"; #address-cells = <1>; @@ -373,36 +368,43 @@ #address-cells = <1>; #size-cells = <0>; reg = <0>; + port@0 { status = "off"; reg = <0>; label = "lan0"; }; + port@1 { status = "off"; reg = <1>; label = "lan1"; }; + port@2 { status = "off"; reg = <2>; label = "lan2"; }; + port@3 { status = "off"; reg = <3>; label = "lan3"; }; + port@4 { status = "off"; reg = <4>; label = "lan4"; }; + port@6 { reg = <6>; label = "cpu"; ethernet = <&gmac0>; phy-mode = "trgmii"; + fixed-link { speed = <1000>; full-duplex; diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c index 9873bb2a9ee4f..113a3efd12e95 100644 --- a/drivers/staging/qlge/qlge_main.c +++ b/drivers/staging/qlge/qlge_main.c @@ -4605,14 +4605,12 @@ static int qlge_probe(struct pci_dev *pdev, err = register_netdev(ndev); if (err) { dev_err(&pdev->dev, "net device registration failed.\n"); - qlge_release_all(pdev); - pci_disable_device(pdev); - goto netdev_free; + goto cleanup_pdev; } err = qlge_health_create_reporters(qdev); if (err) - goto netdev_free; + goto unregister_netdev; /* Start up the timer to trigger EEH if * the bus goes dead @@ -4626,6 +4624,11 @@ static int qlge_probe(struct pci_dev *pdev, devlink_register(devlink); return 0; +unregister_netdev: + unregister_netdev(ndev); +cleanup_pdev: + qlge_release_all(pdev); + pci_disable_device(pdev); netdev_free: free_netdev(ndev); devlink_free: diff --git a/drivers/staging/r8188eu/core/rtw_recv.c b/drivers/staging/r8188eu/core/rtw_recv.c index 51a13262a226f..d120d61454a35 100644 --- a/drivers/staging/r8188eu/core/rtw_recv.c +++ b/drivers/staging/r8188eu/core/rtw_recv.c @@ -1853,8 +1853,7 @@ static int recv_func(struct adapter *padapter, struct recv_frame *rframe) struct recv_frame *pending_frame; int cnt = 0; - pending_frame = rtw_alloc_recvframe(&padapter->recvpriv.uc_swdec_pending_queue); - while (pending_frame) { + while ((pending_frame = rtw_alloc_recvframe(&padapter->recvpriv.uc_swdec_pending_queue))) { cnt++; recv_func_posthandle(padapter, pending_frame); } diff --git a/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c b/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c index b818872e0d194..31a9b7500a7b6 100644 --- a/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c +++ b/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c @@ -538,10 +538,10 @@ static int load_firmware(struct rt_firmware *pFirmware, struct device *device) } memcpy(pFirmware->szFwBuffer, fw->data, fw->size); pFirmware->ulFwLength = fw->size; - release_firmware(fw); dev_dbg(device, "!bUsedWoWLANFw, FmrmwareLen:%d+\n", pFirmware->ulFwLength); Exit: + release_firmware(fw); return rtStatus; } diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 3a2e4582db8e1..a3e3c9f9aa181 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -1209,6 +1209,9 @@ int vchiq_dump_platform_instances(void *dump_context) int len; int i; + if (!state) + return -ENOTCONN; + /* * There is no list of instances, so instead scan all services, * marking those that have been dumped. diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c index 7fe20d4b7ba28..b7295236671c1 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c @@ -2306,6 +2306,9 @@ void vchiq_msg_queue_push(unsigned int handle, struct vchiq_header *header) struct vchiq_service *service = find_service_by_handle(handle); int pos; + if (!service) + return; + while (service->msg_queue_write == service->msg_queue_read + VCHIQ_MAX_SLOTS) { if (wait_for_completion_interruptible(&service->msg_queue_pop)) @@ -2326,6 +2329,9 @@ struct vchiq_header *vchiq_msg_hold(unsigned int handle) struct vchiq_header *header; int pos; + if (!service) + return NULL; + if (service->msg_queue_write == service->msg_queue_read) return NULL; diff --git a/drivers/staging/wfx/bus_sdio.c b/drivers/staging/wfx/bus_sdio.c index a670176ba06f0..0612f8a7c0857 100644 --- a/drivers/staging/wfx/bus_sdio.c +++ b/drivers/staging/wfx/bus_sdio.c @@ -207,9 +207,6 @@ static int wfx_sdio_probe(struct sdio_func *func, bus->func = func; sdio_set_drvdata(func, bus); - func->card->quirks |= MMC_QUIRK_LENIENT_FN0 | - MMC_QUIRK_BLKSZ_FOR_BYTE_MODE | - MMC_QUIRK_BROKEN_BYTE_MODE_512; sdio_claim_host(func); ret = sdio_enable_func(func); diff --git a/drivers/staging/wfx/main.c b/drivers/staging/wfx/main.c index 858d778cc5897..e3999e95ce851 100644 --- a/drivers/staging/wfx/main.c +++ b/drivers/staging/wfx/main.c @@ -322,7 +322,8 @@ struct wfx_dev *wfx_init_common(struct device *dev, wdev->pdata.gpio_wakeup = devm_gpiod_get_optional(dev, "wakeup", GPIOD_OUT_LOW); if (IS_ERR(wdev->pdata.gpio_wakeup)) - return NULL; + goto err; + if (wdev->pdata.gpio_wakeup) gpiod_set_consumer_name(wdev->pdata.gpio_wakeup, "wfx wakeup"); @@ -341,6 +342,10 @@ struct wfx_dev *wfx_init_common(struct device *dev, return NULL; return wdev; + +err: + ieee80211_free_hw(hw); + return NULL; } int wfx_probe(struct wfx_dev *wdev) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 7b2a89a67cdba..06a5c40865513 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1820,6 +1820,7 @@ static struct page *tcmu_try_get_data_page(struct tcmu_dev *udev, uint32_t dpi) mutex_lock(&udev->cmdr_lock); page = xa_load(&udev->data_pages, dpi); if (likely(page)) { + get_page(page); mutex_unlock(&udev->cmdr_lock); return page; } @@ -1876,6 +1877,7 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf) /* For the vmalloc()ed cmd area pages */ addr = (void *)(unsigned long)info->mem[mi].addr + offset; page = vmalloc_to_page(addr); + get_page(page); } else { uint32_t dpi; @@ -1886,7 +1888,6 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } - get_page(page); vmf->page = page; return 0; } diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 4f478812cb514..a0b599100106b 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -53,7 +53,7 @@ struct int3400_thermal_priv { struct art *arts; int trt_count; struct trt *trts; - u8 uuid_bitmap; + u32 uuid_bitmap; int rel_misc_dev_res; int current_uuid_index; char *data_vault; @@ -468,6 +468,11 @@ static void int3400_setup_gddv(struct int3400_thermal_priv *priv) priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer, obj->package.elements[0].buffer.length, GFP_KERNEL); + if (!priv->data_vault) { + kfree(buffer.pointer); + return; + } + bin_attr_data_vault.private = priv->data_vault; bin_attr_data_vault.size = obj->package.elements[0].buffer.length; kfree(buffer.pointer); diff --git a/drivers/tty/hvc/hvc_iucv.c b/drivers/tty/hvc/hvc_iucv.c index 82a76cac94deb..32366caca6623 100644 --- a/drivers/tty/hvc/hvc_iucv.c +++ b/drivers/tty/hvc/hvc_iucv.c @@ -1417,7 +1417,9 @@ static int __init hvc_iucv_init(void) */ static int __init hvc_iucv_config(char *val) { - return kstrtoul(val, 10, &hvc_iucv_devices); + if (kstrtoul(val, 10, &hvc_iucv_devices)) + pr_warn("hvc_iucv= invalid parameter value '%s'\n", val); + return 1; } diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c index c858aff721c41..fbb796f837532 100644 --- a/drivers/tty/mxser.c +++ b/drivers/tty/mxser.c @@ -744,6 +744,7 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty) struct mxser_port *info = container_of(port, struct mxser_port, port); unsigned long page; unsigned long flags; + int ret; page = __get_free_page(GFP_KERNEL); if (!page) @@ -753,9 +754,9 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty) if (!info->type) { set_bit(TTY_IO_ERROR, &tty->flags); - free_page(page); spin_unlock_irqrestore(&info->slock, flags); - return 0; + ret = 0; + goto err_free_xmit; } info->port.xmit_buf = (unsigned char *) page; @@ -775,8 +776,10 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty) if (capable(CAP_SYS_ADMIN)) { set_bit(TTY_IO_ERROR, &tty->flags); return 0; - } else - return -ENODEV; + } + + ret = -ENODEV; + goto err_free_xmit; } /* @@ -821,6 +824,10 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty) spin_unlock_irqrestore(&info->slock, flags); return 0; +err_free_xmit: + free_page(page); + info->port.xmit_buf = NULL; + return ret; } /* diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index 2350fb3bb5e4c..c2cecc6f47db4 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -487,7 +487,7 @@ static int aspeed_vuart_probe(struct platform_device *pdev) port.port.irq = irq_of_parse_and_map(np, 0); port.port.handle_irq = aspeed_vuart_handle_irq; port.port.iotype = UPIO_MEM; - port.port.type = PORT_16550A; + port.port.type = PORT_ASPEED_VUART; port.port.uartclk = clk; port.port.flags = UPF_SHARE_IRQ | UPF_BOOT_AUTOCONF | UPF_IOREMAP | UPF_FIXED_PORT | UPF_FIXED_TYPE | UPF_NO_THRE_TEST; diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c index 890fa7ddaa7f3..b3c3f7e5851ab 100644 --- a/drivers/tty/serial/8250/8250_dma.c +++ b/drivers/tty/serial/8250/8250_dma.c @@ -64,10 +64,19 @@ int serial8250_tx_dma(struct uart_8250_port *p) struct uart_8250_dma *dma = p->dma; struct circ_buf *xmit = &p->port.state->xmit; struct dma_async_tx_descriptor *desc; + struct uart_port *up = &p->port; int ret; - if (dma->tx_running) + if (dma->tx_running) { + if (up->x_char) { + dmaengine_pause(dma->txchan); + uart_xchar_out(up, UART_TX); + dmaengine_resume(dma->txchan); + } return 0; + } else if (up->x_char) { + uart_xchar_out(up, UART_TX); + } if (uart_tx_stopped(&p->port) || uart_circ_empty(xmit)) { /* We have been called from __dma_tx_complete() */ diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c index d3bafec7619da..0f5af061e0b45 100644 --- a/drivers/tty/serial/8250/8250_lpss.c +++ b/drivers/tty/serial/8250/8250_lpss.c @@ -117,8 +117,7 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port) { struct dw_dma_slave *param = &lpss->dma_param; struct pci_dev *pdev = to_pci_dev(port->dev); - unsigned int dma_devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0); - struct pci_dev *dma_dev = pci_get_slot(pdev->bus, dma_devfn); + struct pci_dev *dma_dev; switch (pdev->device) { case PCI_DEVICE_ID_INTEL_BYT_UART1: @@ -137,6 +136,8 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port) return -EINVAL; } + dma_dev = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); + param->dma_dev = &dma_dev->dev; param->m_master = 0; param->p_master = 1; @@ -152,6 +153,14 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port) return 0; } +static void byt_serial_exit(struct lpss8250 *lpss) +{ + struct dw_dma_slave *param = &lpss->dma_param; + + /* Paired with pci_get_slot() in the byt_serial_setup() above */ + put_device(param->dma_dev); +} + static int ehl_serial_setup(struct lpss8250 *lpss, struct uart_port *port) { struct uart_8250_dma *dma = &lpss->data.dma; @@ -170,6 +179,13 @@ static int ehl_serial_setup(struct lpss8250 *lpss, struct uart_port *port) return 0; } +static void ehl_serial_exit(struct lpss8250 *lpss) +{ + struct uart_8250_port *up = serial8250_get_port(lpss->data.line); + + up->dma = NULL; +} + #ifdef CONFIG_SERIAL_8250_DMA static const struct dw_dma_platform_data qrk_serial_dma_pdata = { .nr_channels = 2, @@ -344,8 +360,7 @@ static int lpss8250_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_exit: - if (lpss->board->exit) - lpss->board->exit(lpss); + lpss->board->exit(lpss); pci_free_irq_vectors(pdev); return ret; } @@ -356,8 +371,7 @@ static void lpss8250_remove(struct pci_dev *pdev) serial8250_unregister_port(lpss->data.line); - if (lpss->board->exit) - lpss->board->exit(lpss); + lpss->board->exit(lpss); pci_free_irq_vectors(pdev); } @@ -365,12 +379,14 @@ static const struct lpss8250_board byt_board = { .freq = 100000000, .base_baud = 2764800, .setup = byt_serial_setup, + .exit = byt_serial_exit, }; static const struct lpss8250_board ehl_board = { .freq = 200000000, .base_baud = 12500000, .setup = ehl_serial_setup, + .exit = ehl_serial_exit, }; static const struct lpss8250_board qrk_board = { diff --git a/drivers/tty/serial/8250/8250_mid.c b/drivers/tty/serial/8250/8250_mid.c index efa0515139f8e..e6c1791609ddf 100644 --- a/drivers/tty/serial/8250/8250_mid.c +++ b/drivers/tty/serial/8250/8250_mid.c @@ -73,6 +73,11 @@ static int pnw_setup(struct mid8250 *mid, struct uart_port *p) return 0; } +static void pnw_exit(struct mid8250 *mid) +{ + pci_dev_put(mid->dma_dev); +} + static int tng_handle_irq(struct uart_port *p) { struct mid8250 *mid = p->private_data; @@ -124,6 +129,11 @@ static int tng_setup(struct mid8250 *mid, struct uart_port *p) return 0; } +static void tng_exit(struct mid8250 *mid) +{ + pci_dev_put(mid->dma_dev); +} + static int dnv_handle_irq(struct uart_port *p) { struct mid8250 *mid = p->private_data; @@ -330,9 +340,9 @@ static int mid8250_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, mid); return 0; + err: - if (mid->board->exit) - mid->board->exit(mid); + mid->board->exit(mid); return ret; } @@ -342,8 +352,7 @@ static void mid8250_remove(struct pci_dev *pdev) serial8250_unregister_port(mid->line); - if (mid->board->exit) - mid->board->exit(mid); + mid->board->exit(mid); } static const struct mid8250_board pnw_board = { @@ -351,6 +360,7 @@ static const struct mid8250_board pnw_board = { .freq = 50000000, .base_baud = 115200, .setup = pnw_setup, + .exit = pnw_exit, }; static const struct mid8250_board tng_board = { @@ -358,6 +368,7 @@ static const struct mid8250_board tng_board = { .freq = 38400000, .base_baud = 1843200, .setup = tng_setup, + .exit = tng_exit, }; static const struct mid8250_board dnv_board = { diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 3b12bfc1ed67b..9f116e75956e2 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -307,6 +307,14 @@ static const struct serial8250_config uart_config[] = { .rxtrig_bytes = {1, 32, 64, 112}, .flags = UART_CAP_FIFO | UART_CAP_SLEEP, }, + [PORT_ASPEED_VUART] = { + .name = "ASPEED VUART", + .fifo_size = 16, + .tx_loadsz = 16, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00, + .rxtrig_bytes = {1, 4, 8, 14}, + .flags = UART_CAP_FIFO, + }, }; /* Uart divisor latch read */ @@ -1615,6 +1623,18 @@ static inline void start_tx_rs485(struct uart_port *port) struct uart_8250_port *up = up_to_u8250p(port); struct uart_8250_em485 *em485 = up->em485; + /* + * While serial8250_em485_handle_stop_tx() is a noop if + * em485->active_timer != &em485->stop_tx_timer, it might happen that + * the timer is still armed and triggers only after the current bunch of + * chars is send and em485->active_timer == &em485->stop_tx_timer again. + * So cancel the timer. There is still a theoretical race condition if + * the timer is already running and only comes around to check for + * em485->active_timer when &em485->stop_tx_timer is armed again. + */ + if (em485->active_timer == &em485->stop_tx_timer) + hrtimer_try_to_cancel(&em485->stop_tx_timer); + em485->active_timer = NULL; if (em485->tx_stopped) { @@ -1799,9 +1819,7 @@ void serial8250_tx_chars(struct uart_8250_port *up) int count; if (port->x_char) { - serial_out(up, UART_TX, port->x_char); - port->icount.tx++; - port->x_char = 0; + uart_xchar_out(port, UART_TX); return; } if (uart_tx_stopped(port)) { diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index 49d0c7f2b29b8..79b7db8580e05 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -403,16 +403,16 @@ static int kgdboc_option_setup(char *opt) { if (!opt) { pr_err("config string not provided\n"); - return -EINVAL; + return 1; } if (strlen(opt) >= MAX_CONFIG_LEN) { pr_err("config string too long\n"); - return -ENOSPC; + return 1; } strcpy(config, opt); - return 0; + return 1; } __setup("kgdboc=", kgdboc_option_setup); diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index d002a4e48ed93..0d94a7cb275e5 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -921,11 +921,8 @@ static void s3c24xx_serial_tx_chars(struct s3c24xx_uart_port *ourport) return; } - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) { - spin_unlock(&port->lock); + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); - spin_lock(&port->lock); - } if (uart_circ_empty(xmit)) s3c24xx_serial_stop_tx(port); diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 0db90be4c3bc3..f67540ae2a883 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -644,6 +644,20 @@ static void uart_flush_buffer(struct tty_struct *tty) tty_port_tty_wakeup(&state->port); } +/* + * This function performs low-level write of high-priority XON/XOFF + * character and accounting for it. + * + * Requires uart_port to implement .serial_out(). + */ +void uart_xchar_out(struct uart_port *uport, int offset) +{ + serial_port_out(uport, offset, uport->x_char); + uport->icount.tx++; + uport->x_char = 0; +} +EXPORT_SYMBOL_GPL(uart_xchar_out); + /* * This function is used to send a high-priority XON/XOFF character to * the device diff --git a/drivers/usb/cdns3/cdnsp-debug.h b/drivers/usb/cdns3/cdnsp-debug.h index a8776df2d4e0c..f0ca865cce2a0 100644 --- a/drivers/usb/cdns3/cdnsp-debug.h +++ b/drivers/usb/cdns3/cdnsp-debug.h @@ -182,208 +182,211 @@ static inline const char *cdnsp_decode_trb(char *str, size_t size, u32 field0, int ep_id = TRB_TO_EP_INDEX(field3) - 1; int type = TRB_FIELD_TO_TYPE(field3); unsigned int ep_num; - int ret = 0; + int ret; u32 temp; ep_num = DIV_ROUND_UP(ep_id, 2); switch (type) { case TRB_LINK: - ret += snprintf(str, size, - "LINK %08x%08x intr %ld type '%s' flags %c:%c:%c:%c", - field1, field0, GET_INTR_TARGET(field2), - cdnsp_trb_type_string(type), - field3 & TRB_IOC ? 'I' : 'i', - field3 & TRB_CHAIN ? 'C' : 'c', - field3 & TRB_TC ? 'T' : 't', - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, + "LINK %08x%08x intr %ld type '%s' flags %c:%c:%c:%c", + field1, field0, GET_INTR_TARGET(field2), + cdnsp_trb_type_string(type), + field3 & TRB_IOC ? 'I' : 'i', + field3 & TRB_CHAIN ? 'C' : 'c', + field3 & TRB_TC ? 'T' : 't', + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_TRANSFER: case TRB_COMPLETION: case TRB_PORT_STATUS: case TRB_HC_EVENT: - ret += snprintf(str, size, - "ep%d%s(%d) type '%s' TRB %08x%08x status '%s'" - " len %ld slot %ld flags %c:%c", - ep_num, ep_id % 2 ? "out" : "in", - TRB_TO_EP_INDEX(field3), - cdnsp_trb_type_string(type), field1, field0, - cdnsp_trb_comp_code_string(GET_COMP_CODE(field2)), - EVENT_TRB_LEN(field2), TRB_TO_SLOT_ID(field3), - field3 & EVENT_DATA ? 'E' : 'e', - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, + "ep%d%s(%d) type '%s' TRB %08x%08x status '%s'" + " len %ld slot %ld flags %c:%c", + ep_num, ep_id % 2 ? "out" : "in", + TRB_TO_EP_INDEX(field3), + cdnsp_trb_type_string(type), field1, field0, + cdnsp_trb_comp_code_string(GET_COMP_CODE(field2)), + EVENT_TRB_LEN(field2), TRB_TO_SLOT_ID(field3), + field3 & EVENT_DATA ? 'E' : 'e', + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_MFINDEX_WRAP: - ret += snprintf(str, size, "%s: flags %c", - cdnsp_trb_type_string(type), - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, "%s: flags %c", + cdnsp_trb_type_string(type), + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_SETUP: - ret += snprintf(str, size, - "type '%s' bRequestType %02x bRequest %02x " - "wValue %02x%02x wIndex %02x%02x wLength %d " - "length %ld TD size %ld intr %ld Setup ID %ld " - "flags %c:%c:%c", - cdnsp_trb_type_string(type), - field0 & 0xff, - (field0 & 0xff00) >> 8, - (field0 & 0xff000000) >> 24, - (field0 & 0xff0000) >> 16, - (field1 & 0xff00) >> 8, - field1 & 0xff, - (field1 & 0xff000000) >> 16 | - (field1 & 0xff0000) >> 16, - TRB_LEN(field2), GET_TD_SIZE(field2), - GET_INTR_TARGET(field2), - TRB_SETUPID_TO_TYPE(field3), - field3 & TRB_IDT ? 'D' : 'd', - field3 & TRB_IOC ? 'I' : 'i', - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, + "type '%s' bRequestType %02x bRequest %02x " + "wValue %02x%02x wIndex %02x%02x wLength %d " + "length %ld TD size %ld intr %ld Setup ID %ld " + "flags %c:%c:%c", + cdnsp_trb_type_string(type), + field0 & 0xff, + (field0 & 0xff00) >> 8, + (field0 & 0xff000000) >> 24, + (field0 & 0xff0000) >> 16, + (field1 & 0xff00) >> 8, + field1 & 0xff, + (field1 & 0xff000000) >> 16 | + (field1 & 0xff0000) >> 16, + TRB_LEN(field2), GET_TD_SIZE(field2), + GET_INTR_TARGET(field2), + TRB_SETUPID_TO_TYPE(field3), + field3 & TRB_IDT ? 'D' : 'd', + field3 & TRB_IOC ? 'I' : 'i', + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_DATA: - ret += snprintf(str, size, - "type '%s' Buffer %08x%08x length %ld TD size %ld " - "intr %ld flags %c:%c:%c:%c:%c:%c:%c", - cdnsp_trb_type_string(type), - field1, field0, TRB_LEN(field2), - GET_TD_SIZE(field2), - GET_INTR_TARGET(field2), - field3 & TRB_IDT ? 'D' : 'i', - field3 & TRB_IOC ? 'I' : 'i', - field3 & TRB_CHAIN ? 'C' : 'c', - field3 & TRB_NO_SNOOP ? 'S' : 's', - field3 & TRB_ISP ? 'I' : 'i', - field3 & TRB_ENT ? 'E' : 'e', - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, + "type '%s' Buffer %08x%08x length %ld TD size %ld " + "intr %ld flags %c:%c:%c:%c:%c:%c:%c", + cdnsp_trb_type_string(type), + field1, field0, TRB_LEN(field2), + GET_TD_SIZE(field2), + GET_INTR_TARGET(field2), + field3 & TRB_IDT ? 'D' : 'i', + field3 & TRB_IOC ? 'I' : 'i', + field3 & TRB_CHAIN ? 'C' : 'c', + field3 & TRB_NO_SNOOP ? 'S' : 's', + field3 & TRB_ISP ? 'I' : 'i', + field3 & TRB_ENT ? 'E' : 'e', + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_STATUS: - ret += snprintf(str, size, - "Buffer %08x%08x length %ld TD size %ld intr" - "%ld type '%s' flags %c:%c:%c:%c", - field1, field0, TRB_LEN(field2), - GET_TD_SIZE(field2), - GET_INTR_TARGET(field2), - cdnsp_trb_type_string(type), - field3 & TRB_IOC ? 'I' : 'i', - field3 & TRB_CHAIN ? 'C' : 'c', - field3 & TRB_ENT ? 'E' : 'e', - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, + "Buffer %08x%08x length %ld TD size %ld intr" + "%ld type '%s' flags %c:%c:%c:%c", + field1, field0, TRB_LEN(field2), + GET_TD_SIZE(field2), + GET_INTR_TARGET(field2), + cdnsp_trb_type_string(type), + field3 & TRB_IOC ? 'I' : 'i', + field3 & TRB_CHAIN ? 'C' : 'c', + field3 & TRB_ENT ? 'E' : 'e', + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_NORMAL: case TRB_ISOC: case TRB_EVENT_DATA: case TRB_TR_NOOP: - ret += snprintf(str, size, - "type '%s' Buffer %08x%08x length %ld " - "TD size %ld intr %ld " - "flags %c:%c:%c:%c:%c:%c:%c:%c:%c", - cdnsp_trb_type_string(type), - field1, field0, TRB_LEN(field2), - GET_TD_SIZE(field2), - GET_INTR_TARGET(field2), - field3 & TRB_BEI ? 'B' : 'b', - field3 & TRB_IDT ? 'T' : 't', - field3 & TRB_IOC ? 'I' : 'i', - field3 & TRB_CHAIN ? 'C' : 'c', - field3 & TRB_NO_SNOOP ? 'S' : 's', - field3 & TRB_ISP ? 'I' : 'i', - field3 & TRB_ENT ? 'E' : 'e', - field3 & TRB_CYCLE ? 'C' : 'c', - !(field3 & TRB_EVENT_INVALIDATE) ? 'V' : 'v'); + ret = snprintf(str, size, + "type '%s' Buffer %08x%08x length %ld " + "TD size %ld intr %ld " + "flags %c:%c:%c:%c:%c:%c:%c:%c:%c", + cdnsp_trb_type_string(type), + field1, field0, TRB_LEN(field2), + GET_TD_SIZE(field2), + GET_INTR_TARGET(field2), + field3 & TRB_BEI ? 'B' : 'b', + field3 & TRB_IDT ? 'T' : 't', + field3 & TRB_IOC ? 'I' : 'i', + field3 & TRB_CHAIN ? 'C' : 'c', + field3 & TRB_NO_SNOOP ? 'S' : 's', + field3 & TRB_ISP ? 'I' : 'i', + field3 & TRB_ENT ? 'E' : 'e', + field3 & TRB_CYCLE ? 'C' : 'c', + !(field3 & TRB_EVENT_INVALIDATE) ? 'V' : 'v'); break; case TRB_CMD_NOOP: case TRB_ENABLE_SLOT: - ret += snprintf(str, size, "%s: flags %c", - cdnsp_trb_type_string(type), - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, "%s: flags %c", + cdnsp_trb_type_string(type), + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_DISABLE_SLOT: - ret += snprintf(str, size, "%s: slot %ld flags %c", - cdnsp_trb_type_string(type), - TRB_TO_SLOT_ID(field3), - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, "%s: slot %ld flags %c", + cdnsp_trb_type_string(type), + TRB_TO_SLOT_ID(field3), + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_ADDR_DEV: - ret += snprintf(str, size, - "%s: ctx %08x%08x slot %ld flags %c:%c", - cdnsp_trb_type_string(type), field1, field0, - TRB_TO_SLOT_ID(field3), - field3 & TRB_BSR ? 'B' : 'b', - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, + "%s: ctx %08x%08x slot %ld flags %c:%c", + cdnsp_trb_type_string(type), field1, field0, + TRB_TO_SLOT_ID(field3), + field3 & TRB_BSR ? 'B' : 'b', + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_CONFIG_EP: - ret += snprintf(str, size, - "%s: ctx %08x%08x slot %ld flags %c:%c", - cdnsp_trb_type_string(type), field1, field0, - TRB_TO_SLOT_ID(field3), - field3 & TRB_DC ? 'D' : 'd', - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, + "%s: ctx %08x%08x slot %ld flags %c:%c", + cdnsp_trb_type_string(type), field1, field0, + TRB_TO_SLOT_ID(field3), + field3 & TRB_DC ? 'D' : 'd', + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_EVAL_CONTEXT: - ret += snprintf(str, size, - "%s: ctx %08x%08x slot %ld flags %c", - cdnsp_trb_type_string(type), field1, field0, - TRB_TO_SLOT_ID(field3), - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, + "%s: ctx %08x%08x slot %ld flags %c", + cdnsp_trb_type_string(type), field1, field0, + TRB_TO_SLOT_ID(field3), + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_RESET_EP: case TRB_HALT_ENDPOINT: case TRB_FLUSH_ENDPOINT: - ret += snprintf(str, size, - "%s: ep%d%s(%d) ctx %08x%08x slot %ld flags %c", - cdnsp_trb_type_string(type), - ep_num, ep_id % 2 ? "out" : "in", - TRB_TO_EP_INDEX(field3), field1, field0, - TRB_TO_SLOT_ID(field3), - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, + "%s: ep%d%s(%d) ctx %08x%08x slot %ld flags %c", + cdnsp_trb_type_string(type), + ep_num, ep_id % 2 ? "out" : "in", + TRB_TO_EP_INDEX(field3), field1, field0, + TRB_TO_SLOT_ID(field3), + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_STOP_RING: - ret += snprintf(str, size, - "%s: ep%d%s(%d) slot %ld sp %d flags %c", - cdnsp_trb_type_string(type), - ep_num, ep_id % 2 ? "out" : "in", - TRB_TO_EP_INDEX(field3), - TRB_TO_SLOT_ID(field3), - TRB_TO_SUSPEND_PORT(field3), - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, + "%s: ep%d%s(%d) slot %ld sp %d flags %c", + cdnsp_trb_type_string(type), + ep_num, ep_id % 2 ? "out" : "in", + TRB_TO_EP_INDEX(field3), + TRB_TO_SLOT_ID(field3), + TRB_TO_SUSPEND_PORT(field3), + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_SET_DEQ: - ret += snprintf(str, size, - "%s: ep%d%s(%d) deq %08x%08x stream %ld slot %ld flags %c", - cdnsp_trb_type_string(type), - ep_num, ep_id % 2 ? "out" : "in", - TRB_TO_EP_INDEX(field3), field1, field0, - TRB_TO_STREAM_ID(field2), - TRB_TO_SLOT_ID(field3), - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, + "%s: ep%d%s(%d) deq %08x%08x stream %ld slot %ld flags %c", + cdnsp_trb_type_string(type), + ep_num, ep_id % 2 ? "out" : "in", + TRB_TO_EP_INDEX(field3), field1, field0, + TRB_TO_STREAM_ID(field2), + TRB_TO_SLOT_ID(field3), + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_RESET_DEV: - ret += snprintf(str, size, "%s: slot %ld flags %c", - cdnsp_trb_type_string(type), - TRB_TO_SLOT_ID(field3), - field3 & TRB_CYCLE ? 'C' : 'c'); + ret = snprintf(str, size, "%s: slot %ld flags %c", + cdnsp_trb_type_string(type), + TRB_TO_SLOT_ID(field3), + field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_ENDPOINT_NRDY: - temp = TRB_TO_HOST_STREAM(field2); - - ret += snprintf(str, size, - "%s: ep%d%s(%d) H_SID %x%s%s D_SID %lx flags %c:%c", - cdnsp_trb_type_string(type), - ep_num, ep_id % 2 ? "out" : "in", - TRB_TO_EP_INDEX(field3), temp, - temp == STREAM_PRIME_ACK ? "(PRIME)" : "", - temp == STREAM_REJECTED ? "(REJECTED)" : "", - TRB_TO_DEV_STREAM(field0), - field3 & TRB_STAT ? 'S' : 's', - field3 & TRB_CYCLE ? 'C' : 'c'); + temp = TRB_TO_HOST_STREAM(field2); + + ret = snprintf(str, size, + "%s: ep%d%s(%d) H_SID %x%s%s D_SID %lx flags %c:%c", + cdnsp_trb_type_string(type), + ep_num, ep_id % 2 ? "out" : "in", + TRB_TO_EP_INDEX(field3), temp, + temp == STREAM_PRIME_ACK ? "(PRIME)" : "", + temp == STREAM_REJECTED ? "(REJECTED)" : "", + TRB_TO_DEV_STREAM(field0), + field3 & TRB_STAT ? 'S' : 's', + field3 & TRB_CYCLE ? 'C' : 'c'); break; default: - ret += snprintf(str, size, - "type '%s' -> raw %08x %08x %08x %08x", - cdnsp_trb_type_string(type), - field0, field1, field2, field3); + ret = snprintf(str, size, + "type '%s' -> raw %08x %08x %08x %08x", + cdnsp_trb_type_string(type), + field0, field1, field2, field3); } + if (ret >= size) + pr_info("CDNSP: buffer overflowed.\n"); + return str; } diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index e196673f5c647..efaf0db595f46 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -242,7 +242,7 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap, break; case OMAP_DWC3_ID_FLOAT: - if (omap->vbus_reg) + if (omap->vbus_reg && regulator_is_enabled(omap->vbus_reg)) regulator_disable(omap->vbus_reg); val = dwc3_omap_read_utmi_ctrl(omap); val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG; diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 06d0e88ec8af9..4d9608cc55f73 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -185,7 +185,8 @@ static const struct software_node dwc3_pci_amd_mr_swnode = { .properties = dwc3_pci_mr_properties, }; -static int dwc3_pci_quirks(struct dwc3_pci *dwc) +static int dwc3_pci_quirks(struct dwc3_pci *dwc, + const struct software_node *swnode) { struct pci_dev *pdev = dwc->pci; @@ -242,7 +243,7 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc) } } - return 0; + return device_add_software_node(&dwc->dwc3->dev, swnode); } #ifdef CONFIG_PM @@ -307,11 +308,7 @@ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) dwc->dwc3->dev.parent = dev; ACPI_COMPANION_SET(&dwc->dwc3->dev, ACPI_COMPANION(dev)); - ret = device_add_software_node(&dwc->dwc3->dev, (void *)id->driver_data); - if (ret < 0) - goto err; - - ret = dwc3_pci_quirks(dwc); + ret = dwc3_pci_quirks(dwc, (void *)id->driver_data); if (ret) goto err; diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index 43f1b0d461c1e..be76f891b9c52 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -32,9 +32,6 @@ #include /* XUSB_DEV registers */ -#define SPARAM 0x000 -#define SPARAM_ERSTMAX_MASK GENMASK(20, 16) -#define SPARAM_ERSTMAX(x) (((x) << 16) & SPARAM_ERSTMAX_MASK) #define DB 0x004 #define DB_TARGET_MASK GENMASK(15, 8) #define DB_TARGET(x) (((x) << 8) & DB_TARGET_MASK) @@ -275,8 +272,10 @@ BUILD_EP_CONTEXT_RW(deq_hi, deq_hi, 0, 0xffffffff) BUILD_EP_CONTEXT_RW(avg_trb_len, tx_info, 0, 0xffff) BUILD_EP_CONTEXT_RW(max_esit_payload, tx_info, 16, 0xffff) BUILD_EP_CONTEXT_RW(edtla, rsvd[0], 0, 0xffffff) -BUILD_EP_CONTEXT_RW(seq_num, rsvd[0], 24, 0xff) +BUILD_EP_CONTEXT_RW(rsvd, rsvd[0], 24, 0x1) BUILD_EP_CONTEXT_RW(partial_td, rsvd[0], 25, 0x1) +BUILD_EP_CONTEXT_RW(splitxstate, rsvd[0], 26, 0x1) +BUILD_EP_CONTEXT_RW(seq_num, rsvd[0], 27, 0x1f) BUILD_EP_CONTEXT_RW(cerrcnt, rsvd[1], 18, 0x3) BUILD_EP_CONTEXT_RW(data_offset, rsvd[2], 0, 0x1ffff) BUILD_EP_CONTEXT_RW(numtrbs, rsvd[2], 22, 0x1f) @@ -1557,6 +1556,9 @@ static int __tegra_xudc_ep_set_halt(struct tegra_xudc_ep *ep, bool halt) ep_reload(xudc, ep->index); ep_ctx_write_state(ep->context, EP_STATE_RUNNING); + ep_ctx_write_rsvd(ep->context, 0); + ep_ctx_write_partial_td(ep->context, 0); + ep_ctx_write_splitxstate(ep->context, 0); ep_ctx_write_seq_num(ep->context, 0); ep_reload(xudc, ep->index); @@ -2812,7 +2814,10 @@ static void tegra_xudc_reset(struct tegra_xudc *xudc) xudc->setup_seq_num = 0; xudc->queued_setup_packet = false; - ep_ctx_write_seq_num(ep0->context, xudc->setup_seq_num); + ep_ctx_write_rsvd(ep0->context, 0); + ep_ctx_write_partial_td(ep0->context, 0); + ep_ctx_write_splitxstate(ep0->context, 0); + ep_ctx_write_seq_num(ep0->context, 0); deq_ptr = trb_virt_to_phys(ep0, &ep0->transfer_ring[ep0->deq_ptr]); @@ -3295,11 +3300,6 @@ static void tegra_xudc_init_event_ring(struct tegra_xudc *xudc) unsigned int i; u32 val; - val = xudc_readl(xudc, SPARAM); - val &= ~(SPARAM_ERSTMAX_MASK); - val |= SPARAM_ERSTMAX(XUDC_NR_EVENT_RINGS); - xudc_writel(xudc, val, SPARAM); - for (i = 0; i < ARRAY_SIZE(xudc->event_ring); i++) { memset(xudc->event_ring[i], 0, XUDC_EVENT_RING_SIZE * sizeof(*xudc->event_ring[i])); diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index e87cf3a00fa4b..638f03b897394 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -21,6 +21,9 @@ static const char hcd_name[] = "ehci-pci"; /* defined here to avoid adding to pci_ids.h for single instance use */ #define PCI_DEVICE_ID_INTEL_CE4100_USB 0x2e70 +#define PCI_VENDOR_ID_ASPEED 0x1a03 +#define PCI_DEVICE_ID_ASPEED_EHCI 0x2603 + /*-------------------------------------------------------------------------*/ #define PCI_DEVICE_ID_INTEL_QUARK_X1000_SOC 0x0939 static inline bool is_intel_quark_x1000(struct pci_dev *pdev) @@ -222,6 +225,12 @@ static int ehci_pci_setup(struct usb_hcd *hcd) ehci->has_synopsys_hc_bug = 1; } break; + case PCI_VENDOR_ID_ASPEED: + if (pdev->device == PCI_DEVICE_ID_ASPEED_EHCI) { + ehci_info(ehci, "applying Aspeed HC workaround\n"); + ehci->is_aspeed = 1; + } + break; } /* optional debug port, normally in the first BAR */ diff --git a/drivers/usb/host/xen-hcd.c b/drivers/usb/host/xen-hcd.c index 19b8c7ed74cb1..4ed3ee328a4a6 100644 --- a/drivers/usb/host/xen-hcd.c +++ b/drivers/usb/host/xen-hcd.c @@ -51,6 +51,7 @@ struct vdevice_status { struct usb_shadow { struct xenusb_urb_request req; struct urb *urb; + bool in_flight; }; struct xenhcd_info { @@ -722,6 +723,12 @@ static void xenhcd_gnttab_done(struct xenhcd_info *info, unsigned int id) int nr_segs = 0; int i; + if (!shadow->in_flight) { + xenhcd_set_error(info, "Illegal request id"); + return; + } + shadow->in_flight = false; + nr_segs = shadow->req.nr_buffer_segs; if (xenusb_pipeisoc(shadow->req.pipe)) @@ -805,6 +812,7 @@ static int xenhcd_do_request(struct xenhcd_info *info, struct urb_priv *urbp) info->urb_ring.req_prod_pvt++; info->shadow[id].urb = urb; + info->shadow[id].in_flight = true; RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->urb_ring, notify); if (notify) @@ -933,10 +941,27 @@ static int xenhcd_unlink_urb(struct xenhcd_info *info, struct urb_priv *urbp) return ret; } -static int xenhcd_urb_request_done(struct xenhcd_info *info) +static void xenhcd_res_to_urb(struct xenhcd_info *info, + struct xenusb_urb_response *res, struct urb *urb) +{ + if (unlikely(!urb)) + return; + + if (res->actual_length > urb->transfer_buffer_length) + urb->actual_length = urb->transfer_buffer_length; + else if (res->actual_length < 0) + urb->actual_length = 0; + else + urb->actual_length = res->actual_length; + urb->error_count = res->error_count; + urb->start_frame = res->start_frame; + xenhcd_giveback_urb(info, urb, res->status); +} + +static int xenhcd_urb_request_done(struct xenhcd_info *info, + unsigned int *eoiflag) { struct xenusb_urb_response res; - struct urb *urb; RING_IDX i, rp; __u16 id; int more_to_do = 0; @@ -963,16 +988,12 @@ static int xenhcd_urb_request_done(struct xenhcd_info *info) xenhcd_gnttab_done(info, id); if (info->error) goto err; - urb = info->shadow[id].urb; - if (likely(urb)) { - urb->actual_length = res.actual_length; - urb->error_count = res.error_count; - urb->start_frame = res.start_frame; - xenhcd_giveback_urb(info, urb, res.status); - } + xenhcd_res_to_urb(info, &res, info->shadow[id].urb); } xenhcd_add_id_to_freelist(info, id); + + *eoiflag = 0; } info->urb_ring.rsp_cons = i; @@ -990,7 +1011,7 @@ static int xenhcd_urb_request_done(struct xenhcd_info *info) return 0; } -static int xenhcd_conn_notify(struct xenhcd_info *info) +static int xenhcd_conn_notify(struct xenhcd_info *info, unsigned int *eoiflag) { struct xenusb_conn_response res; struct xenusb_conn_request *req; @@ -1035,6 +1056,8 @@ static int xenhcd_conn_notify(struct xenhcd_info *info) info->conn_ring.req_prod_pvt); req->id = id; info->conn_ring.req_prod_pvt++; + + *eoiflag = 0; } if (rc != info->conn_ring.req_prod_pvt) @@ -1057,14 +1080,19 @@ static int xenhcd_conn_notify(struct xenhcd_info *info) static irqreturn_t xenhcd_int(int irq, void *dev_id) { struct xenhcd_info *info = (struct xenhcd_info *)dev_id; + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; - if (unlikely(info->error)) + if (unlikely(info->error)) { + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); return IRQ_HANDLED; + } - while (xenhcd_urb_request_done(info) | xenhcd_conn_notify(info)) + while (xenhcd_urb_request_done(info, &eoiflag) | + xenhcd_conn_notify(info, &eoiflag)) /* Yield point for this unbounded loop. */ cond_resched(); + xen_irq_lateeoi(irq, eoiflag); return IRQ_HANDLED; } @@ -1141,9 +1169,9 @@ static int xenhcd_setup_rings(struct xenbus_device *dev, goto fail; } - err = bind_evtchn_to_irq(info->evtchn); + err = bind_evtchn_to_irq_lateeoi(info->evtchn); if (err <= 0) { - xenbus_dev_fatal(dev, err, "bind_evtchn_to_irq"); + xenbus_dev_fatal(dev, err, "bind_evtchn_to_irq_lateeoi"); goto fail; } @@ -1496,6 +1524,7 @@ static struct usb_hcd *xenhcd_create_hcd(struct xenbus_device *dev) for (i = 0; i < XENUSB_URB_RING_SIZE; i++) { info->shadow[i].req.id = i + 1; info->shadow[i].urb = NULL; + info->shadow[i].in_flight = false; } info->shadow[XENUSB_URB_RING_SIZE - 1].req.id = 0x0fff; diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index df3522dab31b5..1e7dc130c39a6 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -762,7 +762,7 @@ static int xhci_exit_test_mode(struct xhci_hcd *xhci) } pm_runtime_allow(xhci_to_hcd(xhci)->self.controller); xhci->test_mode = 0; - return xhci_reset(xhci); + return xhci_reset(xhci, XHCI_RESET_SHORT_USEC); } void xhci_set_link_state(struct xhci_hcd *xhci, struct xhci_port *port, @@ -1088,6 +1088,9 @@ static void xhci_get_usb2_port_status(struct xhci_port *port, u32 *status, if (link_state == XDEV_U2) *status |= USB_PORT_STAT_L1; if (link_state == XDEV_U0) { + if (bus_state->resume_done[portnum]) + usb_hcd_end_port_resume(&port->rhub->hcd->self, + portnum); bus_state->resume_done[portnum] = 0; clear_bit(portnum, &bus_state->resuming_ports); if (bus_state->suspended_ports & (1 << portnum)) { diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 0e312066c5c63..b398d3fdabf61 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2583,7 +2583,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) fail: xhci_halt(xhci); - xhci_reset(xhci); + xhci_reset(xhci, XHCI_RESET_SHORT_USEC); xhci_mem_cleanup(xhci); return -ENOMEM; } diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 2d378543bc3aa..7d1ad8d654cbb 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -65,7 +65,7 @@ static bool td_on_ring(struct xhci_td *td, struct xhci_ring *ring) * handshake done). There are two failure modes: "usec" have passed (major * hardware flakeout), or the register reads as all-ones (hardware removed). */ -int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, int usec) +int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us) { u32 result; int ret; @@ -73,7 +73,7 @@ int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, int usec) ret = readl_poll_timeout_atomic(ptr, result, (result & mask) == done || result == U32_MAX, - 1, usec); + 1, timeout_us); if (result == U32_MAX) /* card removed */ return -ENODEV; @@ -162,7 +162,7 @@ int xhci_start(struct xhci_hcd *xhci) * Transactions will be terminated immediately, and operational registers * will be set to their defaults. */ -int xhci_reset(struct xhci_hcd *xhci) +int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us) { u32 command; u32 state; @@ -195,8 +195,7 @@ int xhci_reset(struct xhci_hcd *xhci) if (xhci->quirks & XHCI_INTEL_HOST) udelay(1000); - ret = xhci_handshake(&xhci->op_regs->command, - CMD_RESET, 0, 10 * 1000 * 1000); + ret = xhci_handshake(&xhci->op_regs->command, CMD_RESET, 0, timeout_us); if (ret) return ret; @@ -209,8 +208,7 @@ int xhci_reset(struct xhci_hcd *xhci) * xHCI cannot write to any doorbells or operational registers other * than status until the "Controller Not Ready" flag is cleared. */ - ret = xhci_handshake(&xhci->op_regs->status, - STS_CNR, 0, 10 * 1000 * 1000); + ret = xhci_handshake(&xhci->op_regs->status, STS_CNR, 0, timeout_us); xhci->usb2_rhub.bus_state.port_c_suspend = 0; xhci->usb2_rhub.bus_state.suspended_ports = 0; @@ -731,7 +729,7 @@ static void xhci_stop(struct usb_hcd *hcd) xhci->xhc_state |= XHCI_STATE_HALTED; xhci->cmd_ring_state = CMD_RING_STATE_STOPPED; xhci_halt(xhci); - xhci_reset(xhci); + xhci_reset(xhci, XHCI_RESET_SHORT_USEC); spin_unlock_irq(&xhci->lock); xhci_cleanup_msix(xhci); @@ -784,7 +782,7 @@ void xhci_shutdown(struct usb_hcd *hcd) xhci_halt(xhci); /* Workaround for spurious wakeups at shutdown with HSW */ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) - xhci_reset(xhci); + xhci_reset(xhci, XHCI_RESET_SHORT_USEC); spin_unlock_irq(&xhci->lock); xhci_cleanup_msix(xhci); @@ -1170,7 +1168,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) xhci_dbg(xhci, "Stop HCD\n"); xhci_halt(xhci); xhci_zero_64b_regs(xhci); - retval = xhci_reset(xhci); + retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC); spin_unlock_irq(&xhci->lock); if (retval) return retval; @@ -5316,7 +5314,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) xhci_dbg(xhci, "Resetting HCD\n"); /* Reset the internal HC memory state and registers. */ - retval = xhci_reset(xhci); + retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC); if (retval) return retval; xhci_dbg(xhci, "Reset complete\n"); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 5a75fe5631238..bc0789229527f 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -229,6 +229,9 @@ struct xhci_op_regs { #define CMD_ETE (1 << 14) /* bits 15:31 are reserved (and should be preserved on writes). */ +#define XHCI_RESET_LONG_USEC (10 * 1000 * 1000) +#define XHCI_RESET_SHORT_USEC (250 * 1000) + /* IMAN - Interrupt Management Register */ #define IMAN_IE (1 << 1) #define IMAN_IP (1 << 0) @@ -2083,11 +2086,11 @@ void xhci_free_container_ctx(struct xhci_hcd *xhci, /* xHCI host controller glue */ typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *); -int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, int usec); +int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us); void xhci_quiesce(struct xhci_hcd *xhci); int xhci_halt(struct xhci_hcd *xhci); int xhci_start(struct xhci_hcd *xhci); -int xhci_reset(struct xhci_hcd *xhci); +int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us); int xhci_run(struct usb_hcd *hcd); int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks); void xhci_shutdown(struct usb_hcd *hcd); @@ -2467,6 +2470,8 @@ static inline const char *xhci_decode_ctrl_ctx(char *str, unsigned int bit; int ret = 0; + str[0] = '\0'; + if (drop) { ret = sprintf(str, "Drop:"); for_each_set_bit(bit, &drop, 32) @@ -2624,8 +2629,11 @@ static inline const char *xhci_decode_usbsts(char *str, u32 usbsts) { int ret = 0; + ret = sprintf(str, " 0x%08x", usbsts); + if (usbsts == ~(u32)0) - return " 0xffffffff"; + return str; + if (usbsts & STS_HALT) ret += sprintf(str + ret, " HCHalted"); if (usbsts & STS_FATAL) diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig index de5c012570603..ef8d1c73c7545 100644 --- a/drivers/usb/serial/Kconfig +++ b/drivers/usb/serial/Kconfig @@ -66,6 +66,7 @@ config USB_SERIAL_SIMPLE - Libtransistor USB console - a number of Motorola phones - Motorola Tetra devices + - Nokia mobile phones - Novatel Wireless GPS receivers - Siemens USB/MPI adapter. - ViVOtech ViVOpay USB device. diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index a70fd86f735ca..88b284d61681a 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -116,6 +116,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(ADLINK_VENDOR_ID, ADLINK_ND6530GC_PRODUCT_ID) }, { USB_DEVICE(SMART_VENDOR_ID, SMART_PRODUCT_ID) }, { USB_DEVICE(AT_VENDOR_ID, AT_VTKIT3_PRODUCT_ID) }, + { USB_DEVICE(IBM_VENDOR_ID, IBM_PRODUCT_ID) }, { } /* Terminating entry */ }; @@ -435,6 +436,7 @@ static int pl2303_detect_type(struct usb_serial *serial) case 0x105: case 0x305: case 0x405: + case 0x605: /* * Assume it's an HXN-type if the device doesn't * support the old read request value. diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 6097ee8fccb25..c5406452b774e 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -35,6 +35,9 @@ #define ATEN_PRODUCT_UC232B 0x2022 #define ATEN_PRODUCT_ID2 0x2118 +#define IBM_VENDOR_ID 0x04b3 +#define IBM_PRODUCT_ID 0x4016 + #define IODATA_VENDOR_ID 0x04bb #define IODATA_PRODUCT_ID 0x0a03 #define IODATA_PRODUCT_ID_RSAQ5 0x0a0e diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index bd23a7cb1be2b..4c6747889a194 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -91,6 +91,11 @@ DEVICE(moto_modem, MOTO_IDS); { USB_DEVICE(0x0cad, 0x9016) } /* TPG2200 */ DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); +/* Nokia mobile phone driver */ +#define NOKIA_IDS() \ + { USB_DEVICE(0x0421, 0x069a) } /* Nokia 130 (RM-1035) */ +DEVICE(nokia, NOKIA_IDS); + /* Novatel Wireless GPS driver */ #define NOVATEL_IDS() \ { USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */ @@ -123,6 +128,7 @@ static struct usb_serial_driver * const serial_drivers[] = { &vivopay_device, &moto_modem_device, &motorola_tetra_device, + &nokia_device, &novatel_gps_device, &hp4x_device, &suunto_device, @@ -140,6 +146,7 @@ static const struct usb_device_id id_table[] = { VIVOPAY_IDS(), MOTO_IDS(), MOTOROLA_TETRA_IDS(), + NOKIA_IDS(), NOVATEL_IDS(), HP4X_IDS(), SUUNTO_IDS(), diff --git a/drivers/usb/storage/ene_ub6250.c b/drivers/usb/storage/ene_ub6250.c index 5f7d678502be4..6012603f3630e 100644 --- a/drivers/usb/storage/ene_ub6250.c +++ b/drivers/usb/storage/ene_ub6250.c @@ -237,36 +237,33 @@ static struct us_unusual_dev ene_ub6250_unusual_dev_list[] = { #define memstick_logaddr(logadr1, logadr0) ((((u16)(logadr1)) << 8) | (logadr0)) -struct SD_STATUS { - u8 Insert:1; - u8 Ready:1; - u8 MediaChange:1; - u8 IsMMC:1; - u8 HiCapacity:1; - u8 HiSpeed:1; - u8 WtP:1; - u8 Reserved:1; -}; - -struct MS_STATUS { - u8 Insert:1; - u8 Ready:1; - u8 MediaChange:1; - u8 IsMSPro:1; - u8 IsMSPHG:1; - u8 Reserved1:1; - u8 WtP:1; - u8 Reserved2:1; -}; - -struct SM_STATUS { - u8 Insert:1; - u8 Ready:1; - u8 MediaChange:1; - u8 Reserved:3; - u8 WtP:1; - u8 IsMS:1; -}; +/* SD_STATUS bits */ +#define SD_Insert BIT(0) +#define SD_Ready BIT(1) +#define SD_MediaChange BIT(2) +#define SD_IsMMC BIT(3) +#define SD_HiCapacity BIT(4) +#define SD_HiSpeed BIT(5) +#define SD_WtP BIT(6) + /* Bit 7 reserved */ + +/* MS_STATUS bits */ +#define MS_Insert BIT(0) +#define MS_Ready BIT(1) +#define MS_MediaChange BIT(2) +#define MS_IsMSPro BIT(3) +#define MS_IsMSPHG BIT(4) + /* Bit 5 reserved */ +#define MS_WtP BIT(6) + /* Bit 7 reserved */ + +/* SM_STATUS bits */ +#define SM_Insert BIT(0) +#define SM_Ready BIT(1) +#define SM_MediaChange BIT(2) + /* Bits 3-5 reserved */ +#define SM_WtP BIT(6) +#define SM_IsMS BIT(7) struct ms_bootblock_cis { u8 bCistplDEVICE[6]; /* 0 */ @@ -437,9 +434,9 @@ struct ene_ub6250_info { u8 *bbuf; /* for 6250 code */ - struct SD_STATUS SD_Status; - struct MS_STATUS MS_Status; - struct SM_STATUS SM_Status; + u8 SD_Status; + u8 MS_Status; + u8 SM_Status; /* ----- SD Control Data ---------------- */ /*SD_REGISTER SD_Regs; */ @@ -602,7 +599,7 @@ static int sd_scsi_test_unit_ready(struct us_data *us, struct scsi_cmnd *srb) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; - if (info->SD_Status.Insert && info->SD_Status.Ready) + if ((info->SD_Status & SD_Insert) && (info->SD_Status & SD_Ready)) return USB_STOR_TRANSPORT_GOOD; else { ene_sd_init(us); @@ -622,7 +619,7 @@ static int sd_scsi_mode_sense(struct us_data *us, struct scsi_cmnd *srb) 0x0b, 0x00, 0x80, 0x08, 0x00, 0x00, 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00 }; - if (info->SD_Status.WtP) + if (info->SD_Status & SD_WtP) usb_stor_set_xfer_buf(mediaWP, 12, srb); else usb_stor_set_xfer_buf(mediaNoWP, 12, srb); @@ -641,9 +638,9 @@ static int sd_scsi_read_capacity(struct us_data *us, struct scsi_cmnd *srb) struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; usb_stor_dbg(us, "sd_scsi_read_capacity\n"); - if (info->SD_Status.HiCapacity) { + if (info->SD_Status & SD_HiCapacity) { bl_len = 0x200; - if (info->SD_Status.IsMMC) + if (info->SD_Status & SD_IsMMC) bl_num = info->HC_C_SIZE-1; else bl_num = (info->HC_C_SIZE + 1) * 1024 - 1; @@ -693,7 +690,7 @@ static int sd_scsi_read(struct us_data *us, struct scsi_cmnd *srb) return USB_STOR_TRANSPORT_ERROR; } - if (info->SD_Status.HiCapacity) + if (info->SD_Status & SD_HiCapacity) bnByte = bn; /* set up the command wrapper */ @@ -733,7 +730,7 @@ static int sd_scsi_write(struct us_data *us, struct scsi_cmnd *srb) return USB_STOR_TRANSPORT_ERROR; } - if (info->SD_Status.HiCapacity) + if (info->SD_Status & SD_HiCapacity) bnByte = bn; /* set up the command wrapper */ @@ -1456,7 +1453,7 @@ static int ms_scsi_test_unit_ready(struct us_data *us, struct scsi_cmnd *srb) struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); /* pr_info("MS_SCSI_Test_Unit_Ready\n"); */ - if (info->MS_Status.Insert && info->MS_Status.Ready) { + if ((info->MS_Status & MS_Insert) && (info->MS_Status & MS_Ready)) { return USB_STOR_TRANSPORT_GOOD; } else { ene_ms_init(us); @@ -1476,7 +1473,7 @@ static int ms_scsi_mode_sense(struct us_data *us, struct scsi_cmnd *srb) 0x0b, 0x00, 0x80, 0x08, 0x00, 0x00, 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00 }; - if (info->MS_Status.WtP) + if (info->MS_Status & MS_WtP) usb_stor_set_xfer_buf(mediaWP, 12, srb); else usb_stor_set_xfer_buf(mediaNoWP, 12, srb); @@ -1495,7 +1492,7 @@ static int ms_scsi_read_capacity(struct us_data *us, struct scsi_cmnd *srb) usb_stor_dbg(us, "ms_scsi_read_capacity\n"); bl_len = 0x200; - if (info->MS_Status.IsMSPro) + if (info->MS_Status & MS_IsMSPro) bl_num = info->MSP_TotalBlock - 1; else bl_num = info->MS_Lib.NumberOfLogBlock * info->MS_Lib.blockSize * 2 - 1; @@ -1650,7 +1647,7 @@ static int ms_scsi_read(struct us_data *us, struct scsi_cmnd *srb) if (bn > info->bl_num) return USB_STOR_TRANSPORT_ERROR; - if (info->MS_Status.IsMSPro) { + if (info->MS_Status & MS_IsMSPro) { result = ene_load_bincode(us, MSP_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Load MPS RW pattern Fail !!\n"); @@ -1751,7 +1748,7 @@ static int ms_scsi_write(struct us_data *us, struct scsi_cmnd *srb) if (bn > info->bl_num) return USB_STOR_TRANSPORT_ERROR; - if (info->MS_Status.IsMSPro) { + if (info->MS_Status & MS_IsMSPro) { result = ene_load_bincode(us, MSP_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) { pr_info("Load MSP RW pattern Fail !!\n"); @@ -1859,12 +1856,12 @@ static int ene_get_card_status(struct us_data *us, u8 *buf) tmpreg = (u16) reg4b; reg4b = *(u32 *)(&buf[0x14]); - if (info->SD_Status.HiCapacity && !info->SD_Status.IsMMC) + if ((info->SD_Status & SD_HiCapacity) && !(info->SD_Status & SD_IsMMC)) info->HC_C_SIZE = (reg4b >> 8) & 0x3fffff; info->SD_C_SIZE = ((tmpreg & 0x03) << 10) | (u16)(reg4b >> 22); info->SD_C_SIZE_MULT = (u8)(reg4b >> 7) & 0x07; - if (info->SD_Status.HiCapacity && info->SD_Status.IsMMC) + if ((info->SD_Status & SD_HiCapacity) && (info->SD_Status & SD_IsMMC)) info->HC_C_SIZE = *(u32 *)(&buf[0x100]); if (info->SD_READ_BL_LEN > SD_BLOCK_LEN) { @@ -2076,6 +2073,7 @@ static int ene_ms_init(struct us_data *us) u16 MSP_BlockSize, MSP_UserAreaBlocks; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; u8 *bbuf = info->bbuf; + unsigned int s; printk(KERN_INFO "transport --- ENE_MSInit\n"); @@ -2100,15 +2098,16 @@ static int ene_ms_init(struct us_data *us) return USB_STOR_TRANSPORT_ERROR; } /* the same part to test ENE */ - info->MS_Status = *(struct MS_STATUS *) bbuf; - - if (info->MS_Status.Insert && info->MS_Status.Ready) { - printk(KERN_INFO "Insert = %x\n", info->MS_Status.Insert); - printk(KERN_INFO "Ready = %x\n", info->MS_Status.Ready); - printk(KERN_INFO "IsMSPro = %x\n", info->MS_Status.IsMSPro); - printk(KERN_INFO "IsMSPHG = %x\n", info->MS_Status.IsMSPHG); - printk(KERN_INFO "WtP= %x\n", info->MS_Status.WtP); - if (info->MS_Status.IsMSPro) { + info->MS_Status = bbuf[0]; + + s = info->MS_Status; + if ((s & MS_Insert) && (s & MS_Ready)) { + printk(KERN_INFO "Insert = %x\n", !!(s & MS_Insert)); + printk(KERN_INFO "Ready = %x\n", !!(s & MS_Ready)); + printk(KERN_INFO "IsMSPro = %x\n", !!(s & MS_IsMSPro)); + printk(KERN_INFO "IsMSPHG = %x\n", !!(s & MS_IsMSPHG)); + printk(KERN_INFO "WtP= %x\n", !!(s & MS_WtP)); + if (s & MS_IsMSPro) { MSP_BlockSize = (bbuf[6] << 8) | bbuf[7]; MSP_UserAreaBlocks = (bbuf[10] << 8) | bbuf[11]; info->MSP_TotalBlock = MSP_BlockSize * MSP_UserAreaBlocks; @@ -2169,17 +2168,17 @@ static int ene_sd_init(struct us_data *us) return USB_STOR_TRANSPORT_ERROR; } - info->SD_Status = *(struct SD_STATUS *) bbuf; - if (info->SD_Status.Insert && info->SD_Status.Ready) { - struct SD_STATUS *s = &info->SD_Status; + info->SD_Status = bbuf[0]; + if ((info->SD_Status & SD_Insert) && (info->SD_Status & SD_Ready)) { + unsigned int s = info->SD_Status; ene_get_card_status(us, bbuf); - usb_stor_dbg(us, "Insert = %x\n", s->Insert); - usb_stor_dbg(us, "Ready = %x\n", s->Ready); - usb_stor_dbg(us, "IsMMC = %x\n", s->IsMMC); - usb_stor_dbg(us, "HiCapacity = %x\n", s->HiCapacity); - usb_stor_dbg(us, "HiSpeed = %x\n", s->HiSpeed); - usb_stor_dbg(us, "WtP = %x\n", s->WtP); + usb_stor_dbg(us, "Insert = %x\n", !!(s & SD_Insert)); + usb_stor_dbg(us, "Ready = %x\n", !!(s & SD_Ready)); + usb_stor_dbg(us, "IsMMC = %x\n", !!(s & SD_IsMMC)); + usb_stor_dbg(us, "HiCapacity = %x\n", !!(s & SD_HiCapacity)); + usb_stor_dbg(us, "HiSpeed = %x\n", !!(s & SD_HiSpeed)); + usb_stor_dbg(us, "WtP = %x\n", !!(s & SD_WtP)); } else { usb_stor_dbg(us, "SD Card Not Ready --- %x\n", bbuf[0]); return USB_STOR_TRANSPORT_ERROR; @@ -2201,14 +2200,14 @@ static int ene_init(struct us_data *us) misc_reg03 = bbuf[0]; if (misc_reg03 & 0x01) { - if (!info->SD_Status.Ready) { + if (!(info->SD_Status & SD_Ready)) { result = ene_sd_init(us); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; } } if (misc_reg03 & 0x02) { - if (!info->MS_Status.Ready) { + if (!(info->MS_Status & MS_Ready)) { result = ene_ms_init(us); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; @@ -2307,14 +2306,14 @@ static int ene_transport(struct scsi_cmnd *srb, struct us_data *us) /*US_DEBUG(usb_stor_show_command(us, srb)); */ scsi_set_resid(srb, 0); - if (unlikely(!(info->SD_Status.Ready || info->MS_Status.Ready))) + if (unlikely(!(info->SD_Status & SD_Ready) || (info->MS_Status & MS_Ready))) result = ene_init(us); if (result == USB_STOR_XFER_GOOD) { result = USB_STOR_TRANSPORT_ERROR; - if (info->SD_Status.Ready) + if (info->SD_Status & SD_Ready) result = sd_scsi_irp(us, srb); - if (info->MS_Status.Ready) + if (info->MS_Status & MS_Ready) result = ms_scsi_irp(us, srb); } return result; @@ -2378,7 +2377,6 @@ static int ene_ub6250_probe(struct usb_interface *intf, static int ene_ub6250_resume(struct usb_interface *iface) { - u8 tmp = 0; struct us_data *us = usb_get_intfdata(iface); struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); @@ -2390,17 +2388,16 @@ static int ene_ub6250_resume(struct usb_interface *iface) mutex_unlock(&us->dev_mutex); info->Power_IsResum = true; - /*info->SD_Status.Ready = 0; */ - info->SD_Status = *(struct SD_STATUS *)&tmp; - info->MS_Status = *(struct MS_STATUS *)&tmp; - info->SM_Status = *(struct SM_STATUS *)&tmp; + /* info->SD_Status &= ~SD_Ready; */ + info->SD_Status = 0; + info->MS_Status = 0; + info->SM_Status = 0; return 0; } static int ene_ub6250_reset_resume(struct usb_interface *iface) { - u8 tmp = 0; struct us_data *us = usb_get_intfdata(iface); struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); @@ -2412,10 +2409,10 @@ static int ene_ub6250_reset_resume(struct usb_interface *iface) * the device */ info->Power_IsResum = true; - /*info->SD_Status.Ready = 0; */ - info->SD_Status = *(struct SD_STATUS *)&tmp; - info->MS_Status = *(struct MS_STATUS *)&tmp; - info->SM_Status = *(struct SM_STATUS *)&tmp; + /* info->SD_Status &= ~SD_Ready; */ + info->SD_Status = 0; + info->MS_Status = 0; + info->SM_Status = 0; return 0; } diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c index 3789698d9d3c6..0c423916d7bfa 100644 --- a/drivers/usb/storage/realtek_cr.c +++ b/drivers/usb/storage/realtek_cr.c @@ -365,7 +365,7 @@ static int rts51x_read_mem(struct us_data *us, u16 addr, u8 *data, u16 len) buf = kmalloc(len, GFP_NOIO); if (buf == NULL) - return USB_STOR_TRANSPORT_ERROR; + return -ENOMEM; usb_stor_dbg(us, "addr = 0x%x, len = %d\n", addr, len); diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 7ffcda94d323a..16b4560216ba6 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -256,6 +256,10 @@ static int tps6598x_connect(struct tps6598x *tps, u32 status) typec_set_pwr_opmode(tps->port, mode); typec_set_pwr_role(tps->port, TPS_STATUS_TO_TYPEC_PORTROLE(status)); typec_set_vconn_role(tps->port, TPS_STATUS_TO_TYPEC_VCONN(status)); + if (TPS_STATUS_TO_UPSIDE_DOWN(status)) + typec_set_orientation(tps->port, TYPEC_ORIENTATION_REVERSE); + else + typec_set_orientation(tps->port, TYPEC_ORIENTATION_NORMAL); tps6598x_set_data_role(tps, TPS_STATUS_TO_TYPEC_DATAROLE(status), true); tps->partner = typec_register_partner(tps->port, &desc); @@ -278,6 +282,7 @@ static void tps6598x_disconnect(struct tps6598x *tps, u32 status) typec_set_pwr_opmode(tps->port, TYPEC_PWR_MODE_USB); typec_set_pwr_role(tps->port, TPS_STATUS_TO_TYPEC_PORTROLE(status)); typec_set_vconn_role(tps->port, TPS_STATUS_TO_TYPEC_VCONN(status)); + typec_set_orientation(tps->port, TYPEC_ORIENTATION_NONE); tps6598x_set_data_role(tps, TPS_STATUS_TO_TYPEC_DATAROLE(status), false); power_supply_changed(tps->psy); diff --git a/drivers/usb/typec/tipd/tps6598x.h b/drivers/usb/typec/tipd/tps6598x.h index 3dae84c524fb5..527857549d699 100644 --- a/drivers/usb/typec/tipd/tps6598x.h +++ b/drivers/usb/typec/tipd/tps6598x.h @@ -17,6 +17,7 @@ /* TPS_REG_STATUS bits */ #define TPS_STATUS_PLUG_PRESENT BIT(0) #define TPS_STATUS_PLUG_UPSIDE_DOWN BIT(4) +#define TPS_STATUS_TO_UPSIDE_DOWN(s) (!!((s) & TPS_STATUS_PLUG_UPSIDE_DOWN)) #define TPS_STATUS_PORTROLE BIT(5) #define TPS_STATUS_TO_TYPEC_PORTROLE(s) (!!((s) & TPS_STATUS_PORTROLE)) #define TPS_STATUS_DATAROLE BIT(6) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index d0f91078600e9..1b5de3af1a627 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -163,6 +163,7 @@ struct mlx5_vdpa_net { u32 cur_num_vqs; struct notifier_block nb; struct vdpa_callback config_cb; + struct mlx5_vdpa_wq_ent cvq_ent; }; static void free_resources(struct mlx5_vdpa_net *ndev); @@ -1616,10 +1617,10 @@ static void mlx5_cvq_kick_handler(struct work_struct *work) ndev = to_mlx5_vdpa_ndev(mvdev); cvq = &mvdev->cvq; if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) - goto out; + return; if (!cvq->ready) - goto out; + return; while (true) { err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, @@ -1653,9 +1654,10 @@ static void mlx5_cvq_kick_handler(struct work_struct *work) if (vringh_need_notify_iotlb(&cvq->vring)) vringh_notify(&cvq->vring); + + queue_work(mvdev->wq, &wqent->work); + break; } -out: - kfree(wqent); } static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) @@ -1663,22 +1665,15 @@ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_virtqueue *mvq; - struct mlx5_vdpa_wq_ent *wqent; if (!is_index_valid(mvdev, idx)) return; if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { - if (!mvdev->cvq.ready) + if (!mvdev->wq || !mvdev->cvq.ready) return; - wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); - if (!wqent) - return; - - wqent->mvdev = mvdev; - INIT_WORK(&wqent->work, mlx5_cvq_kick_handler); - queue_work(mvdev->wq, &wqent->work); + queue_work(mvdev->wq, &ndev->cvq_ent.work); return; } @@ -2668,6 +2663,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, if (err) goto err_mr; + ndev->cvq_ent.mvdev = mvdev; + INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler); mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); if (!mvdev->wq) { err = -ENOMEM; @@ -2707,9 +2704,12 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct workqueue_struct *wq; mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); - destroy_workqueue(mvdev->wq); + wq = mvdev->wq; + mvdev->wq = NULL; + destroy_workqueue(wq); _vdpa_unregister_device(dev); mgtdev->ndev = NULL; } diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index f948e6cd29939..ef54ef11af552 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -36,6 +36,10 @@ static bool nointxmask; static bool disable_vga; static bool disable_idle_d3; +/* List of PF's that vfio_pci_core_sriov_configure() has been called on */ +static DEFINE_MUTEX(vfio_pci_sriov_pfs_mutex); +static LIST_HEAD(vfio_pci_sriov_pfs); + static inline bool vfio_vga_disabled(void) { #ifdef CONFIG_VFIO_PCI_VGA @@ -228,6 +232,19 @@ int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t stat if (!ret) { /* D3 might be unsupported via quirk, skip unless in D3 */ if (needs_save && pdev->current_state >= PCI_D3hot) { + /* + * The current PCI state will be saved locally in + * 'pm_save' during the D3hot transition. When the + * device state is changed to D0 again with the current + * function, then pci_store_saved_state() will restore + * the state and will free the memory pointed by + * 'pm_save'. There are few cases where the PCI power + * state can be changed to D0 without the involvement + * of the driver. For these cases, free the earlier + * allocated memory first before overwriting 'pm_save' + * to prevent the memory leak. + */ + kfree(vdev->pm_save); vdev->pm_save = pci_store_saved_state(pdev); } else if (needs_restore) { pci_load_and_free_saved_state(pdev, &vdev->pm_save); @@ -322,6 +339,17 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev) /* For needs_reset */ lockdep_assert_held(&vdev->vdev.dev_set->lock); + /* + * This function can be invoked while the power state is non-D0. + * This function calls __pci_reset_function_locked() which internally + * can use pci_pm_reset() for the function reset. pci_pm_reset() will + * fail if the power state is non-D0. Also, for the devices which + * have NoSoftRst-, the reset function can cause the PCI config space + * reset without restoring the original state (saved locally in + * 'vdev->pm_save'). + */ + vfio_pci_set_power_state(vdev, PCI_D0); + /* Stop the device from further DMA */ pci_clear_master(pdev); @@ -410,47 +438,17 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev) } EXPORT_SYMBOL_GPL(vfio_pci_core_disable); -static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vdev) -{ - struct pci_dev *physfn = pci_physfn(vdev->pdev); - struct vfio_device *pf_dev; - - if (!vdev->pdev->is_virtfn) - return NULL; - - pf_dev = vfio_device_get_from_dev(&physfn->dev); - if (!pf_dev) - return NULL; - - if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) { - vfio_device_put(pf_dev); - return NULL; - } - - return container_of(pf_dev, struct vfio_pci_core_device, vdev); -} - -static void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int val) -{ - struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev); - - if (!pf_vdev) - return; - - mutex_lock(&pf_vdev->vf_token->lock); - pf_vdev->vf_token->users += val; - WARN_ON(pf_vdev->vf_token->users < 0); - mutex_unlock(&pf_vdev->vf_token->lock); - - vfio_device_put(&pf_vdev->vdev); -} - void vfio_pci_core_close_device(struct vfio_device *core_vdev) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); - vfio_pci_vf_token_user_add(vdev, -1); + if (vdev->sriov_pf_core_dev) { + mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock); + WARN_ON(!vdev->sriov_pf_core_dev->vf_token->users); + vdev->sriov_pf_core_dev->vf_token->users--; + mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock); + } vfio_spapr_pci_eeh_release(vdev->pdev); vfio_pci_core_disable(vdev); @@ -471,7 +469,12 @@ void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) { vfio_pci_probe_mmaps(vdev); vfio_spapr_pci_eeh_open(vdev->pdev); - vfio_pci_vf_token_user_add(vdev, 1); + + if (vdev->sriov_pf_core_dev) { + mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock); + vdev->sriov_pf_core_dev->vf_token->users++; + mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock); + } } EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable); @@ -921,6 +924,19 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, return -EINVAL; vfio_pci_zap_and_down_write_memory_lock(vdev); + + /* + * This function can be invoked while the power state is non-D0. + * If pci_try_reset_function() has been called while the power + * state is non-D0, then pci_try_reset_function() will + * internally set the power state to D0 without vfio driver + * involvement. For the devices which have NoSoftRst-, the + * reset function can cause the PCI config space reset without + * restoring the original state (saved locally in + * 'vdev->pm_save'). + */ + vfio_pci_set_power_state(vdev, PCI_D0); + ret = pci_try_reset_function(vdev->pdev); up_write(&vdev->memory_lock); @@ -1566,11 +1582,8 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, * * If the VF token is provided but unused, an error is generated. */ - if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token) - return 0; /* No VF token provided or required */ - if (vdev->pdev->is_virtfn) { - struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev); + struct vfio_pci_core_device *pf_vdev = vdev->sriov_pf_core_dev; bool match; if (!pf_vdev) { @@ -1583,7 +1596,6 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, } if (!vf_token) { - vfio_device_put(&pf_vdev->vdev); pci_info_ratelimited(vdev->pdev, "VF token required to access device\n"); return -EACCES; @@ -1593,8 +1605,6 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, match = uuid_equal(uuid, &pf_vdev->vf_token->uuid); mutex_unlock(&pf_vdev->vf_token->lock); - vfio_device_put(&pf_vdev->vdev); - if (!match) { pci_info_ratelimited(vdev->pdev, "Incorrect VF token provided for device\n"); @@ -1715,8 +1725,30 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb, static int vfio_pci_vf_init(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; + struct vfio_pci_core_device *cur; + struct pci_dev *physfn; int ret; + if (pdev->is_virtfn) { + /* + * If this VF was created by our vfio_pci_core_sriov_configure() + * then we can find the PF vfio_pci_core_device now, and due to + * the locking in pci_disable_sriov() it cannot change until + * this VF device driver is removed. + */ + physfn = pci_physfn(vdev->pdev); + mutex_lock(&vfio_pci_sriov_pfs_mutex); + list_for_each_entry(cur, &vfio_pci_sriov_pfs, sriov_pfs_item) { + if (cur->pdev == physfn) { + vdev->sriov_pf_core_dev = cur; + break; + } + } + mutex_unlock(&vfio_pci_sriov_pfs_mutex); + return 0; + } + + /* Not a SRIOV PF */ if (!pdev->is_physfn) return 0; @@ -1788,6 +1820,7 @@ void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, INIT_LIST_HEAD(&vdev->ioeventfds_list); mutex_init(&vdev->vma_lock); INIT_LIST_HEAD(&vdev->vma_list); + INIT_LIST_HEAD(&vdev->sriov_pfs_item); init_rwsem(&vdev->memory_lock); } EXPORT_SYMBOL_GPL(vfio_pci_core_init_device); @@ -1879,7 +1912,7 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; - pci_disable_sriov(pdev); + vfio_pci_core_sriov_configure(pdev, 0); vfio_unregister_group_dev(&vdev->vdev); @@ -1917,21 +1950,49 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn) { + struct vfio_pci_core_device *vdev; struct vfio_device *device; int ret = 0; + device_lock_assert(&pdev->dev); + device = vfio_device_get_from_dev(&pdev->dev); if (!device) return -ENODEV; - if (nr_virtfn == 0) - pci_disable_sriov(pdev); - else + vdev = container_of(device, struct vfio_pci_core_device, vdev); + + if (nr_virtfn) { + mutex_lock(&vfio_pci_sriov_pfs_mutex); + /* + * The thread that adds the vdev to the list is the only thread + * that gets to call pci_enable_sriov() and we will only allow + * it to be called once without going through + * pci_disable_sriov() + */ + if (!list_empty(&vdev->sriov_pfs_item)) { + ret = -EINVAL; + goto out_unlock; + } + list_add_tail(&vdev->sriov_pfs_item, &vfio_pci_sriov_pfs); + mutex_unlock(&vfio_pci_sriov_pfs_mutex); ret = pci_enable_sriov(pdev, nr_virtfn); + if (ret) + goto out_del; + ret = nr_virtfn; + goto out_put; + } - vfio_device_put(device); + pci_disable_sriov(pdev); - return ret < 0 ? ret : nr_virtfn; +out_del: + mutex_lock(&vfio_pci_sriov_pfs_mutex); + list_del_init(&vdev->sriov_pfs_item); +out_unlock: + mutex_unlock(&vfio_pci_sriov_pfs_mutex); +out_put: + vfio_device_put(device); + return ret; } EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure); @@ -2055,6 +2116,18 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, } cur_mem = NULL; + /* + * The pci_reset_bus() will reset all the devices in the bus. + * The power state can be non-D0 for some of the devices in the bus. + * For these devices, the pci_reset_bus() will internally set + * the power state to D0 without vfio driver involvement. + * For the devices which have NoSoftRst-, the reset function can + * cause the PCI config space reset without restoring the original + * state (saved locally in 'vdev->pm_save'). + */ + list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) + vfio_pci_set_power_state(cur, PCI_D0); + ret = pci_reset_bus(pdev); err_undo: @@ -2108,6 +2181,18 @@ static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set) if (!pdev) return false; + /* + * The pci_reset_bus() will reset all the devices in the bus. + * The power state can be non-D0 for some of the devices in the bus. + * For these devices, the pci_reset_bus() will internally set + * the power state to D0 without vfio driver involvement. + * For the devices which have NoSoftRst-, the reset function can + * cause the PCI config space reset without restoring the original + * state (saved locally in 'vdev->pm_save'). + */ + list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) + vfio_pci_set_power_state(cur, PCI_D0); + ret = pci_reset_bus(pdev); if (ret) return false; diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 57d3b2cbbd8e5..82ac1569deb05 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -288,6 +288,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, return done; } +#ifdef CONFIG_VFIO_PCI_VGA ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { @@ -355,6 +356,7 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, return done; } +#endif static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd, bool test_mem) diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c index 40b098320b2a7..5829cf2d0552d 100644 --- a/drivers/vhost/iotlb.c +++ b/drivers/vhost/iotlb.c @@ -62,8 +62,12 @@ int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb, */ if (start == 0 && last == ULONG_MAX) { u64 mid = last / 2; + int err = vhost_iotlb_add_range_ctx(iotlb, start, mid, addr, + perm, opaque); + + if (err) + return err; - vhost_iotlb_add_range_ctx(iotlb, start, mid, addr, perm, opaque); addr += mid + 1; start = mid + 1; } diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 28ef323882fb2..792ab5f236471 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -473,6 +473,7 @@ static void vhost_tx_batch(struct vhost_net *net, goto signal_used; msghdr->msg_control = &ctl; + msghdr->msg_controllen = sizeof(ctl); err = sock->ops->sendmsg(sock, msghdr, 0); if (unlikely(err < 0)) { vq_err(&nvq->vq, "Fail to batch sending packets\n"); diff --git a/drivers/video/fbdev/atafb.c b/drivers/video/fbdev/atafb.c index e3812a8ff55a4..29e650ecfceb1 100644 --- a/drivers/video/fbdev/atafb.c +++ b/drivers/video/fbdev/atafb.c @@ -1683,9 +1683,9 @@ static int falcon_setcolreg(unsigned int regno, unsigned int red, ((blue & 0xfc00) >> 8)); if (regno < 16) { shifter_tt.color_reg[regno] = - (((red & 0xe000) >> 13) | ((red & 0x1000) >> 12) << 8) | - (((green & 0xe000) >> 13) | ((green & 0x1000) >> 12) << 4) | - ((blue & 0xe000) >> 13) | ((blue & 0x1000) >> 12); + ((((red & 0xe000) >> 13) | ((red & 0x1000) >> 12)) << 8) | + ((((green & 0xe000) >> 13) | ((green & 0x1000) >> 12)) << 4) | + ((blue & 0xe000) >> 13) | ((blue & 0x1000) >> 12); ((u32 *)info->pseudo_palette)[regno] = ((red & 0xf800) | ((green & 0xfc00) >> 5) | ((blue & 0xf800) >> 11)); @@ -1971,9 +1971,9 @@ static int stste_setcolreg(unsigned int regno, unsigned int red, green >>= 12; if (ATARIHW_PRESENT(EXTD_SHIFTER)) shifter_tt.color_reg[regno] = - (((red & 0xe) >> 1) | ((red & 1) << 3) << 8) | - (((green & 0xe) >> 1) | ((green & 1) << 3) << 4) | - ((blue & 0xe) >> 1) | ((blue & 1) << 3); + ((((red & 0xe) >> 1) | ((red & 1) << 3)) << 8) | + ((((green & 0xe) >> 1) | ((green & 1) << 3)) << 4) | + ((blue & 0xe) >> 1) | ((blue & 1) << 3); else shifter_tt.color_reg[regno] = ((red & 0xe) << 7) | diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c index 355b6120dc4f0..1fc8de4ecbebf 100644 --- a/drivers/video/fbdev/atmel_lcdfb.c +++ b/drivers/video/fbdev/atmel_lcdfb.c @@ -1062,15 +1062,16 @@ static int __init atmel_lcdfb_probe(struct platform_device *pdev) INIT_LIST_HEAD(&info->modelist); - if (pdev->dev.of_node) { - ret = atmel_lcdfb_of_init(sinfo); - if (ret) - goto free_info; - } else { + if (!pdev->dev.of_node) { dev_err(dev, "cannot get default configuration\n"); goto free_info; } + ret = atmel_lcdfb_of_init(sinfo); + if (ret) + goto free_info; + + ret = -ENODEV; if (!sinfo->config) goto free_info; diff --git a/drivers/video/fbdev/cirrusfb.c b/drivers/video/fbdev/cirrusfb.c index 93802abbbc72a..3d47c347b8970 100644 --- a/drivers/video/fbdev/cirrusfb.c +++ b/drivers/video/fbdev/cirrusfb.c @@ -469,7 +469,7 @@ static int cirrusfb_check_mclk(struct fb_info *info, long freq) return 0; } -static int cirrusfb_check_pixclock(const struct fb_var_screeninfo *var, +static int cirrusfb_check_pixclock(struct fb_var_screeninfo *var, struct fb_info *info) { long freq; @@ -478,9 +478,7 @@ static int cirrusfb_check_pixclock(const struct fb_var_screeninfo *var, unsigned maxclockidx = var->bits_per_pixel >> 3; /* convert from ps to kHz */ - freq = PICOS2KHZ(var->pixclock); - - dev_dbg(info->device, "desired pixclock: %ld kHz\n", freq); + freq = PICOS2KHZ(var->pixclock ? : 1); maxclock = cirrusfb_board_info[cinfo->btype].maxclock[maxclockidx]; cinfo->multiplexing = 0; @@ -488,11 +486,13 @@ static int cirrusfb_check_pixclock(const struct fb_var_screeninfo *var, /* If the frequency is greater than we can support, we might be able * to use multiplexing for the video mode */ if (freq > maxclock) { - dev_err(info->device, - "Frequency greater than maxclock (%ld kHz)\n", - maxclock); - return -EINVAL; + var->pixclock = KHZ2PICOS(maxclock); + + while ((freq = PICOS2KHZ(var->pixclock)) > maxclock) + var->pixclock++; } + dev_dbg(info->device, "desired pixclock: %ld kHz\n", freq); + /* * Additional constraint: 8bpp uses DAC clock doubling to allow maximum * pixel clock diff --git a/drivers/video/fbdev/controlfb.c b/drivers/video/fbdev/controlfb.c index 509311471d515..bd59e7b11ed53 100644 --- a/drivers/video/fbdev/controlfb.c +++ b/drivers/video/fbdev/controlfb.c @@ -67,7 +67,9 @@ #define out_8(addr, val) (void)(val) #define in_le32(addr) 0 #define out_le32(addr, val) (void)(val) +#ifndef pgprot_cached_wthru #define pgprot_cached_wthru(prot) (prot) +#endif #else static void invalid_vram_cache(void __force *addr) { diff --git a/drivers/video/fbdev/core/fbcvt.c b/drivers/video/fbdev/core/fbcvt.c index 55d2bd0ce5c02..64843464c6613 100644 --- a/drivers/video/fbdev/core/fbcvt.c +++ b/drivers/video/fbdev/core/fbcvt.c @@ -214,9 +214,11 @@ static u32 fb_cvt_aspect_ratio(struct fb_cvt_data *cvt) static void fb_cvt_print_name(struct fb_cvt_data *cvt) { u32 pixcount, pixcount_mod; - int cnt = 255, offset = 0, read = 0; - u8 *buf = kzalloc(256, GFP_KERNEL); + int size = 256; + int off = 0; + u8 *buf; + buf = kzalloc(size, GFP_KERNEL); if (!buf) return; @@ -224,43 +226,30 @@ static void fb_cvt_print_name(struct fb_cvt_data *cvt) pixcount_mod = (cvt->xres * (cvt->yres/cvt->interlace)) % 1000000; pixcount_mod /= 1000; - read = snprintf(buf+offset, cnt, "fbcvt: %dx%d@%d: CVT Name - ", - cvt->xres, cvt->yres, cvt->refresh); - offset += read; - cnt -= read; + off += scnprintf(buf + off, size - off, "fbcvt: %dx%d@%d: CVT Name - ", + cvt->xres, cvt->yres, cvt->refresh); - if (cvt->status) - snprintf(buf+offset, cnt, "Not a CVT standard - %d.%03d Mega " - "Pixel Image\n", pixcount, pixcount_mod); - else { - if (pixcount) { - read = snprintf(buf+offset, cnt, "%d", pixcount); - cnt -= read; - offset += read; - } + if (cvt->status) { + off += scnprintf(buf + off, size - off, + "Not a CVT standard - %d.%03d Mega Pixel Image\n", + pixcount, pixcount_mod); + } else { + if (pixcount) + off += scnprintf(buf + off, size - off, "%d", pixcount); - read = snprintf(buf+offset, cnt, ".%03dM", pixcount_mod); - cnt -= read; - offset += read; + off += scnprintf(buf + off, size - off, ".%03dM", pixcount_mod); if (cvt->aspect_ratio == 0) - read = snprintf(buf+offset, cnt, "3"); + off += scnprintf(buf + off, size - off, "3"); else if (cvt->aspect_ratio == 3) - read = snprintf(buf+offset, cnt, "4"); + off += scnprintf(buf + off, size - off, "4"); else if (cvt->aspect_ratio == 1 || cvt->aspect_ratio == 4) - read = snprintf(buf+offset, cnt, "9"); + off += scnprintf(buf + off, size - off, "9"); else if (cvt->aspect_ratio == 2) - read = snprintf(buf+offset, cnt, "A"); - else - read = 0; - cnt -= read; - offset += read; - - if (cvt->flags & FB_CVT_FLAG_REDUCED_BLANK) { - read = snprintf(buf+offset, cnt, "-R"); - cnt -= read; - offset += read; - } + off += scnprintf(buf + off, size - off, "A"); + + if (cvt->flags & FB_CVT_FLAG_REDUCED_BLANK) + off += scnprintf(buf + off, size - off, "-R"); } printk(KERN_INFO "%s\n", buf); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 13083ad8d7515..00f0f282e7a13 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -1559,18 +1560,43 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a, /* check all firmware fbs and kick off if the base addr overlaps */ for_each_registered_fb(i) { struct apertures_struct *gen_aper; + struct device *device; if (!(registered_fb[i]->flags & FBINFO_MISC_FIRMWARE)) continue; gen_aper = registered_fb[i]->apertures; + device = registered_fb[i]->device; if (fb_do_apertures_overlap(gen_aper, a) || (primary && gen_aper && gen_aper->count && gen_aper->ranges[0].base == VGA_FB_PHYS)) { printk(KERN_INFO "fb%d: switching to %s from %s\n", i, name, registered_fb[i]->fix.id); - do_unregister_framebuffer(registered_fb[i]); + + /* + * If we kick-out a firmware driver, we also want to remove + * the underlying platform device, such as simple-framebuffer, + * VESA, EFI, etc. A native driver will then be able to + * allocate the memory range. + * + * If it's not a platform device, at least print a warning. A + * fix would add code to remove the device from the system. + */ + if (!device) { + /* TODO: Represent each OF framebuffer as its own + * device in the device hierarchy. For now, offb + * doesn't have such a device, so unregister the + * framebuffer as before without warning. + */ + do_unregister_framebuffer(registered_fb[i]); + } else if (dev_is_platform(device)) { + registered_fb[i]->forced_out = true; + platform_device_unregister(to_platform_device(device)); + } else { + pr_warn("fb%d: cannot remove device\n", i); + do_unregister_framebuffer(registered_fb[i]); + } } } } @@ -1900,9 +1926,13 @@ EXPORT_SYMBOL(register_framebuffer); void unregister_framebuffer(struct fb_info *fb_info) { - mutex_lock(®istration_lock); + bool forced_out = fb_info->forced_out; + + if (!forced_out) + mutex_lock(®istration_lock); do_unregister_framebuffer(fb_info); - mutex_unlock(®istration_lock); + if (!forced_out) + mutex_unlock(®istration_lock); } EXPORT_SYMBOL(unregister_framebuffer); diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c index 5c82611e93d99..236521b19daf7 100644 --- a/drivers/video/fbdev/matrox/matroxfb_base.c +++ b/drivers/video/fbdev/matrox/matroxfb_base.c @@ -1377,7 +1377,7 @@ static struct video_board vbG200 = { .lowlevel = &matrox_G100 }; static struct video_board vbG200eW = { - .maxvram = 0x800000, + .maxvram = 0x100000, .maxdisplayable = 0x800000, .accelID = FB_ACCEL_MATROX_MGAG200, .lowlevel = &matrox_G100 diff --git a/drivers/video/fbdev/nvidia/nv_i2c.c b/drivers/video/fbdev/nvidia/nv_i2c.c index d7994a1732459..0b48965a6420c 100644 --- a/drivers/video/fbdev/nvidia/nv_i2c.c +++ b/drivers/video/fbdev/nvidia/nv_i2c.c @@ -86,7 +86,7 @@ static int nvidia_setup_i2c_bus(struct nvidia_i2c_chan *chan, const char *name, { int rc; - strcpy(chan->adapter.name, name); + strscpy(chan->adapter.name, name, sizeof(chan->adapter.name)); chan->adapter.owner = THIS_MODULE; chan->adapter.class = i2c_class; chan->adapter.algo_data = &chan->algo; diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c index 2fa436475b406..c8ad3ef42bd31 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c @@ -246,6 +246,7 @@ static int dvic_probe_of(struct platform_device *pdev) adapter_node = of_parse_phandle(node, "ddc-i2c-bus", 0); if (adapter_node) { adapter = of_get_i2c_adapter_by_node(adapter_node); + of_node_put(adapter_node); if (adapter == NULL) { dev_err(&pdev->dev, "failed to parse ddc-i2c-bus\n"); omap_dss_put_device(ddata->in); diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c index 4b0793abdd84b..a2c7c5cb15234 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c @@ -409,7 +409,7 @@ static ssize_t dsicm_num_errors_show(struct device *dev, if (r) return r; - return snprintf(buf, PAGE_SIZE, "%d\n", errors); + return sysfs_emit(buf, "%d\n", errors); } static ssize_t dsicm_hw_revision_show(struct device *dev, @@ -439,7 +439,7 @@ static ssize_t dsicm_hw_revision_show(struct device *dev, if (r) return r; - return snprintf(buf, PAGE_SIZE, "%02x.%02x.%02x\n", id1, id2, id3); + return sysfs_emit(buf, "%02x.%02x.%02x\n", id1, id2, id3); } static ssize_t dsicm_store_ulps(struct device *dev, @@ -487,7 +487,7 @@ static ssize_t dsicm_show_ulps(struct device *dev, t = ddata->ulps_enabled; mutex_unlock(&ddata->lock); - return snprintf(buf, PAGE_SIZE, "%u\n", t); + return sysfs_emit(buf, "%u\n", t); } static ssize_t dsicm_store_ulps_timeout(struct device *dev, @@ -532,7 +532,7 @@ static ssize_t dsicm_show_ulps_timeout(struct device *dev, t = ddata->ulps_timeout; mutex_unlock(&ddata->lock); - return snprintf(buf, PAGE_SIZE, "%u\n", t); + return sysfs_emit(buf, "%u\n", t); } static DEVICE_ATTR(num_dsi_errors, S_IRUGO, dsicm_num_errors_show, NULL); diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c index 8d8b5ff7d43c8..3696eb09b69b4 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c @@ -476,7 +476,7 @@ static ssize_t show_cabc_available_modes(struct device *dev, int i; if (!ddata->has_cabc) - return snprintf(buf, PAGE_SIZE, "%s\n", cabc_modes[0]); + return sysfs_emit(buf, "%s\n", cabc_modes[0]); for (i = 0, len = 0; len < PAGE_SIZE && i < ARRAY_SIZE(cabc_modes); i++) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c index afac1d9445aa2..57b7d1f490962 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c @@ -169,7 +169,7 @@ static ssize_t tpo_td043_vmirror_show(struct device *dev, { struct panel_drv_data *ddata = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", ddata->vmirror); + return sysfs_emit(buf, "%d\n", ddata->vmirror); } static ssize_t tpo_td043_vmirror_store(struct device *dev, @@ -199,7 +199,7 @@ static ssize_t tpo_td043_mode_show(struct device *dev, { struct panel_drv_data *ddata = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", ddata->mode); + return sysfs_emit(buf, "%d\n", ddata->mode); } static ssize_t tpo_td043_mode_store(struct device *dev, diff --git a/drivers/video/fbdev/sm712fb.c b/drivers/video/fbdev/sm712fb.c index 0dbc6bf8268ac..092a1caa1208e 100644 --- a/drivers/video/fbdev/sm712fb.c +++ b/drivers/video/fbdev/sm712fb.c @@ -1047,7 +1047,7 @@ static ssize_t smtcfb_read(struct fb_info *info, char __user *buf, if (count + p > total_size) count = total_size - p; - buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count, GFP_KERNEL); + buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buffer) return -ENOMEM; @@ -1059,25 +1059,14 @@ static ssize_t smtcfb_read(struct fb_info *info, char __user *buf, while (count) { c = (count > PAGE_SIZE) ? PAGE_SIZE : count; dst = buffer; - for (i = c >> 2; i--;) { - *dst = fb_readl(src++); - *dst = big_swap(*dst); + for (i = (c + 3) >> 2; i--;) { + u32 val; + + val = fb_readl(src); + *dst = big_swap(val); + src++; dst++; } - if (c & 3) { - u8 *dst8 = (u8 *)dst; - u8 __iomem *src8 = (u8 __iomem *)src; - - for (i = c & 3; i--;) { - if (i & 1) { - *dst8++ = fb_readb(++src8); - } else { - *dst8++ = fb_readb(--src8); - src8 += 2; - } - } - src = (u32 __iomem *)src8; - } if (copy_to_user(buf, buffer, c)) { err = -EFAULT; @@ -1130,7 +1119,7 @@ static ssize_t smtcfb_write(struct fb_info *info, const char __user *buf, count = total_size - p; } - buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count, GFP_KERNEL); + buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buffer) return -ENOMEM; @@ -1148,24 +1137,11 @@ static ssize_t smtcfb_write(struct fb_info *info, const char __user *buf, break; } - for (i = c >> 2; i--;) { - fb_writel(big_swap(*src), dst++); + for (i = (c + 3) >> 2; i--;) { + fb_writel(big_swap(*src), dst); + dst++; src++; } - if (c & 3) { - u8 *src8 = (u8 *)src; - u8 __iomem *dst8 = (u8 __iomem *)dst; - - for (i = c & 3; i--;) { - if (i & 1) { - fb_writeb(*src8++, ++dst8); - } else { - fb_writeb(*src8++, --dst8); - dst8 += 2; - } - } - dst = (u32 __iomem *)dst8; - } *ppos += c; buf += c; diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c index bfac3ee4a6422..28768c272b73d 100644 --- a/drivers/video/fbdev/smscufx.c +++ b/drivers/video/fbdev/smscufx.c @@ -1656,6 +1656,7 @@ static int ufx_usb_probe(struct usb_interface *interface, info->par = dev; info->pseudo_palette = dev->pseudo_palette; info->fbops = &ufx_ops; + INIT_LIST_HEAD(&info->modelist); retval = fb_alloc_cmap(&info->cmap, 256, 0); if (retval < 0) { @@ -1666,8 +1667,6 @@ static int ufx_usb_probe(struct usb_interface *interface, INIT_DELAYED_WORK(&dev->free_framebuffer_work, ufx_free_framebuffer_work); - INIT_LIST_HEAD(&info->modelist); - retval = ufx_reg_read(dev, 0x3000, &id_rev); check_warn_goto_error(retval, "error %d reading 0x3000 register from device", retval); dev_dbg(dev->gdev, "ID_REV register value 0x%08x", id_rev); diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c index b9cdd02c10009..90f48b71fd8f7 100644 --- a/drivers/video/fbdev/udlfb.c +++ b/drivers/video/fbdev/udlfb.c @@ -1426,7 +1426,7 @@ static ssize_t metrics_bytes_rendered_show(struct device *fbdev, struct device_attribute *a, char *buf) { struct fb_info *fb_info = dev_get_drvdata(fbdev); struct dlfb_data *dlfb = fb_info->par; - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", atomic_read(&dlfb->bytes_rendered)); } @@ -1434,7 +1434,7 @@ static ssize_t metrics_bytes_identical_show(struct device *fbdev, struct device_attribute *a, char *buf) { struct fb_info *fb_info = dev_get_drvdata(fbdev); struct dlfb_data *dlfb = fb_info->par; - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", atomic_read(&dlfb->bytes_identical)); } @@ -1442,7 +1442,7 @@ static ssize_t metrics_bytes_sent_show(struct device *fbdev, struct device_attribute *a, char *buf) { struct fb_info *fb_info = dev_get_drvdata(fbdev); struct dlfb_data *dlfb = fb_info->par; - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", atomic_read(&dlfb->bytes_sent)); } @@ -1450,7 +1450,7 @@ static ssize_t metrics_cpu_kcycles_used_show(struct device *fbdev, struct device_attribute *a, char *buf) { struct fb_info *fb_info = dev_get_drvdata(fbdev); struct dlfb_data *dlfb = fb_info->par; - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", atomic_read(&dlfb->cpu_kcycles_used)); } diff --git a/drivers/video/fbdev/w100fb.c b/drivers/video/fbdev/w100fb.c index d96ab28f8ce4a..4e641a780726e 100644 --- a/drivers/video/fbdev/w100fb.c +++ b/drivers/video/fbdev/w100fb.c @@ -770,12 +770,18 @@ static int w100fb_probe(struct platform_device *pdev) fb_dealloc_cmap(&info->cmap); kfree(info->pseudo_palette); } - if (remapped_fbuf != NULL) + if (remapped_fbuf != NULL) { iounmap(remapped_fbuf); - if (remapped_regs != NULL) + remapped_fbuf = NULL; + } + if (remapped_regs != NULL) { iounmap(remapped_regs); - if (remapped_base != NULL) + remapped_regs = NULL; + } + if (remapped_base != NULL) { iounmap(remapped_base); + remapped_base = NULL; + } if (info) framebuffer_release(info); return err; @@ -795,8 +801,11 @@ static int w100fb_remove(struct platform_device *pdev) fb_dealloc_cmap(&info->cmap); iounmap(remapped_base); + remapped_base = NULL; iounmap(remapped_regs); + remapped_regs = NULL; iounmap(remapped_fbuf); + remapped_fbuf = NULL; framebuffer_release(info); diff --git a/drivers/virt/acrn/hsm.c b/drivers/virt/acrn/hsm.c index 5419794fccf1e..423ea888d79af 100644 --- a/drivers/virt/acrn/hsm.c +++ b/drivers/virt/acrn/hsm.c @@ -136,8 +136,10 @@ static long acrn_dev_ioctl(struct file *filp, unsigned int cmd, if (IS_ERR(vm_param)) return PTR_ERR(vm_param); - if ((vm_param->reserved0 | vm_param->reserved1) != 0) + if ((vm_param->reserved0 | vm_param->reserved1) != 0) { + kfree(vm_param); return -EINVAL; + } vm = acrn_vm_create(vm, vm_param); if (!vm) { @@ -182,21 +184,29 @@ static long acrn_dev_ioctl(struct file *filp, unsigned int cmd, return PTR_ERR(cpu_regs); for (i = 0; i < ARRAY_SIZE(cpu_regs->reserved); i++) - if (cpu_regs->reserved[i]) + if (cpu_regs->reserved[i]) { + kfree(cpu_regs); return -EINVAL; + } for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.reserved_32); i++) - if (cpu_regs->vcpu_regs.reserved_32[i]) + if (cpu_regs->vcpu_regs.reserved_32[i]) { + kfree(cpu_regs); return -EINVAL; + } for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.reserved_64); i++) - if (cpu_regs->vcpu_regs.reserved_64[i]) + if (cpu_regs->vcpu_regs.reserved_64[i]) { + kfree(cpu_regs); return -EINVAL; + } for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.gdt.reserved); i++) if (cpu_regs->vcpu_regs.gdt.reserved[i] | - cpu_regs->vcpu_regs.idt.reserved[i]) + cpu_regs->vcpu_regs.idt.reserved[i]) { + kfree(cpu_regs); return -EINVAL; + } ret = hcall_set_vcpu_regs(vm->vmid, virt_to_phys(cpu_regs)); if (ret < 0) diff --git a/drivers/virt/acrn/mm.c b/drivers/virt/acrn/mm.c index c4f2e15c8a2ba..3b1b1e7a844b4 100644 --- a/drivers/virt/acrn/mm.c +++ b/drivers/virt/acrn/mm.c @@ -162,10 +162,34 @@ int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap) void *remap_vaddr; int ret, pinned; u64 user_vm_pa; + unsigned long pfn; + struct vm_area_struct *vma; if (!vm || !memmap) return -EINVAL; + mmap_read_lock(current->mm); + vma = vma_lookup(current->mm, memmap->vma_base); + if (vma && ((vma->vm_flags & VM_PFNMAP) != 0)) { + if ((memmap->vma_base + memmap->len) > vma->vm_end) { + mmap_read_unlock(current->mm); + return -EINVAL; + } + + ret = follow_pfn(vma, memmap->vma_base, &pfn); + mmap_read_unlock(current->mm); + if (ret < 0) { + dev_dbg(acrn_dev.this_device, + "Failed to lookup PFN at VMA:%pK.\n", (void *)memmap->vma_base); + return ret; + } + + return acrn_mm_region_add(vm, memmap->user_vm_pa, + PFN_PHYS(pfn), memmap->len, + ACRN_MEM_TYPE_WB, memmap->attr); + } + mmap_read_unlock(current->mm); + /* Get the page number of the map region */ nr_pages = memmap->len >> PAGE_SHIFT; pages = vzalloc(nr_pages * sizeof(struct page *)); diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 22f15f444f757..75c8d560bbd36 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -526,8 +526,9 @@ int virtio_device_restore(struct virtio_device *dev) goto err; } - /* Finally, tell the device we're all set */ - virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); + /* If restore didn't do it, mark device DRIVER_OK ourselves. */ + if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK)) + virtio_device_ready(dev); virtio_config_enable(dev); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index fdbde1db5ec59..d724f676608ba 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -24,46 +24,17 @@ MODULE_PARM_DESC(force_legacy, "Force legacy mode for transitional virtio 1 devices"); #endif -/* disable irq handlers */ -void vp_disable_cbs(struct virtio_device *vdev) +/* wait for pending irq handlers */ +void vp_synchronize_vectors(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; - if (vp_dev->intx_enabled) { - /* - * The below synchronize() guarantees that any - * interrupt for this line arriving after - * synchronize_irq() has completed is guaranteed to see - * intx_soft_enabled == false. - */ - WRITE_ONCE(vp_dev->intx_soft_enabled, false); + if (vp_dev->intx_enabled) synchronize_irq(vp_dev->pci_dev->irq); - } - - for (i = 0; i < vp_dev->msix_vectors; ++i) - disable_irq(pci_irq_vector(vp_dev->pci_dev, i)); -} - -/* enable irq handlers */ -void vp_enable_cbs(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - int i; - - if (vp_dev->intx_enabled) { - disable_irq(vp_dev->pci_dev->irq); - /* - * The above disable_irq() provides TSO ordering and - * as such promotes the below store to store-release. - */ - WRITE_ONCE(vp_dev->intx_soft_enabled, true); - enable_irq(vp_dev->pci_dev->irq); - return; - } for (i = 0; i < vp_dev->msix_vectors; ++i) - enable_irq(pci_irq_vector(vp_dev->pci_dev, i)); + synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); } /* the notify function used when creating a virt queue */ @@ -113,9 +84,6 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) struct virtio_pci_device *vp_dev = opaque; u8 isr; - if (!READ_ONCE(vp_dev->intx_soft_enabled)) - return IRQ_NONE; - /* reading the ISR has the effect of also clearing it so it's very * important to save off the value. */ isr = ioread8(vp_dev->isr); @@ -173,8 +141,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-config", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_config_changed, IRQF_NO_AUTOEN, - vp_dev->msix_names[v], + vp_config_changed, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; @@ -193,8 +160,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-virtqueues", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_vring_interrupt, IRQF_NO_AUTOEN, - vp_dev->msix_names[v], + vp_vring_interrupt, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; @@ -371,7 +337,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]); err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), - vring_interrupt, IRQF_NO_AUTOEN, + vring_interrupt, 0, vp_dev->msix_names[msix_vec], vqs[i]); if (err) diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 23f6c5c678d5e..eb17a29fc7ef1 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -63,7 +63,6 @@ struct virtio_pci_device { /* MSI-X support */ int msix_enabled; int intx_enabled; - bool intx_soft_enabled; cpumask_var_t *msix_affinity_masks; /* Name strings for interrupts. This size should be enough, * and I'm too lazy to allocate each name separately. */ @@ -102,10 +101,8 @@ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) return container_of(vdev, struct virtio_pci_device, vdev); } -/* disable irq handlers */ -void vp_disable_cbs(struct virtio_device *vdev); -/* enable irq handlers */ -void vp_enable_cbs(struct virtio_device *vdev); +/* wait for pending irq handlers */ +void vp_synchronize_vectors(struct virtio_device *vdev); /* the notify function used when creating a virt queue */ bool vp_notify(struct virtqueue *vq); /* the config->del_vqs() implementation */ diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 34141b9abe278..6f4e34ce96b81 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -98,8 +98,8 @@ static void vp_reset(struct virtio_device *vdev) /* Flush out the status write, and flush in device writes, * including MSi-X interrupts, if any. */ vp_legacy_get_status(&vp_dev->ldev); - /* Disable VQ/configuration callbacks. */ - vp_disable_cbs(vdev); + /* Flush pending VQ/configuration callbacks. */ + vp_synchronize_vectors(vdev); } static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) @@ -185,7 +185,6 @@ static void del_vq(struct virtio_pci_vq_info *info) } static const struct virtio_config_ops virtio_pci_config_ops = { - .enable_cbs = vp_enable_cbs, .get = vp_get, .set = vp_set, .get_status = vp_get_status, diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 5455bc041fb69..30654d3a0b41e 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -172,8 +172,8 @@ static void vp_reset(struct virtio_device *vdev) */ while (vp_modern_get_status(mdev)) msleep(1); - /* Disable VQ/configuration callbacks. */ - vp_disable_cbs(vdev); + /* Flush pending VQ/configuration callbacks. */ + vp_synchronize_vectors(vdev); } static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) @@ -380,7 +380,6 @@ static bool vp_get_shm_region(struct virtio_device *vdev, } static const struct virtio_config_ops virtio_pci_config_nodev_ops = { - .enable_cbs = vp_enable_cbs, .get = NULL, .set = NULL, .generation = vp_generation, @@ -398,7 +397,6 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { }; static const struct virtio_config_ops virtio_pci_config_ops = { - .enable_cbs = vp_enable_cbs, .get = vp_get, .set = vp_set, .generation = vp_generation, diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c index 565578002d79e..c7b8a8e787e23 100644 --- a/drivers/w1/slaves/w1_therm.c +++ b/drivers/w1/slaves/w1_therm.c @@ -2089,16 +2089,20 @@ static ssize_t w1_seq_show(struct device *device, if (sl->reg_num.id == reg_num->id) seq = i; + if (w1_reset_bus(sl->master)) + goto error; + + /* Put the device into chain DONE state */ + w1_write_8(sl->master, W1_MATCH_ROM); + w1_write_block(sl->master, (u8 *)&rn, 8); w1_write_8(sl->master, W1_42_CHAIN); w1_write_8(sl->master, W1_42_CHAIN_DONE); w1_write_8(sl->master, W1_42_CHAIN_DONE_INV); - w1_read_block(sl->master, &ack, sizeof(ack)); /* check for acknowledgment */ ack = w1_read_8(sl->master); if (ack != W1_42_SUCCESS_CONFIRM_BYTE) goto error; - } /* Exit from CHAIN state */ diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c index 117bc2a8eb0a4..db843f8258602 100644 --- a/drivers/watchdog/rti_wdt.c +++ b/drivers/watchdog/rti_wdt.c @@ -228,6 +228,7 @@ static int rti_wdt_probe(struct platform_device *pdev) ret = pm_runtime_get_sync(dev); if (ret) { pm_runtime_put_noidle(dev); + pm_runtime_disable(&pdev->dev); return dev_err_probe(dev, ret, "runtime pm failed\n"); } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d61543fbd6528..15b3fa6390818 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -170,8 +170,8 @@ static int padzero(unsigned long elf_bss) static int create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, - unsigned long load_addr, unsigned long interp_load_addr, - unsigned long e_entry) + unsigned long interp_load_addr, + unsigned long e_entry, unsigned long phdr_addr) { struct mm_struct *mm = current->mm; unsigned long p = bprm->p; @@ -257,7 +257,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP); NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE); NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); - NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff); + NEW_AUX_ENT(AT_PHDR, phdr_addr); NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr)); NEW_AUX_ENT(AT_PHNUM, exec->e_phnum); NEW_AUX_ENT(AT_BASE, interp_load_addr); @@ -823,7 +823,7 @@ static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr, static int load_elf_binary(struct linux_binprm *bprm) { struct file *interpreter = NULL; /* to shut gcc up */ - unsigned long load_addr = 0, load_bias = 0; + unsigned long load_addr, load_bias = 0, phdr_addr = 0; int load_addr_set = 0; unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; @@ -1116,11 +1116,11 @@ static int load_elf_binary(struct linux_binprm *bprm) * independently randomized mmap region (0 load_bias * without MAP_FIXED nor MAP_FIXED_NOREPLACE). */ - alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); - if (interpreter || alignment > ELF_MIN_ALIGN) { + if (interpreter) { load_bias = ELF_ET_DYN_BASE; if (current->flags & PF_RANDOMIZE) load_bias += arch_mmap_rnd(); + alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); if (alignment) load_bias &= ~(alignment - 1); elf_flags |= MAP_FIXED_NOREPLACE; @@ -1180,6 +1180,17 @@ static int load_elf_binary(struct linux_binprm *bprm) reloc_func_desc = load_bias; } } + + /* + * Figure out which segment in the file contains the Program + * Header table, and map to the associated memory address. + */ + if (elf_ppnt->p_offset <= elf_ex->e_phoff && + elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) { + phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset + + elf_ppnt->p_vaddr; + } + k = elf_ppnt->p_vaddr; if ((elf_ppnt->p_flags & PF_X) && k < start_code) start_code = k; @@ -1215,6 +1226,7 @@ static int load_elf_binary(struct linux_binprm *bprm) } e_entry = elf_ex->e_entry + load_bias; + phdr_addr += load_bias; elf_bss += load_bias; elf_brk += load_bias; start_code += load_bias; @@ -1278,8 +1290,8 @@ static int load_elf_binary(struct linux_binprm *bprm) goto out; #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ - retval = create_elf_tables(bprm, elf_ex, - load_addr, interp_load_addr, e_entry); + retval = create_elf_tables(bprm, elf_ex, interp_load_addr, + e_entry, phdr_addr); if (retval < 0) goto out; @@ -1630,17 +1642,16 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, * long file_ofs * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... */ -static int fill_files_note(struct memelfnote *note) +static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; unsigned count, size, names_ofs, remaining, n; user_long_t *data; user_long_t *start_end_ofs; char *name_base, *name_curpos; + int i; /* *Estimated* file count and total data size needed */ - count = mm->map_count; + count = cprm->vma_count; if (count > UINT_MAX / 64) return -EINVAL; size = count * 64; @@ -1662,11 +1673,12 @@ static int fill_files_note(struct memelfnote *note) name_base = name_curpos = ((char *)data) + names_ofs; remaining = size - names_ofs; count = 0; - for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *m = &cprm->vma_meta[i]; struct file *file; const char *filename; - file = vma->vm_file; + file = m->file; if (!file) continue; filename = file_path(file, name_curpos, remaining); @@ -1686,9 +1698,9 @@ static int fill_files_note(struct memelfnote *note) memmove(name_curpos, filename, n); name_curpos += n; - *start_end_ofs++ = vma->vm_start; - *start_end_ofs++ = vma->vm_end; - *start_end_ofs++ = vma->vm_pgoff; + *start_end_ofs++ = m->start; + *start_end_ofs++ = m->end; + *start_end_ofs++ = m->pgoff; count++; } @@ -1699,7 +1711,7 @@ static int fill_files_note(struct memelfnote *note) * Count usually is less than mm->map_count, * we need to move filenames down. */ - n = mm->map_count - count; + n = cprm->vma_count - count; if (n != 0) { unsigned shift_bytes = n * 3 * sizeof(data[0]); memmove(name_base - shift_bytes, name_base, @@ -1811,7 +1823,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, - const kernel_siginfo_t *siginfo, struct pt_regs *regs) + struct coredump_params *cprm) { struct task_struct *dump_task = current; const struct user_regset_view *view = task_user_regset_view(dump_task); @@ -1883,7 +1895,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, * Now fill in each thread's information. */ for (t = info->thread; t != NULL; t = t->next) - if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size)) + if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, &info->size)) return 0; /* @@ -1892,13 +1904,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm); info->size += notesize(&info->psinfo); - fill_siginfo_note(&info->signote, &info->csigdata, siginfo); + fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo); info->size += notesize(&info->signote); fill_auxv_note(&info->auxv, current->mm); info->size += notesize(&info->auxv); - if (fill_files_note(&info->files) == 0) + if (fill_files_note(&info->files, cprm) == 0) info->size += notesize(&info->files); return 1; @@ -2040,7 +2052,7 @@ static int elf_note_info_init(struct elf_note_info *info) static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, - const kernel_siginfo_t *siginfo, struct pt_regs *regs) + struct coredump_params *cprm) { struct core_thread *ct; struct elf_thread_status *ets; @@ -2061,13 +2073,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, list_for_each_entry(ets, &info->thread_list, list) { int sz; - sz = elf_dump_thread_status(siginfo->si_signo, ets); + sz = elf_dump_thread_status(cprm->siginfo->si_signo, ets); info->thread_status_size += sz; } /* now collect the dump for the current */ memset(info->prstatus, 0, sizeof(*info->prstatus)); - fill_prstatus(&info->prstatus->common, current, siginfo->si_signo); - elf_core_copy_regs(&info->prstatus->pr_reg, regs); + fill_prstatus(&info->prstatus->common, current, cprm->siginfo->si_signo); + elf_core_copy_regs(&info->prstatus->pr_reg, cprm->regs); /* Set up header */ fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS); @@ -2083,18 +2095,18 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_note(info->notes + 1, "CORE", NT_PRPSINFO, sizeof(*info->psinfo), info->psinfo); - fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo); + fill_siginfo_note(info->notes + 2, &info->csigdata, cprm->siginfo); fill_auxv_note(info->notes + 3, current->mm); info->numnote = 4; - if (fill_files_note(info->notes + info->numnote) == 0) { + if (fill_files_note(info->notes + info->numnote, cprm) == 0) { info->notes_files = info->notes + info->numnote; info->numnote++; } /* Try to dump the FPU. */ - info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, - info->fpu); + info->prstatus->pr_fpvalid = + elf_core_copy_task_fpregs(current, cprm->regs, info->fpu); if (info->prstatus->pr_fpvalid) fill_note(info->notes + info->numnote++, "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu); @@ -2180,8 +2192,7 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, static int elf_core_dump(struct coredump_params *cprm) { int has_dumped = 0; - int vma_count, segs, i; - size_t vma_data_size; + int segs, i; struct elfhdr elf; loff_t offset = 0, dataoff; struct elf_note_info info = { }; @@ -2189,16 +2200,12 @@ static int elf_core_dump(struct coredump_params *cprm) struct elf_shdr *shdr4extnum = NULL; Elf_Half e_phnum; elf_addr_t e_shoff; - struct core_vma_metadata *vma_meta; - - if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size)) - return 0; /* * The number of segs are recored into ELF header as 16bit value. * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. */ - segs = vma_count + elf_core_extra_phdrs(); + segs = cprm->vma_count + elf_core_extra_phdrs(); /* for notes section */ segs++; @@ -2212,7 +2219,7 @@ static int elf_core_dump(struct coredump_params *cprm) * Collect all the non-memory information about the process for the * notes. This also sets up the file header. */ - if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs)) + if (!fill_note_info(&elf, e_phnum, &info, cprm)) goto end_coredump; has_dumped = 1; @@ -2237,7 +2244,7 @@ static int elf_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); - offset += vma_data_size; + offset += cprm->vma_data_size; offset += elf_core_extra_data_size(); e_shoff = offset; @@ -2257,8 +2264,8 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; /* Write program headers for segments dump */ - for (i = 0; i < vma_count; i++) { - struct core_vma_metadata *meta = vma_meta + i; + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *meta = cprm->vma_meta + i; struct elf_phdr phdr; phdr.p_type = PT_LOAD; @@ -2295,8 +2302,8 @@ static int elf_core_dump(struct coredump_params *cprm) /* Align to page */ dump_skip_to(cprm, dataoff); - for (i = 0; i < vma_count; i++) { - struct core_vma_metadata *meta = vma_meta + i; + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *meta = cprm->vma_meta + i; if (!dump_user_range(cprm, meta->start, meta->dump_size)) goto end_coredump; @@ -2313,7 +2320,6 @@ static int elf_core_dump(struct coredump_params *cprm) end_coredump: free_note_info(&info); kfree(shdr4extnum); - kvfree(vma_meta); kfree(phdr4note); return has_dumped; } diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index c6f588dc4a9db..1a25536b01201 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1465,7 +1465,7 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm, static int elf_fdpic_core_dump(struct coredump_params *cprm) { int has_dumped = 0; - int vma_count, segs; + int segs; int i; struct elfhdr *elf = NULL; loff_t offset = 0, dataoff; @@ -1480,8 +1480,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) elf_addr_t e_shoff; struct core_thread *ct; struct elf_thread_status *tmp; - struct core_vma_metadata *vma_meta = NULL; - size_t vma_data_size; /* alloc memory for large data structures: too large to be on stack */ elf = kmalloc(sizeof(*elf), GFP_KERNEL); @@ -1491,9 +1489,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) if (!psinfo) goto end_coredump; - if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size)) - goto end_coredump; - for (ct = current->signal->core_state->dumper.next; ct; ct = ct->next) { tmp = elf_dump_thread_status(cprm->siginfo->si_signo, @@ -1513,7 +1508,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) tmp->next = thread_list; thread_list = tmp; - segs = vma_count + elf_core_extra_phdrs(); + segs = cprm->vma_count + elf_core_extra_phdrs(); /* for notes section */ segs++; @@ -1558,7 +1553,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) /* Page-align dumped data */ dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); - offset += vma_data_size; + offset += cprm->vma_data_size; offset += elf_core_extra_data_size(); e_shoff = offset; @@ -1578,8 +1573,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto end_coredump; /* write program headers for segments dump */ - for (i = 0; i < vma_count; i++) { - struct core_vma_metadata *meta = vma_meta + i; + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *meta = cprm->vma_meta + i; struct elf_phdr phdr; size_t sz; @@ -1628,7 +1623,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) dump_skip_to(cprm, dataoff); - if (!elf_fdpic_dump_segments(cprm, vma_meta, vma_count)) + if (!elf_fdpic_dump_segments(cprm, cprm->vma_meta, cprm->vma_count)) goto end_coredump; if (!elf_core_write_extra_data(cprm)) @@ -1652,7 +1647,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) thread_list = thread_list->next; kfree(tmp); } - kvfree(vma_meta); kfree(phdr4note); kfree(elf); kfree(psinfo); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 8202ad6aa1317..18e5ad5decdeb 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1522,8 +1522,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) return; - if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) + sb_start_write(fs_info->sb); + + if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { + sb_end_write(fs_info->sb); return; + } /* * Long running balances can keep us blocked here for eternity, so @@ -1531,6 +1535,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) */ if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) { btrfs_exclop_finish(fs_info); + sb_end_write(fs_info->sb); return; } @@ -1605,6 +1610,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); btrfs_exclop_finish(fs_info); + sb_end_write(fs_info->sb); } void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info) @@ -2473,12 +2479,6 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran return ERR_PTR(ret); } - /* - * New block group is likely to be used soon. Try to activate it now. - * Failure is OK for now. - */ - btrfs_zone_activate(cache); - ret = exclude_super_stripes(cache); if (ret) { /* We may have excluded something, so call this just in case */ @@ -2916,7 +2916,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) struct btrfs_path *path = NULL; LIST_HEAD(dirty); struct list_head *io = &cur_trans->io_bgs; - int num_started = 0; int loops = 0; spin_lock(&cur_trans->dirty_bgs_lock); @@ -2982,7 +2981,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) cache->io_ctl.inode = NULL; ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { - num_started++; should_put = 0; /* @@ -3083,7 +3081,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) int should_put; struct btrfs_path *path; struct list_head *io = &cur_trans->io_bgs; - int num_started = 0; path = btrfs_alloc_path(); if (!path) @@ -3141,7 +3138,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) cache->io_ctl.inode = NULL; ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { - num_started++; should_put = 0; list_add_tail(&cache->io_list, io); } else { @@ -3425,7 +3421,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type) return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); } -static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) +static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) { struct btrfs_block_group *bg; int ret; @@ -3512,7 +3508,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) out: btrfs_trans_release_chunk_metadata(trans); - return ret; + if (ret) + return ERR_PTR(ret); + + btrfs_get_block_group(bg); + return bg; } /* @@ -3627,10 +3627,17 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_space_info *space_info; + struct btrfs_block_group *ret_bg; bool wait_for_alloc = false; bool should_alloc = false; + bool from_extent_allocation = false; int ret = 0; + if (force == CHUNK_ALLOC_FORCE_FOR_EXTENT) { + from_extent_allocation = true; + force = CHUNK_ALLOC_FORCE; + } + /* Don't re-enter if we're already allocating a chunk */ if (trans->allocating_chunk) return -ENOSPC; @@ -3720,9 +3727,22 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, force_metadata_allocation(fs_info); } - ret = do_chunk_alloc(trans, flags); + ret_bg = do_chunk_alloc(trans, flags); trans->allocating_chunk = false; + if (IS_ERR(ret_bg)) { + ret = PTR_ERR(ret_bg); + } else if (from_extent_allocation) { + /* + * New block group is likely to be used soon. Try to activate + * it now. Failure is OK for now. + */ + btrfs_zone_activate(ret_bg); + } + + if (!ret) + btrfs_put_block_group(ret_bg); + spin_lock(&space_info->lock); if (ret < 0) { if (ret == -ENOSPC) diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 5878b7ce3b78e..faa7f1d6782a0 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -35,11 +35,15 @@ enum btrfs_discard_state { * the FS with empty chunks * * CHUNK_ALLOC_FORCE means it must try to allocate one + * + * CHUNK_ALLOC_FORCE_FOR_EXTENT like CHUNK_ALLOC_FORCE but called from + * find_free_extent() that also activaes the zone */ enum btrfs_chunk_alloc_enum { CHUNK_ALLOC_NO_FORCE, CHUNK_ALLOC_LIMITED, CHUNK_ALLOC_FORCE, + CHUNK_ALLOC_FORCE_FOR_EXTENT, }; struct btrfs_caching_control { diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 71e5b2e9a1ba8..93f704ba877e2 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -534,6 +534,9 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb->orig_bio = NULL; cb->nr_pages = nr_pages; + if (blkcg_css) + kthread_associate_blkcg(blkcg_css); + while (cur_disk_bytenr < disk_start + compressed_len) { u64 offset = cur_disk_bytenr - disk_start; unsigned int index = offset >> PAGE_SHIFT; @@ -552,6 +555,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, bio = NULL; goto finish_cb; } + if (blkcg_css) + bio->bi_opf |= REQ_CGROUP_PUNT; } /* * We should never reach next_stripe_start start as we will @@ -609,6 +614,9 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, return 0; finish_cb: + if (blkcg_css) + kthread_associate_blkcg(NULL); + if (bio) { bio->bi_status = ret; bio_endio(bio); @@ -808,7 +816,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, u64 em_len; u64 em_start; struct extent_map *em; - blk_status_t ret = BLK_STS_RESOURCE; + blk_status_t ret; int faili = 0; u8 *sums; @@ -821,14 +829,18 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize); read_unlock(&em_tree->lock); - if (!em) - return BLK_STS_IOERR; + if (!em) { + ret = BLK_STS_IOERR; + goto out; + } ASSERT(em->compress_type != BTRFS_COMPRESS_NONE); compressed_len = em->block_len; cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); - if (!cb) + if (!cb) { + ret = BLK_STS_RESOURCE; goto out; + } refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits); cb->errors = 0; @@ -851,8 +863,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE); cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); - if (!cb->compressed_pages) + if (!cb->compressed_pages) { + ret = BLK_STS_RESOURCE; goto fail1; + } for (pg_index = 0; pg_index < nr_pages; pg_index++) { cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS); @@ -938,7 +952,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, comp_bio = NULL; } } - return 0; + return BLK_STS_OK; fail2: while (faili >= 0) { @@ -951,6 +965,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, kfree(cb); out: free_extent_map(em); + bio->bi_status = ret; + bio_endio(bio); return ret; finish_cb: if (comp_bio) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 48590a3807621..b43f80c3bffd9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -441,17 +441,31 @@ static int csum_one_extent_buffer(struct extent_buffer *eb) else ret = btrfs_check_leaf_full(eb); - if (ret < 0) { - btrfs_print_tree(eb, 0); + if (ret < 0) + goto error; + + /* + * Also check the generation, the eb reached here must be newer than + * last committed. Or something seriously wrong happened. + */ + if (unlikely(btrfs_header_generation(eb) <= fs_info->last_trans_committed)) { + ret = -EUCLEAN; btrfs_err(fs_info, - "block=%llu write time tree block corruption detected", - eb->start); - WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); - return ret; + "block=%llu bad generation, have %llu expect > %llu", + eb->start, btrfs_header_generation(eb), + fs_info->last_trans_committed); + goto error; } write_extent_buffer(eb, result, 0, fs_info->csum_size); return 0; + +error: + btrfs_print_tree(eb, 0); + btrfs_err(fs_info, "block=%llu write time tree block corruption detected", + eb->start); + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + return ret; } /* Checksum all dirty extent buffers in one bio_vec */ @@ -1812,9 +1826,10 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, ret = btrfs_insert_fs_root(fs_info, root); if (ret) { - btrfs_put_root(root); - if (ret == -EEXIST) + if (ret == -EEXIST) { + btrfs_put_root(root); goto again; + } goto fail; } return root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 96427b1ecac3e..e5b832d77df96 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4087,7 +4087,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, } ret = btrfs_chunk_alloc(trans, ffe_ctl->flags, - CHUNK_ALLOC_FORCE); + CHUNK_ALLOC_FORCE_FOR_EXTENT); /* Do not bail out on ENOSPC since we can do more. */ if (ret == -ENOSPC) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4c91060d103ae..e93526d86a922 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2639,7 +2639,6 @@ int btrfs_repair_one_sector(struct inode *inode, const int icsum = bio_offset >> fs_info->sectorsize_bits; struct bio *repair_bio; struct btrfs_bio *repair_bbio; - blk_status_t status; btrfs_debug(fs_info, "repair read error: read error at %llu", start); @@ -2678,13 +2677,13 @@ int btrfs_repair_one_sector(struct inode *inode, "repair read error: submitting new read to mirror %d", failrec->this_mirror); - status = submit_bio_hook(inode, repair_bio, failrec->this_mirror, - failrec->bio_flags); - if (status) { - free_io_failure(failure_tree, tree, failrec); - bio_put(repair_bio); - } - return blk_status_to_errno(status); + /* + * At this point we have a bio, so any errors from submit_bio_hook() + * will be handled by the endio on the repair_bio, so we can't return an + * error here. + */ + submit_bio_hook(inode, repair_bio, failrec->this_mirror, failrec->bio_flags); + return BLK_STS_OK; } static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len) @@ -3563,7 +3562,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, u64 cur_end; struct extent_map *em; int ret = 0; - int nr = 0; size_t pg_offset = 0; size_t iosize; size_t blocksize = inode->i_sb->s_blocksize; @@ -3721,9 +3719,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, end_bio_extent_readpage, 0, this_bio_flag, force_bio_submit); - if (!ret) { - nr++; - } else { + if (ret) { unlock_extent(tree, cur, cur + iosize - 1); end_page_read(page, false, cur, iosize); goto out; @@ -4780,11 +4776,12 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc, return ret; } if (cache) { - /* Impiles write in zoned mode */ - btrfs_put_block_group(cache); - /* Mark the last eb in a block group */ + /* + * Implies write in zoned mode. Mark the last eb in a block group. + */ if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity) set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags); + btrfs_put_block_group(cache); } ret = write_one_eb(eb, wbc, epd); free_extent_buffer(eb); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 0399cf8e3c32c..151e9da5da2dc 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -118,7 +118,7 @@ struct btrfs_bio_ctrl { */ struct extent_changeset { /* How many bytes are set/cleared in this operation */ - unsigned int bytes_changed; + u64 bytes_changed; /* Changed ranges */ struct ulist range_changed; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 90c5c38836ab3..77c8f298f52e2 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -305,7 +305,7 @@ static int search_csum_tree(struct btrfs_fs_info *fs_info, read_extent_buffer(path->nodes[0], dst, (unsigned long)item, ret * csum_size); out: - if (ret == -ENOENT) + if (ret == -ENOENT || ret == -EFBIG) ret = 0; return ret; } @@ -368,6 +368,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_bio *bbio = NULL; struct btrfs_path *path; const u32 sectorsize = fs_info->sectorsize; const u32 csum_size = fs_info->csum_size; @@ -377,6 +378,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst u8 *csum; const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits; int count = 0; + blk_status_t ret = BLK_STS_OK; if ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) || test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) @@ -400,7 +402,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst return BLK_STS_RESOURCE; if (!dst) { - struct btrfs_bio *bbio = btrfs_bio(bio); + bbio = btrfs_bio(bio); if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS); @@ -456,21 +458,27 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst count = search_csum_tree(fs_info, path, cur_disk_bytenr, search_len, csum_dst); - if (count <= 0) { - /* - * Either we hit a critical error or we didn't find - * the csum. - * Either way, we put zero into the csums dst, and skip - * to the next sector. - */ + if (count < 0) { + ret = errno_to_blk_status(count); + if (bbio) + btrfs_bio_free_csum(bbio); + break; + } + + /* + * We didn't find a csum for this range. We need to make sure + * we complain loudly about this, because we are not NODATASUM. + * + * However for the DATA_RELOC inode we could potentially be + * relocating data extents for a NODATASUM inode, so the inode + * itself won't be marked with NODATASUM, but the extent we're + * copying is in fact NODATASUM. If we don't find a csum we + * assume this is the case. + */ + if (count == 0) { memset(csum_dst, 0, csum_size); count = 1; - /* - * For data reloc inode, we need to mark the range - * NODATASUM so that balance won't report false csum - * error. - */ if (BTRFS_I(inode)->root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) { u64 file_offset; @@ -491,7 +499,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst } btrfs_free_path(path); - return BLK_STS_OK; + return ret; } int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a0179cc62913b..28ddd9cf20692 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2918,8 +2918,9 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode, return ret; } -static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) +static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len) { + struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_state *cached_state = NULL; @@ -2951,6 +2952,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) goto out_only_mutex; } + ret = file_modified(file); + if (ret) + goto out_only_mutex; + lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode))); lockend = round_down(offset + len, btrfs_inode_sectorsize(BTRFS_I(inode))) - 1; @@ -3391,7 +3396,7 @@ static long btrfs_fallocate(struct file *file, int mode, return -EOPNOTSUPP; if (mode & FALLOC_FL_PUNCH_HOLE) - return btrfs_punch_hole(inode, offset, len); + return btrfs_punch_hole(file, offset, len); /* * Only trigger disk allocation, don't trigger qgroup reserve @@ -3413,6 +3418,10 @@ static long btrfs_fallocate(struct file *file, int mode, goto out; } + ret = file_modified(file); + if (ret) + goto out; + /* * TODO: Move these two operations after we have checked * accurate reserved space, or fallocate can still fail but diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5bbea5ec31fc5..9547088a93066 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1130,7 +1130,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode, int ret = 0; if (btrfs_is_free_space_inode(inode)) { - WARN_ON_ONCE(1); ret = -EINVAL; goto out_unlock; } @@ -2538,10 +2537,15 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio, goto out; if (bio_flags & EXTENT_BIO_COMPRESSED) { + /* + * btrfs_submit_compressed_read will handle completing + * the bio if there were any errors, so just return + * here. + */ ret = btrfs_submit_compressed_read(inode, bio, mirror_num, bio_flags); - goto out; + goto out_no_endio; } else { /* * Lookup bio sums does extra checks around whether we @@ -2575,6 +2579,7 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio, bio->bi_status = ret; bio_endio(bio); } +out_no_endio: return ret; } @@ -4460,6 +4465,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) dest->root_key.objectid); return -EPERM; } + if (atomic_read(&dest->nr_swapfiles)) { + spin_unlock(&dest->root_item_lock); + btrfs_warn(fs_info, + "attempt to delete subvolume %llu with active swapfile", + root->root_key.objectid); + return -EPERM; + } root_flags = btrfs_root_flags(&dest->root_item); btrfs_set_root_flags(&dest->root_item, root_flags | BTRFS_ROOT_SUBVOL_DEAD); @@ -7410,6 +7422,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, u64 block_start, orig_start, orig_block_len, ram_bytes; bool can_nocow = false; bool space_reserved = false; + u64 prev_len; int ret = 0; /* @@ -7437,6 +7450,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, can_nocow = true; } + prev_len = len; if (can_nocow) { struct extent_map *em2; @@ -7466,8 +7480,6 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, goto out; } } else { - const u64 prev_len = len; - /* Our caller expects us to free the input extent map. */ free_extent_map(em); *map = NULL; @@ -7498,7 +7510,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, * We have created our ordered extent, so we can now release our reservation * for an outstanding extent. */ - btrfs_delalloc_release_extents(BTRFS_I(inode), len); + btrfs_delalloc_release_extents(BTRFS_I(inode), prev_len); /* * Need to update the i_size under the extent lock so buffered @@ -10418,8 +10430,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, * set. We use this counter to prevent snapshots. We must increment it * before walking the extents because we don't want a concurrent * snapshot to run after we've already checked the extents. + * + * It is possible that subvolume is marked for deletion but still not + * removed yet. To prevent this race, we check the root status before + * activating the swapfile. */ + spin_lock(&root->root_item_lock); + if (btrfs_root_dead(root)) { + spin_unlock(&root->root_item_lock); + + btrfs_exclop_finish(fs_info); + btrfs_warn(fs_info, + "cannot activate swapfile because subvolume %llu is being deleted", + root->root_key.objectid); + return -EPERM; + } atomic_inc(&root->nr_swapfiles); + spin_unlock(&root->root_item_lock); isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8d47ec5fc4f44..8fe9d55d68622 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1215,7 +1215,7 @@ static u32 get_extent_max_capacity(const struct extent_map *em) } static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, - bool locked) + u32 extent_thresh, u64 newer_than, bool locked) { struct extent_map *next; bool ret = false; @@ -1225,11 +1225,12 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, return false; /* - * We want to check if the next extent can be merged with the current - * one, which can be an extent created in a past generation, so we pass - * a minimum generation of 0 to defrag_lookup_extent(). + * Here we need to pass @newer_then when checking the next extent, or + * we will hit a case we mark current extent for defrag, but the next + * one will not be a target. + * This will just cause extra IO without really reducing the fragments. */ - next = defrag_lookup_extent(inode, em->start + em->len, 0, locked); + next = defrag_lookup_extent(inode, em->start + em->len, newer_than, locked); /* No more em or hole */ if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) goto out; @@ -1241,6 +1242,13 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, */ if (next->len >= get_extent_max_capacity(em)) goto out; + /* Skip older extent */ + if (next->generation < newer_than) + goto out; + /* Also check extent size */ + if (next->len >= extent_thresh) + goto out; + ret = true; out: free_extent_map(next); @@ -1446,7 +1454,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode, goto next; next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em, - locked); + extent_thresh, newer_than, locked); if (!next_mergeable) { struct defrag_target_range *last; diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index a3930da4eb3fb..e437238cc603e 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -505,8 +505,11 @@ static int btrfs_clone(struct inode *src, struct inode *inode, */ ASSERT(key.offset == 0); ASSERT(datal <= fs_info->sectorsize); - if (key.offset != 0 || datal > fs_info->sectorsize) - return -EUCLEAN; + if (WARN_ON(key.offset != 0) || + WARN_ON(datal > fs_info->sectorsize)) { + ret = -EUCLEAN; + goto out; + } ret = clone_copy_inline_extent(inode, path, &new_key, drop_start, datal, size, diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 294242c194d80..62382ae1eb02a 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -1061,7 +1061,6 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) trans_rsv->reserved; if (block_rsv_size < space_info->bytes_may_use) delalloc_size = space_info->bytes_may_use - block_rsv_size; - spin_unlock(&space_info->lock); /* * We don't want to include the global_rsv in our calculation, @@ -1092,6 +1091,8 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) flush = FLUSH_DELAYED_REFS_NR; } + spin_unlock(&space_info->lock); + /* * We don't want to reclaim everything, just a portion, so scale * down the to_reclaim by 1/4. If it takes us down to 0, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b07d382d53a86..b0dfcc7a4225c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -534,15 +534,48 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, return ret; } -static bool device_path_matched(const char *path, struct btrfs_device *device) +/* + * Check if the device in the path matches the device in the given struct device. + * + * Returns: + * true If it is the same device. + * false If it is not the same device or on error. + */ +static bool device_matched(const struct btrfs_device *device, const char *path) { - int found; + char *device_name; + dev_t dev_old; + dev_t dev_new; + int ret; + + /* + * If we are looking for a device with the matching dev_t, then skip + * device without a name (a missing device). + */ + if (!device->name) + return false; + + device_name = kzalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL); + if (!device_name) + return false; rcu_read_lock(); - found = strcmp(rcu_str_deref(device->name), path); + scnprintf(device_name, BTRFS_PATH_NAME_MAX, "%s", rcu_str_deref(device->name)); rcu_read_unlock(); - return found == 0; + ret = lookup_bdev(device_name, &dev_old); + kfree(device_name); + if (ret) + return false; + + ret = lookup_bdev(path, &dev_new); + if (ret) + return false; + + if (dev_old == dev_new) + return true; + + return false; } /* @@ -575,9 +608,7 @@ static int btrfs_free_stale_devices(const char *path, &fs_devices->devices, dev_list) { if (skip_device && skip_device == device) continue; - if (path && !device->name) - continue; - if (path && !device_path_matched(path, device)) + if (path && !device_matched(device, path)) continue; if (fs_devices->opened) { /* for an already deleted device return 0 */ @@ -1914,23 +1945,18 @@ static void update_dev_time(const char *device_path) path_put(&path); } -static int btrfs_rm_dev_item(struct btrfs_device *device) +static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans, + struct btrfs_device *device) { struct btrfs_root *root = device->fs_info->chunk_root; int ret; struct btrfs_path *path; struct btrfs_key key; - struct btrfs_trans_handle *trans; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) { - btrfs_free_path(path); - return PTR_ERR(trans); - } key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; @@ -1941,21 +1967,12 @@ static int btrfs_rm_dev_item(struct btrfs_device *device) if (ret) { if (ret > 0) ret = -ENOENT; - btrfs_abort_transaction(trans, ret); - btrfs_end_transaction(trans); goto out; } ret = btrfs_del_item(trans, root, path); - if (ret) { - btrfs_abort_transaction(trans, ret); - btrfs_end_transaction(trans); - } - out: btrfs_free_path(path); - if (!ret) - ret = btrfs_commit_transaction(trans); return ret; } @@ -2096,6 +2113,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, struct block_device **bdev, fmode_t *mode) { + struct btrfs_trans_handle *trans; struct btrfs_device *device; struct btrfs_fs_devices *cur_devices; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; @@ -2111,7 +2129,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1); if (ret) - goto out; + return ret; device = btrfs_find_device(fs_info->fs_devices, args); if (!device) { @@ -2119,27 +2137,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; else ret = -ENOENT; - goto out; + return ret; } if (btrfs_pinned_by_swapfile(fs_info, device)) { btrfs_warn_in_rcu(fs_info, "cannot remove device %s (devid %llu) due to active swapfile", rcu_str_deref(device->name), device->devid); - ret = -ETXTBSY; - goto out; + return -ETXTBSY; } - if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { - ret = BTRFS_ERROR_DEV_TGT_REPLACE; - goto out; - } + if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) + return BTRFS_ERROR_DEV_TGT_REPLACE; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && - fs_info->fs_devices->rw_devices == 1) { - ret = BTRFS_ERROR_DEV_ONLY_WRITABLE; - goto out; - } + fs_info->fs_devices->rw_devices == 1) + return BTRFS_ERROR_DEV_ONLY_WRITABLE; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { mutex_lock(&fs_info->chunk_mutex); @@ -2152,14 +2165,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, if (ret) goto error_undo; - /* - * TODO: the superblock still includes this device in its num_devices - * counter although write_all_supers() is not locked out. This - * could give a filesystem state which requires a degraded mount. - */ - ret = btrfs_rm_dev_item(device); - if (ret) + trans = btrfs_start_transaction(fs_info->chunk_root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); goto error_undo; + } + + ret = btrfs_rm_dev_item(trans, device); + if (ret) { + /* Any error in dev item removal is critical */ + btrfs_crit(fs_info, + "failed to remove device item for devid %llu: %d", + device->devid, ret); + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); + return ret; + } clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); btrfs_scrub_cancel_dev(device); @@ -2242,7 +2263,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, free_fs_devices(cur_devices); } -out: + ret = btrfs_commit_transaction(trans); + return ret; error_undo: @@ -2253,7 +2275,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, device->fs_devices->rw_devices++; mutex_unlock(&fs_info->chunk_mutex); } - goto out; + return ret; } void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev) @@ -4445,10 +4467,12 @@ static int balance_kthread(void *data) struct btrfs_fs_info *fs_info = data; int ret = 0; + sb_start_write(fs_info->sb); mutex_lock(&fs_info->balance_mutex); if (fs_info->balance_ctl) ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL); mutex_unlock(&fs_info->balance_mutex); + sb_end_write(fs_info->sb); return ret; } @@ -8299,10 +8323,12 @@ static int relocating_repair_kthread(void *data) target = cache->start; btrfs_put_block_group(cache); + sb_start_write(fs_info->sb); if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { btrfs_info(fs_info, "zoned: skip relocating block group %llu to repair: EBUSY", target); + sb_end_write(fs_info->sb); return -EBUSY; } @@ -8330,6 +8356,7 @@ static int relocating_repair_kthread(void *data) btrfs_put_block_group(cache); mutex_unlock(&fs_info->reclaim_bgs_lock); btrfs_exclop_finish(fs_info); + sb_end_write(fs_info->sb); return ret; } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index f559d517c7c44..f03705d2f8a8c 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1927,18 +1927,19 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group) bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags) { + struct btrfs_fs_info *fs_info = fs_devices->fs_info; struct btrfs_device *device; bool ret = false; - if (!btrfs_is_zoned(fs_devices->fs_info)) + if (!btrfs_is_zoned(fs_info)) return true; /* Non-single profiles are not supported yet */ ASSERT((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0); /* Check if there is a device with active zones left */ - mutex_lock(&fs_devices->device_list_mutex); - list_for_each_entry(device, &fs_devices->devices, dev_list) { + mutex_lock(&fs_info->chunk_mutex); + list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { struct btrfs_zoned_device_info *zinfo = device->zone_info; if (!device->bdev) @@ -1950,7 +1951,7 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags) break; } } - mutex_unlock(&fs_devices->device_list_mutex); + mutex_unlock(&fs_info->chunk_mutex); return ret; } diff --git a/fs/buffer.c b/fs/buffer.c index 8e112b6bd3719..c76a8ef60a758 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1235,16 +1235,18 @@ static void bh_lru_install(struct buffer_head *bh) int i; check_irqs_on(); + bh_lru_lock(); + /* * the refcount of buffer_head in bh_lru prevents dropping the * attached page(i.e., try_to_free_buffers) so it could cause * failing page migration. * Skip putting upcoming bh into bh_lru until migration is done. */ - if (lru_cache_disabled()) + if (lru_cache_disabled()) { + bh_lru_unlock(); return; - - bh_lru_lock(); + } b = this_cpu_ptr(&bh_lrus); for (i = 0; i < BH_LRU_SIZE; i++) { diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index f256c8aff7bb5..ca9f3e4ec4b3f 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -57,6 +57,16 @@ static void __cachefiles_unmark_inode_in_use(struct cachefiles_object *object, trace_cachefiles_mark_inactive(object, inode); } +static void cachefiles_do_unmark_inode_in_use(struct cachefiles_object *object, + struct dentry *dentry) +{ + struct inode *inode = d_backing_inode(dentry); + + inode_lock(inode); + __cachefiles_unmark_inode_in_use(object, dentry); + inode_unlock(inode); +} + /* * Unmark a backing inode and tell cachefilesd that there's something that can * be culled. @@ -68,9 +78,7 @@ void cachefiles_unmark_inode_in_use(struct cachefiles_object *object, struct inode *inode = file_inode(file); if (inode) { - inode_lock(inode); - __cachefiles_unmark_inode_in_use(object, file->f_path.dentry); - inode_unlock(inode); + cachefiles_do_unmark_inode_in_use(object, file->f_path.dentry); if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { atomic_long_add(inode->i_blocks, &cache->b_released); @@ -484,7 +492,7 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object) object, d_backing_inode(path.dentry), ret, cachefiles_trace_trunc_error); file = ERR_PTR(ret); - goto out_dput; + goto out_unuse; } } @@ -494,15 +502,20 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object) trace_cachefiles_vfs_error(object, d_backing_inode(path.dentry), PTR_ERR(file), cachefiles_trace_open_error); - goto out_dput; + goto out_unuse; } if (unlikely(!file->f_op->read_iter) || unlikely(!file->f_op->write_iter)) { fput(file); pr_notice("Cache does not support read_iter and write_iter\n"); file = ERR_PTR(-EINVAL); + goto out_unuse; } + goto out_dput; + +out_unuse: + cachefiles_do_unmark_inode_in_use(object, path.dentry); out_dput: dput(path.dentry); out: @@ -590,14 +603,16 @@ static bool cachefiles_open_file(struct cachefiles_object *object, check_failed: fscache_cookie_lookup_negative(object->cookie); cachefiles_unmark_inode_in_use(object, file); - if (ret == -ESTALE) { - fput(file); - dput(dentry); + fput(file); + dput(dentry); + if (ret == -ESTALE) return cachefiles_create_file(object); - } + return false; + error_fput: fput(file); error: + cachefiles_do_unmark_inode_in_use(object, dentry); dput(dentry); return false; } diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index 35465109d9c4e..00b087c14995a 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -203,7 +203,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) if (!buf) return false; buf->reserved = cpu_to_be32(0); - memcpy(buf->data, p, len); + memcpy(buf->data, p, volume->vcookie->coherency_len); ret = cachefiles_inject_write_error(); if (ret == 0) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 133dbd9338e73..d91fa53e12b33 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -478,8 +478,11 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) 2 : (fpos_off(rde->offset) + 1); err = note_last_dentry(dfi, rde->name, rde->name_len, next_offset); - if (err) + if (err) { + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; return err; + } } else if (req->r_reply_info.dir_end) { dfi->next_offset = 2; /* keep last name */ @@ -520,6 +523,12 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit(ctx, rde->name, rde->name_len, ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)), le32_to_cpu(rde->inode.in->mode) >> 12)) { + /* + * NOTE: Here no need to put the 'dfi->last_readdir', + * because when dir_emit stops us it's most likely + * doesn't have enough memory, etc. So for next readdir + * it will continue. + */ dout("filldir stopping us...\n"); return 0; } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ef4a980a7bf37..c092dce0485c7 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -87,13 +87,13 @@ struct inode *ceph_get_snapdir(struct inode *parent) if (!S_ISDIR(parent->i_mode)) { pr_warn_once("bad snapdir parent type (mode=0%o)\n", parent->i_mode); - return ERR_PTR(-ENOTDIR); + goto err; } if (!(inode->i_state & I_NEW) && !S_ISDIR(inode->i_mode)) { pr_warn_once("bad snapdir inode type (mode=0%o)\n", inode->i_mode); - return ERR_PTR(-ENOTDIR); + goto err; } inode->i_mode = parent->i_mode; @@ -113,6 +113,12 @@ struct inode *ceph_get_snapdir(struct inode *parent) } return inode; +err: + if ((inode->i_state & I_NEW)) + discard_new_inode(inode); + else + iput(inode); + return ERR_PTR(-ENOTDIR); } const struct inode_operations ceph_file_iops = { diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c index cdce1609c5c26..180c234c2f46c 100644 --- a/fs/cifs/cifs_swn.c +++ b/fs/cifs/cifs_swn.c @@ -396,11 +396,11 @@ static int cifs_swn_resource_state_changed(struct cifs_swn_reg *swnreg, const ch switch (state) { case CIFS_SWN_RESOURCE_STATE_UNAVAILABLE: cifs_dbg(FYI, "%s: resource name '%s' become unavailable\n", __func__, name); - cifs_mark_tcp_ses_conns_for_reconnect(swnreg->tcon->ses->server, true); + cifs_signal_cifsd_for_reconnect(swnreg->tcon->ses->server, true); break; case CIFS_SWN_RESOURCE_STATE_AVAILABLE: cifs_dbg(FYI, "%s: resource name '%s' become available\n", __func__, name); - cifs_mark_tcp_ses_conns_for_reconnect(swnreg->tcon->ses->server, true); + cifs_signal_cifsd_for_reconnect(swnreg->tcon->ses->server, true); break; case CIFS_SWN_RESOURCE_STATE_UNKNOWN: cifs_dbg(FYI, "%s: resource name '%s' changed to unknown state\n", __func__, name); @@ -498,7 +498,7 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a goto unlock; } - cifs_mark_tcp_ses_conns_for_reconnect(tcon->ses->server, false); + cifs_signal_cifsd_for_reconnect(tcon->ses->server, false); unlock: mutex_unlock(&tcon->ses->server->srv_mutex); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 082c214786867..792fdcfdc6add 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -210,6 +210,9 @@ cifs_read_super(struct super_block *sb) if (rc) goto out_no_root; /* tune readahead according to rsize if readahead size not set on mount */ + if (cifs_sb->ctx->rsize == 0) + cifs_sb->ctx->rsize = + tcon->ses->server->ops->negotiate_rsize(tcon, cifs_sb->ctx); if (cifs_sb->ctx->rasize) sb->s_bdi->ra_pages = cifs_sb->ctx->rasize / PAGE_SIZE; else @@ -254,26 +257,33 @@ static void cifs_kill_sb(struct super_block *sb) struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifs_tcon *tcon; struct cached_fid *cfid; + struct rb_root *root = &cifs_sb->tlink_tree; + struct rb_node *node; + struct tcon_link *tlink; /* * We ned to release all dentries for the cached directories * before we kill the sb. */ if (cifs_sb->root) { + for (node = rb_first(root); node; node = rb_next(node)) { + tlink = rb_entry(node, struct tcon_link, tl_rbnode); + tcon = tlink_tcon(tlink); + if (IS_ERR(tcon)) + continue; + cfid = &tcon->crfid; + mutex_lock(&cfid->fid_mutex); + if (cfid->dentry) { + dput(cfid->dentry); + cfid->dentry = NULL; + } + mutex_unlock(&cfid->fid_mutex); + } + + /* finally release root dentry */ dput(cifs_sb->root); cifs_sb->root = NULL; } - tcon = cifs_sb_master_tcon(cifs_sb); - if (tcon) { - cfid = &tcon->crfid; - mutex_lock(&cfid->fid_mutex); - if (cfid->dentry) { - - dput(cfid->dentry); - cfid->dentry = NULL; - } - mutex_unlock(&cfid->fid_mutex); - } kill_anon_super(sb); cifs_umount(cifs_sb); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index d3701295402d2..0df3b24a0bf4c 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -132,6 +132,9 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *out_buf, int *bytes_returned); void +cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, + bool all_channels); +void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, bool mark_smb_session); extern int cifs_reconnect(struct TCP_Server_Info *server, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d3020abfe404a..c3be6a541c8fc 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -162,11 +162,51 @@ static void cifs_resolve_server(struct work_struct *work) mutex_unlock(&server->srv_mutex); } +/* + * Update the tcpStatus for the server. + * This is used to signal the cifsd thread to call cifs_reconnect + * ONLY cifsd thread should call cifs_reconnect. For any other + * thread, use this function + * + * @server: the tcp ses for which reconnect is needed + * @all_channels: if this needs to be done for all channels + */ +void +cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, + bool all_channels) +{ + struct TCP_Server_Info *pserver; + struct cifs_ses *ses; + int i; + + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + + spin_lock(&cifs_tcp_ses_lock); + if (!all_channels) { + pserver->tcpStatus = CifsNeedReconnect; + spin_unlock(&cifs_tcp_ses_lock); + return; + } + + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + spin_lock(&ses->chan_lock); + for (i = 0; i < ses->chan_count; i++) + ses->chans[i].server->tcpStatus = CifsNeedReconnect; + spin_unlock(&ses->chan_lock); + } + spin_unlock(&cifs_tcp_ses_lock); +} + /* * Mark all sessions and tcons for reconnect. + * IMPORTANT: make sure that this gets called only from + * cifsd thread. For any other thread, use + * cifs_signal_cifsd_for_reconnect * + * @server: the tcp ses for which reconnect is needed * @server needs to be previously set to CifsNeedReconnect. - * + * @mark_smb_session: whether even sessions need to be marked */ void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, @@ -413,9 +453,7 @@ static int reconnect_target_unlocked(struct TCP_Server_Info *server, struct dfs_ return rc; } -static int -reconnect_dfs_server(struct TCP_Server_Info *server, - bool mark_smb_session) +static int reconnect_dfs_server(struct TCP_Server_Info *server) { int rc = 0; const char *refpath = server->current_fullpath + 1; @@ -439,7 +477,12 @@ reconnect_dfs_server(struct TCP_Server_Info *server, if (!cifs_tcp_ses_needs_reconnect(server, num_targets)) return 0; - cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session); + /* + * Unconditionally mark all sessions & tcons for reconnect as we might be connecting to a + * different server or share during failover. It could be improved by adding some logic to + * only do that in case it connects to a different server or share, though. + */ + cifs_mark_tcp_ses_conns_for_reconnect(server, true); cifs_abort_connection(server); @@ -497,7 +540,7 @@ int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) } spin_unlock(&cifs_tcp_ses_lock); - return reconnect_dfs_server(server, mark_smb_session); + return reconnect_dfs_server(server); } #else int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) @@ -3473,6 +3516,9 @@ static int connect_dfs_target(struct mount_ctx *mnt_ctx, const char *full_path, struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; char *oldmnt = cifs_sb->ctx->mount_options; + cifs_dbg(FYI, "%s: full_path=%s ref_path=%s target=%s\n", __func__, full_path, ref_path, + dfs_cache_get_tgt_name(tit)); + rc = dfs_cache_get_tgt_referral(ref_path, tit, &ref); if (rc) goto out; @@ -3571,13 +3617,18 @@ static int __follow_dfs_link(struct mount_ctx *mnt_ctx) if (rc) goto out; - /* Try all dfs link targets */ + /* Try all dfs link targets. If an I/O fails from currently connected DFS target with an + * error other than STATUS_PATH_NOT_COVERED (-EREMOTE), then retry it from other targets as + * specified in MS-DFSC "3.1.5.2 I/O Operation to Target Fails with an Error Other Than + * STATUS_PATH_NOT_COVERED." + */ for (rc = -ENOENT, tit = dfs_cache_get_tgt_iterator(&tl); tit; tit = dfs_cache_get_next_tgt(&tl, tit)) { rc = connect_dfs_target(mnt_ctx, full_path, mnt_ctx->leaf_fullpath + 1, tit); if (!rc) { rc = is_path_remote(mnt_ctx); - break; + if (!rc || rc == -EREMOTE) + break; } } @@ -3651,7 +3702,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) goto error; rc = is_path_remote(&mnt_ctx); - if (rc == -EREMOTE) + if (rc) rc = follow_dfs_link(&mnt_ctx); if (rc) goto error; @@ -4417,7 +4468,7 @@ static int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tco */ if (rc && server->current_fullpath != server->origin_fullpath) { server->current_fullpath = server->origin_fullpath; - cifs_reconnect(tcon->ses->server, true); + cifs_signal_cifsd_for_reconnect(server, true); } dfs_cache_free_tgts(tl); diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 831f42458bf6d..30e040da4f096 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1355,7 +1355,7 @@ static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cach } cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__); - cifs_mark_tcp_ses_conns_for_reconnect(tcon->ses->server, true); + cifs_signal_cifsd_for_reconnect(tcon->ses->server, true); } /* Refresh dfs referral of tcon and mark it for reconnect if needed */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e7af802dcfa60..a2723f7cb5e9d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3740,6 +3740,11 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, break; } + if (cifs_sb->ctx->rsize == 0) + cifs_sb->ctx->rsize = + server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), + cifs_sb->ctx); + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize, credits); if (rc) @@ -4474,6 +4479,11 @@ static void cifs_readahead(struct readahead_control *ractl) } } + if (cifs_sb->ctx->rsize == 0) + cifs_sb->ctx->rsize = + server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), + cifs_sb->ctx); + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize, credits); if (rc) diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 852e54ee82c28..bbdf3281559c8 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -85,6 +85,9 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, if (rc != 1) return -EINVAL; + if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) + return -EINVAL; + rc = symlink_hash(link_len, link_str, md5_hash); if (rc) { cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc); diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index ebe236b9d9f56..235aa1b395ebc 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -896,7 +896,7 @@ map_and_check_smb_error(struct mid_q_entry *mid, bool logErr) if (class == ERRSRV && code == ERRbaduid) { cifs_dbg(FYI, "Server returned 0x%x, reconnecting session...\n", code); - cifs_reconnect(mid->server, false); + cifs_signal_cifsd_for_reconnect(mid->server, false); } } diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index b2fb7bd119366..c71c9a44bef4b 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -228,7 +228,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server) spin_unlock(&GlobalMid_Lock); if (reconnect) { - cifs_mark_tcp_ses_conns_for_reconnect(server, false); + cifs_signal_cifsd_for_reconnect(server, false); } return mid; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index af5d0830bc8a8..5d120cd8bc78f 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -25,6 +25,7 @@ #include "smb2glob.h" #include "cifs_ioctl.h" #include "smbdirect.h" +#include "fscache.h" #include "fs_context.h" /* Change credits for different ops and return the total number of credits */ @@ -1642,6 +1643,7 @@ smb2_ioctl_query_info(const unsigned int xid, unsigned int size[2]; void *data[2]; int create_options = is_dir ? CREATE_NOT_FILE : CREATE_NOT_DIR; + void (*free_req1_func)(struct smb_rqst *r); vars = kzalloc(sizeof(*vars), GFP_ATOMIC); if (vars == NULL) @@ -1651,27 +1653,29 @@ smb2_ioctl_query_info(const unsigned int xid, resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER; - if (copy_from_user(&qi, arg, sizeof(struct smb_query_info))) - goto e_fault; - + if (copy_from_user(&qi, arg, sizeof(struct smb_query_info))) { + rc = -EFAULT; + goto free_vars; + } if (qi.output_buffer_length > 1024) { - kfree(vars); - return -EINVAL; + rc = -EINVAL; + goto free_vars; } if (!ses || !server) { - kfree(vars); - return -EIO; + rc = -EIO; + goto free_vars; } if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - buffer = memdup_user(arg + sizeof(struct smb_query_info), - qi.output_buffer_length); - if (IS_ERR(buffer)) { - kfree(vars); - return PTR_ERR(buffer); + if (qi.output_buffer_length) { + buffer = memdup_user(arg + sizeof(struct smb_query_info), qi.output_buffer_length); + if (IS_ERR(buffer)) { + rc = PTR_ERR(buffer); + goto free_vars; + } } /* Open */ @@ -1709,45 +1713,45 @@ smb2_ioctl_query_info(const unsigned int xid, rc = SMB2_open_init(tcon, server, &rqst[0], &oplock, &oparms, path); if (rc) - goto iqinf_exit; + goto free_output_buffer; smb2_set_next_command(tcon, &rqst[0]); /* Query */ if (qi.flags & PASSTHRU_FSCTL) { /* Can eventually relax perm check since server enforces too */ - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) { rc = -EPERM; - else { - rqst[1].rq_iov = &vars->io_iov[0]; - rqst[1].rq_nvec = SMB2_IOCTL_IOV_SIZE; - - rc = SMB2_ioctl_init(tcon, server, - &rqst[1], - COMPOUND_FID, COMPOUND_FID, - qi.info_type, true, buffer, - qi.output_buffer_length, - CIFSMaxBufSize - - MAX_SMB2_CREATE_RESPONSE_SIZE - - MAX_SMB2_CLOSE_RESPONSE_SIZE); + goto free_open_req; } + rqst[1].rq_iov = &vars->io_iov[0]; + rqst[1].rq_nvec = SMB2_IOCTL_IOV_SIZE; + + rc = SMB2_ioctl_init(tcon, server, &rqst[1], COMPOUND_FID, COMPOUND_FID, + qi.info_type, true, buffer, qi.output_buffer_length, + CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE - + MAX_SMB2_CLOSE_RESPONSE_SIZE); + free_req1_func = SMB2_ioctl_free; } else if (qi.flags == PASSTHRU_SET_INFO) { /* Can eventually relax perm check since server enforces too */ - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) { rc = -EPERM; - else { - rqst[1].rq_iov = &vars->si_iov[0]; - rqst[1].rq_nvec = 1; - - size[0] = 8; - data[0] = buffer; - - rc = SMB2_set_info_init(tcon, server, - &rqst[1], - COMPOUND_FID, COMPOUND_FID, - current->tgid, - FILE_END_OF_FILE_INFORMATION, - SMB2_O_INFO_FILE, 0, data, size); + goto free_open_req; + } + if (qi.output_buffer_length < 8) { + rc = -EINVAL; + goto free_open_req; } + rqst[1].rq_iov = &vars->si_iov[0]; + rqst[1].rq_nvec = 1; + + /* MS-FSCC 2.4.13 FileEndOfFileInformation */ + size[0] = 8; + data[0] = buffer; + + rc = SMB2_set_info_init(tcon, server, &rqst[1], COMPOUND_FID, COMPOUND_FID, + current->tgid, FILE_END_OF_FILE_INFORMATION, + SMB2_O_INFO_FILE, 0, data, size); + free_req1_func = SMB2_set_info_free; } else if (qi.flags == PASSTHRU_QUERY_INFO) { rqst[1].rq_iov = &vars->qi_iov[0]; rqst[1].rq_nvec = 1; @@ -1758,6 +1762,7 @@ smb2_ioctl_query_info(const unsigned int xid, qi.info_type, qi.additional_information, qi.input_buffer_length, qi.output_buffer_length, buffer); + free_req1_func = SMB2_query_info_free; } else { /* unknown flags */ cifs_tcon_dbg(VFS, "Invalid passthru query flags: 0x%x\n", qi.flags); @@ -1765,7 +1770,7 @@ smb2_ioctl_query_info(const unsigned int xid, } if (rc) - goto iqinf_exit; + goto free_open_req; smb2_set_next_command(tcon, &rqst[1]); smb2_set_related(&rqst[1]); @@ -1776,14 +1781,14 @@ smb2_ioctl_query_info(const unsigned int xid, rc = SMB2_close_init(tcon, server, &rqst[2], COMPOUND_FID, COMPOUND_FID, false); if (rc) - goto iqinf_exit; + goto free_req_1; smb2_set_related(&rqst[2]); rc = compound_send_recv(xid, ses, server, flags, 3, rqst, resp_buftype, rsp_iov); if (rc) - goto iqinf_exit; + goto out; /* No need to bump num_remote_opens since handle immediately closed */ if (qi.flags & PASSTHRU_FSCTL) { @@ -1793,18 +1798,22 @@ smb2_ioctl_query_info(const unsigned int xid, qi.input_buffer_length = le32_to_cpu(io_rsp->OutputCount); if (qi.input_buffer_length > 0 && le32_to_cpu(io_rsp->OutputOffset) + qi.input_buffer_length - > rsp_iov[1].iov_len) - goto e_fault; + > rsp_iov[1].iov_len) { + rc = -EFAULT; + goto out; + } if (copy_to_user(&pqi->input_buffer_length, &qi.input_buffer_length, - sizeof(qi.input_buffer_length))) - goto e_fault; + sizeof(qi.input_buffer_length))) { + rc = -EFAULT; + goto out; + } if (copy_to_user((void __user *)pqi + sizeof(struct smb_query_info), (const void *)io_rsp + le32_to_cpu(io_rsp->OutputOffset), qi.input_buffer_length)) - goto e_fault; + rc = -EFAULT; } else { pqi = (struct smb_query_info __user *)arg; qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; @@ -1812,28 +1821,30 @@ smb2_ioctl_query_info(const unsigned int xid, qi.input_buffer_length = le32_to_cpu(qi_rsp->OutputBufferLength); if (copy_to_user(&pqi->input_buffer_length, &qi.input_buffer_length, - sizeof(qi.input_buffer_length))) - goto e_fault; + sizeof(qi.input_buffer_length))) { + rc = -EFAULT; + goto out; + } if (copy_to_user(pqi + 1, qi_rsp->Buffer, qi.input_buffer_length)) - goto e_fault; + rc = -EFAULT; } - iqinf_exit: - cifs_small_buf_release(rqst[0].rq_iov[0].iov_base); - cifs_small_buf_release(rqst[1].rq_iov[0].iov_base); - cifs_small_buf_release(rqst[2].rq_iov[0].iov_base); +out: free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base); free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base); free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base); - kfree(vars); + SMB2_close_free(&rqst[2]); +free_req_1: + free_req1_func(&rqst[1]); +free_open_req: + SMB2_open_free(&rqst[0]); +free_output_buffer: kfree(buffer); +free_vars: + kfree(vars); return rc; - -e_fault: - rc = -EFAULT; - goto iqinf_exit; } static ssize_t @@ -3887,29 +3898,38 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, { int rc; unsigned int xid; + struct inode *inode; struct cifsFileInfo *cfile = file->private_data; + struct cifsInodeInfo *cifsi; __le64 eof; xid = get_xid(); - if (off >= i_size_read(file->f_inode) || - off + len >= i_size_read(file->f_inode)) { + inode = d_inode(cfile->dentry); + cifsi = CIFS_I(inode); + + if (off >= i_size_read(inode) || + off + len >= i_size_read(inode)) { rc = -EINVAL; goto out; } rc = smb2_copychunk_range(xid, cfile, cfile, off + len, - i_size_read(file->f_inode) - off - len, off); + i_size_read(inode) - off - len, off); if (rc < 0) goto out; - eof = cpu_to_le64(i_size_read(file->f_inode) - len); + eof = cpu_to_le64(i_size_read(inode) - len); rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, &eof); if (rc < 0) goto out; rc = 0; + + cifsi->server_eof = i_size_read(inode) - len; + truncate_setsize(inode, cifsi->server_eof); + fscache_resize_cookie(cifs_inode_cookie(inode), cifsi->server_eof); out: free_xid(xid); return rc; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 7e7909b1ae118..f82d6fcb5c646 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3858,8 +3858,10 @@ void smb2_reconnect_server(struct work_struct *work) tcon = kzalloc(sizeof(struct cifs_tcon), GFP_KERNEL); if (!tcon) { resched = true; - list_del_init(&ses->rlist); - cifs_put_smb_ses(ses); + list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) { + list_del_init(&ses->rlist); + cifs_put_smb_ses(ses); + } goto done; } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index a4c3e027cca25..eeb1a699bd6f2 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -430,7 +430,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, * be taken as the remainder of this one. We need to kill the * socket so the server throws away the partial SMB */ - cifs_mark_tcp_ses_conns_for_reconnect(server, false); + cifs_signal_cifsd_for_reconnect(server, false); trace_smb3_partial_send_reconnect(server->CurrentMid, server->conn_id, server->hostname); } diff --git a/fs/coredump.c b/fs/coredump.c index 1c060c0a2d72f..7ed7d601e5e00 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -53,6 +54,9 @@ #include +static bool dump_vma_snapshot(struct coredump_params *cprm); +static void free_vma_snapshot(struct coredump_params *cprm); + static int core_uses_pid; static unsigned int core_pipe_limit; static char core_pattern[CORENAME_MAX_SIZE] = "core"; @@ -531,6 +535,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) * by any locks. */ .mm_flags = mm->flags, + .vma_meta = NULL, }; audit_core_dumps(siginfo->si_signo); @@ -745,6 +750,9 @@ void do_coredump(const kernel_siginfo_t *siginfo) pr_info("Core dump to |%s disabled\n", cn.corename); goto close_fail; } + if (!dump_vma_snapshot(&cprm)) + goto close_fail; + file_start_write(cprm.file); core_dumped = binfmt->core_dump(&cprm); /* @@ -758,6 +766,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) dump_emit(&cprm, "", 1); } file_end_write(cprm.file); + free_vma_snapshot(&cprm); } if (ispipe && core_pipe_limit) wait_for_dump_helpers(cprm.file); @@ -980,6 +989,8 @@ static bool always_dump_vma(struct vm_area_struct *vma) return false; } +#define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1 + /* * Decide how much of @vma's contents should be included in a core dump. */ @@ -1039,9 +1050,20 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, * dump the first page to aid in determining what was mapped here. */ if (FILTER(ELF_HEADERS) && - vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ) && - (READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0) - return PAGE_SIZE; + vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) { + if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0) + return PAGE_SIZE; + + /* + * ELF libraries aren't always executable. + * We'll want to check whether the mapping starts with the ELF + * magic, but not now - we're holding the mmap lock, + * so copy_from_user() doesn't work here. + * Use a placeholder instead, and fix it up later in + * dump_vma_snapshot(). + */ + return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER; + } #undef FILTER @@ -1078,18 +1100,29 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, return gate_vma; } +static void free_vma_snapshot(struct coredump_params *cprm) +{ + if (cprm->vma_meta) { + int i; + for (i = 0; i < cprm->vma_count; i++) { + struct file *file = cprm->vma_meta[i].file; + if (file) + fput(file); + } + kvfree(cprm->vma_meta); + cprm->vma_meta = NULL; + } +} + /* * Under the mmap_lock, take a snapshot of relevant information about the task's * VMAs. */ -int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, - struct core_vma_metadata **vma_meta, - size_t *vma_data_size_ptr) +static bool dump_vma_snapshot(struct coredump_params *cprm) { struct vm_area_struct *vma, *gate_vma; struct mm_struct *mm = current->mm; int i; - size_t vma_data_size = 0; /* * Once the stack expansion code is fixed to not change VMA bounds @@ -1097,36 +1130,51 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, * mmap_lock in read mode. */ if (mmap_write_lock_killable(mm)) - return -EINTR; + return false; + cprm->vma_data_size = 0; gate_vma = get_gate_vma(mm); - *vma_count = mm->map_count + (gate_vma ? 1 : 0); + cprm->vma_count = mm->map_count + (gate_vma ? 1 : 0); - *vma_meta = kvmalloc_array(*vma_count, sizeof(**vma_meta), GFP_KERNEL); - if (!*vma_meta) { + cprm->vma_meta = kvmalloc_array(cprm->vma_count, sizeof(*cprm->vma_meta), GFP_KERNEL); + if (!cprm->vma_meta) { mmap_write_unlock(mm); - return -ENOMEM; + return false; } for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; vma = next_vma(vma, gate_vma), i++) { - struct core_vma_metadata *m = (*vma_meta) + i; + struct core_vma_metadata *m = cprm->vma_meta + i; m->start = vma->vm_start; m->end = vma->vm_end; m->flags = vma->vm_flags; m->dump_size = vma_dump_size(vma, cprm->mm_flags); + m->pgoff = vma->vm_pgoff; - vma_data_size += m->dump_size; + m->file = vma->vm_file; + if (m->file) + get_file(m->file); } mmap_write_unlock(mm); - if (WARN_ON(i != *vma_count)) { - kvfree(*vma_meta); - return -EFAULT; + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *m = cprm->vma_meta + i; + + if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) { + char elfmag[SELFMAG]; + + if (copy_from_user(elfmag, (void __user *)m->start, SELFMAG) || + memcmp(elfmag, ELFMAG, SELFMAG) != 0) { + m->dump_size = 0; + } else { + m->dump_size = PAGE_SIZE; + } + } + + cprm->vma_data_size += m->dump_size; } - *vma_data_size_ptr = vma_data_size; - return 0; + return true; } diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c index dac252bc92281..f3babf1e66083 100644 --- a/fs/erofs/sysfs.c +++ b/fs/erofs/sysfs.c @@ -221,9 +221,11 @@ void erofs_unregister_sysfs(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); - kobject_del(&sbi->s_kobj); - kobject_put(&sbi->s_kobj); - wait_for_completion(&sbi->s_kobj_unregister); + if (sbi->s_kobj.state_in_sysfs) { + kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); + } } int __init erofs_init_sysfs(void) diff --git a/fs/exec.c b/fs/exec.c index 79f2c9483302d..6027e2a939a74 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -495,8 +495,14 @@ static int bprm_stack_limits(struct linux_binprm *bprm) * the stack. They aren't stored until much later when we can't * signal to the parent that the child has run out of stack space. * Instead, calculate it here so it's possible to fail gracefully. + * + * In the case of argc = 0, make sure there is space for adding a + * empty string (which will bump argc to 1), to ensure confused + * userspace programs don't start processing from argv[1], thinking + * argc can never be 0, to keep them from walking envp by accident. + * See do_execveat_common(). */ - ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); + ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *); if (limit <= ptr_size) return -E2BIG; limit -= ptr_size; @@ -1006,6 +1012,7 @@ static int exec_mmap(struct mm_struct *mm) active_mm = tsk->active_mm; tsk->active_mm = mm; tsk->mm = mm; + lru_gen_add_mm(mm); /* * This prevents preemption while active_mm is being loaded and * it and mm are being updated, which could cause problems for @@ -1018,6 +1025,7 @@ static int exec_mmap(struct mm_struct *mm) activate_mm(active_mm, mm); if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); + lru_gen_use_mm(mm); tsk->mm->vmacache_seqnum = 0; vmacache_flush(tsk); task_unlock(tsk); @@ -1897,6 +1905,9 @@ static int do_execveat_common(int fd, struct filename *filename, } retval = count(argv, MAX_ARG_STRINGS); + if (retval == 0) + pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n", + current->comm, bprm->filename); if (retval < 0) goto out_free; bprm->argc = retval; @@ -1923,6 +1934,19 @@ static int do_execveat_common(int fd, struct filename *filename, if (retval < 0) goto out_free; + /* + * When argv is empty, add an empty string ("") as argv[0] to + * ensure confused userspace programs that start processing + * from argv[1] won't end up walking envp. See also + * bprm_stack_limits(). + */ + if (bprm->argc == 0) { + retval = copy_string_kernel("", bprm); + if (retval < 0) + goto out_free; + bprm->argc = 1; + } + retval = bprm_execve(bprm, fd, filename, flags); out_free: free_bprm(bprm); @@ -1951,6 +1975,8 @@ int kernel_execve(const char *kernel_filename, } retval = count_strings_kernel(argv); + if (WARN_ON_ONCE(retval == 0)) + retval = -EINVAL; if (retval < 0) goto out_free; bprm->argc = retval; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 94f1fbd7d3ac2..6d4f5ef747660 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -753,8 +753,12 @@ static loff_t ext2_max_size(int bits) res += 1LL << (bits-2); res += 1LL << (2*(bits-2)); res += 1LL << (3*(bits-2)); + /* Compute how many metadata blocks are needed */ + meta_blocks = 1; + meta_blocks += 1 + ppb; + meta_blocks += 1 + ppb + ppb * ppb; /* Does block tree limit file size? */ - if (res < upper_limit) + if (res + meta_blocks <= upper_limit) goto check_lfs; res = upper_limit; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index e429418036050..9c076262770d9 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1783,19 +1783,20 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) void *inline_pos; unsigned int offset; struct ext4_dir_entry_2 *de; - bool ret = true; + bool ret = false; err = ext4_get_inode_loc(dir, &iloc); if (err) { EXT4_ERROR_INODE_ERR(dir, -err, "error %d getting inode %lu block", err, dir->i_ino); - return true; + return false; } down_read(&EXT4_I(dir)->xattr_sem); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; + ret = true; goto out; } @@ -1804,7 +1805,6 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) ext4_warning(dir->i_sb, "bad inline directory (dir #%lu) - no `..'", dir->i_ino); - ret = true; goto out; } @@ -1823,16 +1823,15 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) dir->i_ino, le32_to_cpu(de->inode), le16_to_cpu(de->rec_len), de->name_len, inline_size); - ret = true; goto out; } if (le32_to_cpu(de->inode)) { - ret = false; goto out; } offset += ext4_rec_len_from_disk(de->rec_len, inline_size); } + ret = true; out: up_read(&EXT4_I(dir)->xattr_sem); brelse(iloc.bh); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 01c9e4f743ba9..531a94f48637c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1993,6 +1993,15 @@ static int ext4_writepage(struct page *page, else len = PAGE_SIZE; + /* Should never happen but for bugs in other kernel subsystems */ + if (!page_has_buffers(page)) { + ext4_warning_inode(inode, + "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + return 0; + } + page_bufs = page_buffers(page); /* * We cannot do block allocation or other extent handling in this @@ -2594,6 +2603,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) wait_on_page_writeback(page); BUG_ON(PageWriteback(page)); + /* + * Should never happen but for buggy code in + * other subsystems that call + * set_page_dirty() without properly warning + * the file system first. See [1] for more + * information. + * + * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz + */ + if (!page_has_buffers(page)) { + ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + continue; + } + if (mpd->map.m_len == 0) mpd->first_page = page->index; mpd->next_page = page->index + 1; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 67ac95c4cd9b8..1f37eb0176ccc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1000,7 +1000,7 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac) return 0; if (ac->ac_criteria >= 2) return 0; - if (ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) + if (!ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) return 0; return 1; } @@ -3899,69 +3899,95 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group; ext4_grpblk_t blkoff; - int i, clen, err; + int i, err; int already; + unsigned int clen, clen_changed, thisgrp_len; - clen = EXT4_B2C(sbi, len); + while (len > 0) { + ext4_get_group_no_and_offset(sb, block, &group, &blkoff); - ext4_get_group_no_and_offset(sb, block, &group, &blkoff); - bitmap_bh = ext4_read_block_bitmap(sb, group); - if (IS_ERR(bitmap_bh)) { - err = PTR_ERR(bitmap_bh); - bitmap_bh = NULL; - goto out_err; - } + /* + * Check to see if we are freeing blocks across a group + * boundary. + * In case of flex_bg, this can happen that (block, len) may + * span across more than one group. In that case we need to + * get the corresponding group metadata to work with. + * For this we have goto again loop. + */ + thisgrp_len = min_t(unsigned int, (unsigned int)len, + EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff)); + clen = EXT4_NUM_B2C(sbi, thisgrp_len); - err = -EIO; - gdp = ext4_get_group_desc(sb, group, &gdp_bh); - if (!gdp) - goto out_err; + bitmap_bh = ext4_read_block_bitmap(sb, group); + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; + break; + } - ext4_lock_group(sb, group); - already = 0; - for (i = 0; i < clen; i++) - if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state) - already++; + err = -EIO; + gdp = ext4_get_group_desc(sb, group, &gdp_bh); + if (!gdp) + break; - if (state) - ext4_set_bits(bitmap_bh->b_data, blkoff, clen); - else - mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen); - if (ext4_has_group_desc_csum(sb) && - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); - ext4_free_group_clusters_set(sb, gdp, - ext4_free_clusters_after_init(sb, - group, gdp)); - } - if (state) - clen = ext4_free_group_clusters(sb, gdp) - clen + already; - else - clen = ext4_free_group_clusters(sb, gdp) + clen - already; + ext4_lock_group(sb, group); + already = 0; + for (i = 0; i < clen; i++) + if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == + !state) + already++; + + clen_changed = clen - already; + if (state) + ext4_set_bits(bitmap_bh->b_data, blkoff, clen); + else + mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen); + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); + ext4_free_group_clusters_set(sb, gdp, + ext4_free_clusters_after_init(sb, group, gdp)); + } + if (state) + clen = ext4_free_group_clusters(sb, gdp) - clen_changed; + else + clen = ext4_free_group_clusters(sb, gdp) + clen_changed; - ext4_free_group_clusters_set(sb, gdp, clen); - ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); - ext4_group_desc_csum_set(sb, group, gdp); + ext4_free_group_clusters_set(sb, gdp, clen); + ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); + ext4_group_desc_csum_set(sb, group, gdp); - ext4_unlock_group(sb, group); + ext4_unlock_group(sb, group); - if (sbi->s_log_groups_per_flex) { - ext4_group_t flex_group = ext4_flex_group(sbi, group); + if (sbi->s_log_groups_per_flex) { + ext4_group_t flex_group = ext4_flex_group(sbi, group); + struct flex_groups *fg = sbi_array_rcu_deref(sbi, + s_flex_groups, flex_group); - atomic64_sub(len, - &sbi_array_rcu_deref(sbi, s_flex_groups, - flex_group)->free_clusters); + if (state) + atomic64_sub(clen_changed, &fg->free_clusters); + else + atomic64_add(clen_changed, &fg->free_clusters); + + } + + err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); + if (err) + break; + sync_dirty_buffer(bitmap_bh); + err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); + sync_dirty_buffer(gdp_bh); + if (err) + break; + + block += thisgrp_len; + len -= thisgrp_len; + brelse(bitmap_bh); + BUG_ON(len < 0); } - err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); if (err) - goto out_err; - sync_dirty_buffer(bitmap_bh); - err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); - sync_dirty_buffer(gdp_bh); - -out_err: - brelse(bitmap_bh); + brelse(bitmap_bh); } /* diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 8cf0a924a49bf..39e223f7bf64d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2997,14 +2997,14 @@ bool ext4_empty_dir(struct inode *inode) if (inode->i_size < ext4_dir_rec_len(1, NULL) + ext4_dir_rec_len(2, NULL)) { EXT4_ERROR_INODE(inode, "invalid size"); - return true; + return false; } /* The first directory block must not be a hole, * so treat it as DIRENT_HTREE */ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); if (IS_ERR(bh)) - return true; + return false; de = (struct ext4_dir_entry_2 *) bh->b_data; if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, @@ -3012,7 +3012,7 @@ bool ext4_empty_dir(struct inode *inode) le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) { ext4_warning_inode(inode, "directory missing '.'"); brelse(bh); - return true; + return false; } offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); de = ext4_next_entry(de, sb->s_blocksize); @@ -3021,7 +3021,7 @@ bool ext4_empty_dir(struct inode *inode) le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { ext4_warning_inode(inode, "directory missing '..'"); brelse(bh); - return true; + return false; } offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); while (offset < inode->i_size) { @@ -3035,7 +3035,7 @@ bool ext4_empty_dir(struct inode *inode) continue; } if (IS_ERR(bh)) - return true; + return false; } de = (struct ext4_dir_entry_2 *) (bh->b_data + (offset & (sb->s_blocksize - 1))); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c5021ca0a28ad..bed29f96ccc7e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2021,12 +2021,12 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg) #define EXT4_SPEC_s_commit_interval (1 << 16) #define EXT4_SPEC_s_fc_debug_max_replay (1 << 17) #define EXT4_SPEC_s_sb_block (1 << 18) +#define EXT4_SPEC_mb_optimize_scan (1 << 19) struct ext4_fs_context { char *s_qf_names[EXT4_MAXQUOTAS]; char *test_dummy_enc_arg; int s_jquota_fmt; /* Format of quota to use */ - int mb_optimize_scan; #ifdef CONFIG_EXT4_DEBUG int s_fc_debug_max_replay; #endif @@ -2045,8 +2045,8 @@ struct ext4_fs_context { unsigned int mask_s_mount_opt; unsigned int vals_s_mount_opt2; unsigned int mask_s_mount_opt2; - unsigned int vals_s_mount_flags; - unsigned int mask_s_mount_flags; + unsigned long vals_s_mount_flags; + unsigned long mask_s_mount_flags; unsigned int opt_flags; /* MOPT flags */ unsigned int spec; u32 s_max_batch_time; @@ -2149,23 +2149,36 @@ static inline void ctx_set_##name(struct ext4_fs_context *ctx, \ { \ ctx->mask_s_##name |= flag; \ ctx->vals_s_##name |= flag; \ -} \ +} + +#define EXT4_CLEAR_CTX(name) \ static inline void ctx_clear_##name(struct ext4_fs_context *ctx, \ unsigned long flag) \ { \ ctx->mask_s_##name |= flag; \ ctx->vals_s_##name &= ~flag; \ -} \ +} + +#define EXT4_TEST_CTX(name) \ static inline unsigned long \ ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag) \ { \ return (ctx->vals_s_##name & flag); \ -} \ +} -EXT4_SET_CTX(flags); +EXT4_SET_CTX(flags); /* set only */ EXT4_SET_CTX(mount_opt); +EXT4_CLEAR_CTX(mount_opt); +EXT4_TEST_CTX(mount_opt); EXT4_SET_CTX(mount_opt2); -EXT4_SET_CTX(mount_flags); +EXT4_CLEAR_CTX(mount_opt2); +EXT4_TEST_CTX(mount_opt2); + +static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit) +{ + set_bit(bit, &ctx->mask_s_mount_flags); + set_bit(bit, &ctx->vals_s_mount_flags); +} static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) { @@ -2235,7 +2248,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) param->key); return 0; case Opt_abort: - ctx_set_mount_flags(ctx, EXT4_MF_FS_ABORTED); + ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED); return 0; case Opt_i_version: ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20"); @@ -2451,12 +2464,17 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx_clear_mount_opt(ctx, m->mount_opt); return 0; case Opt_mb_optimize_scan: - if (result.int_32 != 0 && result.int_32 != 1) { + if (result.int_32 == 1) { + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN); + ctx->spec |= EXT4_SPEC_mb_optimize_scan; + } else if (result.int_32 == 0) { + ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN); + ctx->spec |= EXT4_SPEC_mb_optimize_scan; + } else { ext4_msg(NULL, KERN_WARNING, "mb_optimize_scan should be set to 0 or 1."); return -EINVAL; } - ctx->mb_optimize_scan = result.int_32; return 0; } @@ -4369,7 +4387,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) /* Set defaults for the variables that will be set during parsing */ ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; - ctx->mb_optimize_scan = DEFAULT_MB_OPTIMIZE_SCAN; sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; sbi->s_sectors_written_start = @@ -5320,12 +5337,12 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) * turned off by passing "mb_optimize_scan=0". This can also be * turned on forcefully by passing "mb_optimize_scan=1". */ - if (ctx->mb_optimize_scan == 1) - set_opt2(sb, MB_OPTIMIZE_SCAN); - else if (ctx->mb_optimize_scan == 0) - clear_opt2(sb, MB_OPTIMIZE_SCAN); - else if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD) - set_opt2(sb, MB_OPTIMIZE_SCAN); + if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) { + if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD) + set_opt2(sb, MB_OPTIMIZE_SCAN); + else + clear_opt2(sb, MB_OPTIMIZE_SCAN); + } err = ext4_mb_init(sb); if (err) { diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 982f0170639fc..bf3ba85cf325b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -864,6 +864,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, struct page *cp_page_1 = NULL, *cp_page_2 = NULL; struct f2fs_checkpoint *cp_block = NULL; unsigned long long cur_version = 0, pre_version = 0; + unsigned int cp_blocks; int err; err = get_checkpoint_version(sbi, cp_addr, &cp_block, @@ -871,15 +872,16 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (err) return NULL; - if (le32_to_cpu(cp_block->cp_pack_total_block_count) > - sbi->blocks_per_seg) { + cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count); + + if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) { f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u", le32_to_cpu(cp_block->cp_pack_total_block_count)); goto invalid_cp; } pre_version = *version; - cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; + cp_addr += cp_blocks - 1; err = get_checkpoint_version(sbi, cp_addr, &cp_block, &cp_page_2, version); if (err) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d0c3aeba59454..3b162506b269a 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -314,10 +314,9 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) } if (ret != PAGE_SIZE << dic->log_cluster_size) { - printk_ratelimited("%sF2FS-fs (%s): lz4 invalid rlen:%zu, " + printk_ratelimited("%sF2FS-fs (%s): lz4 invalid ret:%d, " "expected:%lu\n", KERN_ERR, - F2FS_I_SB(dic->inode)->sb->s_id, - dic->rlen, + F2FS_I_SB(dic->inode)->sb->s_id, ret, PAGE_SIZE << dic->log_cluster_size); return -EIO; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8c417864c66ae..bdfa8bed10b2c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3163,8 +3163,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping, /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[DATA]); - else if (atomic_read(&sbi->wb_sync_req[DATA])) + else if (atomic_read(&sbi->wb_sync_req[DATA])) { + /* to avoid potential deadlock */ + if (current->plug) + blk_finish_plug(current->plug); goto skip_write; + } if (__should_serialize_io(inode, wbc)) { mutex_lock(&sbi->writepages); @@ -3353,7 +3357,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, *fsdata = NULL; - if (len == PAGE_SIZE) + if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode))) goto repeat; ret = f2fs_prepare_compress_overwrite(inode, pagep, diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8c50518475a99..b449c7a372a4b 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -21,7 +21,7 @@ #include "gc.h" static LIST_HEAD(f2fs_stat_list); -static DEFINE_MUTEX(f2fs_stat_mutex); +static DEFINE_RAW_SPINLOCK(f2fs_stat_lock); #ifdef CONFIG_DEBUG_FS static struct dentry *f2fs_debugfs_root; #endif @@ -338,14 +338,16 @@ static char *s_flag[] = { [SBI_QUOTA_SKIP_FLUSH] = " quota_skip_flush", [SBI_QUOTA_NEED_REPAIR] = " quota_need_repair", [SBI_IS_RESIZEFS] = " resizefs", + [SBI_IS_FREEZING] = " freezefs", }; static int stat_show(struct seq_file *s, void *v) { struct f2fs_stat_info *si; int i = 0, j = 0; + unsigned long flags; - mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_for_each_entry(si, &f2fs_stat_list, stat_list) { update_general_status(si->sbi); @@ -573,7 +575,7 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - paged : %llu KB\n", si->page_mem >> 10); } - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); return 0; } @@ -584,6 +586,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; + unsigned long flags; int i; si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); @@ -619,9 +622,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->max_aw_cnt, 0); atomic_set(&sbi->max_vw_cnt, 0); - mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_add_tail(&si->stat_list, &f2fs_stat_list); - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); return 0; } @@ -629,10 +632,11 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); + unsigned long flags; - mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_del(&si->stat_list); - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); kfree(si); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 68b44015514f5..2514597f5b26b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1267,6 +1267,7 @@ enum { SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ SBI_IS_RESIZEFS, /* resizefs is in process */ + SBI_IS_FREEZING, /* freezefs is in process */ }; enum { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3c98ef6af97d1..b110c3a7db6ae 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2008,7 +2008,10 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode_lock(inode); - f2fs_disable_compressed_file(inode); + if (!f2fs_disable_compressed_file(inode)) { + ret = -EINVAL; + goto out; + } if (f2fs_is_atomic_file(inode)) { if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ee308a8de4327..e020804f7b075 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1038,8 +1038,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, set_sbi_flag(sbi, SBI_NEED_FSCK); } - if (f2fs_check_nid_range(sbi, dni->ino)) + if (f2fs_check_nid_range(sbi, dni->ino)) { + f2fs_put_page(node_page, 1); return false; + } *nofs = ofs_of_node(node_page); source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0ec8e32a00b47..71f232dcf3c20 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -778,7 +778,8 @@ void f2fs_evict_inode(struct inode *inode) f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO); - sb_start_intwrite(inode->i_sb); + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + sb_start_intwrite(inode->i_sb); set_inode_flag(inode, FI_NO_ALLOC); i_size_write(inode, 0); retry: @@ -809,7 +810,8 @@ void f2fs_evict_inode(struct inode *inode) if (dquot_initialize_needed(inode)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); } - sb_end_intwrite(inode->i_sb); + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + sb_end_intwrite(inode->i_sb); no_delete: dquot_drop(inode); @@ -885,6 +887,7 @@ void f2fs_handle_failed_inode(struct inode *inode) err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); + set_inode_flag(inode, FI_FREE_NID); f2fs_warn(sbi, "May loss orphan inode, run fsck to fix."); goto out; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 50b2874e758c9..4ff7dfb542502 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2111,8 +2111,12 @@ static int f2fs_write_node_pages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[NODE]); - else if (atomic_read(&sbi->wb_sync_req[NODE])) + else if (atomic_read(&sbi->wb_sync_req[NODE])) { + /* to avoid potential deadlock */ + if (current->plug) + blk_finish_plug(current->plug); goto skip_write; + } trace_f2fs_writepages(mapping->host, wbc, NODE); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1dabc8244083d..416d802ebbea6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4789,6 +4789,13 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) sanity_check_seg_type(sbi, curseg->seg_type); + if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) { + f2fs_err(sbi, + "Current segment has invalid alloc_type:%d", + curseg->alloc_type); + return -EFSCORRUPTED; + } + if (f2fs_test_bit(blkofs, se->cur_valid_map)) goto out; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index baefd398ec1a3..c4f8510fac930 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1662,11 +1662,15 @@ static int f2fs_freeze(struct super_block *sb) /* ensure no checkpoint required */ if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list)) return -EINVAL; + + /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */ + set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; } static int f2fs_unfreeze(struct super_block *sb) { + clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; } @@ -2688,7 +2692,7 @@ int f2fs_quota_sync(struct super_block *sb, int type) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct quota_info *dqopt = sb_dqopt(sb); int cnt; - int ret; + int ret = 0; /* * Now when everything is written we can discard the pagecache so @@ -2699,8 +2703,8 @@ int f2fs_quota_sync(struct super_block *sb, int type) if (type != -1 && cnt != type) continue; - if (!sb_has_quota_active(sb, type)) - return 0; + if (!sb_has_quota_active(sb, cnt)) + continue; inode_lock(dqopt->files[cnt]); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 8ac5066712454..bdb1b5c05be2e 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -481,7 +481,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a, } else if (t == GC_IDLE_AT) { if (!sbi->am.atgc_enabled) return -EINVAL; - sbi->gc_mode = GC_AT; + sbi->gc_mode = GC_IDLE_AT; } else { sbi->gc_mode = GC_NORMAL; } diff --git a/fs/file.c b/fs/file.c index 97d212a9b8144..ee93173467025 100644 --- a/fs/file.c +++ b/fs/file.c @@ -87,6 +87,21 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds); } +/* + * Note how the fdtable bitmap allocations very much have to be a multiple of + * BITS_PER_LONG. This is not only because we walk those things in chunks of + * 'unsigned long' in some places, but simply because that is how the Linux + * kernel bitmaps are defined to work: they are not "bits in an array of bytes", + * they are very much "bits in an array of unsigned long". + * + * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied + * by that "1024/sizeof(ptr)" before, we already know there are sufficient + * clear low bits. Clang seems to realize that, gcc ends up being confused. + * + * On a 128-bit machine, the ALIGN() would actually matter. In the meantime, + * let's consider it documentation (and maybe a test-case for gcc to improve + * its code generation ;) + */ static struct fdtable * alloc_fdtable(unsigned int nr) { struct fdtable *fdt; @@ -102,6 +117,7 @@ static struct fdtable * alloc_fdtable(unsigned int nr) nr /= (1024 / sizeof(struct file *)); nr = roundup_pow_of_two(nr + 1); nr *= (1024 / sizeof(struct file *)); + nr = ALIGN(nr, BITS_PER_LONG); /* * Note that this can drive nr *below* what we had passed if sysctl_nr_open * had been set lower between the check in expand_files() and here. Deal @@ -269,6 +285,19 @@ static unsigned int count_open_files(struct fdtable *fdt) return i; } +/* + * Note that a sane fdtable size always has to be a multiple of + * BITS_PER_LONG, since we have bitmaps that are sized by this. + * + * 'max_fds' will normally already be properly aligned, but it + * turns out that in the close_range() -> __close_range() -> + * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end + * up having a 'max_fds' value that isn't already aligned. + * + * Rather than make close_range() have to worry about this, + * just make that BITS_PER_LONG alignment be part of a sane + * fdtable size. Becuase that's really what it is. + */ static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds) { unsigned int count; @@ -276,7 +305,7 @@ static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds) count = count_open_files(fdt); if (max_fds < NR_OPEN_DEFAULT) max_fds = NR_OPEN_DEFAULT; - return min(count, max_fds); + return ALIGN(min(count, max_fds), BITS_PER_LONG); } /* diff --git a/fs/file_table.c b/fs/file_table.c index 7d2e692b66a94..ada8fe814db97 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -412,6 +412,7 @@ void __fput_sync(struct file *file) } EXPORT_SYMBOL(fput); +EXPORT_SYMBOL(__fput_sync); void __init files_init(void) { diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 592730fd6e424..e7c0aa6d61ced 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -785,7 +785,8 @@ static int fuse_check_page(struct page *page) 1 << PG_active | 1 << PG_workingset | 1 << PG_reclaim | - 1 << PG_waiters))) { + 1 << PG_waiters | + LRU_GEN_MASK | LRU_REFS_MASK))) { dump_page(page, "fuse: trying to steal weird page"); return 1; } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index d67108489148e..fbdb7a30470a3 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2146,7 +2146,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) ret = do_shrink(inode, newsize); out: - gfs2_rs_delete(ip, NULL); + gfs2_rs_delete(ip); gfs2_qa_put(ip); return ret; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 8c39a8571b1fa..b53ad18e5ccbf 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -706,7 +706,7 @@ static int gfs2_release(struct inode *inode, struct file *file) if (file->f_mode & FMODE_WRITE) { if (gfs2_rs_active(&ip->i_res)) - gfs2_rs_delete(ip, &inode->i_writecount); + gfs2_rs_delete(ip); gfs2_qa_put(ip); } return 0; @@ -1083,6 +1083,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, gfs2_holder_uninit(gh); if (statfs_gh) kfree(statfs_gh); + from->count = orig_count - read; return read ? read : ret; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 89905f4f29bb6..66a123306aecb 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -793,7 +793,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (free_vfs_inode) /* else evict will do the put for us */ gfs2_glock_put(ip->i_gl); } - gfs2_rs_delete(ip, NULL); + gfs2_rs_deltree(&ip->i_res); gfs2_qa_put(ip); fail_free_acls: posix_acl_release(default_acl); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 0fb3c01bc5577..3b34bb24d0af4 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -680,13 +680,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) /** * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation - * @wcount: The inode's write count, or NULL * */ -void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_rs_delete(struct gfs2_inode *ip) { + struct inode *inode = &ip->i_inode; + down_write(&ip->i_rw_mutex); - if ((wcount == NULL) || (atomic_read(wcount) <= 1)) + if (atomic_read(&inode->i_writecount) <= 1) gfs2_rs_deltree(&ip->i_res); up_write(&ip->i_rw_mutex); } @@ -1415,7 +1416,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp) start = r.start >> bs_shift; end = start + (r.len >> bs_shift); - minlen = max_t(u64, r.minlen, + minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize); + minlen = max_t(u64, minlen, q->limits.discard_granularity) >> bs_shift; if (end <= start || minlen > sdp->sd_max_rg_data) diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 3e2ca1fb43056..46dd94e9e085c 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); +extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 64c67090f5036..143a47359d1b8 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1396,7 +1396,7 @@ static void gfs2_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); - gfs2_rs_delete(ip, NULL); + gfs2_rs_deltree(&ip->i_res); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); diff --git a/fs/io_uring.c b/fs/io_uring.c index 4715980e90150..619c67fd456dd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -112,8 +112,7 @@ IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS) #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ - REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \ - REQ_F_ASYNC_DATA) + REQ_F_POLLED | REQ_F_CREDS | REQ_F_ASYNC_DATA) #define IO_TCTX_REFS_CACHE_NR (1U << 10) @@ -469,7 +468,6 @@ struct io_uring_task { const struct io_ring_ctx *last; struct io_wq *io_wq; struct percpu_counter inflight; - atomic_t inflight_tracked; atomic_t in_idle; spinlock_t task_lock; @@ -560,7 +558,8 @@ struct io_rw { /* NOTE: kiocb has the file as the first member, so don't do it here */ struct kiocb kiocb; u64 addr; - u64 len; + u32 len; + u32 flags; }; struct io_connect { @@ -621,10 +620,10 @@ struct io_epoll { struct io_splice { struct file *file_out; - struct file *file_in; loff_t off_out; loff_t off_in; u64 len; + int splice_fd_in; unsigned int flags; }; @@ -865,7 +864,11 @@ struct io_kiocb { u64 user_data; u32 result; - u32 cflags; + /* fd initially, then cflags for completion */ + union { + u32 cflags; + int fd; + }; struct io_ring_ctx *ctx; struct task_struct *task; @@ -1127,8 +1130,11 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, struct io_uring_rsrc_update2 *up, unsigned nr_args); static void io_clean_op(struct io_kiocb *req); -static struct file *io_file_get(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd, bool fixed); +static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, + unsigned issue_flags); +static inline struct file *io_file_get_normal(struct io_kiocb *req, int fd); +static void io_drop_inflight_file(struct io_kiocb *req); +static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags); static void __io_queue_sqe(struct io_kiocb *req); static void io_rsrc_put_work(struct work_struct *work); @@ -1257,13 +1263,20 @@ static void io_rsrc_refs_refill(struct io_ring_ctx *ctx) } static inline void io_req_set_rsrc_node(struct io_kiocb *req, - struct io_ring_ctx *ctx) + struct io_ring_ctx *ctx, + unsigned int issue_flags) { if (!req->fixed_rsrc_refs) { req->fixed_rsrc_refs = &ctx->rsrc_node->refs; - ctx->rsrc_cached_refs--; - if (unlikely(ctx->rsrc_cached_refs < 0)) - io_rsrc_refs_refill(ctx); + + if (!(issue_flags & IO_URING_F_UNLOCKED)) { + lockdep_assert_held(&ctx->uring_lock); + ctx->rsrc_cached_refs--; + if (unlikely(ctx->rsrc_cached_refs < 0)) + io_rsrc_refs_refill(ctx); + } else { + percpu_ref_get(req->fixed_rsrc_refs); + } } } @@ -1303,29 +1316,9 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task, bool cancel_all) __must_hold(&req->ctx->timeout_lock) { - struct io_kiocb *req; - if (task && head->task != task) return false; - if (cancel_all) - return true; - - io_for_each_link(req, head) { - if (req->flags & REQ_F_INFLIGHT) - return true; - } - return false; -} - -static bool io_match_linked(struct io_kiocb *head) -{ - struct io_kiocb *req; - - io_for_each_link(req, head) { - if (req->flags & REQ_F_INFLIGHT) - return true; - } - return false; + return cancel_all; } /* @@ -1335,24 +1328,9 @@ static bool io_match_linked(struct io_kiocb *head) static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, bool cancel_all) { - bool matched; - if (task && head->task != task) return false; - if (cancel_all) - return true; - - if (head->flags & REQ_F_LINK_TIMEOUT) { - struct io_ring_ctx *ctx = head->ctx; - - /* protect against races with linked timeouts */ - spin_lock_irq(&ctx->timeout_lock); - matched = io_match_linked(head); - spin_unlock_irq(&ctx->timeout_lock); - } else { - matched = io_match_linked(head); - } - return matched; + return cancel_all; } static inline bool req_has_async_data(struct io_kiocb *req) @@ -1500,14 +1478,6 @@ static inline bool io_req_ffs_set(struct io_kiocb *req) return req->flags & REQ_F_FIXED_FILE; } -static inline void io_req_track_inflight(struct io_kiocb *req) -{ - if (!(req->flags & REQ_F_INFLIGHT)) { - req->flags |= REQ_F_INFLIGHT; - atomic_inc(¤t->io_uring->inflight_tracked); - } -} - static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) { if (WARN_ON_ONCE(!req->link)) @@ -1551,14 +1521,6 @@ static void io_prep_async_work(struct io_kiocb *req) if (def->unbound_nonreg_file) req->work.flags |= IO_WQ_WORK_UNBOUND; } - - switch (req->opcode) { - case IORING_OP_SPLICE: - case IORING_OP_TEE: - if (!S_ISREG(file_inode(req->splice.file_in)->i_mode)) - req->work.flags |= IO_WQ_WORK_UNBOUND; - break; - } } static void io_prep_async_link(struct io_kiocb *req) @@ -1652,12 +1614,11 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx) __must_hold(&ctx->completion_lock) { u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + struct io_kiocb *req, *tmp; spin_lock_irq(&ctx->timeout_lock); - while (!list_empty(&ctx->timeout_list)) { + list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { u32 events_needed, events_got; - struct io_kiocb *req = list_first_entry(&ctx->timeout_list, - struct io_kiocb, timeout.list); if (io_is_timeout_noseq(req)) break; @@ -1674,7 +1635,6 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx) if (events_got < events_needed) break; - list_del_init(&req->timeout.list); io_kill_timeout(req, 0); } ctx->cq_last_tm_flush = seq; @@ -2381,6 +2341,8 @@ static void io_req_task_work_add(struct io_kiocb *req, bool priority) WARN_ON_ONCE(!tctx); + io_drop_inflight_file(req); + spin_lock_irqsave(&tctx->task_lock, flags); if (priority) wq_list_add_tail(&req->io_task_work.node, &tctx->prior_task_list); @@ -2813,8 +2775,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req) static bool __io_complete_rw_common(struct io_kiocb *req, long res) { - if (req->rw.kiocb.ki_flags & IOCB_WRITE) + if (req->rw.kiocb.ki_flags & IOCB_WRITE) { kiocb_end_write(req); + fsnotify_modify(req->file); + } else { + fsnotify_access(req->file); + } if (unlikely(res != req->result)) { if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_should_reissue(req)) { @@ -2990,50 +2956,11 @@ static inline bool io_file_supports_nowait(struct io_kiocb *req) static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - struct io_ring_ctx *ctx = req->ctx; struct kiocb *kiocb = &req->rw.kiocb; - struct file *file = req->file; unsigned ioprio; int ret; - if (!io_req_ffs_set(req)) - req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT; - kiocb->ki_pos = READ_ONCE(sqe->off); - if (kiocb->ki_pos == -1) { - if (!(file->f_mode & FMODE_STREAM)) { - req->flags |= REQ_F_CUR_POS; - kiocb->ki_pos = file->f_pos; - } else { - kiocb->ki_pos = 0; - } - } - kiocb->ki_flags = iocb_flags(file); - ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); - if (unlikely(ret)) - return ret; - - /* - * If the file is marked O_NONBLOCK, still allow retry for it if it - * supports async. Otherwise it's impossible to use O_NONBLOCK files - * reliably. If not, or it IOCB_NOWAIT is set, don't retry. - */ - if ((kiocb->ki_flags & IOCB_NOWAIT) || - ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req))) - req->flags |= REQ_F_NOWAIT; - - if (ctx->flags & IORING_SETUP_IOPOLL) { - if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) - return -EOPNOTSUPP; - - kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; - kiocb->ki_complete = io_complete_rw_iopoll; - req->iopoll_completed = 0; - } else { - if (kiocb->ki_flags & IOCB_HIPRI) - return -EINVAL; - kiocb->ki_complete = io_complete_rw; - } ioprio = READ_ONCE(sqe->ioprio); if (ioprio) { @@ -3049,6 +2976,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->imu = NULL; req->rw.addr = READ_ONCE(sqe->addr); req->rw.len = READ_ONCE(sqe->len); + req->rw.flags = READ_ONCE(sqe->rw_flags); req->buf_index = READ_ONCE(sqe->buf_index); return 0; } @@ -3165,7 +3093,8 @@ static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter return 0; } -static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) +static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter, + unsigned int issue_flags) { struct io_mapped_ubuf *imu = req->imu; u16 index, buf_index = req->buf_index; @@ -3175,7 +3104,7 @@ static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) if (unlikely(buf_index >= ctx->nr_user_bufs)) return -EFAULT; - io_req_set_rsrc_node(req, ctx); + io_req_set_rsrc_node(req, ctx, issue_flags); index = array_index_nospec(buf_index, ctx->nr_user_bufs); imu = READ_ONCE(ctx->user_bufs[index]); req->imu = imu; @@ -3331,7 +3260,7 @@ static struct iovec *__io_import_iovec(int rw, struct io_kiocb *req, ssize_t ret; if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { - ret = io_import_fixed(req, rw, iter); + ret = io_import_fixed(req, rw, iter, issue_flags); if (ret) return ERR_PTR(ret); return NULL; @@ -3436,13 +3365,15 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) ret = nr; break; } + ret += nr; if (!iov_iter_is_bvec(iter)) { iov_iter_advance(iter, nr); } else { - req->rw.len -= nr; req->rw.addr += nr; + req->rw.len -= nr; + if (!req->rw.len) + break; } - ret += nr; if (nr != iovec.iov_len) break; } @@ -3527,13 +3458,6 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw) return 0; } -static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - if (unlikely(!(req->file->f_mode & FMODE_READ))) - return -EBADF; - return io_prep_rw(req, sqe); -} - /* * This is our waitqueue callback handler, registered through __folio_lock_async() * when we initially tried to do the IO with the iocb armed our waitqueue. @@ -3621,6 +3545,58 @@ static bool need_read_all(struct io_kiocb *req) S_ISBLK(file_inode(req->file)->i_mode); } +static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) +{ + struct kiocb *kiocb = &req->rw.kiocb; + struct io_ring_ctx *ctx = req->ctx; + struct file *file = req->file; + int ret; + + if (unlikely(!file || !(file->f_mode & mode))) + return -EBADF; + + if (!io_req_ffs_set(req)) + req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT; + + if (kiocb->ki_pos == -1) { + if (!(file->f_mode & FMODE_STREAM)) { + req->flags |= REQ_F_CUR_POS; + kiocb->ki_pos = file->f_pos; + } else { + kiocb->ki_pos = 0; + } + } + + kiocb->ki_flags = iocb_flags(file); + ret = kiocb_set_rw_flags(kiocb, req->rw.flags); + if (unlikely(ret)) + return ret; + + /* + * If the file is marked O_NONBLOCK, still allow retry for it if it + * supports async. Otherwise it's impossible to use O_NONBLOCK files + * reliably. If not, or it IOCB_NOWAIT is set, don't retry. + */ + if ((kiocb->ki_flags & IOCB_NOWAIT) || + ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req))) + req->flags |= REQ_F_NOWAIT; + + if (ctx->flags & IORING_SETUP_IOPOLL) { + if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) + return -EOPNOTSUPP; + + kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; + kiocb->ki_complete = io_complete_rw_iopoll; + req->iopoll_completed = 0; + } else { + if (kiocb->ki_flags & IOCB_HIPRI) + return -EINVAL; + kiocb->ki_complete = io_complete_rw; + } + + return 0; +} + static int io_read(struct io_kiocb *req, unsigned int issue_flags) { struct io_rw_state __s, *s = &__s; @@ -3645,6 +3621,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) iov_iter_restore(&s->iter, &s->iter_state); iovec = NULL; } + ret = io_rw_init_file(req, FMODE_READ); + if (unlikely(ret)) + return ret; req->result = iov_iter_count(&s->iter); if (force_nonblock) { @@ -3743,14 +3722,6 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) return 0; } -static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - if (unlikely(!(req->file->f_mode & FMODE_WRITE))) - return -EBADF; - req->rw.kiocb.ki_hint = ki_hint_validate(file_write_hint(req->file)); - return io_prep_rw(req, sqe); -} - static int io_write(struct io_kiocb *req, unsigned int issue_flags) { struct io_rw_state __s, *s = &__s; @@ -3770,6 +3741,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) iov_iter_restore(&s->iter, &s->iter_state); iovec = NULL; } + ret = io_rw_init_file(req, FMODE_WRITE); + if (unlikely(ret)) + return ret; req->result = iov_iter_count(&s->iter); if (force_nonblock) { @@ -4138,18 +4112,11 @@ static int __io_splice_prep(struct io_kiocb *req, if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - sp->file_in = NULL; sp->len = READ_ONCE(sqe->len); sp->flags = READ_ONCE(sqe->splice_flags); - if (unlikely(sp->flags & ~valid_flags)) return -EINVAL; - - sp->file_in = io_file_get(req->ctx, req, READ_ONCE(sqe->splice_fd_in), - (sp->flags & SPLICE_F_FD_IN_FIXED)); - if (!sp->file_in) - return -EBADF; - req->flags |= REQ_F_NEED_CLEANUP; + sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in); return 0; } @@ -4164,20 +4131,29 @@ static int io_tee_prep(struct io_kiocb *req, static int io_tee(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = &req->splice; - struct file *in = sp->file_in; struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; + struct file *in; long ret = 0; if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; + + if (sp->flags & SPLICE_F_FD_IN_FIXED) + in = io_file_get_fixed(req, sp->splice_fd_in, issue_flags); + else + in = io_file_get_normal(req, sp->splice_fd_in); + if (!in) { + ret = -EBADF; + goto done; + } + if (sp->len) ret = do_tee(in, out, sp->len, flags); if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) io_put_file(in); - req->flags &= ~REQ_F_NEED_CLEANUP; - +done: if (ret != sp->len) req_set_fail(req); io_req_complete(req, ret); @@ -4196,15 +4172,24 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static int io_splice(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = &req->splice; - struct file *in = sp->file_in; struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; loff_t *poff_in, *poff_out; + struct file *in; long ret = 0; if (issue_flags & IO_URING_F_NONBLOCK) return -EAGAIN; + if (sp->flags & SPLICE_F_FD_IN_FIXED) + in = io_file_get_fixed(req, sp->splice_fd_in, issue_flags); + else + in = io_file_get_normal(req, sp->splice_fd_in); + if (!in) { + ret = -EBADF; + goto done; + } + poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; @@ -4213,8 +4198,7 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags) if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) io_put_file(in); - req->flags &= ~REQ_F_NEED_CLEANUP; - +done: if (ret != sp->len) req_set_fail(req); io_req_complete(req, ret); @@ -4239,9 +4223,6 @@ static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ring_ctx *ctx = req->ctx; - if (!req->file) - return -EBADF; - if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || @@ -4301,6 +4282,8 @@ static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags) req->sync.len); if (ret < 0) req_set_fail(req); + else + fsnotify_modify(req->file); io_req_complete(req, ret); return 0; } @@ -5258,8 +5241,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) accept->nofile = rlimit(RLIMIT_NOFILE); accept->file_slot = READ_ONCE(sqe->file_index); - if (accept->file_slot && ((req->open.how.flags & O_CLOEXEC) || - (accept->flags & SOCK_CLOEXEC))) + if (accept->file_slot && (accept->flags & SOCK_CLOEXEC)) return -EINVAL; if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) return -EINVAL; @@ -5407,7 +5389,7 @@ struct io_poll_table { }; #define IO_POLL_CANCEL_FLAG BIT(31) -#define IO_POLL_REF_MASK ((1u << 20)-1) +#define IO_POLL_REF_MASK GENMASK(30, 0) /* * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can @@ -5507,7 +5489,7 @@ static void io_poll_remove_entries(struct io_kiocb *req) * either spurious wakeup or multishot CQE is served. 0 when it's done with * the request, then the mask is stored in req->result. */ -static int io_poll_check_events(struct io_kiocb *req) +static int io_poll_check_events(struct io_kiocb *req, bool locked) { struct io_ring_ctx *ctx = req->ctx; struct io_poll_iocb *poll = io_poll_get_single(req); @@ -5528,7 +5510,10 @@ static int io_poll_check_events(struct io_kiocb *req) if (!req->result) { struct poll_table_struct pt = { ._key = poll->events }; + unsigned flags = locked ? 0 : IO_URING_F_UNLOCKED; + if (unlikely(!io_assign_file(req, flags))) + return -EBADF; req->result = vfs_poll(req->file, &pt) & poll->events; } @@ -5563,7 +5548,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) struct io_ring_ctx *ctx = req->ctx; int ret; - ret = io_poll_check_events(req); + ret = io_poll_check_events(req, *locked); if (ret > 0) return; @@ -5588,7 +5573,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked) struct io_ring_ctx *ctx = req->ctx; int ret; - ret = io_poll_check_events(req); + ret = io_poll_check_events(req, *locked); if (ret > 0) return; @@ -5863,6 +5848,7 @@ static __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, list = &ctx->cancel_hash[i]; hlist_for_each_entry_safe(req, tmp, list, hash_node) { if (io_match_task_safe(req, tsk, cancel_all)) { + hlist_del_init(&req->hash_node); io_poll_cancel_req(req); found = true; } @@ -6273,6 +6259,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) return -EINVAL; + INIT_LIST_HEAD(&req->timeout.list); data->mode = io_translate_timeout_mode(flags); hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); @@ -6479,6 +6466,7 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) up.nr = 0; up.tags = 0; up.resv = 0; + up.resv2 = 0; io_ring_submit_lock(ctx, needs_lock); ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, @@ -6499,11 +6487,10 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) case IORING_OP_READV: case IORING_OP_READ_FIXED: case IORING_OP_READ: - return io_read_prep(req, sqe); case IORING_OP_WRITEV: case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE: - return io_write_prep(req, sqe); + return io_prep_rw(req, sqe); case IORING_OP_POLL_ADD: return io_poll_add_prep(req, sqe); case IORING_OP_POLL_REMOVE: @@ -6681,11 +6668,6 @@ static void io_clean_op(struct io_kiocb *req) kfree(io->free_iov); break; } - case IORING_OP_SPLICE: - case IORING_OP_TEE: - if (!(req->splice.flags & SPLICE_F_FD_IN_FIXED)) - io_put_file(req->splice.file_in); - break; case IORING_OP_OPENAT: case IORING_OP_OPENAT2: if (req->open.filename) @@ -6716,11 +6698,6 @@ static void io_clean_op(struct io_kiocb *req) kfree(req->apoll); req->apoll = NULL; } - if (req->flags & REQ_F_INFLIGHT) { - struct io_uring_task *tctx = req->task->io_uring; - - atomic_dec(&tctx->inflight_tracked); - } if (req->flags & REQ_F_CREDS) put_cred(req->creds); if (req->flags & REQ_F_ASYNC_DATA) { @@ -6730,11 +6707,31 @@ static void io_clean_op(struct io_kiocb *req) req->flags &= ~IO_REQ_CLEAN_FLAGS; } +static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags) +{ + if (req->file || !io_op_defs[req->opcode].needs_file) + return true; + + if (req->flags & REQ_F_FIXED_FILE) + req->file = io_file_get_fixed(req, req->fd, issue_flags); + else + req->file = io_file_get_normal(req, req->fd); + if (req->file) + return true; + + req_set_fail(req); + req->result = -EBADF; + return false; +} + static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) { const struct cred *creds = NULL; int ret; + if (unlikely(!io_assign_file(req, issue_flags))) + return -EBADF; + if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) creds = override_creds(req->creds); @@ -6881,10 +6878,11 @@ static struct io_wq_work *io_wq_free_work(struct io_wq_work *work) static void io_wq_submit_work(struct io_wq_work *work) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); + const struct io_op_def *def = &io_op_defs[req->opcode]; unsigned int issue_flags = IO_URING_F_UNLOCKED; bool needs_poll = false; struct io_kiocb *timeout; - int ret = 0; + int ret = 0, err = -ECANCELED; /* one will be dropped by ->io_free_work() after returning to io-wq */ if (!(req->flags & REQ_F_REFCOUNT)) @@ -6896,14 +6894,20 @@ static void io_wq_submit_work(struct io_wq_work *work) if (timeout) io_queue_linked_timeout(timeout); + /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ if (work->flags & IO_WQ_WORK_CANCEL) { - io_req_task_queue_fail(req, -ECANCELED); +fail: + io_req_task_queue_fail(req, err); return; } + if (!io_assign_file(req, issue_flags)) { + err = -EBADF; + work->flags |= IO_WQ_WORK_CANCEL; + goto fail; + } if (req->flags & REQ_F_FORCE_ASYNC) { - const struct io_op_def *def = &io_op_defs[req->opcode]; bool opcode_poll = def->pollin || def->pollout; if (opcode_poll && file_can_poll(req->file)) { @@ -6960,46 +6964,56 @@ static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file file_slot->file_ptr = file_ptr; } -static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd) +static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, + unsigned int issue_flags) { - struct file *file; + struct io_ring_ctx *ctx = req->ctx; + struct file *file = NULL; unsigned long file_ptr; + if (issue_flags & IO_URING_F_UNLOCKED) + mutex_lock(&ctx->uring_lock); + if (unlikely((unsigned int)fd >= ctx->nr_user_files)) - return NULL; + goto out; fd = array_index_nospec(fd, ctx->nr_user_files); file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; file = (struct file *) (file_ptr & FFS_MASK); file_ptr &= ~FFS_MASK; /* mask in overlapping REQ_F and FFS bits */ req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT); - io_req_set_rsrc_node(req, ctx); + io_req_set_rsrc_node(req, ctx, 0); +out: + if (issue_flags & IO_URING_F_UNLOCKED) + mutex_unlock(&ctx->uring_lock); return file; } -static struct file *io_file_get_normal(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd) +/* + * Drop the file for requeue operations. Only used of req->file is the + * io_uring descriptor itself. + */ +static void io_drop_inflight_file(struct io_kiocb *req) +{ + if (unlikely(req->flags & REQ_F_INFLIGHT)) { + fput(req->file); + req->file = NULL; + req->flags &= ~REQ_F_INFLIGHT; + } +} + +static struct file *io_file_get_normal(struct io_kiocb *req, int fd) { struct file *file = fget(fd); - trace_io_uring_file_get(ctx, fd); + trace_io_uring_file_get(req->ctx, fd); /* we don't allow fixed io_uring files */ - if (file && unlikely(file->f_op == &io_uring_fops)) - io_req_track_inflight(req); + if (file && file->f_op == &io_uring_fops) + req->flags |= REQ_F_INFLIGHT; return file; } -static inline struct file *io_file_get(struct io_ring_ctx *ctx, - struct io_kiocb *req, int fd, bool fixed) -{ - if (fixed) - return io_file_get_fixed(ctx, req, fd); - else - return io_file_get_normal(ctx, req, fd); -} - static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) { struct io_kiocb *prev = req->timeout.prev; @@ -7237,6 +7251,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (io_op_defs[opcode].needs_file) { struct io_submit_state *state = &ctx->submit_state; + req->fd = READ_ONCE(sqe->fd); + /* * Plug now if we have more than 2 IO left after this, and the * target is potentially a read/write to block based storage. @@ -7246,11 +7262,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, state->need_plug = false; blk_start_plug_nr_ios(&state->plug, state->submit_nr); } - - req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd), - (sqe_flags & IOSQE_FIXED_FILE)); - if (unlikely(!req->file)) - return -EBADF; } personality = READ_ONCE(sqe->personality); @@ -8229,10 +8240,15 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) refcount_add(skb->truesize, &sk->sk_wmem_alloc); skb_queue_head(&sk->sk_receive_queue, skb); - for (i = 0; i < nr_files; i++) - fput(fpl->fp[i]); + for (i = 0; i < nr; i++) { + struct file *file = io_file_from_index(ctx, i + offset); + + if (file) + fput(file); + } } else { kfree_skb(skb); + free_uid(fpl->user); kfree(fpl); } @@ -8520,13 +8536,15 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file, static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, struct io_rsrc_node *node, void *rsrc) { + u64 *tag_slot = io_get_tag_slot(data, idx); struct io_rsrc_put *prsrc; prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); if (!prsrc) return -ENOMEM; - prsrc->tag = *io_get_tag_slot(data, idx); + prsrc->tag = *tag_slot; + *tag_slot = 0; prsrc->rsrc = rsrc; list_add(&prsrc->list, &node->rsrc_list); return 0; @@ -8595,7 +8613,7 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) bool needs_lock = issue_flags & IO_URING_F_UNLOCKED; struct io_fixed_file *file_slot; struct file *file; - int ret, i; + int ret; io_ring_submit_lock(ctx, needs_lock); ret = -ENXIO; @@ -8608,8 +8626,8 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) if (ret) goto out; - i = array_index_nospec(offset, ctx->nr_user_files); - file_slot = io_fixed_file_slot(&ctx->file_table, i); + offset = array_index_nospec(offset, ctx->nr_user_files); + file_slot = io_fixed_file_slot(&ctx->file_table, offset); ret = -EBADF; if (!file_slot->file_ptr) goto out; @@ -8665,8 +8683,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, if (file_slot->file_ptr) { file = (struct file *)(file_slot->file_ptr & FFS_MASK); - err = io_queue_rsrc_removal(data, up->offset + done, - ctx->rsrc_node, file); + err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file); if (err) break; file_slot->file_ptr = 0; @@ -8691,7 +8708,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } - *io_get_tag_slot(data, up->offset + done) = tag; + *io_get_tag_slot(data, i) = tag; io_fixed_file_set(file_slot, file); err = io_sqe_file_register(ctx, file, i); if (err) { @@ -8766,7 +8783,6 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task, xa_init(&tctx->xa); init_waitqueue_head(&tctx->wait); atomic_set(&tctx->in_idle, 0); - atomic_set(&tctx->inflight_tracked, 0); task->io_uring = tctx; spin_lock_init(&tctx->task_lock); INIT_WQ_LIST(&tctx->task_list); @@ -9340,7 +9356,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, i = array_index_nospec(offset, ctx->nr_user_bufs); if (ctx->user_bufs[i] != ctx->dummy_ubuf) { - err = io_queue_rsrc_removal(ctx->buf_data, offset, + err = io_queue_rsrc_removal(ctx->buf_data, i, ctx->rsrc_node, ctx->user_bufs[i]); if (unlikely(err)) { io_buffer_unmap(ctx, &imu); @@ -9904,7 +9920,7 @@ static __cold void io_uring_clean_tctx(struct io_uring_task *tctx) static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) { if (tracked) - return atomic_read(&tctx->inflight_tracked); + return 0; return percpu_counter_sum(&tctx->inflight); } @@ -10095,6 +10111,8 @@ static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz return -EINVAL; if (copy_from_user(&arg, argp, sizeof(arg))) return -EFAULT; + if (arg.pad) + return -EINVAL; *sig = u64_to_user_ptr(arg.sigmask); *argsz = arg.sigmask_sz; *ts = u64_to_user_ptr(arg.ts); @@ -10559,7 +10577,8 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED | IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS | - IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP; + IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP | + IORING_FEAT_LINKED_FILE; if (copy_to_user(params, p, sizeof(*p))) { ret = -EFAULT; @@ -10770,8 +10789,6 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, __u32 tmp; int err; - if (up->resv) - return -EINVAL; if (check_add_overflow(up->offset, nr_args, &tmp)) return -EOVERFLOW; err = io_rsrc_node_switch_start(ctx); @@ -10797,6 +10814,8 @@ static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, memset(&up, 0, sizeof(up)); if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) return -EFAULT; + if (up.resv || up.resv2) + return -EINVAL; return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); } @@ -10809,7 +10828,7 @@ static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, return -EINVAL; if (copy_from_user(&up, arg, sizeof(up))) return -EFAULT; - if (!up.nr || up.resv) + if (!up.nr || up.resv || up.resv2) return -EINVAL; return __io_register_rsrc_update(ctx, type, &up, up.nr); } @@ -10857,7 +10876,15 @@ static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx, if (len > cpumask_size()) len = cpumask_size(); - if (copy_from_user(new_mask, arg, len)) { + if (in_compat_syscall()) { + ret = compat_get_bitmap(cpumask_bits(new_mask), + (const compat_ulong_t __user *)arg, + len * 8 /* CHAR_BIT */); + } else { + ret = copy_from_user(new_mask, arg, len); + } + + if (ret) { free_cpumask_var(new_mask); return -EFAULT; } diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 6c51a75d0be61..d020a2e81a24c 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -480,7 +480,8 @@ EXPORT_SYMBOL_GPL(iomap_releasepage); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len) { - trace_iomap_invalidatepage(folio->mapping->host, offset, len); + trace_iomap_invalidatepage(folio->mapping->host, + folio_pos(folio) + offset, len); /* * If we're invalidating the entire folio, clear the dirty state diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 8e2f8275a2535..259e00046a8bd 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -842,27 +842,38 @@ EXPORT_SYMBOL(jbd2_journal_restart); */ void jbd2_journal_wait_updates(journal_t *journal) { - transaction_t *commit_transaction = journal->j_running_transaction; + DEFINE_WAIT(wait); - if (!commit_transaction) - return; + while (1) { + /* + * Note that the running transaction can get freed under us if + * this transaction is getting committed in + * jbd2_journal_commit_transaction() -> + * jbd2_journal_free_transaction(). This can only happen when we + * release j_state_lock -> schedule() -> acquire j_state_lock. + * Hence we should everytime retrieve new j_running_transaction + * value (after j_state_lock release acquire cycle), else it may + * lead to use-after-free of old freed transaction. + */ + transaction_t *transaction = journal->j_running_transaction; - spin_lock(&commit_transaction->t_handle_lock); - while (atomic_read(&commit_transaction->t_updates)) { - DEFINE_WAIT(wait); + if (!transaction) + break; + spin_lock(&transaction->t_handle_lock); prepare_to_wait(&journal->j_wait_updates, &wait, - TASK_UNINTERRUPTIBLE); - if (atomic_read(&commit_transaction->t_updates)) { - spin_unlock(&commit_transaction->t_handle_lock); - write_unlock(&journal->j_state_lock); - schedule(); - write_lock(&journal->j_state_lock); - spin_lock(&commit_transaction->t_handle_lock); + TASK_UNINTERRUPTIBLE); + if (!atomic_read(&transaction->t_updates)) { + spin_unlock(&transaction->t_handle_lock); + finish_wait(&journal->j_wait_updates, &wait); + break; } + spin_unlock(&transaction->t_handle_lock); + write_unlock(&journal->j_state_lock); + schedule(); finish_wait(&journal->j_wait_updates, &wait); + write_lock(&journal->j_state_lock); } - spin_unlock(&commit_transaction->t_handle_lock); } /** @@ -877,8 +888,6 @@ void jbd2_journal_wait_updates(journal_t *journal) */ void jbd2_journal_lock_updates(journal_t *journal) { - DEFINE_WAIT(wait); - jbd2_might_wait_for_commit(journal); write_lock(&journal->j_state_lock); diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index b288c8ae1236b..837cd55fd4c5e 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -415,13 +415,15 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c) jffs2_free_ino_caches(c); jffs2_free_raw_node_refs(c); ret = -EIO; - goto out_free; + goto out_sum_exit; } jffs2_calc_trigger_levels(c); return 0; + out_sum_exit: + jffs2_sum_exit(c); out_free: kvfree(c->blocks); diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 2ac410477c4f4..71f03a5d36ed2 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -603,8 +603,8 @@ int jffs2_do_fill_super(struct super_block *sb, struct fs_context *fc) jffs2_free_ino_caches(c); jffs2_free_raw_node_refs(c); kvfree(c->blocks); - out_inohash: jffs2_clear_xattr_subsystem(c); + out_inohash: kfree(c->inocache_list); out_wbuf: jffs2_flash_cleanup(c); diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index b676056826beb..29671e33a1714 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -136,7 +136,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) if (!s) { JFFS2_WARNING("Can't allocate memory for summary\n"); ret = -ENOMEM; - goto out; + goto out_buf; } } @@ -275,13 +275,15 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) } ret = 0; out: + jffs2_sum_reset_collected(s); + kfree(s); + out_buf: if (buf_size) kfree(flashbuf); #ifndef __ECOS else mtd_unpoint(c->mtd, 0, c->mtd->size); #endif - kfree(s); return ret; } diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 57ab424c05ff0..072821b50ab91 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -146,12 +146,13 @@ void jfs_evict_inode(struct inode *inode) dquot_initialize(inode); if (JFS_IP(inode)->fileset == FILESYSTEM_I) { + struct inode *ipimap = JFS_SBI(inode->i_sb)->ipimap; truncate_inode_pages_final(&inode->i_data); if (test_cflag(COMMIT_Freewmap, inode)) jfs_free_zero_link(inode); - if (JFS_SBI(inode->i_sb)->ipimap) + if (ipimap && JFS_IP(ipimap)->i_imap) diFree(inode); /* diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 91f4ec93dab1f..d8502f4989d9d 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -148,6 +148,7 @@ static const s8 budtab[256] = { * 0 - success * -ENOMEM - insufficient memory * -EIO - i/o error + * -EINVAL - wrong bmap data */ int dbMount(struct inode *ipbmap) { @@ -179,6 +180,12 @@ int dbMount(struct inode *ipbmap) bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); + if (!bmp->db_numag) { + release_metapage(mp); + kfree(bmp); + return -EINVAL; + } + bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); diff --git a/fs/minix/inode.c b/fs/minix/inode.c index a71f1cf894b9f..d4bd94234ef73 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -447,7 +447,8 @@ static const struct address_space_operations minix_aops = { .writepage = minix_writepage, .write_begin = minix_write_begin, .write_end = generic_write_end, - .bmap = minix_bmap + .bmap = minix_bmap, + .direct_IO = noop_direct_IO }; static const struct inode_operations minix_symlink_inode_operations = { diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index c343666d9a428..6464dde03705c 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -358,12 +358,11 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, struct cb_process_state *cps) { struct cb_devicenotifyargs *args = argp; + const struct pnfs_layoutdriver_type *ld = NULL; uint32_t i; __be32 res = 0; - struct nfs_client *clp = cps->clp; - struct nfs_server *server = NULL; - if (!clp) { + if (!cps->clp) { res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); goto out; } @@ -371,23 +370,15 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, for (i = 0; i < args->ndevs; i++) { struct cb_devicenotifyitem *dev = &args->devs[i]; - if (!server || - server->pnfs_curr_ld->id != dev->cbd_layout_type) { - rcu_read_lock(); - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) - if (server->pnfs_curr_ld && - server->pnfs_curr_ld->id == dev->cbd_layout_type) { - rcu_read_unlock(); - goto found; - } - rcu_read_unlock(); - continue; + if (!ld || ld->id != dev->cbd_layout_type) { + pnfs_put_layoutdriver(ld); + ld = pnfs_find_layoutdriver(dev->cbd_layout_type); + if (!ld) + continue; } - - found: - nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); + nfs4_delete_deviceid(ld, cps->clp, &dev->cbd_dev_id); } - + pnfs_put_layoutdriver(ld); out: kfree(args->devs); return res; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index f90de8043b0f9..8dcb08e1a885d 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -271,10 +271,6 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, n = ntohl(*p++); if (n == 0) goto out; - if (n > ULONG_MAX / sizeof(*args->devs)) { - status = htonl(NFS4ERR_BADXDR); - goto out; - } args->devs = kmalloc_array(n, sizeof(*args->devs), GFP_KERNEL); if (!args->devs) { diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d1f34229e11ab..e828504cc396c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -857,7 +857,8 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str } if (clp->rpc_ops->discover_trunking != NULL && - (server->caps & NFS_CAP_FS_LOCATIONS)) { + (server->caps & NFS_CAP_FS_LOCATIONS && + (server->flags & NFS_MOUNT_TRUNK_DISCOVERY))) { error = clp->rpc_ops->discover_trunking(server, mntfh); if (error < 0) return error; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 75cb1cbe4cdea..911bdb35eb085 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1853,16 +1853,6 @@ const struct dentry_operations nfs4_dentry_operations = { }; EXPORT_SYMBOL_GPL(nfs4_dentry_operations); -static fmode_t flags_to_mode(int flags) -{ - fmode_t res = (__force fmode_t)flags & FMODE_EXEC; - if ((flags & O_ACCMODE) != O_WRONLY) - res |= FMODE_READ; - if ((flags & O_ACCMODE) != O_RDONLY) - res |= FMODE_WRITE; - return res; -} - static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp) { return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index eabfdab543c8c..11c566d8769f6 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -173,8 +173,8 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); if (iov_iter_rw(iter) == READ) - return nfs_file_direct_read(iocb, iter); - return nfs_file_direct_write(iocb, iter); + return nfs_file_direct_read(iocb, iter, true); + return nfs_file_direct_write(iocb, iter, true); } static void nfs_direct_release_pages(struct page **pages, unsigned int npages) @@ -425,6 +425,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers into which to read data + * @swap: flag indicating this is swap IO, not O_DIRECT IO * * We use this function for direct reads instead of calling * generic_file_aio_read() in order to avoid gfar's check to see if @@ -440,7 +441,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * client must read the updated atime from the server back into its * cache. */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) +ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, + bool swap) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -482,12 +484,14 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) if (iter_is_iovec(iter)) dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; - nfs_start_io_direct(inode); + if (!swap) + nfs_start_io_direct(inode); NFS_I(inode)->read_io += count; requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); - nfs_end_io_direct(inode); + if (!swap) + nfs_end_io_direct(inode); if (requested > 0) { result = nfs_direct_wait(dreq); @@ -790,7 +794,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { */ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, struct iov_iter *iter, - loff_t pos) + loff_t pos, int ioflags) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; @@ -798,7 +802,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); - nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, + nfs_pageio_init_write(&desc, inode, ioflags, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; get_dreq(dreq); @@ -876,6 +880,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers from which to write data + * @swap: flag indicating this is swap IO, not O_DIRECT IO * * We use this function for direct writes instead of calling * generic_file_aio_write() in order to avoid taking the inode @@ -892,7 +897,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, + bool swap) { ssize_t result, requested; size_t count; @@ -906,7 +912,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", file, iov_iter_count(iter), (long long) iocb->ki_pos); - result = generic_write_checks(iocb, iter); + if (swap) + /* bypass generic checks */ + result = iov_iter_count(iter); + else + result = generic_write_checks(iocb, iter); if (result <= 0) return result; count = result; @@ -937,16 +947,22 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) dreq->iocb = iocb; pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode); - nfs_start_io_direct(inode); + if (swap) { + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_STABLE); + } else { + nfs_start_io_direct(inode); - requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_COND_STABLE); - if (mapping->nrpages) { - invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, end); - } + if (mapping->nrpages) { + invalidate_inode_pages2_range(mapping, + pos >> PAGE_SHIFT, end); + } - nfs_end_io_direct(inode); + nfs_end_io_direct(inode); + } if (requested > 0) { result = nfs_direct_wait(dreq); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 76d76acbc5943..d8583f57ff99f 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -162,7 +162,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) ssize_t result; if (iocb->ki_flags & IOCB_DIRECT) - return nfs_file_direct_read(iocb, to); + return nfs_file_direct_read(iocb, to, false); dprintk("NFS: read(%pD2, %zu@%lu)\n", iocb->ki_filp, @@ -619,7 +619,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) return result; if (iocb->ki_flags & IOCB_DIRECT) - return nfs_file_direct_write(iocb, from); + return nfs_file_direct_write(iocb, from, false); dprintk("NFS: write(%pD2, %zu@%Ld)\n", file, iov_iter_count(from), (long long) iocb->ki_pos); diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index ea17fa1f31ecb..e2d59bb5e6bbe 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -80,6 +80,7 @@ enum nfs_param { Opt_source, Opt_tcp, Opt_timeo, + Opt_trunkdiscovery, Opt_udp, Opt_v, Opt_vers, @@ -180,6 +181,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_string("source", Opt_source), fsparam_flag ("tcp", Opt_tcp), fsparam_u32 ("timeo", Opt_timeo), + fsparam_flag_no("trunkdiscovery", Opt_trunkdiscovery), fsparam_flag ("udp", Opt_udp), fsparam_flag ("v2", Opt_v), fsparam_flag ("v3", Opt_v), @@ -529,6 +531,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, else ctx->flags &= ~NFS_MOUNT_NOCTO; break; + case Opt_trunkdiscovery: + if (result.negated) + ctx->flags &= ~NFS_MOUNT_TRUNK_DISCOVERY; + else + ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY; + break; case Opt_ac: if (result.negated) ctx->flags |= NFS_MOUNT_NOAC; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d96baa4450e39..e4fb939a2904b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1180,7 +1180,6 @@ int nfs_open(struct inode *inode, struct file *filp) nfs_fscache_open_file(inode, filp); return 0; } -EXPORT_SYMBOL_GPL(nfs_open); /* * This function is called whenever some part of NFS notices that diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2de7c56a1fbed..465e39ff018d4 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -42,6 +42,16 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry) return true; } +static inline fmode_t flags_to_mode(int flags) +{ + fmode_t res = (__force fmode_t)flags & FMODE_EXEC; + if ((flags & O_ACCMODE) != O_WRONLY) + res |= FMODE_READ; + if ((flags & O_ACCMODE) != O_RDONLY) + res |= FMODE_WRITE; + return res; +} + /* * Note: RFC 1813 doesn't limit the number of auth flavors that * a server can return, so make something up. @@ -573,6 +583,13 @@ nfs_write_match_verf(const struct nfs_writeverf *verf, !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier); } +static inline gfp_t nfs_io_gfp_mask(void) +{ + if (current->flags & PF_WQ_WORKER) + return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; + return GFP_KERNEL; +} + /* unlink.c */ extern struct rpc_task * nfs_async_rename(struct inode *old_dir, struct inode *new_dir, diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 7fba7711e6b3a..3d5ba43f44bb6 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -949,7 +949,7 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, error = decode_filename_inline(xdr, &entry->name, &entry->len); if (unlikely(error)) - return error; + return -EAGAIN; /* * The type (size and byte order) of nfscookie isn't defined in diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 9274c9c5efea6..7ab60ad98776f 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1967,7 +1967,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, bool plus) { struct user_namespace *userns = rpc_userns(entry->server->client); - struct nfs_entry old = *entry; __be32 *p; int error; u64 new_cookie; @@ -1987,15 +1986,15 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, error = decode_fileid3(xdr, &entry->ino); if (unlikely(error)) - return error; + return -EAGAIN; error = decode_inline_filename3(xdr, &entry->name, &entry->len); if (unlikely(error)) - return error; + return -EAGAIN; error = decode_cookie3(xdr, &new_cookie); if (unlikely(error)) - return error; + return -EAGAIN; entry->d_type = DT_UNKNOWN; @@ -2003,7 +2002,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, entry->fattr->valid = 0; error = decode_post_op_attr(xdr, entry->fattr, userns); if (unlikely(error)) - return error; + return -EAGAIN; if (entry->fattr->valid & NFS_ATTR_FATTR_V3) entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); @@ -2018,11 +2017,8 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, return -EAGAIN; if (*p != xdr_zero) { error = decode_nfs_fh3(xdr, entry->fh); - if (unlikely(error)) { - if (error == -E2BIG) - goto out_truncated; - return error; - } + if (unlikely(error)) + return -EAGAIN; } else zero_nfs_fh3(entry->fh); } @@ -2031,11 +2027,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, entry->cookie = new_cookie; return 0; - -out_truncated: - dprintk("NFS: directory entry contains invalid file handle\n"); - *entry = old; - return -EAGAIN; } /* @@ -2228,6 +2219,7 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr, /* ignore properties */ result->lease_time = 0; result->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; + result->xattr_support = 0; return 0; } diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 32129446beca6..ca878d021faba 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -591,8 +591,10 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, ctx = get_nfs_open_context(nfs_file_open_context(src)); l_ctx = nfs_get_lock_context(ctx); - if (IS_ERR(l_ctx)) - return PTR_ERR(l_ctx); + if (IS_ERR(l_ctx)) { + status = PTR_ERR(l_ctx); + goto out; + } status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx, FMODE_READ); @@ -600,7 +602,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, if (status) { if (status == -EAGAIN) status = -NFS4ERR_BAD_STATEID; - return status; + goto out; } status = nfs4_call_sync(src_server->client, src_server, &msg, @@ -609,6 +611,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, if (status == -ENOTSUPP) src_server->caps &= ~NFS_CAP_COPY_NOTIFY; +out: put_nfs_open_context(nfs_file_open_context(src)); return status; } diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index e79ae4cbc395e..e34af48fb4f41 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -32,6 +32,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) struct dentry *parent = NULL; struct inode *dir; unsigned openflags = filp->f_flags; + fmode_t f_mode; struct iattr attr; int err; @@ -50,8 +51,9 @@ nfs4_file_open(struct inode *inode, struct file *filp) if (err) return err; + f_mode = filp->f_mode; if ((openflags & O_ACCMODE) == 3) - return nfs_open(inode, filp); + f_mode |= flags_to_mode(openflags); /* We can't create new files here */ openflags &= ~(O_CREAT|O_EXCL); @@ -59,7 +61,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) parent = dget_parent(dentry); dir = d_inode(parent); - ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp); + ctx = alloc_nfs_open_context(file_dentry(filp), f_mode, filp); err = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0e0db6c276196..c36fa0d0d438b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8333,6 +8333,7 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) case -NFS4ERR_DEADSESSION: nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); + return; } if (args->dir == NFS4_CDFC4_FORE_OR_BOTH && res->dir != NFS4_CDFS4_BOTH) { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f5a62c0d999b4..0f4818627ef0c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -2560,9 +2561,17 @@ static void nfs4_layoutreturn_any_run(struct nfs_client *clp) static void nfs4_state_manager(struct nfs_client *clp) { + unsigned int memflags; int status = 0; const char *section = "", *section_sep = ""; + /* + * State recovery can deadlock if the direct reclaim code tries + * start NFS writeback. So ensure memory allocations are all + * GFP_NOFS. + */ + memflags = memalloc_nofs_save(); + /* Ensure exclusive access to NFSv4 state */ do { trace_nfs4_state_mgr(clp); @@ -2657,6 +2666,7 @@ static void nfs4_state_manager(struct nfs_client *clp) clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); } + memalloc_nofs_restore(memflags); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); @@ -2674,6 +2684,7 @@ static void nfs4_state_manager(struct nfs_client *clp) return; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; + memflags = memalloc_nofs_save(); } while (refcount_read(&clp->cl_count) > 1 && !signalled()); goto out_drain; @@ -2686,6 +2697,7 @@ static void nfs4_state_manager(struct nfs_client *clp) clp->cl_hostname, -status); ssleep(1); out_drain: + memalloc_nofs_restore(memflags); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ad7f83dc9a2df..9157dd19b8b4f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -90,10 +90,10 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) } } -static inline struct nfs_page * -nfs_page_alloc(void) +static inline struct nfs_page *nfs_page_alloc(void) { - struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); + struct nfs_page *p = + kmem_cache_zalloc(nfs_page_cachep, nfs_io_gfp_mask()); if (p) INIT_LIST_HEAD(&p->wb_list); return p; @@ -892,7 +892,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, struct nfs_commit_info cinfo; struct nfs_page_array *pg_array = &hdr->page_array; unsigned int pagecount, pageused; - gfp_t gfp_flags = GFP_KERNEL; + gfp_t gfp_flags = nfs_io_gfp_mask(); pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); pg_array->npages = pagecount; @@ -979,7 +979,7 @@ nfs_pageio_alloc_mirrors(struct nfs_pageio_descriptor *desc, desc->pg_mirrors_dynamic = NULL; if (mirror_count == 1) return desc->pg_mirrors_static; - ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_KERNEL); + ret = kmalloc_array(mirror_count, sizeof(*ret), nfs_io_gfp_mask()); if (ret != NULL) { for (i = 0; i < mirror_count; i++) nfs_pageio_mirror_init(&ret[i], desc->pg_bsize); @@ -1218,6 +1218,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) do { list_splice_init(&mirror->pg_list, &head); + mirror->pg_recoalesce = 0; while (!list_empty(&head)) { struct nfs_page *req; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7c9090a28e5c3..7ddd003ab8b1a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -92,6 +92,17 @@ find_pnfs_driver(u32 id) return local; } +const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id) +{ + return find_pnfs_driver(id); +} + +void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld) +{ + if (ld) + module_put(ld->owner); +} + void unset_pnfs_layoutdriver(struct nfs_server *nfss) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f4d7548d67b24..07f11489e4e9f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -234,6 +234,8 @@ struct pnfs_devicelist { extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); +extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id); +extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld); /* nfs4proc.c */ extern size_t max_response_pages(struct nfs_server *server); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 316f68f96e573..657c242a18ff1 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -419,7 +419,7 @@ static struct nfs_commit_data * pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket, struct nfs_commit_info *cinfo) { - struct nfs_commit_data *data = nfs_commitdata_alloc(false); + struct nfs_commit_data *data = nfs_commitdata_alloc(); if (!data) return NULL; @@ -515,7 +515,11 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, unsigned int nreq = 0; if (!list_empty(mds_pages)) { - data = nfs_commitdata_alloc(true); + data = nfs_commitdata_alloc(); + if (!data) { + nfs_retry_commit(mds_pages, NULL, cinfo, -1); + return -ENOMEM; + } data->ds_commit_index = -1; list_splice_init(mds_pages, &data->pages); list_add_tail(&data->list, &list); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 73dcaa99fa9ba..e3570c656b0f9 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -92,6 +92,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, info->maxfilesize = 0x7FFFFFFF; info->lease_time = 0; info->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; + info->xattr_support = 0; return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 987a187bd39aa..9388503030992 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -70,27 +70,17 @@ static mempool_t *nfs_wdata_mempool; static struct kmem_cache *nfs_cdata_cachep; static mempool_t *nfs_commit_mempool; -struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail) +struct nfs_commit_data *nfs_commitdata_alloc(void) { struct nfs_commit_data *p; - if (never_fail) - p = mempool_alloc(nfs_commit_mempool, GFP_NOIO); - else { - /* It is OK to do some reclaim, not no safe to wait - * for anything to be returned to the pool. - * mempool_alloc() cannot handle that particular combination, - * so we need two separate attempts. - */ + p = kmem_cache_zalloc(nfs_cdata_cachep, nfs_io_gfp_mask()); + if (!p) { p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT); - if (!p) - p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO | - __GFP_NOWARN | __GFP_NORETRY); if (!p) return NULL; + memset(p, 0, sizeof(*p)); } - - memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); return p; } @@ -104,9 +94,15 @@ EXPORT_SYMBOL_GPL(nfs_commit_free); static struct nfs_pgio_header *nfs_writehdr_alloc(void) { - struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_KERNEL); + struct nfs_pgio_header *p; - memset(p, 0, sizeof(*p)); + p = kmem_cache_zalloc(nfs_wdata_cachep, nfs_io_gfp_mask()); + if (!p) { + p = mempool_alloc(nfs_wdata_mempool, GFP_NOWAIT); + if (!p) + return NULL; + memset(p, 0, sizeof(*p)); + } p->rw_mode = FMODE_WRITE; return p; } @@ -316,7 +312,10 @@ static void nfs_mapping_set_error(struct page *page, int error) struct address_space *mapping = page_file_mapping(page); SetPageError(page); - mapping_set_error(mapping, error); + filemap_set_wb_err(mapping, error); + if (mapping->host) + errseq_set(&mapping->host->i_sb->s_wb_err, + error == -ENOSPC ? -ENOSPC : -EIO); nfs_set_pageerror(mapping); } @@ -1409,6 +1408,8 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, { int priority = flush_task_priority(how); + if (IS_SWAPFILE(hdr->inode)) + task_setup_data->flags |= RPC_TASK_SWAPPER; task_setup_data->priority = priority; rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client); trace_nfs_initiate_write(hdr); @@ -1821,7 +1822,11 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, if (list_empty(head)) return 0; - data = nfs_commitdata_alloc(true); + data = nfs_commitdata_alloc(); + if (!data) { + nfs_retry_commit(head, NULL, cinfo, -1); + return -ENOMEM; + } /* Set up the argument struct */ nfs_init_commit(data, head, NULL, cinfo); diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 8bc807c5fea4c..496f7b3f75237 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -235,6 +235,13 @@ nfsd_file_check_write_error(struct nfsd_file *nf) return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); } +static void +nfsd_file_flush(struct nfsd_file *nf) +{ + if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0) + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); +} + static void nfsd_file_do_unhash(struct nfsd_file *nf) { @@ -302,11 +309,14 @@ nfsd_file_put(struct nfsd_file *nf) return; } - filemap_flush(nf->nf_file->f_mapping); is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; - nfsd_file_put_noref(nf); - if (is_hashed) + if (!is_hashed) { + nfsd_file_flush(nf); + nfsd_file_put_noref(nf); + } else { + nfsd_file_put_noref(nf); nfsd_file_schedule_laundrette(); + } if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) nfsd_file_gc(); } @@ -327,6 +337,7 @@ nfsd_file_dispose_list(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del(&nf->nf_lru); + nfsd_file_flush(nf); nfsd_file_put_noref(nf); } } @@ -340,6 +351,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del(&nf->nf_lru); + nfsd_file_flush(nf); if (!refcount_dec_and_test(&nf->nf_ref)) continue; if (nfsd_file_free(nf)) @@ -632,7 +644,7 @@ nfsd_file_cache_init(void) if (!nfsd_filecache_wq) goto out; - nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, + nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, sizeof(*nfsd_file_hashtbl), GFP_KERNEL); if (!nfsd_file_hashtbl) { pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); @@ -700,7 +712,7 @@ nfsd_file_cache_init(void) nfsd_file_slab = NULL; kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - kfree(nfsd_file_hashtbl); + kvfree(nfsd_file_hashtbl); nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; @@ -811,7 +823,7 @@ nfsd_file_cache_shutdown(void) fsnotify_wait_marks_destroyed(); kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - kfree(nfsd_file_hashtbl); + kvfree(nfsd_file_hashtbl); nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 32063733443d4..f3b71fd1d1341 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4711,6 +4711,14 @@ nfsd_break_deleg_cb(struct file_lock *fl) return ret; } +/** + * nfsd_breaker_owns_lease - Check if lease conflict was resolved + * @fl: Lock state to check + * + * Return values: + * %true: Lease conflict was resolved + * %false: Lease conflict was not resolved. + */ static bool nfsd_breaker_owns_lease(struct file_lock *fl) { struct nfs4_delegation *dl = fl->fl_owner; @@ -4718,11 +4726,11 @@ static bool nfsd_breaker_owns_lease(struct file_lock *fl) struct nfs4_client *clp; if (!i_am_nfsd()) - return NULL; + return false; rqst = kthread_data(current); /* Note rq_prog == NFS_ACL_PROGRAM is also possible: */ if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4) - return NULL; + return false; clp = *(rqst->rq_lease_breaker); return dl->dl_stid.sc_client == clp; } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 18b8eb43a19bc..fcdab8a8a41f4 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -230,7 +230,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) unsigned long cnt = argp->len; unsigned int nvecs; - dprintk("nfsd: WRITE %s %d bytes at %d\n", + dprintk("nfsd: WRITE %s %u bytes at %d\n", SVCFH_fmt(&argp->fh), argp->len, argp->offset); diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 528fb299430e6..852f71580bd06 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -32,7 +32,7 @@ struct nfsd_readargs { struct nfsd_writeargs { svc_fh fh; __u32 offset; - int len; + __u32 len; struct xdr_buf payload; }; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 4474adb393ca8..517b71c73aa96 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1881,6 +1881,10 @@ int ntfs_read_inode_mount(struct inode *vi) } /* Now allocate memory for the attribute list. */ ni->attr_list_size = (u32)ntfs_attr_size(a); + if (!ni->attr_list_size) { + ntfs_error(sb, "Attr_list_size is zero"); + goto put_err_out; + } ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); if (!ni->attr_list) { ntfs_error(sb, "Not enough memory to allocate buffer " diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index f033de733adb3..effe92c7d6937 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -337,7 +337,6 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) /* Read information header from global quota file */ int ocfs2_global_read_info(struct super_block *sb, int type) { - struct inode *gqinode = NULL; unsigned int ino[OCFS2_MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, GROUP_QUOTA_SYSTEM_INODE }; struct ocfs2_global_disk_dqinfo dinfo; @@ -346,29 +345,31 @@ int ocfs2_global_read_info(struct super_block *sb, int type) u64 pcount; int status; + oinfo->dqi_gi.dqi_sb = sb; + oinfo->dqi_gi.dqi_type = type; + ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); + oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); + oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; + oinfo->dqi_gqi_bh = NULL; + oinfo->dqi_gqi_count = 0; + /* Read global header */ - gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], + oinfo->dqi_gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], OCFS2_INVALID_SLOT); - if (!gqinode) { + if (!oinfo->dqi_gqinode) { mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n", type); status = -EINVAL; goto out_err; } - oinfo->dqi_gi.dqi_sb = sb; - oinfo->dqi_gi.dqi_type = type; - oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); - oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; - oinfo->dqi_gqi_bh = NULL; - oinfo->dqi_gqi_count = 0; - oinfo->dqi_gqinode = gqinode; + status = ocfs2_lock_global_qf(oinfo, 0); if (status < 0) { mlog_errno(status); goto out_err; } - status = ocfs2_extent_map_get_blocks(gqinode, 0, &oinfo->dqi_giblk, + status = ocfs2_extent_map_get_blocks(oinfo->dqi_gqinode, 0, &oinfo->dqi_giblk, &pcount, NULL); if (status < 0) goto out_unlock; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 0e4b16d4c037f..b1a8b046f4c22 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -702,8 +702,6 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) info->dqi_priv = oinfo; oinfo->dqi_type = type; INIT_LIST_HEAD(&oinfo->dqi_chunk); - oinfo->dqi_gqinode = NULL; - ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); oinfo->dqi_rec = NULL; oinfo->dqi_lqi_bh = NULL; oinfo->dqi_libh = NULL; diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c index 6d8d4bf208377..2e244ada1f970 100644 --- a/fs/proc/bootconfig.c +++ b/fs/proc/bootconfig.c @@ -32,6 +32,8 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size) int ret = 0; key = kzalloc(XBC_KEYLEN_MAX, GFP_KERNEL); + if (!key) + return -ENOMEM; xbc_for_each_key_value(leaf, val) { ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index f243cb5e6a4fb..e26162f102ffe 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -143,21 +143,22 @@ static void pstore_timer_kick(void) mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms)); } -/* - * Should pstore_dump() wait for a concurrent pstore_dump()? If - * not, the current pstore_dump() will report a failure to dump - * and return. - */ -static bool pstore_cannot_wait(enum kmsg_dump_reason reason) +static bool pstore_cannot_block_path(enum kmsg_dump_reason reason) { - /* In NMI path, pstore shouldn't block regardless of reason. */ + /* + * In case of NMI path, pstore shouldn't be blocked + * regardless of reason. + */ if (in_nmi()) return true; switch (reason) { /* In panic case, other cpus are stopped by smp_send_stop(). */ case KMSG_DUMP_PANIC: - /* Emergency restart shouldn't be blocked. */ + /* + * Emergency restart shouldn't be blocked by spinning on + * pstore_info::buf_lock. + */ case KMSG_DUMP_EMERG: return true; default: @@ -389,21 +390,19 @@ static void pstore_dump(struct kmsg_dumper *dumper, unsigned long total = 0; const char *why; unsigned int part = 1; + unsigned long flags = 0; int ret; why = kmsg_dump_reason_str(reason); - if (down_trylock(&psinfo->buf_lock)) { - /* Failed to acquire lock: give up if we cannot wait. */ - if (pstore_cannot_wait(reason)) { - pr_err("dump skipped in %s path: may corrupt error record\n", - in_nmi() ? "NMI" : why); - return; - } - if (down_interruptible(&psinfo->buf_lock)) { - pr_err("could not grab semaphore?!\n"); + if (pstore_cannot_block_path(reason)) { + if (!spin_trylock_irqsave(&psinfo->buf_lock, flags)) { + pr_err("dump skipped in %s path because of concurrent dump\n", + in_nmi() ? "NMI" : why); return; } + } else { + spin_lock_irqsave(&psinfo->buf_lock, flags); } kmsg_dump_rewind(&iter); @@ -467,8 +466,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, total += record.size; part++; } - - up(&psinfo->buf_lock); + spin_unlock_irqrestore(&psinfo->buf_lock, flags); } static struct kmsg_dumper pstore_dumper = { @@ -594,7 +592,7 @@ int pstore_register(struct pstore_info *psi) psi->write_user = pstore_write_user_compat; psinfo = psi; mutex_init(&psinfo->read_mutex); - sema_init(&psinfo->buf_lock, 1); + spin_lock_init(&psinfo->buf_lock); if (psi->flags & PSTORE_FLAGS_DMESG) allocate_buf_for_compression(); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index dbe72f664abf3..86151889548e3 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -349,20 +349,97 @@ static int ubifs_create(struct user_namespace *mnt_userns, struct inode *dir, return err; } -static int do_tmpfile(struct inode *dir, struct dentry *dentry, - umode_t mode, struct inode **whiteout) +static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry) +{ + int err; + umode_t mode = S_IFCHR | WHITEOUT_MODE; + struct inode *inode; + struct ubifs_info *c = dir->i_sb->s_fs_info; + struct fscrypt_name nm; + + /* + * Create an inode('nlink = 1') for whiteout without updating journal, + * let ubifs_jnl_rename() store it on flash to complete rename whiteout + * atomically. + */ + + dbg_gen("dent '%pd', mode %#hx in dir ino %lu", + dentry, mode, dir->i_ino); + + err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); + if (err) + return ERR_PTR(err); + + inode = ubifs_new_inode(c, dir, mode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_free; + } + + init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); + ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations); + + err = ubifs_init_security(dir, inode, &dentry->d_name); + if (err) + goto out_inode; + + /* The dir size is updated by do_rename. */ + insert_inode_hash(inode); + + return inode; + +out_inode: + make_bad_inode(inode); + iput(inode); +out_free: + fscrypt_free_filename(&nm); + ubifs_err(c, "cannot create whiteout file, error %d", err); + return ERR_PTR(err); +} + +/** + * lock_2_inodes - a wrapper for locking two UBIFS inodes. + * @inode1: first inode + * @inode2: second inode + * + * We do not implement any tricks to guarantee strict lock ordering, because + * VFS has already done it for us on the @i_mutex. So this is just a simple + * wrapper function. + */ +static void lock_2_inodes(struct inode *inode1, struct inode *inode2) +{ + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); + mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); +} + +/** + * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. + * @inode1: first inode + * @inode2: second inode + */ +static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) +{ + mutex_unlock(&ubifs_inode(inode2)->ui_mutex); + mutex_unlock(&ubifs_inode(inode1)->ui_mutex); +} + +static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode) { struct inode *inode; struct ubifs_info *c = dir->i_sb->s_fs_info; - struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1}; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .dirtied_ino = 1}; struct ubifs_budget_req ino_req = { .dirtied_ino = 1 }; - struct ubifs_inode *ui, *dir_ui = ubifs_inode(dir); + struct ubifs_inode *ui; int err, instantiated = 0; struct fscrypt_name nm; /* - * Budget request settings: new dirty inode, new direntry, - * budget for dirtied inode will be released via writeback. + * Budget request settings: new inode, new direntry, changing the + * parent directory inode. + * Allocate budget separately for new dirtied inode, the budget will + * be released via writeback. */ dbg_gen("dent '%pd', mode %#hx in dir ino %lu", @@ -392,42 +469,30 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry, } ui = ubifs_inode(inode); - if (whiteout) { - init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); - ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations); - } - err = ubifs_init_security(dir, inode, &dentry->d_name); if (err) goto out_inode; mutex_lock(&ui->ui_mutex); insert_inode_hash(inode); - - if (whiteout) { - mark_inode_dirty(inode); - drop_nlink(inode); - *whiteout = inode; - } else { - d_tmpfile(dentry, inode); - } + d_tmpfile(dentry, inode); ubifs_assert(c, ui->dirty); instantiated = 1; mutex_unlock(&ui->ui_mutex); - mutex_lock(&dir_ui->ui_mutex); + lock_2_inodes(dir, inode); err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); if (err) goto out_cancel; - mutex_unlock(&dir_ui->ui_mutex); + unlock_2_inodes(dir, inode); ubifs_release_budget(c, &req); return 0; out_cancel: - mutex_unlock(&dir_ui->ui_mutex); + unlock_2_inodes(dir, inode); out_inode: make_bad_inode(inode); if (!instantiated) @@ -441,12 +506,6 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry, return err; } -static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, umode_t mode) -{ - return do_tmpfile(dir, dentry, mode, NULL); -} - /** * vfs_dent_type - get VFS directory entry type. * @type: UBIFS directory entry type @@ -660,32 +719,6 @@ static int ubifs_dir_release(struct inode *dir, struct file *file) return 0; } -/** - * lock_2_inodes - a wrapper for locking two UBIFS inodes. - * @inode1: first inode - * @inode2: second inode - * - * We do not implement any tricks to guarantee strict lock ordering, because - * VFS has already done it for us on the @i_mutex. So this is just a simple - * wrapper function. - */ -static void lock_2_inodes(struct inode *inode1, struct inode *inode2) -{ - mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); - mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); -} - -/** - * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. - * @inode1: first inode - * @inode2: second inode - */ -static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) -{ - mutex_unlock(&ubifs_inode(inode2)->ui_mutex); - mutex_unlock(&ubifs_inode(inode1)->ui_mutex); -} - static int ubifs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { @@ -949,7 +982,8 @@ static int ubifs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_info *c = dir->i_sb->s_fs_info; int err, sz_change; - struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 }; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .dirtied_ino = 1}; struct fscrypt_name nm; /* @@ -1264,17 +1298,19 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, .dirtied_ino = 3 }; struct ubifs_budget_req ino_req = { .dirtied_ino = 1, .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; + struct ubifs_budget_req wht_req; struct timespec64 time; unsigned int saved_nlink; struct fscrypt_name old_nm, new_nm; /* - * Budget request settings: deletion direntry, new direntry, removing - * the old inode, and changing old and new parent directory inodes. + * Budget request settings: + * req: deletion direntry, new direntry, removing the old inode, + * and changing old and new parent directory inodes. + * + * wht_req: new whiteout inode for RENAME_WHITEOUT. * - * However, this operation also marks the target inode as dirty and - * does not write it, so we allocate budget for the target inode - * separately. + * ino_req: marks the target inode as dirty and does not write it. */ dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu flags 0x%x", @@ -1331,20 +1367,44 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_release; } - err = do_tmpfile(old_dir, old_dentry, S_IFCHR | WHITEOUT_MODE, &whiteout); - if (err) { + /* + * The whiteout inode without dentry is pinned in memory, + * umount won't happen during rename process because we + * got parent dentry. + */ + whiteout = create_whiteout(old_dir, old_dentry); + if (IS_ERR(whiteout)) { + err = PTR_ERR(whiteout); kfree(dev); goto out_release; } - spin_lock(&whiteout->i_lock); - whiteout->i_state |= I_LINKABLE; - spin_unlock(&whiteout->i_lock); - whiteout_ui = ubifs_inode(whiteout); whiteout_ui->data = dev; whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0)); ubifs_assert(c, !whiteout_ui->dirty); + + memset(&wht_req, 0, sizeof(struct ubifs_budget_req)); + wht_req.new_ino = 1; + wht_req.new_ino_d = ALIGN(whiteout_ui->data_len, 8); + /* + * To avoid deadlock between space budget (holds ui_mutex and + * waits wb work) and writeback work(waits ui_mutex), do space + * budget before ubifs inodes locked. + */ + err = ubifs_budget_space(c, &wht_req); + if (err) { + /* + * Whiteout inode can not be written on flash by + * ubifs_jnl_write_inode(), because it's neither + * dirty nor zero-nlink. + */ + iput(whiteout); + goto out_release; + } + + /* Add the old_dentry size to the old_dir size. */ + old_sz -= CALC_DENT_SIZE(fname_len(&old_nm)); } lock_4_inodes(old_dir, new_dir, new_inode, whiteout); @@ -1416,29 +1476,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir); if (unlink && IS_SYNC(new_inode)) sync = 1; - } - - if (whiteout) { - struct ubifs_budget_req wht_req = { .dirtied_ino = 1, - .dirtied_ino_d = \ - ALIGN(ubifs_inode(whiteout)->data_len, 8) }; - - err = ubifs_budget_space(c, &wht_req); - if (err) { - kfree(whiteout_ui->data); - whiteout_ui->data_len = 0; - iput(whiteout); - goto out_release; - } - - inc_nlink(whiteout); - mark_inode_dirty(whiteout); - - spin_lock(&whiteout->i_lock); - whiteout->i_state &= ~I_LINKABLE; - spin_unlock(&whiteout->i_lock); - - iput(whiteout); + /* + * S_SYNC flag of whiteout inherits from the old_dir, and we + * have already checked the old dir inode. So there is no need + * to check whiteout. + */ } err = ubifs_jnl_rename(c, old_dir, old_inode, &old_nm, new_dir, @@ -1449,6 +1491,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); ubifs_release_budget(c, &req); + if (whiteout) { + ubifs_release_budget(c, &wht_req); + iput(whiteout); + } + mutex_lock(&old_inode_ui->ui_mutex); release = old_inode_ui->dirty; mark_inode_dirty_sync(old_inode); @@ -1457,11 +1504,16 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, if (release) ubifs_release_budget(c, &ino_req); if (IS_SYNC(old_inode)) - err = old_inode->i_sb->s_op->write_inode(old_inode, NULL); + /* + * Rename finished here. Although old inode cannot be updated + * on flash, old ctime is not a big problem, don't return err + * code to userspace. + */ + old_inode->i_sb->s_op->write_inode(old_inode, NULL); fscrypt_free_filename(&old_nm); fscrypt_free_filename(&new_nm); - return err; + return 0; out_cancel: if (unlink) { @@ -1482,11 +1534,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, inc_nlink(old_dir); } } + unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); if (whiteout) { - drop_nlink(whiteout); + ubifs_release_budget(c, &wht_req); iput(whiteout); } - unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); out_release: ubifs_release_budget(c, &ino_req); ubifs_release_budget(c, &req); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 5cfa28cd00cdc..6b45a037a0471 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -570,7 +570,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, } if (!PagePrivate(page)) { - SetPagePrivate(page); + attach_page_private(page, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); __set_page_dirty_nobuffers(page); } @@ -947,7 +947,7 @@ static int do_writepage(struct page *page, int len) release_existing_page_budget(c); atomic_long_dec(&c->dirty_pg_cnt); - ClearPagePrivate(page); + detach_page_private(page); ClearPageChecked(page); kunmap(page); @@ -1304,7 +1304,7 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset, release_existing_page_budget(c); atomic_long_dec(&c->dirty_pg_cnt); - ClearPagePrivate(page); + detach_page_private(page); ClearPageChecked(page); } @@ -1471,8 +1471,8 @@ static int ubifs_migrate_page(struct address_space *mapping, return rc; if (PagePrivate(page)) { - ClearPagePrivate(page); - SetPagePrivate(newpage); + detach_page_private(page); + attach_page_private(newpage, (void *)1); } if (mode != MIGRATE_SYNC_NO_COPY) @@ -1496,7 +1496,7 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) return 0; ubifs_assert(c, PagePrivate(page)); ubifs_assert(c, 0); - ClearPagePrivate(page); + detach_page_private(page); ClearPageChecked(page); return 1; } @@ -1567,7 +1567,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) else { if (!PageChecked(page)) ubifs_convert_page_budget(c); - SetPagePrivate(page); + attach_page_private(page, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); __set_page_dirty_nobuffers(page); } diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 789a7813f3fa2..1607a3c76681a 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -854,16 +854,42 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) */ n = aligned_len >> c->max_write_shift; if (n) { - n <<= c->max_write_shift; + int m = n - 1; + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, wbuf->offs); - err = ubifs_leb_write(c, wbuf->lnum, buf + written, - wbuf->offs, n); + + if (m) { + /* '(n-1)<max_write_shift < len' is always true. */ + m <<= c->max_write_shift; + err = ubifs_leb_write(c, wbuf->lnum, buf + written, + wbuf->offs, m); + if (err) + goto out; + wbuf->offs += m; + aligned_len -= m; + len -= m; + written += m; + } + + /* + * The non-written len of buf may be less than 'n' because + * parameter 'len' is not 8 bytes aligned, so here we read + * min(len, n) bytes from buf. + */ + n = 1 << c->max_write_shift; + memcpy(wbuf->buf, buf + written, min(len, n)); + if (n > len) { + ubifs_assert(c, n - len < 8); + ubifs_pad(c, wbuf->buf + len, n - len); + } + + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, n); if (err) goto out; wbuf->offs += n; aligned_len -= n; - len -= n; + len -= min(len, n); written += n; } diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index c6a8634877803..71bcebe45f9c5 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -108,7 +108,7 @@ static int setflags(struct inode *inode, int flags) struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_budget_req req = { .dirtied_ino = 1, - .dirtied_ino_d = ui->data_len }; + .dirtied_ino_d = ALIGN(ui->data_len, 8) }; err = ubifs_budget_space(c, &req); if (err) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 8ea680dba61e3..75dab0ae3939d 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -1207,9 +1207,9 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir, * @sync: non-zero if the write-buffer has to be synchronized * * This function implements the re-name operation which may involve writing up - * to 4 inodes and 2 directory entries. It marks the written inodes as clean - * and returns zero on success. In case of failure, a negative error code is - * returned. + * to 4 inodes(new inode, whiteout inode, old and new parent directory inodes) + * and 2 directory entries. It marks the written inodes as clean and returns + * zero on success. In case of failure, a negative error code is returned. */ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct inode *old_inode, @@ -1222,14 +1222,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, void *p; union ubifs_key key; struct ubifs_dent_node *dent, *dent2; - int err, dlen1, dlen2, ilen, lnum, offs, len, orphan_added = 0; + int err, dlen1, dlen2, ilen, wlen, lnum, offs, len, orphan_added = 0; int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ; int last_reference = !!(new_inode && new_inode->i_nlink == 0); int move = (old_dir != new_dir); - struct ubifs_inode *new_ui; + struct ubifs_inode *new_ui, *whiteout_ui; u8 hash_old_dir[UBIFS_HASH_ARR_SZ]; u8 hash_new_dir[UBIFS_HASH_ARR_SZ]; u8 hash_new_inode[UBIFS_HASH_ARR_SZ]; + u8 hash_whiteout_inode[UBIFS_HASH_ARR_SZ]; u8 hash_dent1[UBIFS_HASH_ARR_SZ]; u8 hash_dent2[UBIFS_HASH_ARR_SZ]; @@ -1249,9 +1250,20 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, } else ilen = 0; + if (whiteout) { + whiteout_ui = ubifs_inode(whiteout); + ubifs_assert(c, mutex_is_locked(&whiteout_ui->ui_mutex)); + ubifs_assert(c, whiteout->i_nlink == 1); + ubifs_assert(c, !whiteout_ui->dirty); + wlen = UBIFS_INO_NODE_SZ; + wlen += whiteout_ui->data_len; + } else + wlen = 0; + aligned_dlen1 = ALIGN(dlen1, 8); aligned_dlen2 = ALIGN(dlen2, 8); - len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8); + len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + + ALIGN(wlen, 8) + ALIGN(plen, 8); if (move) len += plen; @@ -1313,6 +1325,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, p += ALIGN(ilen, 8); } + if (whiteout) { + pack_inode(c, p, whiteout, 0); + err = ubifs_node_calc_hash(c, p, hash_whiteout_inode); + if (err) + goto out_release; + + p += ALIGN(wlen, 8); + } + if (!move) { pack_inode(c, p, old_dir, 1); err = ubifs_node_calc_hash(c, p, hash_old_dir); @@ -1352,6 +1373,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, if (new_inode) ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, new_inode->i_ino); + if (whiteout) + ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, + whiteout->i_ino); } release_head(c, BASEHD); @@ -1368,8 +1392,6 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, hash_dent2, old_nm); if (err) goto out_ro; - - ubifs_delete_orphan(c, whiteout->i_ino); } else { err = ubifs_add_dirt(c, lnum, dlen2); if (err) @@ -1390,6 +1412,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, offs += ALIGN(ilen, 8); } + if (whiteout) { + ino_key_init(c, &key, whiteout->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, wlen, + hash_whiteout_inode); + if (err) + goto out_ro; + offs += ALIGN(wlen, 8); + } + ino_key_init(c, &key, old_dir->i_ino); err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_old_dir); if (err) @@ -1410,6 +1441,11 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, new_ui->synced_i_size = new_ui->ui_size; spin_unlock(&new_ui->ui_lock); } + /* + * No need to mark whiteout inode clean. + * Whiteout doesn't have non-zero size, no need to update + * synced_i_size for whiteout_ui. + */ mark_inode_clean(c, ubifs_inode(old_dir)); if (move) mark_inode_clean(c, ubifs_inode(new_dir)); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index c08758b6b3642..c05d2ce9b6cd8 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -269,6 +269,7 @@ bool hv_isolation_type_snp(void); u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size); void hyperv_cleanup(void); bool hv_query_ext_cap(u64 cap_query); +void hv_setup_dma_ops(struct device *dev, bool coherent); void *hv_map_memory(void *addr, unsigned long size); void hv_unmap_memory(void *addr); #else /* CONFIG_HYPERV */ diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 2c68a545ffa7d..71942a1c642d4 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -565,10 +565,14 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ do { \ unsigned long _sz = huge_page_size(h); \ - if (_sz == PMD_SIZE) \ - tlb_flush_pmd_range(tlb, address, _sz); \ - else if (_sz == PUD_SIZE) \ + if (_sz >= P4D_SIZE) \ + tlb_flush_p4d_range(tlb, address, _sz); \ + else if (_sz >= PUD_SIZE) \ tlb_flush_pud_range(tlb, address, _sz); \ + else if (_sz >= PMD_SIZE) \ + tlb_flush_pmd_range(tlb, address, _sz); \ + else \ + tlb_flush_pte_range(tlb, address, _sz); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index af5ad51d3eef8..b12ae9bdebf43 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -283,4 +283,11 @@ enum drbg_prefixes { DRBG_PREFIX3 }; +extern int drbg_alloc_state(struct drbg_state *drbg); +extern void drbg_dealloc_state(struct drbg_state *drbg); +extern void drbg_convert_tfm_core(const char *cra_driver_name, int *coreref, + bool *pr); +extern const struct drbg_core drbg_cores[]; +extern unsigned short drbg_sec_strength(drbg_flag_t flags); + #endif /* _DRBG_H */ diff --git a/crypto/jitterentropy.h b/include/crypto/internal/jitterentropy.h similarity index 100% rename from crypto/jitterentropy.h rename to include/crypto/internal/jitterentropy.h diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index b501d0badaea2..5f88e484515a1 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -592,10 +592,16 @@ struct drm_display_info { bool rgb_quant_range_selectable; /** - * @edid_hdmi_dc_modes: Mask of supported hdmi deep color modes. Even - * more stuff redundant with @bus_formats. + * @edid_hdmi_rgb444_dc_modes: Mask of supported hdmi deep color modes + * in RGB 4:4:4. Even more stuff redundant with @bus_formats. */ - u8 edid_hdmi_dc_modes; + u8 edid_hdmi_rgb444_dc_modes; + + /** + * @edid_hdmi_ycbcr444_dc_modes: Mask of supported hdmi deep color + * modes in YCbCr 4:4:4. Even more stuff redundant with @bus_formats. + */ + u8 edid_hdmi_ycbcr444_dc_modes; /** * @cea_rev: CEA revision of the HDMI sink. diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 30359e434c3f3..fdf3cf6ccc021 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -456,7 +456,7 @@ struct drm_panel; #define DP_FEC_CAPABILITY_1 0x091 /* 2.0 */ /* DP-HDMI2.1 PCON DSC ENCODER SUPPORT */ -#define DP_PCON_DSC_ENCODER_CAP_SIZE 0xC /* 0x9E - 0x92 */ +#define DP_PCON_DSC_ENCODER_CAP_SIZE 0xD /* 0x92 through 0x9E */ #define DP_PCON_DSC_ENCODER 0x092 # define DP_PCON_DSC_ENCODER_SUPPORTED (1 << 0) # define DP_PCON_DSC_PPS_ENC_OVERRIDE (1 << 1) @@ -1528,8 +1528,6 @@ u8 drm_dp_get_adjust_request_pre_emphasis(const u8 link_status[DP_LINK_STATUS_SI int lane); u8 drm_dp_get_adjust_tx_ffe_preset(const u8 link_status[DP_LINK_STATUS_SIZE], int lane); -u8 drm_dp_get_adjust_request_post_cursor(const u8 link_status[DP_LINK_STATUS_SIZE], - unsigned int lane); #define DP_BRANCH_OUI_HEADER_SIZE 0xc #define DP_RECEIVER_CAP_SIZE 0xf diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h index b84693fbd2b50..ec4f543c3d950 100644 --- a/include/drm/drm_modeset_lock.h +++ b/include/drm/drm_modeset_lock.h @@ -34,6 +34,7 @@ struct drm_modeset_lock; * struct drm_modeset_acquire_ctx - locking context (see ww_acquire_ctx) * @ww_ctx: base acquire ctx * @contended: used internally for -EDEADLK handling + * @stack_depot: used internally for contention debugging * @locked: list of held locks * @trylock_only: trylock mode used in atomic contexts/panic notifiers * @interruptible: whether interruptible locking should be used. diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index a3dba31df01e9..6db58d1808665 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -151,7 +151,16 @@ static __always_inline int arch_atomic_read_acquire(const atomic_t *v) { - return smp_load_acquire(&(v)->counter); + int ret; + + if (__native_word(atomic_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_atomic_read(v); + __atomic_acquire_fence(); + } + + return ret; } #define arch_atomic_read_acquire arch_atomic_read_acquire #endif @@ -160,7 +169,12 @@ arch_atomic_read_acquire(const atomic_t *v) static __always_inline void arch_atomic_set_release(atomic_t *v, int i) { - smp_store_release(&(v)->counter, i); + if (__native_word(atomic_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_atomic_set(v, i); + } } #define arch_atomic_set_release arch_atomic_set_release #endif @@ -1258,7 +1272,16 @@ arch_atomic_dec_if_positive(atomic_t *v) static __always_inline s64 arch_atomic64_read_acquire(const atomic64_t *v) { - return smp_load_acquire(&(v)->counter); + s64 ret; + + if (__native_word(atomic64_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_atomic64_read(v); + __atomic_acquire_fence(); + } + + return ret; } #define arch_atomic64_read_acquire arch_atomic64_read_acquire #endif @@ -1267,7 +1290,12 @@ arch_atomic64_read_acquire(const atomic64_t *v) static __always_inline void arch_atomic64_set_release(atomic64_t *v, s64 i) { - smp_store_release(&(v)->counter, i); + if (__native_word(atomic64_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_atomic64_set(v, i); + } } #define arch_atomic64_set_release arch_atomic64_set_release #endif @@ -2358,4 +2386,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// cca554917d7ea73d5e3e7397dd70c484cad9b2c4 +// 8e2cc06bc0d2c0967d2f8424762bd48555ee40ae diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 049cf9421d831..f821b72433613 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -87,6 +87,9 @@ struct coredump_params { loff_t written; loff_t pos; loff_t to_skip; + int vma_count; + size_t vma_data_size; + struct core_vma_metadata *vma_meta; }; /* diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index b4de2010fba55..bc5c04d711bbc 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -24,6 +24,7 @@ #include #include #include +#include /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) @@ -604,6 +605,21 @@ static inline void blkcg_clear_delay(struct blkcg_gq *blkg) atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); } +/** + * blk_cgroup_mergeable - Determine whether to allow or disallow merges + * @rq: request to merge into + * @bio: bio to merge + * + * @bio and @rq should belong to the same cgroup and their issue_as_root should + * match. The latter is necessary as we don't want to throttle e.g. a metadata + * update because it happens to be next to a regular IO. + */ +static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) +{ + return rq->bio->bi_blkg == bio->bi_blkg && + bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio); +} + void blk_cgroup_bio_start(struct bio *bio); void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); @@ -659,6 +675,7 @@ static inline void blkg_put(struct blkcg_gq *blkg) { } static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline void blk_cgroup_bio_start(struct bio *bio) { } +static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; } #define blk_queue_for_each_rl(rl, q) \ for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index fe065c394fff6..86c0f85df8bb4 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -317,7 +317,8 @@ enum { BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ - BIO_TRACKED, /* set if bio goes through the rq_qos path */ + BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ + BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 75c151413fda8..b145025f3eaca 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgroup *cgrp) css_put(&cgrp->self); } +extern struct mutex cgroup_mutex; + +static inline void cgroup_lock(void) +{ + mutex_lock(&cgroup_mutex); +} + +static inline void cgroup_unlock(void) +{ + mutex_unlock(&cgroup_mutex); +} + /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for @@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgroup *cgrp) * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU -extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ @@ -707,6 +718,8 @@ struct cgroup; static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; } static inline void css_get(struct cgroup_subsys_state *css) {} static inline void css_put(struct cgroup_subsys_state *css) {} +static inline void cgroup_lock(void) {} +static inline void cgroup_unlock(void) {} static inline int cgroup_attach_task_all(struct task_struct *from, struct task_struct *t) { return 0; } static inline int cgroupstats_build(struct cgroupstats *stats, diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 248a68c668b45..aa12ec94fae28 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -12,6 +12,8 @@ struct core_vma_metadata { unsigned long start, end; unsigned long flags; unsigned long dump_size; + unsigned long pgoff; + struct file *file; }; /* @@ -25,9 +27,6 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len); -int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, - struct core_vma_metadata **vma_meta, - size_t *vma_data_size_ptr); extern void do_coredump(const kernel_siginfo_t *siginfo); #else static inline void do_coredump(const kernel_siginfo_t *siginfo) {} diff --git a/include/linux/fb.h b/include/linux/fb.h index 02f362c661c80..3d7306c9a7065 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -502,6 +502,7 @@ struct fb_info { } *apertures; bool skip_vt_switch; /* no VT switch on suspend/resume required */ + bool forced_out; /* set when being removed by another driver */ }; static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index b0728c8ad90ce..f8996b46f430e 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -218,6 +218,15 @@ struct gpio_irq_chip { */ bool per_parent_data; + /** + * @initialized: + * + * Flag to track GPIO chip irq member's initialization. + * This flag will make sure GPIO chip irq members are not used + * before they are initialized. + */ + bool initialized; + /** * @init_hw: optional routine to initialize hardware before * an IRQ chip will be added. This is quite useful when diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9367f1cb2e3c4..f40754caaefa4 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -579,7 +579,16 @@ enum NR_SOFTIRQS }; -#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) +/* + * The following vectors can be safely ignored after ksoftirqd is parked: + * + * _ RCU: + * 1) rcutree_migrate_callbacks() migrates the queue. + * 2) rcu_report_dead() reports the final quiescent states. + * + * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue + */ +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a59d25f193857..b8641dc0ee661 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -51,7 +51,7 @@ struct ipv6_devconf { __s32 use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE - __s32 mc_forwarding; + atomic_t mc_forwarding; #endif __s32 disable_ipv6; __s32 drop_unicast_in_l2_multicast; diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 93d270ca0c567..fbec0c019e3b1 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -160,6 +160,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc) int handle_irq_desc(struct irq_desc *desc); int generic_handle_irq(unsigned int irq); +int generic_dispatch_irq(unsigned int irq); #ifdef CONFIG_IRQ_DOMAIN /* diff --git a/include/linux/kfence.h b/include/linux/kfence.h index f49e64222628a..726857a4b6805 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -204,6 +204,22 @@ static __always_inline __must_check bool kfence_free(void *addr) */ bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs); +#ifdef CONFIG_PRINTK +struct kmem_obj_info; +/** + * __kfence_obj_info() - fill kmem_obj_info struct + * @kpp: kmem_obj_info to be filled + * @object: the object + * + * Return: + * * false - not a KFENCE object + * * true - a KFENCE object, filled @kpp + * + * Copies information to @kpp for KFENCE objects. + */ +bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab); +#endif + #else /* CONFIG_KFENCE */ static inline bool is_kfence_address(const void *addr) { return false; } @@ -221,6 +237,14 @@ static inline bool __must_check kfence_handle_page_fault(unsigned long addr, boo return false; } +#ifdef CONFIG_PRINTK +struct kmem_obj_info; +static inline bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +{ + return false; +} +#endif + #endif #endif /* _LINUX_KFENCE_H */ diff --git a/include/linux/ksm.h b/include/linux/ksm.h index a38a5bca1ba58..f85a184bf6203 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -19,6 +19,10 @@ struct stable_node; struct mem_cgroup; #ifdef CONFIG_KSM +int ksm_madvise_merge(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *vm_flags); +int ksm_madvise_unmerge(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long *vm_flags); int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); int __ksm_enter(struct mm_struct *mm); diff --git a/include/linux/lrng.h b/include/linux/lrng.h new file mode 100644 index 0000000000000..3e8f93b53c849 --- /dev/null +++ b/include/linux/lrng.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2018 - 2021, Stephan Mueller + */ + +#ifndef _LRNG_H +#define _LRNG_H + +#include +#include +#include + +/* + * struct lrng_crypto_cb - cryptographic callback functions + * @lrng_drng_name Name of DRNG + * @lrng_hash_name Name of Hash used for reading entropy pool + * @lrng_drng_alloc: Allocate DRNG -- the provided integer should be + * used for sanity checks. + * return: allocated data structure or PTR_ERR on + * error + * @lrng_drng_dealloc: Deallocate DRNG + * @lrng_drng_seed_helper: Seed the DRNG with data of arbitrary length + * drng: is pointer to data structure allocated + * with lrng_drng_alloc + * return: >= 0 on success, < 0 on error + * @lrng_drng_generate_helper: Generate random numbers from the DRNG with + * arbitrary length + * @lrng_hash_alloc: Allocate the hash for reading the entropy pool + * return: allocated data structure (NULL is + * success too) or ERR_PTR on error + * @lrng_hash_dealloc: Deallocate Hash + * @lrng_hash_digestsize: Return the digestsize for the used hash to read + * out entropy pool + * hash: is pointer to data structure allocated + * with lrng_hash_alloc + * return: size of digest of hash in bytes + * @lrng_hash_init: Initialize hash + * hash: is pointer to data structure allocated + * with lrng_hash_alloc + * return: 0 on success, < 0 on error + * @lrng_hash_update: Update hash operation + * hash: is pointer to data structure allocated + * with lrng_hash_alloc + * return: 0 on success, < 0 on error + * @lrng_hash_final Final hash operation + * hash: is pointer to data structure allocated + * with lrng_hash_alloc + * return: 0 on success, < 0 on error + * @lrng_hash_desc_zero Zeroization of hash state buffer + * + * Assumptions: + * + * 1. Hash operation will not sleep + * 2. The hash' volatile state information is provided with *shash by caller. + */ +struct lrng_crypto_cb { + const char *(*lrng_drng_name)(void); + const char *(*lrng_hash_name)(void); + void *(*lrng_drng_alloc)(u32 sec_strength); + void (*lrng_drng_dealloc)(void *drng); + int (*lrng_drng_seed_helper)(void *drng, const u8 *inbuf, u32 inbuflen); + int (*lrng_drng_generate_helper)(void *drng, u8 *outbuf, u32 outbuflen); + void *(*lrng_hash_alloc)(void); + void (*lrng_hash_dealloc)(void *hash); + u32 (*lrng_hash_digestsize)(void *hash); + int (*lrng_hash_init)(struct shash_desc *shash, void *hash); + int (*lrng_hash_update)(struct shash_desc *shash, const u8 *inbuf, + u32 inbuflen); + int (*lrng_hash_final)(struct shash_desc *shash, u8 *digest); + void (*lrng_hash_desc_zero)(struct shash_desc *shash); +}; + +/* Register cryptographic backend */ +#ifdef CONFIG_LRNG_DRNG_SWITCH +int lrng_set_drng_cb(const struct lrng_crypto_cb *cb); +#else /* CONFIG_LRNG_DRNG_SWITCH */ +static inline int +lrng_set_drng_cb(const struct lrng_crypto_cb *cb) { return -EOPNOTSUPP; } +#endif /* CONFIG_LRNG_DRNG_SWITCH */ + +#endif /* _LRNG_H */ diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 819ec92dc2a82..db924fe379c9c 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -332,6 +332,8 @@ LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname, struct sockaddr *address, int addrlen) LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc, struct sock *sk, struct sock *newsk) +LSM_HOOK(int, 0, sctp_assoc_established, struct sctp_association *asoc, + struct sk_buff *skb) #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 3bf5c658bc448..419b5febc3ca5 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1046,6 +1046,11 @@ * @asoc pointer to current sctp association structure. * @sk pointer to current sock structure. * @newsk pointer to new sock structure. + * @sctp_assoc_established: + * Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet + * to the security module. + * @asoc pointer to sctp association structure. + * @skb pointer to skbuff of association packet. * * Security hooks for Infiniband * diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0abbd685703b9..b8e5718665b89 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -343,6 +343,11 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif +#ifdef CONFIG_LRU_GEN + /* per-memcg mm_struct list */ + struct lru_gen_mm_list mm_list; +#endif + struct mem_cgroup_per_node *nodeinfo[]; }; @@ -437,6 +442,7 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio) * - LRU isolation * - lock_page_memcg() * - exclusive reference + * - mem_cgroup_trylock_pages() * * For a kmem folio a caller should hold an rcu read lock to protect memcg * associated with a kmem folio from being released. @@ -498,6 +504,7 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) * - LRU isolation * - lock_page_memcg() * - exclusive reference + * - mem_cgroup_trylock_pages() * * For a kmem page a caller should hold an rcu read lock to protect memcg * associated with a kmem page from being released. @@ -935,6 +942,23 @@ void unlock_page_memcg(struct page *page); void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val); +/* try to stablize folio_memcg() for all the pages in a memcg */ +static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) +{ + rcu_read_lock(); + + if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account)) + return true; + + rcu_read_unlock(); + return false; +} + +static inline void mem_cgroup_unlock_pages(void) +{ + rcu_read_unlock(); +} + /* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) @@ -1372,6 +1396,18 @@ static inline void folio_memcg_unlock(struct folio *folio) { } +static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) +{ + /* to match folio_memcg_rcu() */ + rcu_read_lock(); + return true; +} + +static inline void mem_cgroup_unlock_pages(void) +{ + rcu_read_unlock(); +} + static inline void mem_cgroup_handle_over_high(void) { } diff --git a/include/linux/migrate.h b/include/linux/migrate.h index db96e10eb8da2..90e75d5a54d66 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -48,7 +48,15 @@ int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); extern bool numa_demotion_enabled; +extern void migrate_on_reclaim_init(void); +#ifdef CONFIG_HOTPLUG_CPU +extern void set_migration_target_nodes(void); #else +static inline void set_migration_target_nodes(void) {} +#endif +#else + +static inline void set_migration_target_nodes(void) {} static inline void putback_movable_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t new, diff --git a/include/linux/mm.h b/include/linux/mm.h index 5744a3fc47169..1f3695e959429 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -227,6 +227,7 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +#define lru_to_folio(head) (list_entry((head)->prev, struct folio, lru)) void setup_initial_init_mm(void *start_code, void *end_code, void *end_data, void *brk); @@ -1032,6 +1033,8 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) #define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) +#define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH) +#define LRU_REFS_PGOFF (LRU_GEN_PGOFF - LRU_REFS_WIDTH) /* * Define the bit shifts to access each section. For non-existent @@ -1585,6 +1588,11 @@ static inline unsigned long folio_pfn(struct folio *folio) return page_to_pfn(&folio->page); } +static inline struct folio *pfn_folio(unsigned long pfn) +{ + return page_folio(pfn_to_page(pfn)); +} + /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ #ifdef CONFIG_MIGRATION static inline bool is_pinnable_page(struct page *page) diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index cf90b1fa2c60c..1c8d617e73a9c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -32,15 +32,25 @@ static inline int page_is_file_lru(struct page *page) return folio_is_file_lru(page_folio(page)); } -static __always_inline void update_lru_size(struct lruvec *lruvec, +static __always_inline void __update_lru_size(struct lruvec *lruvec, enum lru_list lru, enum zone_type zid, long nr_pages) { struct pglist_data *pgdat = lruvec_pgdat(lruvec); + lockdep_assert_held(&lruvec->lru_lock); + WARN_ON_ONCE(nr_pages != (int)nr_pages); + __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages); __mod_zone_page_state(&pgdat->node_zones[zid], NR_ZONE_LRU_BASE + lru, nr_pages); +} + +static __always_inline void update_lru_size(struct lruvec *lruvec, + enum lru_list lru, enum zone_type zid, + int nr_pages) +{ + __update_lru_size(lruvec, lru, zid, nr_pages); #ifdef CONFIG_MEMCG mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages); #endif @@ -92,11 +102,210 @@ static __always_inline enum lru_list folio_lru_list(struct folio *folio) return lru; } +#ifdef CONFIG_LRU_GEN + +static inline bool lru_gen_enabled(void) +{ +#ifdef CONFIG_LRU_GEN_ENABLED + DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]); + + return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]); +#else + DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]); + + return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]); +#endif +} + +static inline bool lru_gen_in_fault(void) +{ + return current->in_lru_fault; +} + +static inline int lru_gen_from_seq(unsigned long seq) +{ + return seq % MAX_NR_GENS; +} + +static inline int lru_hist_from_seq(unsigned long seq) +{ + return seq % NR_HIST_GENS; +} + +static inline int lru_tier_from_refs(int refs) +{ + VM_BUG_ON(refs > BIT(LRU_REFS_WIDTH)); + + /* see the comment on MAX_NR_TIERS */ + return order_base_2(refs + 1); +} + +static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen) +{ + unsigned long max_seq = lruvec->lrugen.max_seq; + + VM_BUG_ON(gen >= MAX_NR_GENS); + + /* see the comment on MIN_NR_GENS */ + return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1); +} + +static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *folio, + int old_gen, int new_gen) +{ + int type = folio_is_file_lru(folio); + int zone = folio_zonenum(folio); + int delta = folio_nr_pages(folio); + enum lru_list lru = type * LRU_INACTIVE_FILE; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_BUG_ON(old_gen != -1 && old_gen >= MAX_NR_GENS); + VM_BUG_ON(new_gen != -1 && new_gen >= MAX_NR_GENS); + VM_BUG_ON(old_gen == -1 && new_gen == -1); + + if (old_gen >= 0) + WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone], + lrugen->nr_pages[old_gen][type][zone] - delta); + if (new_gen >= 0) + WRITE_ONCE(lrugen->nr_pages[new_gen][type][zone], + lrugen->nr_pages[new_gen][type][zone] + delta); + + /* addition */ + if (old_gen < 0) { + if (lru_gen_is_active(lruvec, new_gen)) + lru += LRU_ACTIVE; + __update_lru_size(lruvec, lru, zone, delta); + return; + } + + /* deletion */ + if (new_gen < 0) { + if (lru_gen_is_active(lruvec, old_gen)) + lru += LRU_ACTIVE; + __update_lru_size(lruvec, lru, zone, -delta); + return; + } + + /* promotion */ + if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) { + __update_lru_size(lruvec, lru, zone, -delta); + __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta); + } + + /* demotion requires isolation, e.g., lru_deactivate_fn() */ + VM_BUG_ON(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); +} + +static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + int gen; + unsigned long old_flags, new_flags; + int type = folio_is_file_lru(folio); + int zone = folio_zonenum(folio); + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + if (folio_test_unevictable(folio) || !lrugen->enabled) + return false; + /* + * There are three common cases for this page: + * 1. If it's hot, e.g., freshly faulted in or previously hot and + * migrated, add it to the youngest generation. + * 2. If it's cold but can't be evicted immediately, i.e., an anon page + * not in swapcache or a dirty page pending writeback, add it to the + * second oldest generation. + * 3. Everything else (clean, cold) is added to the oldest generation. + */ + if (folio_test_active(folio)) + gen = lru_gen_from_seq(lrugen->max_seq); + else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) || + (folio_test_reclaim(folio) && + (folio_test_dirty(folio) || folio_test_writeback(folio)))) + gen = lru_gen_from_seq(lrugen->min_seq[type] + 1); + else + gen = lru_gen_from_seq(lrugen->min_seq[type]); + + do { + new_flags = old_flags = READ_ONCE(folio->flags); + VM_BUG_ON_FOLIO(new_flags & LRU_GEN_MASK, folio); + + /* see the comment on MIN_NR_GENS */ + new_flags &= ~(LRU_GEN_MASK | BIT(PG_active)); + new_flags |= (gen + 1UL) << LRU_GEN_PGOFF; + } while (cmpxchg(&folio->flags, old_flags, new_flags) != old_flags); + + lru_gen_update_size(lruvec, folio, -1, gen); + /* for folio_rotate_reclaimable() */ + if (reclaiming) + list_add_tail(&folio->lru, &lrugen->lists[gen][type][zone]); + else + list_add(&folio->lru, &lrugen->lists[gen][type][zone]); + + return true; +} + +static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + int gen; + unsigned long old_flags, new_flags; + + do { + new_flags = old_flags = READ_ONCE(folio->flags); + if (!(new_flags & LRU_GEN_MASK)) + return false; + + VM_BUG_ON_FOLIO(folio_test_active(folio), folio); + VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio); + + gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; + + new_flags &= ~LRU_GEN_MASK; + if ((new_flags & LRU_REFS_FLAGS) != LRU_REFS_FLAGS) + new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); + /* for shrink_page_list() */ + if (reclaiming) + new_flags &= ~(BIT(PG_referenced) | BIT(PG_reclaim)); + else if (lru_gen_is_active(lruvec, gen)) + new_flags |= BIT(PG_active); + } while (cmpxchg(&folio->flags, old_flags, new_flags) != old_flags); + + lru_gen_update_size(lruvec, folio, gen, -1); + list_del(&folio->lru); + + return true; +} + +#else + +static inline bool lru_gen_enabled(void) +{ + return false; +} + +static inline bool lru_gen_in_fault(void) +{ + return false; +} + +static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + return false; +} + +static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + return false; +} + +#endif /* CONFIG_LRU_GEN */ + static __always_inline void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio) { enum lru_list lru = folio_lru_list(folio); + if (lru_gen_add_folio(lruvec, folio, false)) + return; + update_lru_size(lruvec, lru, folio_zonenum(folio), folio_nr_pages(folio)); list_add(&folio->lru, &lruvec->lists[lru]); @@ -113,6 +322,9 @@ void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio) { enum lru_list lru = folio_lru_list(folio); + if (lru_gen_add_folio(lruvec, folio, true)) + return; + update_lru_size(lruvec, lru, folio_zonenum(folio), folio_nr_pages(folio)); list_add_tail(&folio->lru, &lruvec->lists[lru]); @@ -127,6 +339,9 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page, static __always_inline void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio) { + if (lru_gen_del_folio(lruvec, folio, false)) + return; + list_del(&folio->lru); update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio), -folio_nr_pages(folio)); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0f549870da6a0..cbc7fa381ac6f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -3,6 +3,7 @@ #define _LINUX_MM_TYPES_H #include +#include #include #include @@ -17,6 +18,8 @@ #include #include #include +#include +#include #include @@ -637,6 +640,22 @@ struct mm_struct { #ifdef CONFIG_IOMMU_SUPPORT u32 pasid; #endif +#ifdef CONFIG_LRU_GEN + struct { + /* this mm_struct is on lru_gen_mm_list */ + struct list_head list; +#ifdef CONFIG_MEMCG + /* points to the memcg of "owner" above */ + struct mem_cgroup *memcg; +#endif + /* + * Set when switching to this mm_struct, as a hint of + * whether it has been used since the last time per-node + * page table walkers cleared the corresponding bits. + */ + nodemask_t nodes; + } lru_gen; +#endif /* CONFIG_LRU_GEN */ } __randomize_layout; /* @@ -663,6 +682,65 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) return (struct cpumask *)&mm->cpu_bitmap; } +#ifdef CONFIG_LRU_GEN + +struct lru_gen_mm_list { + /* mm_struct list for page table walkers */ + struct list_head fifo; + /* protects the list above */ + spinlock_t lock; +}; + +void lru_gen_add_mm(struct mm_struct *mm); +void lru_gen_del_mm(struct mm_struct *mm); +#ifdef CONFIG_MEMCG +void lru_gen_migrate_mm(struct mm_struct *mm); +#endif + +static inline void lru_gen_init_mm(struct mm_struct *mm) +{ + INIT_LIST_HEAD(&mm->lru_gen.list); +#ifdef CONFIG_MEMCG + mm->lru_gen.memcg = NULL; +#endif + nodes_clear(mm->lru_gen.nodes); +} + +static inline void lru_gen_use_mm(struct mm_struct *mm) +{ + /* unlikely but not a bug when racing with lru_gen_migrate_mm() */ + VM_WARN_ON(list_empty(&mm->lru_gen.list)); + + if (!(current->flags & PF_KTHREAD) && !nodes_full(mm->lru_gen.nodes)) + nodes_setall(mm->lru_gen.nodes); +} + +#else /* !CONFIG_LRU_GEN */ + +static inline void lru_gen_add_mm(struct mm_struct *mm) +{ +} + +static inline void lru_gen_del_mm(struct mm_struct *mm) +{ +} + +#ifdef CONFIG_MEMCG +static inline void lru_gen_migrate_mm(struct mm_struct *mm) +{ +} +#endif + +static inline void lru_gen_init_mm(struct mm_struct *mm) +{ +} + +static inline void lru_gen_use_mm(struct mm_struct *mm) +{ +} + +#endif /* CONFIG_LRU_GEN */ + struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index aed44e9b5d899..a068cdca4c81b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -303,6 +303,207 @@ enum lruvec_flags { */ }; +#endif /* !__GENERATING_BOUNDS_H */ + +/* + * Evictable pages are divided into multiple generations. The youngest and the + * oldest generation numbers, max_seq and min_seq, are monotonically increasing. + * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An + * offset within MAX_NR_GENS, gen, indexes the LRU list of the corresponding + * generation. The gen counter in folio->flags stores gen+1 while a page is on + * one of lrugen->lists[]. Otherwise it stores 0. + * + * A page is added to the youngest generation on faulting. The aging needs to + * check the accessed bit at least twice before handing this page over to the + * eviction. The first check takes care of the accessed bit set on the initial + * fault; the second check makes sure this page hasn't been used since then. + * This process, AKA second chance, requires a minimum of two generations, + * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive + * LRU, these two generations are considered active; the rest of generations, if + * they exist, are considered inactive. See lru_gen_is_active(). PG_active is + * always cleared while a page is on one of lrugen->lists[] so that the aging + * needs not to worry about it. And it's set again when a page considered active + * is isolated for non-reclaiming purposes, e.g., migration. See + * lru_gen_add_folio() and lru_gen_del_folio(). + * + * MAX_NR_GENS is set to 4 so that the multi-gen LRU has twice of the categories + * of the active/inactive LRU. + * + */ +#define MIN_NR_GENS 2U +#define MAX_NR_GENS 4U + +/* + * Each generation is divided into multiple tiers. Tiers represent different + * ranges of numbers of accesses through file descriptors. A page accessed N + * times through file descriptors is in tier order_base_2(N). A page in the + * first tier (N=0,1) is marked by PG_referenced unless it was faulted in + * though page tables or read ahead. A page in any other tier (N>1) is marked + * by PG_referenced and PG_workingset. Two additional bits in folio->flags are + * required to support four tiers. + * + * In contrast to moving across generations which requires the LRU lock, moving + * across tiers only requires operations on folio->flags and therefore has a + * negligible cost in the buffered access path. In the eviction path, + * comparisons of refaulted/(evicted+protected) from the first tier and the + * rest infer whether pages accessed multiple times through file descriptors + * are statistically hot and thus worth protecting. + * + * MAX_NR_TIERS is set to 4 so that the multi-gen LRU has of twice of the + * categories of the active/inactive LRU when tracking accesses through file + * descriptors. + */ +#define MAX_NR_TIERS 4U +#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) + +#ifndef __GENERATING_BOUNDS_H + +struct lruvec; +struct page_vma_mapped_walk; + +#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) +#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) + +#ifdef CONFIG_LRU_GEN + +enum { + LRU_GEN_ANON, + LRU_GEN_FILE, +}; + +enum { + LRU_GEN_CORE, + LRU_GEN_MM_WALK, + LRU_GEN_NONLEAF_YOUNG, + NR_LRU_GEN_CAPS +}; + +#define MIN_LRU_BATCH BITS_PER_LONG +#define MAX_LRU_BATCH (MIN_LRU_BATCH * 128) + +/* whether to keep historical stats from evicted generations */ +#ifdef CONFIG_LRU_GEN_STATS +#define NR_HIST_GENS MAX_NR_GENS +#else +#define NR_HIST_GENS 1U +#endif + +/* + * The youngest generation number is stored in max_seq for both anon and file + * types as they are aged on an equal footing. The oldest generation numbers are + * stored in min_seq[] separately for anon and file types as clean file pages + * can be evicted regardless of swap constraints. + * + * Normally anon and file min_seq are in sync. But if swapping is constrained, + * e.g., out of swap space, file min_seq is allowed to advance and leave anon + * min_seq behind. + */ +struct lru_gen_struct { + /* the aging increments the youngest generation number */ + unsigned long max_seq; + /* the eviction increments the oldest generation numbers */ + unsigned long min_seq[ANON_AND_FILE]; + /* the birth time of each generation in jiffies */ + unsigned long timestamps[MAX_NR_GENS]; + /* the multi-gen LRU lists */ + struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* the sizes of the above lists */ + unsigned long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* the exponential moving average of refaulted */ + unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS]; + /* the exponential moving average of evicted+protected */ + unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS]; + /* the first tier doesn't need protection, hence the minus one */ + unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1]; + /* can be modified without holding the LRU lock */ + atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; + atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; + /* whether the multi-gen LRU is enabled */ + bool enabled; +}; + +enum { + MM_PTE_TOTAL, /* total leaf entries */ + MM_PTE_OLD, /* old leaf entries */ + MM_PTE_YOUNG, /* young leaf entries */ + MM_PMD_TOTAL, /* total non-leaf entries */ + MM_PMD_FOUND, /* non-leaf entries found in Bloom filters */ + MM_PMD_ADDED, /* non-leaf entries added to Bloom filters */ + NR_MM_STATS +}; + +/* mnemonic codes for the mm stats above */ +#define MM_STAT_CODES "toydfa" + +/* double-buffering Bloom filters */ +#define NR_BLOOM_FILTERS 2 + +struct lru_gen_mm_state { + /* set to max_seq after each iteration */ + unsigned long seq; + /* where the current iteration starts (inclusive) */ + struct list_head *head; + /* where the last iteration ends (exclusive) */ + struct list_head *tail; + /* to wait for the last page table walker to finish */ + struct wait_queue_head wait; + /* Bloom filters flip after each iteration */ + unsigned long *filters[NR_BLOOM_FILTERS]; + /* the mm stats for debugging */ + unsigned long stats[NR_HIST_GENS][NR_MM_STATS]; + /* the number of concurrent page table walkers */ + int nr_walkers; +}; + +struct lru_gen_mm_walk { + /* the lruvec under reclaim */ + struct lruvec *lruvec; + /* unstable max_seq from lru_gen_struct */ + unsigned long max_seq; + /* the next address within an mm to scan */ + unsigned long next_addr; + /* to batch page table entries */ + unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)]; + /* to batch promoted pages */ + int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* to batch the mm stats */ + int mm_stats[NR_MM_STATS]; + /* total batched items */ + int batched; + bool can_swap; + bool full_scan; +}; + +void lru_gen_init_lruvec(struct lruvec *lruvec); +void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); + +#ifdef CONFIG_MEMCG +void lru_gen_init_memcg(struct mem_cgroup *memcg); +void lru_gen_exit_memcg(struct mem_cgroup *memcg); +#endif + +#else /* !CONFIG_LRU_GEN */ + +static inline void lru_gen_init_lruvec(struct lruvec *lruvec) +{ +} + +static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +{ +} + +#ifdef CONFIG_MEMCG +static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) +{ +} + +static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) +{ +} +#endif + +#endif /* CONFIG_LRU_GEN */ + struct lruvec { struct list_head lists[NR_LRU_LISTS]; /* per lruvec lru_lock for memcg */ @@ -320,6 +521,12 @@ struct lruvec { unsigned long refaults[ANON_AND_FILE]; /* Various lruvec state flags (enum lruvec_flags) */ unsigned long flags; +#ifdef CONFIG_LRU_GEN + /* evictable pages divided into generations */ + struct lru_gen_struct lrugen; + /* to concurrently iterate lru_gen_mm_list */ + struct lru_gen_mm_state mm_state; +#endif #ifdef CONFIG_MEMCG struct pglist_data *pgdat; #endif @@ -911,6 +1118,10 @@ typedef struct pglist_data { unsigned long flags; +#ifdef CONFIG_LRU_GEN + /* kswap mm walk data */ + struct lru_gen_mm_walk mm_walk; +#endif ZONE_PADDING(_pad2_) /* Per-node vmstats */ @@ -1389,13 +1600,16 @@ static inline unsigned long *section_to_usemap(struct mem_section *ms) static inline struct mem_section *__nr_to_section(unsigned long nr) { + unsigned long root = SECTION_NR_TO_ROOT(nr); + + if (unlikely(root >= NR_SECTION_ROOTS)) + return NULL; + #ifdef CONFIG_SPARSEMEM_EXTREME - if (!mem_section) + if (!mem_section || !mem_section[root]) return NULL; #endif - if (!mem_section[SECTION_NR_TO_ROOT(nr)]) - return NULL; - return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; + return &mem_section[root][nr & SECTION_ROOT_MASK]; } extern size_t mem_section_usage_size(void); diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 5b88cd51fadb5..dcf90144d70b7 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1240,6 +1240,7 @@ struct nand_secure_region { * @lock: Lock protecting the suspended field. Also used to serialize accesses * to the NAND device * @suspended: Set to 1 when the device is suspended, 0 when it's not + * @resume_wq: wait queue to sleep if rawnand is in suspended state. * @cur_cs: Currently selected target. -1 means no target selected, otherwise we * should always have cur_cs >= 0 && cur_cs < nanddev_ntargets(). * NAND Controller drivers should not modify this value, but they're @@ -1294,6 +1295,7 @@ struct nand_chip { /* Internals */ struct mutex lock; unsigned int suspended : 1; + wait_queue_head_t resume_wq; int cur_cs; int read_retries; struct nand_secure_region *secure_regions; diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index e6487a6911360..8676316547cc4 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -99,7 +99,7 @@ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, return skb; nf_hook_state_init(&state, NF_NETDEV_EGRESS, - NFPROTO_NETDEV, dev, NULL, NULL, + NFPROTO_NETDEV, NULL, dev, NULL, dev_net(dev), NULL); /* nf assumes rcu_read_lock, not just read_lock_bh */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 68f81d8d36def..c9d3dc79d5876 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -513,10 +513,10 @@ static inline const struct cred *nfs_file_cred(struct file *file) * linux/fs/nfs/direct.c */ extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *); -extern ssize_t nfs_file_direct_read(struct kiocb *iocb, - struct iov_iter *iter); -extern ssize_t nfs_file_direct_write(struct kiocb *iocb, - struct iov_iter *iter); +ssize_t nfs_file_direct_read(struct kiocb *iocb, + struct iov_iter *iter, bool swap); +ssize_t nfs_file_direct_write(struct kiocb *iocb, + struct iov_iter *iter, bool swap); /* * linux/fs/nfs/dir.c @@ -585,7 +585,7 @@ extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page *page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); -extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); +extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index ca0959e51e817..b0e3fd550122c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -151,6 +151,7 @@ struct nfs_server { #define NFS_MOUNT_SOFTREVAL 0x800000 #define NFS_MOUNT_WRITE_EAGER 0x01000000 #define NFS_MOUNT_WRITE_WAIT 0x02000000 +#define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000 unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 567c3ddba2c42..90840c459abcc 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -486,6 +486,7 @@ static inline int num_node_state(enum node_states state) #define first_online_node 0 #define first_memory_node 0 #define next_online_node(nid) (MAX_NUMNODES) +#define next_memory_node(nid) (MAX_NUMNODES) #define nr_node_ids 1U #define nr_online_nodes 1U diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 855dd9b3e84be..a662435c9b6f1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -337,6 +337,7 @@ enum { NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, + NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3, NVME_CTRL_OACS_DIRECTIVES = 1 << 5, NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index ef1e3e736e148..c1946cdb845fe 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -55,7 +55,8 @@ #define SECTIONS_WIDTH 0 #endif -#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \ + <= BITS_PER_LONG - NR_PAGEFLAGS #define NODES_WIDTH NODES_SHIFT #elif defined(CONFIG_SPARSEMEM_VMEMMAP) #error "Vmemmap: No space for nodes field in page flags" @@ -89,8 +90,8 @@ #define LAST_CPUPID_SHIFT 0 #endif -#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \ - <= BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \ + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else #define LAST_CPUPID_WIDTH 0 @@ -100,8 +101,8 @@ #define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif -#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \ - > BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \ + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS #error "Not enough bits in page flags" #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1c3b6e5c8bfd3..a95518ca98eb7 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -935,7 +935,7 @@ __PAGEFLAG(Isolated, isolated, PF_ANY); 1UL << PG_private | 1UL << PG_private_2 | \ 1UL << PG_writeback | 1UL << PG_reserved | \ 1UL << PG_slab | 1UL << PG_active | \ - 1UL << PG_unevictable | __PG_MLOCKED) + 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK) /* * Flags checked when a page is prepped for return by the page allocator. @@ -946,7 +946,7 @@ __PAGEFLAG(Isolated, isolated, PF_ANY); * alloc-free cycle to prevent from reusing the page. */ #define PAGE_FLAGS_CHECK_AT_PREP \ - (PAGEFLAGS_MASK & ~__PG_HWPOISON) + ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) diff --git a/include/linux/pci.h b/include/linux/pci.h index 8253a5413d7c4..678fecdf6b812 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -668,6 +668,7 @@ struct pci_bus { struct bin_attribute *legacy_io; /* Legacy I/O for this bus */ struct bin_attribute *legacy_mem; /* Legacy mem */ unsigned int is_added:1; + unsigned int unsafe_warn:1; /* warned about RW1C config write */ }; #define to_pci_bus(n) container_of(n, struct pci_bus, dev) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index f4f4077b97aab..743e7fc4afda3 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -212,7 +212,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) @@ -233,7 +233,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, BUILD_BUG(); return 0; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ #endif #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH @@ -259,6 +259,19 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_pte_young +/* + * Return whether the accessed bit is supported on the local CPU. + * + * This stub assumes accessing through an old PTE triggers a page fault. + * Architectures that automatically set the access bit should overwrite it. + */ +static inline bool arch_has_hw_pte_young(void) +{ + return false; +} +#endif + #ifndef __HAVE_ARCH_PTEP_CLEAR static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/include/linux/pstore.h b/include/linux/pstore.h index eb93a54cff31f..e97a8188f0fd8 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include @@ -87,7 +87,7 @@ struct pstore_record { * @owner: module which is responsible for this backend driver * @name: name of the backend driver * - * @buf_lock: semaphore to serialize access to @buf + * @buf_lock: spinlock to serialize access to @buf * @buf: preallocated crash dump buffer * @bufsize: size of @buf available for crash dump bytes (must match * smallest number of bytes available for writing to a @@ -178,7 +178,7 @@ struct pstore_info { struct module *owner; const char *name; - struct semaphore buf_lock; + spinlock_t buf_lock; char *buf; size_t bufsize; diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index bebc911161b6f..d373f1bcbf7ca 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -16,8 +16,20 @@ DECLARE_PER_CPU(u32, kstack_offset); * alignment. Also, since this use is being explicitly masked to a max of * 10 bits, stack-clash style attacks are unlikely. For more details see * "VLAs" in Documentation/process/deprecated.rst + * + * The normal __builtin_alloca() is initialized with INIT_STACK_ALL (currently + * only with Clang and not GCC). Initializing the unused area on each syscall + * entry is expensive, and generating an implicit call to memset() may also be + * problematic (such as in noinstr functions). Therefore, if the compiler + * supports it (which it should if it initializes allocas), always use the + * "uninitialized" variant of the builtin. */ -void *__builtin_alloca(size_t size); +#if __has_builtin(__builtin_alloca_uninitialized) +#define __kstack_alloca __builtin_alloca_uninitialized +#else +#define __kstack_alloca __builtin_alloca +#endif + /* * Use, at most, 10 bits of entropy. We explicitly cap this to keep the * "VLA" from being unbounded (see above). 10 bits leaves enough room for @@ -36,7 +48,7 @@ void *__builtin_alloca(size_t size); if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ u32 offset = raw_cpu_read(kstack_offset); \ - u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset)); \ + u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \ /* Keep allocation even after "ptr" loses scope. */ \ asm volatile("" :: "r"(ptr) : "memory"); \ } \ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 858f4d429946d..5fed476f977f6 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -64,9 +64,8 @@ static inline void rcu_softirq_qs(void) rcu_tasks_qs(current, (preempt)); \ } while (0) -static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) +static inline int rcu_needs_cpu(void) { - *nextevt = KTIME_MAX; return 0; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 53209d6694001..6cc91291d0782 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -19,7 +19,7 @@ void rcu_softirq_qs(void); void rcu_note_context_switch(bool preempt); -int rcu_needs_cpu(u64 basem, u64 *nextevt); +int rcu_needs_cpu(void); void rcu_cpu_stall_reset(void); /* diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index 60f3453be23e6..a443abda937d8 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -13,6 +13,7 @@ struct ref_tracker_dir { spinlock_t lock; unsigned int quarantine_avail; refcount_t untracked; + bool dead; struct list_head list; /* List of active trackers */ struct list_head quarantine; /* List of dead trackers */ #endif @@ -26,6 +27,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, INIT_LIST_HEAD(&dir->quarantine); spin_lock_init(&dir->lock); dir->quarantine_avail = quarantine_count; + dir->dead = false; refcount_set(&dir->untracked, 1); stack_depot_init(); } diff --git a/include/linux/sched.h b/include/linux/sched.h index 75ba8aa60248b..be23cc9e6edde 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -914,6 +914,10 @@ struct task_struct { #ifdef CONFIG_MEMCG unsigned in_user_fault:1; #endif +#ifdef CONFIG_LRU_GEN + /* whether the LRU algorithm may apply to this access */ + unsigned in_lru_fault:1; +#endif #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif @@ -1630,6 +1634,14 @@ static inline unsigned int task_state_index(struct task_struct *tsk) if (tsk_state == TASK_IDLE) state = TASK_REPORT_IDLE; + /* + * We're lying here, but rather than expose a completely new task state + * to userspace, we can make this appear as if the task has gone through + * a regular rt_mutex_lock() call. + */ + if (tsk_state == TASK_RTLOCK_WAIT) + state = TASK_UNINTERRUPTIBLE; + return fls(state); } diff --git a/include/linux/security.h b/include/linux/security.h index 6d72772182c82..25b3ef71f495e 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1422,6 +1422,8 @@ int security_sctp_bind_connect(struct sock *sk, int optname, struct sockaddr *address, int addrlen); void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk); +int security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb); #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, @@ -1641,6 +1643,12 @@ static inline void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *newsk) { } + +static inline int security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ + return 0; +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index c58cc142d23f4..8c32935e1059d 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -458,6 +458,8 @@ extern void uart_handle_cts_change(struct uart_port *uport, extern void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, unsigned int ch, unsigned int flag); +void uart_xchar_out(struct uart_port *uport, int offset); + #ifdef CONFIG_MAGIC_SYSRQ_SERIAL #define SYSRQ_TIMEOUT (HZ * 5) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8a636e678902d..42f885f0ce8af 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1475,6 +1475,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = offset; +} #else static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { @@ -1485,6 +1490,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end - skb->head; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = skb->head + offset; +} #endif /* Internal */ @@ -1724,19 +1734,19 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) return 0; } -/* This variant of skb_unclone() makes sure skb->truesize is not changed */ +/* This variant of skb_unclone() makes sure skb->truesize + * and skb_end_offset() are not changed, whenever a new skb->head is needed. + * + * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X)) + * when various debugging features are in place. + */ +int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri); static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); - if (skb_cloned(skb)) { - unsigned int save = skb->truesize; - int res; - - res = pskb_expand_head(skb, 0, 0, pri); - skb->truesize = save; - return res; - } + if (skb_cloned(skb)) + return __skb_unclone_keeptruesize(skb, pri); return 0; } diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 18a717fe62eb0..7f32dd59e7513 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -310,21 +310,16 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } -static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg) -{ - if (msg->skb) - sock_drop(psock->sk, msg->skb); - kfree(msg); -} - static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { spin_lock_bh(&psock->ingress_lock); if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) list_add_tail(&msg->list, &psock->ingress_msg); - else - drop_sk_msg(psock, msg); + else { + sk_msg_free(psock->sk, msg); + kfree(msg); + } spin_unlock_bh(&psock->ingress_lock); } diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 0aad7009b50e6..bd0d11af76c5e 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -645,7 +645,7 @@ devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, static inline struct ti_sci_resource * devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, - u32 dev_id, u32 sub_type); + u32 dev_id, u32 sub_type) { return ERR_PTR(-EINVAL); } diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 3e56a9751c062..3c50b0fdda163 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -196,6 +196,14 @@ extern long __static_call_return0(void); }; \ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) +#define DEFINE_STATIC_CALL_RET0(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = __static_call_return0, \ + .type = 1, \ + }; \ + ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) + #define static_call_cond(name) (void)__static_call(name) #define EXPORT_STATIC_CALL(name) \ @@ -231,6 +239,12 @@ static inline int static_call_init(void) { return 0; } }; \ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) +#define DEFINE_STATIC_CALL_RET0(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = __static_call_return0, \ + }; \ + ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) #define static_call_cond(name) (void)__static_call(name) @@ -248,10 +262,7 @@ static inline int static_call_text_reserved(void *start, void *end) return 0; } -static inline long __static_call_return0(void) -{ - return 0; -} +extern long __static_call_return0(void); #define EXPORT_STATIC_CALL(name) \ EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ @@ -287,6 +298,9 @@ static inline long __static_call_return0(void) .func = NULL, \ } +#define DEFINE_STATIC_CALL_RET0(name, _func) \ + __DEFINE_STATIC_CALL(name, _func, __static_call_return0) + static inline void __static_call_nop(void) { } /* @@ -330,7 +344,4 @@ static inline int static_call_text_reserved(void *start, void *end) #define DEFINE_STATIC_CALL(name, _func) \ __DEFINE_STATIC_CALL(name, _func, _func) -#define DEFINE_STATIC_CALL_RET0(name, _func) \ - __DEFINE_STATIC_CALL(name, _func, __static_call_return0) - #endif /* _LINUX_STATIC_CALL_H */ diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index f35c22b3355ff..66b49afb9e693 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -412,6 +412,7 @@ struct svc_deferred_req { size_t addrlen; struct sockaddr_storage daddr; /* where reply must come from */ size_t daddrlen; + void *xprt_ctxt; struct cache_deferred_req handle; size_t xprt_hlen; int argslen; diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index b519609af1d02..4417f667c757e 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -731,6 +731,8 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; + if (len > SIZE_MAX / sizeof(*p)) + return -EBADMSG; p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) return -EBADMSG; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 955ea4d7af0b2..eef5e87c03b43 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -139,6 +139,9 @@ struct rpc_xprt_ops { void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); + int (*get_srcaddr)(struct rpc_xprt *xprt, char *buf, + size_t buflen); + unsigned short (*get_srcport)(struct rpc_xprt *xprt); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); void (*prepare_request)(struct rpc_rqst *req); diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 8c2a712cb2420..fed813ffe7db1 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -10,7 +10,6 @@ int init_socket_xprt(void); void cleanup_socket_xprt(void); -unsigned short get_srcport(struct rpc_xprt *); #define RPC_MIN_RESVPORT (1U) #define RPC_MAX_RESVPORT (65535U) @@ -89,5 +88,6 @@ struct sock_xprt { #define XPRT_SOCK_WAKE_WRITE (5) #define XPRT_SOCK_WAKE_PENDING (6) #define XPRT_SOCK_WAKE_DISCONNECT (7) +#define XPRT_SOCK_CONNECT_SENT (8) #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 1d38d9475c4d0..04d84ac6d1ac4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -137,6 +137,10 @@ union swap_header { */ struct reclaim_state { unsigned long reclaimed_slab; +#ifdef CONFIG_LRU_GEN + /* per-thread mm walk data */ + struct lru_gen_mm_walk *mm_walk; +#endif }; #ifdef __KERNEL__ @@ -372,6 +376,7 @@ extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); +extern void folio_activate(struct folio *folio); extern void deactivate_file_page(struct page *page); extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 819c0cb00b6d3..0cd9c82d6c22d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -921,6 +921,7 @@ asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec, size_t vlen, int behavior, unsigned int flags); asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags); +asmlinkage long sys_pmadv_ksm(int pidfd, int behavior, unsigned int flags); asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 78b91bb92f0d5..618568b01a80d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -225,7 +225,8 @@ struct tcp_sock { u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ - unused:5; + fast_ack_mode:2, /* which fast ack mode ? */ + unused:3; u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 33a4240e6a6f1..82213f9c4c17f 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -139,6 +139,8 @@ static inline void set_rlimit_ucount_max(struct user_namespace *ns, #ifdef CONFIG_USER_NS +extern int unprivileged_userns_clone; + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) @@ -172,6 +174,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns); struct ns_common *ns_get_owner(struct ns_common *ns); #else +#define unprivileged_userns_clone 0 + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { return &init_user_ns; diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index ef9a44b6cf5d5..6e5db4edc3359 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -133,6 +133,8 @@ struct vfio_pci_core_device { struct mutex ioeventfds_lock; struct list_head ioeventfds_list; struct vfio_pci_vf_token *vf_token; + struct list_head sriov_pfs_item; + struct vfio_pci_core_device *sriov_pf_core_dev; struct notifier_block nb; struct mutex vma_lock; struct list_head vma_list; @@ -159,8 +161,17 @@ extern ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, extern ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); +#ifdef CONFIG_VFIO_PCI_VGA extern ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); +#else +static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, + char __user *buf, size_t count, + loff_t *ppos, bool iswrite) +{ + return -EINVAL; +} +#endif extern long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, uint64_t data, int count, int fd); diff --git a/include/linux/xarray.h b/include/linux/xarray.h index d6d5da6ed7354..66e28bc1a023f 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -9,6 +9,7 @@ * See Documentation/core-api/xarray.rst for how to use the XArray. */ +#include #include #include #include diff --git a/include/net/arp.h b/include/net/arp.h index 031374ac2f222..d7ef4ec71dfeb 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -65,6 +65,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, const unsigned char *src_hw, const unsigned char *th); int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir); void arp_ifdown(struct net_device *dev); +int arp_invalidate(struct net_device *dev, __be32 ip, bool force); struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index a647e5fabdbd6..2aa5e95808a5a 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -204,19 +204,21 @@ void bt_err_ratelimited(const char *fmt, ...); #define BT_DBG(fmt, ...) pr_debug(fmt "\n", ##__VA_ARGS__) #endif +#define bt_dev_name(hdev) ((hdev) ? (hdev)->name : "null") + #define bt_dev_info(hdev, fmt, ...) \ - BT_INFO("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_INFO("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_warn(hdev, fmt, ...) \ - BT_WARN("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_WARN("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_err(hdev, fmt, ...) \ - BT_ERR("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_ERR("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_dbg(hdev, fmt, ...) \ - BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + BT_DBG("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_warn_ratelimited(hdev, fmt, ...) \ - bt_warn_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + bt_warn_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) #define bt_dev_err_ratelimited(hdev, fmt, ...) \ - bt_err_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) + bt_err_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) /* Connection and socket states */ enum { diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 35c073d44ec5a..5cb095b09a940 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -255,6 +255,16 @@ enum { * during the hdev->setup vendor callback. */ HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, + + /* When this quirk is set, HCI_OP_SET_EVENT_FLT requests with + * HCI_FLT_CLEAR_ALL are ignored and event filtering is + * completely avoided. A subset of the CSR controller + * clones struggle with this and instantly lock up. + * + * Note that devices using this must (separately) disable + * runtime suspend, because event filtering takes place there. + */ + HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, }; /* HCI device flags */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index e336e9c1dda4f..36d727f94ac29 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -294,6 +294,9 @@ struct adv_monitor { #define HCI_MAX_SHORT_NAME_LENGTH 10 +#define HCI_CONN_HANDLE_UNSET 0xffff +#define HCI_CONN_HANDLE_MAX 0x0eff + /* Min encryption key size to match with SMP */ #define HCI_MIN_ENC_KEY_SIZE 7 diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index aa33e1092e2c4..9f65f1bfbd246 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -59,6 +59,8 @@ struct flow_dissector_key_vlan { __be16 vlan_tci; }; __be16 vlan_tpid; + __be16 vlan_eth_type; + u16 padding; }; struct flow_dissector_mpls_lse { diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 4ad47d9f9d271..7623bb1f89c06 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -134,7 +134,8 @@ struct inet_connection_sock { u32 icsk_probes_tstamp; u32 icsk_user_timeout; - u64 icsk_ca_priv[104 / sizeof(u64)]; +/* XXX inflated by temporary internal debugging info */ + u64 icsk_ca_priv[216 / sizeof(u64)]; #define ICSK_CA_PRIV_SIZE sizeof_field(struct inet_connection_sock, icsk_ca_priv) }; diff --git a/include/net/mctp.h b/include/net/mctp.h index 7e35ec79b909c..204ae3aebc0da 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -36,8 +36,6 @@ struct mctp_hdr { #define MCTP_HDR_TAG_SHIFT 0 #define MCTP_HDR_TAG_MASK GENMASK(2, 0) -#define MCTP_HEADER_MAXLEN 4 - #define MCTP_INITIAL_DEFAULT_NET 1 static inline bool mctp_address_ok(mctp_eid_t eid) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 5b61c462e534b..374cc7b260fcd 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -513,4 +513,10 @@ static inline void fnhe_genid_bump(struct net *net) atomic_inc(&net->fnhe_genid); } +#ifdef CONFIG_NET +void net_ns_init(void); +#else +static inline void net_ns_init(void) {} +#endif + #endif /* __NET_NET_NAMESPACE_H */ diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 37f0fbefb060f..9939c366f720d 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -177,4 +177,5 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat); int nf_nat_helper_try_module_get(const char *name, u16 l3num, u8 protonum); void nf_nat_helper_put(struct nf_conntrack_helper *helper); +void nf_ct_set_auto_assign_helper_warned(struct net *net); #endif /*_NF_CONNTRACK_HELPER_H*/ diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index bd59e950f4d67..64daafd1fc41c 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -10,6 +10,8 @@ #include #include #include +#include +#include struct nf_flowtable; struct nf_flow_rule; @@ -317,4 +319,20 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, int nf_flow_table_offload_init(void); void nf_flow_table_offload_exit(void); +static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) +{ + __be16 proto; + + proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + + sizeof(struct pppoe_hdr))); + switch (proto) { + case htons(PPP_IP): + return htons(ETH_P_IP); + case htons(PPP_IPV6): + return htons(ETH_P_IPV6); + } + + return 0; +} + #endif /* _NF_FLOW_TABLE_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b9fc978fb2cad..e72896801fdf3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -370,6 +370,7 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 #define TCP_ECN_SEEN 8 +#define TCP_ECN_ECT_PERMANENT 16 enum tcp_tw_status { TCP_TW_SUCCESS = 0, @@ -808,6 +809,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) return max_t(s64, t1 - t0, 0); } +static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0) +{ + return max_t(s32, t1 - t0, 0); +} + static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) { return tcp_ns_to_ts(skb->skb_mstamp_ns); @@ -883,9 +889,14 @@ struct tcp_skb_cb { /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ - u64 first_tx_mstamp; + u32 first_tx_mstamp; /* when we reached the "delivered" count */ - u64 delivered_mstamp; + u32 delivered_mstamp; +#define TCPCB_IN_FLIGHT_BITS 20 +#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1) + u32 in_flight:20, /* packets in flight at transmit */ + unused2:12; + u32 lost; /* packets lost so far upon tx of skb */ } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -1011,7 +1022,11 @@ enum tcp_ca_ack_event_flags { #define TCP_CONG_NON_RESTRICTED 0x1 /* Requires ECN/ECT set on all packets */ #define TCP_CONG_NEEDS_ECN 0x2 -#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) +/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */ +#define TCP_CONG_WANTS_CE_EVENTS 0x4 +#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \ + TCP_CONG_NEEDS_ECN | \ + TCP_CONG_WANTS_CE_EVENTS) union tcp_cc_info; @@ -1031,8 +1046,11 @@ struct ack_sample { */ struct rate_sample { u64 prior_mstamp; /* starting timestamp for interval */ + u32 prior_lost; /* tp->lost at "prior_mstamp" */ u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */ + u32 tx_in_flight; /* packets in flight at starting timestamp */ + s32 lost; /* number of packets lost over interval */ s32 delivered; /* number of packets delivered over interval */ s32 delivered_ce; /* number of packets delivered w/ CE marks*/ long interval_us; /* time for tp->delivered to incr "delivered" */ @@ -1045,6 +1063,7 @@ struct rate_sample { bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ bool is_ack_delayed; /* is this (likely) a delayed ACK? */ + bool is_ece; /* did this ACK have ECN marked? */ }; struct tcp_congestion_ops { @@ -1068,8 +1087,11 @@ struct tcp_congestion_ops { /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - /* override sysctl_tcp_min_tso_segs */ - u32 (*min_tso_segs)(struct sock *sk); + /* pick target number of segments per TSO/GSO skb (optional): */ + u32 (*tso_segs)(struct sock *sk, unsigned int mss_now); + + /* react to a specific lost skb (optional) */ + void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb); /* call when packets are delivered to update cwnd and pacing rate, * after all the ca_state processing. (optional) @@ -1132,6 +1154,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer) } #endif +static inline bool tcp_ca_wants_ce_events(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN | + TCP_CONG_WANTS_CE_EVENTS); +} + static inline bool tcp_ca_needs_ecn(const struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1157,6 +1187,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) } /* From tcp_rate.c */ +void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 647c53b261051..57e3e239a1fce 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -206,6 +206,7 @@ struct scsi_device { unsigned rpm_autosuspend:1; /* Enable runtime autosuspend at device * creation time */ unsigned ignore_media_change:1; /* Ignore MEDIA CHANGE on resume */ + unsigned silence_suspend:1; /* Do not print runtime PM related messages */ unsigned int queue_stopped; /* request queue is quiesced */ bool offline_already; /* Device offline message logged */ diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index c5d7810fd7926..037c77fb5dc55 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -211,6 +211,8 @@ struct iscsi_cls_conn { struct mutex ep_mutex; struct iscsi_endpoint *ep; + /* Used when accessing flags and queueing work. */ + spinlock_t lock; unsigned long flags; struct work_struct cleanup_work; diff --git a/include/sound/core.h b/include/sound/core.h index b7e9b58d3c788..6d4cc49584c63 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -284,6 +284,7 @@ int snd_card_disconnect(struct snd_card *card); void snd_card_disconnect_sync(struct snd_card *card); int snd_card_free(struct snd_card *card); int snd_card_free_when_closed(struct snd_card *card); +int snd_card_free_on_error(struct device *dev, int ret); void snd_card_set_id(struct snd_card *card, const char *id); int snd_card_register(struct snd_card *card); int snd_card_info_init(void); diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h index 089a760d36eb7..6fb2d5e378fdd 100644 --- a/include/sound/intel-nhlt.h +++ b/include/sound/intel-nhlt.h @@ -18,6 +18,13 @@ enum nhlt_link_type { NHLT_LINK_INVALID }; +enum nhlt_device_type { + NHLT_DEVICE_BT = 0, + NHLT_DEVICE_DMIC = 1, + NHLT_DEVICE_I2S = 4, + NHLT_DEVICE_INVALID +}; + #if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_INTEL_NHLT) struct wav_fmt { @@ -41,13 +48,6 @@ struct wav_fmt_ext { u8 sub_fmt[16]; } __packed; -enum nhlt_device_type { - NHLT_DEVICE_BT = 0, - NHLT_DEVICE_DMIC = 1, - NHLT_DEVICE_I2S = 4, - NHLT_DEVICE_INVALID -}; - struct nhlt_specific_cfg { u32 size; u8 caps[]; @@ -133,6 +133,9 @@ void intel_nhlt_free(struct nhlt_acpi_table *addr); int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt); bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt, u8 link_type); + +int intel_nhlt_ssp_endpoint_mask(struct nhlt_acpi_table *nhlt, u8 device_type); + struct nhlt_specific_cfg * intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, u32 bus_id, u8 link_type, u8 vbps, u8 bps, @@ -163,6 +166,11 @@ static inline bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt, return false; } +static inline int intel_nhlt_ssp_endpoint_mask(struct nhlt_acpi_table *nhlt, u8 device_type) +{ + return 0; +} + static inline struct nhlt_specific_cfg * intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, u32 bus_id, u8 link_type, u8 vbps, u8 bps, diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h index 653dfffb3ac84..8d79cebf95f32 100644 --- a/include/sound/memalloc.h +++ b/include/sound/memalloc.h @@ -51,6 +51,11 @@ struct snd_dma_device { #define SNDRV_DMA_TYPE_DEV_SG SNDRV_DMA_TYPE_DEV /* no SG-buf support */ #define SNDRV_DMA_TYPE_DEV_WC_SG SNDRV_DMA_TYPE_DEV_WC #endif +/* fallback types, don't use those directly */ +#ifdef CONFIG_SND_DMA_SGBUF +#define SNDRV_DMA_TYPE_DEV_SG_FALLBACK 10 +#define SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK 11 +#endif /* * info for buffer allocation diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 36da42cd07748..6b99310b5b889 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -401,6 +401,8 @@ struct snd_pcm_runtime { wait_queue_head_t tsleep; /* transfer sleep */ struct fasync_struct *fasync; bool stop_operating; /* sync_stop will be called */ + struct mutex buffer_mutex; /* protect for buffer changes */ + atomic_t buffer_accessing; /* >0: in r/w operation, <0: blocked */ /* -- private section -- */ void *private_data; diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 19e957b7f9410..1a0b7030f72a3 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -95,6 +95,17 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) +TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_NOMEM); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_SWAP_BOOT); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_RESIZE); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_RENAME_DIR); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_FALLOC_RANGE); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_INODE_JOURNAL_DATA); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX); + #define show_fc_reason(reason) \ __print_symbolic(reason, \ { EXT4_FC_REASON_XATTR, "XATTR"}, \ @@ -2723,41 +2734,50 @@ TRACE_EVENT(ext4_fc_commit_stop, #define FC_REASON_NAME_STAT(reason) \ show_fc_reason(reason), \ - __entry->sbi->s_fc_stats.fc_ineligible_reason_count[reason] + __entry->fc_ineligible_rc[reason] TRACE_EVENT(ext4_fc_stats, - TP_PROTO(struct super_block *sb), - - TP_ARGS(sb), + TP_PROTO(struct super_block *sb), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(struct ext4_sb_info *, sbi) - __field(int, count) - ), + TP_ARGS(sb), - TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->sbi = EXT4_SB(sb); - ), + TP_STRUCT__entry( + __field(dev_t, dev) + __array(unsigned int, fc_ineligible_rc, EXT4_FC_REASON_MAX) + __field(unsigned long, fc_commits) + __field(unsigned long, fc_ineligible_commits) + __field(unsigned long, fc_numblks) + ), - TP_printk("dev %d:%d fc ineligible reasons:\n" - "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; " - "num_commits:%ld, ineligible: %ld, numblks: %ld", - MAJOR(__entry->dev), MINOR(__entry->dev), - FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), - FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), - FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), - FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), - FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), - FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), - FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), - FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), - FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), - __entry->sbi->s_fc_stats.fc_num_commits, - __entry->sbi->s_fc_stats.fc_ineligible_commits, - __entry->sbi->s_fc_stats.fc_numblks) + TP_fast_assign( + int i; + __entry->dev = sb->s_dev; + for (i = 0; i < EXT4_FC_REASON_MAX; i++) { + __entry->fc_ineligible_rc[i] = + EXT4_SB(sb)->s_fc_stats.fc_ineligible_reason_count[i]; + } + __entry->fc_commits = EXT4_SB(sb)->s_fc_stats.fc_num_commits; + __entry->fc_ineligible_commits = + EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits; + __entry->fc_numblks = EXT4_SB(sb)->s_fc_stats.fc_numblks; + ), + + TP_printk("dev %d,%d fc ineligible reasons:\n" + "%s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u " + "num_commits:%lu, ineligible: %lu, numblks: %lu", + MAJOR(__entry->dev), MINOR(__entry->dev), + FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), + FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), + FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), + FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), + FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), + FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), + FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), + FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), + FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), + __entry->fc_commits, __entry->fc_ineligible_commits, + __entry->fc_numblks) ); #define DEFINE_TRACE_DENTRY_EVENT(__type) \ diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e70c90116edae..4a3ab0ed6e062 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -83,12 +83,15 @@ enum rxrpc_call_trace { rxrpc_call_error, rxrpc_call_got, rxrpc_call_got_kernel, + rxrpc_call_got_timer, rxrpc_call_got_userid, rxrpc_call_new_client, rxrpc_call_new_service, rxrpc_call_put, rxrpc_call_put_kernel, rxrpc_call_put_noqueue, + rxrpc_call_put_notimer, + rxrpc_call_put_timer, rxrpc_call_put_userid, rxrpc_call_queued, rxrpc_call_queued_ref, @@ -278,12 +281,15 @@ enum rxrpc_tx_point { EM(rxrpc_call_error, "*E*") \ EM(rxrpc_call_got, "GOT") \ EM(rxrpc_call_got_kernel, "Gke") \ + EM(rxrpc_call_got_timer, "GTM") \ EM(rxrpc_call_got_userid, "Gus") \ EM(rxrpc_call_new_client, "NWc") \ EM(rxrpc_call_new_service, "NWs") \ EM(rxrpc_call_put, "PUT") \ EM(rxrpc_call_put_kernel, "Pke") \ - EM(rxrpc_call_put_noqueue, "PNQ") \ + EM(rxrpc_call_put_noqueue, "PnQ") \ + EM(rxrpc_call_put_notimer, "PnT") \ + EM(rxrpc_call_put_timer, "PTM") \ EM(rxrpc_call_put_userid, "Pus") \ EM(rxrpc_call_queued, "QUE") \ EM(rxrpc_call_queued_ref, "QUR") \ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 29982d60b68ab..06fe47fb3686a 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1005,7 +1005,6 @@ DEFINE_RPC_XPRT_LIFETIME_EVENT(connect); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_auto); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_done); DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_force); -DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_cleanup); DEFINE_RPC_XPRT_LIFETIME_EVENT(destroy); DECLARE_EVENT_CLASS(rpc_xprt_event, @@ -1957,17 +1956,18 @@ DECLARE_EVENT_CLASS(svc_deferred_event, TP_STRUCT__entry( __field(const void *, dr) __field(u32, xid) - __string(addr, dr->xprt->xpt_remotebuf) + __array(__u8, addr, INET6_ADDRSTRLEN + 10) ), TP_fast_assign( __entry->dr = dr; __entry->xid = be32_to_cpu(*(__be32 *)(dr->args + (dr->xprt_hlen>>2))); - __assign_str(addr, dr->xprt->xpt_remotebuf); + snprintf(__entry->addr, sizeof(__entry->addr) - 1, + "%pISpc", (struct sockaddr *)&dr->addr); ), - TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr, + TP_printk("addr=%s dr=%p xid=0x%08x", __entry->addr, __entry->dr, __entry->xid) ); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 1c48b0ae3ba30..c780129ab7423 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -886,8 +886,11 @@ __SYSCALL(__NR_futex_waitv, sys_futex_waitv) #define __NR_set_mempolicy_home_node 450 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +#define __NR_pmadv_ksm 451 +__SYSCALL(__NR_pmadv_ksm, sys_pmadv_ksm) + #undef __NR_syscalls -#define __NR_syscalls 451 +#define __NR_syscalls 452 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b0383d371b9af..4b46021eafa99 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2286,8 +2286,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if current task belongs to the cgroup2. - * * 1, if current task does not belong to the cgroup2. + * * 1, if current task belongs to the cgroup2. + * * 0, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) @@ -2975,8 +2975,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack= * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description @@ -4279,8 +4279,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack= * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) * Description @@ -5500,7 +5500,8 @@ struct bpf_sock { __u32 src_ip4; __u32 src_ip6[4]; __u32 src_port; /* host byte order */ - __u32 dst_port; /* network byte order */ + __be16 dst_port; /* network byte order */ + __u16 :16; /* zero padding */ __u32 dst_ip4; __u32 dst_ip6[4]; __u32 state; @@ -6378,7 +6379,8 @@ struct bpf_sk_lookup { __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ __u32 remote_ip4; /* Network byte order */ __u32 remote_ip6[4]; /* Network byte order */ - __u32 remote_port; /* Network byte order */ + __be16 remote_port; /* Network byte order */ + __u16 :16; /* Zero padding */ __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index c55935b64ccc8..590f8aea2b6d2 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -137,20 +137,16 @@ struct can_isotp_ll_options { #define CAN_ISOTP_WAIT_TX_DONE 0x400 /* wait for tx completion */ #define CAN_ISOTP_SF_BROADCAST 0x800 /* 1-to-N functional addressing */ -/* default values */ +/* protocol machine default values */ #define CAN_ISOTP_DEFAULT_FLAGS 0 #define CAN_ISOTP_DEFAULT_EXT_ADDRESS 0x00 #define CAN_ISOTP_DEFAULT_PAD_CONTENT 0xCC /* prevent bit-stuffing */ -#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 0 +#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 50000 /* 50 micro seconds */ #define CAN_ISOTP_DEFAULT_RECV_BS 0 #define CAN_ISOTP_DEFAULT_RECV_STMIN 0x00 #define CAN_ISOTP_DEFAULT_RECV_WFTMAX 0 -#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU -#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN -#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 - /* * Remark on CAN_ISOTP_DEFAULT_RECV_* values: * @@ -162,4 +158,24 @@ struct can_isotp_ll_options { * consistency and copied directly into the flow control (FC) frame. */ +/* link layer default values => make use of Classical CAN frames */ + +#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU +#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN +#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 + +/* + * The CAN_ISOTP_DEFAULT_FRAME_TXTIME has become a non-zero value as + * it only makes sense for isotp implementation tests to run without + * a N_As value. As user space applications usually do not set the + * frame_txtime element of struct can_isotp_options the new in-kernel + * default is very likely overwritten with zero when the sockopt() + * CAN_ISOTP_OPTS is invoked. + * To make sure that a N_As value of zero is only set intentional the + * value '0' is now interpreted as 'do not change the current value'. + * When a frame_txtime of zero is required for testing purposes this + * CAN_ISOTP_FRAME_TXTIME_ZERO u32 value has to be set in frame_txtime. + */ +#define CAN_ISOTP_FRAME_TXTIME_ZERO 0xFFFFFFFF + #endif /* !_UAPI_CAN_ISOTP_H */ diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 20ee93f0f8761..96d52dd9c48ac 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -231,9 +231,42 @@ struct tcp_bbr_info { __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ }; +/* Phase as reported in netlink/ss stats. */ +enum tcp_bbr2_phase { + BBR2_PHASE_INVALID = 0, + BBR2_PHASE_STARTUP = 1, + BBR2_PHASE_DRAIN = 2, + BBR2_PHASE_PROBE_RTT = 3, + BBR2_PHASE_PROBE_BW_UP = 4, + BBR2_PHASE_PROBE_BW_DOWN = 5, + BBR2_PHASE_PROBE_BW_CRUISE = 6, + BBR2_PHASE_PROBE_BW_REFILL = 7 +}; + +struct tcp_bbr2_info { + /* u64 bw: bandwidth (app throughput) estimate in Byte per sec: */ + __u32 bbr_bw_lsb; /* lower 32 bits of bw */ + __u32 bbr_bw_msb; /* upper 32 bits of bw */ + __u32 bbr_min_rtt; /* min-filtered RTT in uSec */ + __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */ + __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ + __u32 bbr_bw_hi_lsb; /* lower 32 bits of bw_hi */ + __u32 bbr_bw_hi_msb; /* upper 32 bits of bw_hi */ + __u32 bbr_bw_lo_lsb; /* lower 32 bits of bw_lo */ + __u32 bbr_bw_lo_msb; /* upper 32 bits of bw_lo */ + __u8 bbr_mode; /* current bbr_mode in state machine */ + __u8 bbr_phase; /* current state machine phase */ + __u8 unused1; /* alignment padding; not used yet */ + __u8 bbr_version; /* MUST be at this offset in struct */ + __u32 bbr_inflight_lo; /* lower/short-term data volume bound */ + __u32 bbr_inflight_hi; /* higher/long-term data volume bound */ + __u32 bbr_extra_acked; /* max excess packets ACKed in epoch */ +}; + union tcp_cc_info { struct tcpvegas_info vegas; struct tcp_dctcp_info dctcp; struct tcp_bbr_info bbr; + struct tcp_bbr2_info bbr2; }; #endif /* _UAPI_INET_DIAG_H_ */ diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 787f491f0d2ae..1e45368ad33fd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -293,6 +293,7 @@ struct io_uring_params { #define IORING_FEAT_NATIVE_WORKERS (1U << 9) #define IORING_FEAT_RSRC_TAGS (1U << 10) #define IORING_FEAT_CQE_SKIP (1U << 11) +#define IORING_FEAT_LINKED_FILE (1U << 12) /* * io_uring_register(2) opcodes and arguments diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index 24a1c45bd1ae2..98e60801195e2 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -45,7 +45,7 @@ struct loop_info { unsigned long lo_inode; /* ioctl r/o */ __kernel_old_dev_t lo_rdevice; /* ioctl r/o */ int lo_offset; - int lo_encrypt_type; + int lo_encrypt_type; /* obsolete, ignored */ int lo_encrypt_key_size; /* ioctl w/o */ int lo_flags; char lo_name[LO_NAME_SIZE]; @@ -61,7 +61,7 @@ struct loop_info64 { __u64 lo_offset; __u64 lo_sizelimit;/* bytes, 0 == max available */ __u32 lo_number; /* ioctl r/o */ - __u32 lo_encrypt_type; + __u32 lo_encrypt_type; /* obsolete, ignored */ __u32 lo_encrypt_key_size; /* ioctl w/o */ __u32 lo_flags; __u8 lo_file_name[LO_NAME_SIZE]; diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h index 87b55755f4ffe..d9db7ad438908 100644 --- a/include/uapi/linux/omap3isp.h +++ b/include/uapi/linux/omap3isp.h @@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { * struct omap3isp_stat_data - Statistic data sent to or received from user * @ts: Timestamp of returned framestats. * @buf: Pointer to pass to user. + * @buf_size: Size of buffer. * @frame_number: Frame number of requested stats. * @cur_frame: Current frame number being processed. * @config_counter: Number of the configuration associated with the data. @@ -176,10 +177,12 @@ struct omap3isp_stat_data { struct timeval ts; #endif void __user *buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #ifdef __KERNEL__ @@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { __s32 tv_usec; } ts; __u32 buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #endif diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h index 9b77cfc42efa3..283c5a7b3f2c8 100644 --- a/include/uapi/linux/rfkill.h +++ b/include/uapi/linux/rfkill.h @@ -159,8 +159,16 @@ struct rfkill_event_ext { * old behaviour for all userspace, unless it explicitly opts in to the * rules outlined here by using the new &struct rfkill_event_ext. * - * Userspace using &struct rfkill_event_ext must adhere to the following - * rules + * Additionally, some other userspace (bluez, g-s-d) was reading with a + * large size but as streaming reads rather than message-based, or with + * too strict checks for the returned size. So eventually, we completely + * reverted this, and extended messages need to be opted in to by using + * an ioctl: + * + * ioctl(fd, RFKILL_IOCTL_MAX_SIZE, sizeof(struct rfkill_event_ext)); + * + * Userspace using &struct rfkill_event_ext and the ioctl must adhere to + * the following rules: * * 1. accept short writes, optionally using them to detect that it's * running on an older kernel; @@ -175,6 +183,8 @@ struct rfkill_event_ext { #define RFKILL_IOC_MAGIC 'R' #define RFKILL_IOC_NOINPUT 1 #define RFKILL_IOCTL_NOINPUT _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT) +#define RFKILL_IOC_MAX_SIZE 2 +#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_EXT_SIZE, __u32) /* and that's all userspace gets */ diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 9a402fdb60e97..77ee207623a9b 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -105,23 +105,11 @@ struct rseq { * Read and set by the kernel. Set by user-space with single-copy * atomicity semantics. This field should only be updated by the * thread which registered this data structure. Aligned on 64-bit. + * + * 32-bit architectures should update the low order bits of the + * rseq_cs field, leaving the high order bits initialized to 0. */ - union { - __u64 ptr64; -#ifdef __LP64__ - __u64 ptr; -#else - struct { -#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN) - __u32 padding; /* Initialized to zero. */ - __u32 ptr32; -#else /* LITTLE */ - __u32 ptr32; - __u32 padding; /* Initialized to zero. */ -#endif /* ENDIAN */ - } ptr; -#endif - } rseq_cs; + __u64 rseq_cs; /* * Restartable sequences flags field. diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index c4042dcfdc0c3..8885e69178bd7 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -68,6 +68,9 @@ /* NVIDIA Tegra Combined UART */ #define PORT_TEGRA_TCU 41 +/* ASPEED AST2x00 virtual UART */ +#define PORT_ASPEED_VUART 42 + /* Intel EG20 */ #define PORT_PCH_8LINE 44 #define PORT_PCH_2LINE 45 diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 3021ea25a2849..7837ba4fe7289 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_STDDEF_H +#define _UAPI_LINUX_STDDEF_H + #include #ifndef __always_inline @@ -41,3 +44,4 @@ struct { } __empty_ ## NAME; \ TYPE NAME[]; \ } +#endif diff --git a/init/Kconfig b/init/Kconfig index e9119bf54b1f3..2fce0d599a91e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1231,6 +1231,22 @@ config USER_NS If unsure, say N. +config USER_NS_UNPRIVILEGED + bool "Allow unprivileged users to create namespaces" + default y + depends on USER_NS + help + When disabled, unprivileged users will not be able to create + new namespaces. Allowing users to create their own namespaces + has been part of several recent local privilege escalation + exploits, so if you need user namespaces but are + paranoid^Wsecurity-conscious you want to disable this. + + This setting can be overridden at runtime via the + kernel.unprivileged_userns_clone sysctl. + + If unsure, say Y. + config PID_NS bool "PID Namespaces" default y @@ -1370,7 +1386,6 @@ config CC_OPTIMIZE_FOR_PERFORMANCE config CC_OPTIMIZE_FOR_PERFORMANCE_O3 bool "Optimize more for performance (-O3)" - depends on ARC help Choosing this option will pass "-O3" to your compiler to optimize the kernel yet more for performance. diff --git a/init/main.c b/init/main.c index 65fa2e41a9c09..9a5097b2251a5 100644 --- a/init/main.c +++ b/init/main.c @@ -99,6 +99,7 @@ #include #include #include +#include #include #include @@ -1116,6 +1117,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) key_init(); security_init(); dbg_late_init(); + net_ns_init(); vfs_caches_init(); pagecache_init(); signals_init(); @@ -1190,7 +1192,7 @@ static int __init initcall_blacklist(char *str) } } while (str_entry); - return 0; + return 1; } static bool __init_or_module initcall_blacklisted(initcall_t fn) @@ -1452,7 +1454,9 @@ static noinline void __init kernel_init_freeable(void); bool rodata_enabled __ro_after_init = true; static int __init set_debug_rodata(char *str) { - return strtobool(str, &rodata_enabled); + if (strtobool(str, &rodata_enabled)) + pr_warn("Invalid option string for rodata: '%s'\n", str); + return 1; } __setup("rodata=", set_debug_rodata); #endif diff --git a/kernel/Makefile b/kernel/Makefile index 56f4ee97f3284..a18d169732d2e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -113,7 +113,8 @@ obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ obj-$(CONFIG_KCSAN) += kcsan/ obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o -obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o +obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o +obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o obj-$(CONFIG_CFI_CLANG) += cfi.o obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/audit.h b/kernel/audit.h index c4498090a5bd6..58b66543b4d57 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -201,6 +201,10 @@ struct audit_context { struct { char *name; } module; + struct { + struct audit_ntp_data ntp_data; + struct timespec64 tk_injoffset; + } time; }; int fds[2]; struct audit_proctitle proctitle; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index a83928cbdcb7c..ea2ee1181921e 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1340,6 +1340,53 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) from_kuid(&init_user_ns, name->fcap.rootid)); } +static void audit_log_time(struct audit_context *context, struct audit_buffer **ab) +{ + const struct audit_ntp_data *ntp = &context->time.ntp_data; + const struct timespec64 *tk = &context->time.tk_injoffset; + static const char * const ntp_name[] = { + "offset", + "freq", + "status", + "tai", + "tick", + "adjust", + }; + int type; + + if (context->type == AUDIT_TIME_ADJNTPVAL) { + for (type = 0; type < AUDIT_NTP_NVALS; type++) { + if (ntp->vals[type].newval != ntp->vals[type].oldval) { + if (!*ab) { + *ab = audit_log_start(context, + GFP_KERNEL, + AUDIT_TIME_ADJNTPVAL); + if (!*ab) + return; + } + audit_log_format(*ab, "op=%s old=%lli new=%lli", + ntp_name[type], + ntp->vals[type].oldval, + ntp->vals[type].newval); + audit_log_end(*ab); + *ab = NULL; + } + } + } + if (tk->tv_sec != 0 || tk->tv_nsec != 0) { + if (!*ab) { + *ab = audit_log_start(context, GFP_KERNEL, + AUDIT_TIME_INJOFFSET); + if (!*ab) + return; + } + audit_log_format(*ab, "sec=%lli nsec=%li", + (long long)tk->tv_sec, tk->tv_nsec); + audit_log_end(*ab); + *ab = NULL; + } +} + static void show_special(struct audit_context *context, int *call_panic) { struct audit_buffer *ab; @@ -1454,6 +1501,11 @@ static void show_special(struct audit_context *context, int *call_panic) audit_log_format(ab, "(null)"); break; + case AUDIT_TIME_ADJNTPVAL: + case AUDIT_TIME_INJOFFSET: + /* this call deviates from the rest, eating the buffer */ + audit_log_time(context, &ab); + break; } audit_log_end(ab); } @@ -2849,31 +2901,26 @@ void __audit_fanotify(unsigned int response) void __audit_tk_injoffset(struct timespec64 offset) { - audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_INJOFFSET, - "sec=%lli nsec=%li", - (long long)offset.tv_sec, offset.tv_nsec); -} - -static void audit_log_ntp_val(const struct audit_ntp_data *ad, - const char *op, enum audit_ntp_type type) -{ - const struct audit_ntp_val *val = &ad->vals[type]; - - if (val->newval == val->oldval) - return; + struct audit_context *context = audit_context(); - audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_ADJNTPVAL, - "op=%s old=%lli new=%lli", op, val->oldval, val->newval); + /* only set type if not already set by NTP */ + if (!context->type) + context->type = AUDIT_TIME_INJOFFSET; + memcpy(&context->time.tk_injoffset, &offset, sizeof(offset)); } void __audit_ntp_log(const struct audit_ntp_data *ad) { - audit_log_ntp_val(ad, "offset", AUDIT_NTP_OFFSET); - audit_log_ntp_val(ad, "freq", AUDIT_NTP_FREQ); - audit_log_ntp_val(ad, "status", AUDIT_NTP_STATUS); - audit_log_ntp_val(ad, "tai", AUDIT_NTP_TAI); - audit_log_ntp_val(ad, "tick", AUDIT_NTP_TICK); - audit_log_ntp_val(ad, "adjust", AUDIT_NTP_ADJUST); + struct audit_context *context = audit_context(); + int type; + + for (type = 0; type < AUDIT_NTP_NVALS; type++) + if (ad->vals[type].newval != ad->vals[type].oldval) { + /* unconditionally set type, overwriting TK */ + context->type = AUDIT_TIME_ADJNTPVAL; + memcpy(&context->time.ntp_data, ad, sizeof(*ad)); + break; + } } void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, diff --git a/kernel/bounds.c b/kernel/bounds.c index 9795d75b09b23..10dd9e6b03e55 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -22,6 +22,13 @@ int main(void) DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); +#ifdef CONFIG_LRU_GEN + DEFINE(LRU_GEN_WIDTH, order_base_2(MAX_NR_GENS + 1)); + DEFINE(LRU_REFS_WIDTH, MAX_NR_TIERS - 2); +#else + DEFINE(LRU_GEN_WIDTH, 0); + DEFINE(LRU_REFS_WIDTH, 0); +#endif /* End of constants */ return 0; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 3e23b3fa79ff6..ac89e65d1692e 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -403,6 +403,9 @@ static struct btf_type btf_void; static int btf_resolve(struct btf_verifier_env *env, const struct btf_type *t, u32 type_id); +static int btf_func_check(struct btf_verifier_env *env, + const struct btf_type *t); + static bool btf_type_is_modifier(const struct btf_type *t) { /* Some of them is not strictly a C modifier @@ -579,6 +582,7 @@ static bool btf_type_needs_resolve(const struct btf_type *t) btf_type_is_struct(t) || btf_type_is_array(t) || btf_type_is_var(t) || + btf_type_is_func(t) || btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } @@ -3533,9 +3537,24 @@ static s32 btf_func_check_meta(struct btf_verifier_env *env, return 0; } +static int btf_func_resolve(struct btf_verifier_env *env, + const struct resolve_vertex *v) +{ + const struct btf_type *t = v->t; + u32 next_type_id = t->type; + int err; + + err = btf_func_check(env, t); + if (err) + return err; + + env_stack_pop_resolved(env, next_type_id, 0); + return 0; +} + static struct btf_kind_operations func_ops = { .check_meta = btf_func_check_meta, - .resolve = btf_df_resolve, + .resolve = btf_func_resolve, .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_ref_type_log, @@ -4156,7 +4175,7 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, return !btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); - if (btf_type_is_decl_tag(t)) + if (btf_type_is_decl_tag(t) || btf_type_is_func(t)) return btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); @@ -4246,12 +4265,6 @@ static int btf_check_all_types(struct btf_verifier_env *env) if (err) return err; } - - if (btf_type_is_func(t)) { - err = btf_func_check(env, t); - if (err) - return err; - } } return 0; @@ -6201,12 +6214,17 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id) return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; } +enum { + BTF_MODULE_F_LIVE = (1 << 0), +}; + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES struct btf_module { struct list_head list; struct module *module; struct btf *btf; struct bin_attribute *sysfs_attr; + int flags; }; static LIST_HEAD(btf_modules); @@ -6234,7 +6252,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, int err = 0; if (mod->btf_data_size == 0 || - (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) + (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE && + op != MODULE_STATE_GOING)) goto out; switch (op) { @@ -6292,6 +6311,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, btf_mod->sysfs_attr = attr; } + break; + case MODULE_STATE_LIVE: + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->module != module) + continue; + + btf_mod->flags |= BTF_MODULE_F_LIVE; + break; + } + mutex_unlock(&btf_module_mutex); break; case MODULE_STATE_GOING: mutex_lock(&btf_module_mutex); @@ -6339,7 +6369,12 @@ struct module *btf_try_get_module(const struct btf *btf) if (btf_mod->btf != btf) continue; - if (try_module_get(btf_mod->module)) + /* We must only consider module whose __init routine has + * finished, hence we must check for BTF_MODULE_F_LIVE flag, + * which is set from the notifier callback for + * MODULE_STATE_LIVE. + */ + if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module)) res = btf_mod->module; break; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 22c8ae94e4c1c..2823dcefae10e 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -166,7 +166,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, } static struct perf_callchain_entry * -get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) +get_callchain_entry_for_task(struct task_struct *task, u32 max_depth) { #ifdef CONFIG_STACKTRACE struct perf_callchain_entry *entry; @@ -177,9 +177,8 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) if (!entry) return NULL; - entry->nr = init_nr + - stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr), - sysctl_perf_event_max_stack - init_nr, 0); + entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip, + max_depth, 0); /* stack_trace_save_tsk() works on unsigned long array, while * perf_callchain_entry uses u64 array. For 32-bit systems, it is @@ -191,7 +190,7 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) int i; /* copy data from the end to avoid using extra buffer */ - for (i = entry->nr - 1; i >= (int)init_nr; i--) + for (i = entry->nr - 1; i >= 0; i--) to[i] = (u64)(from[i]); } @@ -208,27 +207,19 @@ static long __bpf_get_stackid(struct bpf_map *map, { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *bucket, *new_bucket, *old_bucket; - u32 max_depth = map->value_size / stack_map_data_size(map); - /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ - u32 init_nr = sysctl_perf_event_max_stack - max_depth; u32 skip = flags & BPF_F_SKIP_FIELD_MASK; u32 hash, id, trace_nr, trace_len; bool user = flags & BPF_F_USER_STACK; u64 *ips; bool hash_matches; - /* get_perf_callchain() guarantees that trace->nr >= init_nr - * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth - */ - trace_nr = trace->nr - init_nr; - - if (trace_nr <= skip) + if (trace->nr <= skip) /* skipping more than usable stack trace */ return -EFAULT; - trace_nr -= skip; + trace_nr = trace->nr - skip; trace_len = trace_nr * sizeof(u64); - ips = trace->ip + skip + init_nr; + ips = trace->ip + skip; hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); id = hash & (smap->n_buckets - 1); bucket = READ_ONCE(smap->buckets[id]); @@ -285,8 +276,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, u64, flags) { u32 max_depth = map->value_size / stack_map_data_size(map); - /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ - u32 init_nr = sysctl_perf_event_max_stack - max_depth; + u32 skip = flags & BPF_F_SKIP_FIELD_MASK; bool user = flags & BPF_F_USER_STACK; struct perf_callchain_entry *trace; bool kernel = !user; @@ -295,8 +285,12 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) return -EINVAL; - trace = get_perf_callchain(regs, init_nr, kernel, user, - sysctl_perf_event_max_stack, false, false); + max_depth += skip; + if (max_depth > sysctl_perf_event_max_stack) + max_depth = sysctl_perf_event_max_stack; + + trace = get_perf_callchain(regs, 0, kernel, user, max_depth, + false, false); if (unlikely(!trace)) /* couldn't fetch the stack trace */ @@ -387,7 +381,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, struct perf_callchain_entry *trace_in, void *buf, u32 size, u64 flags) { - u32 init_nr, trace_nr, copy_len, elem_size, num_elem; + u32 trace_nr, copy_len, elem_size, num_elem, max_depth; bool user_build_id = flags & BPF_F_USER_BUILD_ID; u32 skip = flags & BPF_F_SKIP_FIELD_MASK; bool user = flags & BPF_F_USER_STACK; @@ -412,30 +406,28 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, goto err_fault; num_elem = size / elem_size; - if (sysctl_perf_event_max_stack < num_elem) - init_nr = 0; - else - init_nr = sysctl_perf_event_max_stack - num_elem; + max_depth = num_elem + skip; + if (sysctl_perf_event_max_stack < max_depth) + max_depth = sysctl_perf_event_max_stack; if (trace_in) trace = trace_in; else if (kernel && task) - trace = get_callchain_entry_for_task(task, init_nr); + trace = get_callchain_entry_for_task(task, max_depth); else - trace = get_perf_callchain(regs, init_nr, kernel, user, - sysctl_perf_event_max_stack, + trace = get_perf_callchain(regs, 0, kernel, user, max_depth, false, false); if (unlikely(!trace)) goto err_fault; - trace_nr = trace->nr - init_nr; - if (trace_nr < skip) + if (trace->nr < skip) goto err_fault; - trace_nr -= skip; + trace_nr = trace->nr - skip; trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; copy_len = trace_nr * elem_size; - ips = trace->ip + skip + init_nr; + + ips = trace->ip + skip; if (user && user_build_id) stack_map_get_build_id_offset(buf, ips, trace_nr, user); else diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 6e36e854b5124..929ed3bf1a7cf 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -165,7 +165,6 @@ struct cgroup_mgctx { #define DEFINE_CGROUP_MGCTX(name) \ struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) -extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; extern struct list_head cgroup_roots; diff --git a/kernel/cpu.c b/kernel/cpu.c index 407a2568f35eb..5601216eb51bd 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -70,7 +70,6 @@ struct cpuhp_cpu_state { bool rollback; bool single; bool bringup; - int cpu; struct hlist_node *node; struct hlist_node *last; enum cpuhp_state cb_state; @@ -474,7 +473,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu) { return true; } #endif static inline enum cpuhp_state -cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) +cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { enum cpuhp_state prev_state = st->state; bool bringup = st->state < target; @@ -485,14 +484,15 @@ cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) st->target = target; st->single = false; st->bringup = bringup; - if (cpu_dying(st->cpu) != !bringup) - set_cpu_dying(st->cpu, !bringup); + if (cpu_dying(cpu) != !bringup) + set_cpu_dying(cpu, !bringup); return prev_state; } static inline void -cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) +cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st, + enum cpuhp_state prev_state) { bool bringup = !st->bringup; @@ -519,8 +519,8 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) } st->bringup = bringup; - if (cpu_dying(st->cpu) != !bringup) - set_cpu_dying(st->cpu, !bringup); + if (cpu_dying(cpu) != !bringup) + set_cpu_dying(cpu, !bringup); } /* Regular hotplug invocation of the AP hotplug thread */ @@ -540,15 +540,16 @@ static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st) wait_for_ap_thread(st, st->bringup); } -static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target) +static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st, + enum cpuhp_state target) { enum cpuhp_state prev_state; int ret; - prev_state = cpuhp_set_state(st, target); + prev_state = cpuhp_set_state(cpu, st, target); __cpuhp_kick_ap(st); if ((ret = st->result)) { - cpuhp_reset_state(st, prev_state); + cpuhp_reset_state(cpu, st, prev_state); __cpuhp_kick_ap(st); } @@ -580,7 +581,7 @@ static int bringup_wait_for_ap(unsigned int cpu) if (st->target <= CPUHP_AP_ONLINE_IDLE) return 0; - return cpuhp_kick_ap(st, st->target); + return cpuhp_kick_ap(cpu, st, st->target); } static int bringup_cpu(unsigned int cpu) @@ -703,7 +704,7 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, ret, cpu, cpuhp_get_step(st->state)->name, st->state); - cpuhp_reset_state(st, prev_state); + cpuhp_reset_state(cpu, st, prev_state); if (can_rollback_cpu(st)) WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, prev_state)); @@ -720,7 +721,6 @@ static void cpuhp_create(unsigned int cpu) init_completion(&st->done_up); init_completion(&st->done_down); - st->cpu = cpu; } static int cpuhp_should_run(unsigned int cpu) @@ -874,7 +874,7 @@ static int cpuhp_kick_ap_work(unsigned int cpu) cpuhp_lock_release(true); trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work); - ret = cpuhp_kick_ap(st, st->target); + ret = cpuhp_kick_ap(cpu, st, st->target); trace_cpuhp_exit(cpu, st->state, prev_state, ret); return ret; @@ -1106,7 +1106,7 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, ret, cpu, cpuhp_get_step(st->state)->name, st->state); - cpuhp_reset_state(st, prev_state); + cpuhp_reset_state(cpu, st, prev_state); if (st->state < prev_state) WARN_ON(cpuhp_invoke_callback_range(true, cpu, st, @@ -1133,7 +1133,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, cpuhp_tasks_frozen = tasks_frozen; - prev_state = cpuhp_set_state(st, target); + prev_state = cpuhp_set_state(cpu, st, target); /* * If the current CPU state is in the range of the AP hotplug thread, * then we need to kick the thread. @@ -1164,7 +1164,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, ret = cpuhp_down_callbacks(cpu, st, target); if (ret && st->state < prev_state) { if (st->state == CPUHP_TEARDOWN_CPU) { - cpuhp_reset_state(st, prev_state); + cpuhp_reset_state(cpu, st, prev_state); __cpuhp_kick_ap(st); } else { WARN(1, "DEAD callback error for CPU%d", cpu); @@ -1351,7 +1351,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) cpuhp_tasks_frozen = tasks_frozen; - cpuhp_set_state(st, target); + cpuhp_set_state(cpu, st, target); /* * If the current CPU state is in the range of the AP hotplug thread, * then we need to kick the thread once more. diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index df2bface866ef..85cb51c4a17e6 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -291,7 +291,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size) */ int kdb_putarea_size(unsigned long addr, void *res, size_t size) { - int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size); + int ret = copy_to_kernel_nofault((char *)addr, (char *)res, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { kdb_func_printf("Bad address 0x%lx\n", addr); diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 7a14ca29c3778..f8ff598596b85 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -927,7 +927,7 @@ static __init int dma_debug_cmdline(char *str) global_disable = true; } - return 0; + return 1; } static __init int dma_debug_entries_cmdline(char *str) @@ -936,7 +936,7 @@ static __init int dma_debug_entries_cmdline(char *str) return -EINVAL; if (!get_option(&str, &nr_prealloc_entries)) nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; - return 0; + return 1; } __setup("dma_debug=", dma_debug_cmdline); diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index 4632b0f4f72eb..8a6cd53dbe8ce 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -114,6 +114,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, dma_direct_sync_single_for_cpu(dev, addr, size, dir); if (unlikely(is_swiotlb_buffer(dev, phys))) - swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); + swiotlb_tbl_unmap_single(dev, phys, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC); } #endif /* _KERNEL_DMA_DIRECT_H */ diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 6db1c475ec827..6c350555e5a1c 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -701,13 +701,10 @@ void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir) { - /* - * Unconditional bounce is necessary to avoid corruption on - * sync_*_for_cpu or dma_ummap_* when the device didn't overwrite - * the whole lengt of the bounce buffer. - */ - swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE); - BUG_ON(!valid_dma_direction(dir)); + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE); + else + BUG_ON(dir != DMA_FROM_DEVICE); } void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, diff --git a/kernel/events/core.c b/kernel/events/core.c index 6859229497b15..0ee9ffceb9764 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10574,8 +10574,11 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, } /* ready to consume more filters */ + kfree(filename); + filename = NULL; state = IF_STATE_ACTION; filter = NULL; + kernel = 0; } } @@ -11637,6 +11640,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, event->state = PERF_EVENT_STATE_INACTIVE; + if (parent_event) + event->event_caps = parent_event->event_caps; + if (event->attr.sigtrap) atomic_set(&event->event_limit, 1); diff --git a/kernel/exit.c b/kernel/exit.c index b00a25bb4ab93..54d2ce4b93d1e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -463,6 +463,7 @@ void mm_update_next_owner(struct mm_struct *mm) goto retry; } WRITE_ONCE(mm->owner, c); + lru_gen_migrate_mm(mm); task_unlock(c); put_task_struct(c); } diff --git a/kernel/fork.c b/kernel/fork.c index f1e89007f2288..463b2d2bf5d66 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -98,6 +98,10 @@ #include #include +#ifdef CONFIG_USER_NS +#include +#endif + #include #include #include @@ -1079,6 +1083,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, goto fail_nocontext; mm->user_ns = get_user_ns(user_ns); + lru_gen_init_mm(mm); return mm; fail_nocontext: @@ -1121,6 +1126,7 @@ static inline void __mmput(struct mm_struct *mm) } if (mm->binfmt) module_put(mm->binfmt->module); + lru_gen_del_mm(mm); mmdrop(mm); } @@ -1922,6 +1928,10 @@ static __latent_entropy struct task_struct *copy_process( if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); + if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. @@ -2586,6 +2596,13 @@ pid_t kernel_clone(struct kernel_clone_args *args) get_task_struct(p); } + if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) { + /* lock the task to synchronize with memcg migration */ + task_lock(p); + lru_gen_add_mm(p->mm); + task_unlock(p); + } + wake_up_new_task(p); /* forking complete and child started to run, tell ptracer */ @@ -3036,6 +3053,12 @@ int ksys_unshare(unsigned long unshare_flags) if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; + if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { + err = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto bad_unshare_out; + } + err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index f7ff8919dc9bb..fdf170404650f 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -269,8 +269,9 @@ static int __irq_build_affinity_masks(unsigned int startvec, */ if (numvecs <= nodes) { for_each_node_mask(n, nodemsk) { - cpumask_or(&masks[curvec].mask, &masks[curvec].mask, - node_to_cpumask[n]); + /* Ensure that only CPUs which are in both masks are set */ + cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); + cpumask_or(&masks[curvec].mask, &masks[curvec].mask, nmsk); if (++curvec == last_affv) curvec = firstvec; } diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 2267e6527db3c..3e129f2e19d15 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -681,6 +681,27 @@ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq) } EXPORT_SYMBOL_GPL(generic_handle_domain_irq); +/** + * generic_dispatch_irq - Dispatch an interrupt from an interrupt handler + * @irq: The irq number to handle + * + * A wrapper around generic_handle_irq() which ensures that interrupts are + * disabled when the primary handler of the dispatched irq is invoked. + * This is useful for interrupt handlers with dispatching to be safe for + * the forced threaded case. + */ +int generic_dispatch_irq(unsigned int irq) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = generic_handle_irq(irq); + local_irq_restore(flags); + return ret; +} +EXPORT_SYMBOL_GPL(generic_dispatch_irq); + /** * generic_handle_domain_nmi - Invoke the handler for a HW nmi belonging * to a domain. diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 585494ec464f9..bc475e62279d2 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -190,7 +190,7 @@ static int klp_find_object_symbol(const char *objname, const char *name, return -EINVAL; } -static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, +static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab, unsigned int symndx, Elf_Shdr *relasec, const char *sec_objname) { @@ -218,7 +218,7 @@ static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, relas = (Elf_Rela *) relasec->sh_addr; /* For each rela in this klp relocation section */ for (i = 0; i < relasec->sh_size / sizeof(Elf_Rela); i++) { - sym = (Elf64_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); + sym = (Elf_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); if (sym->st_shndx != SHN_LIVEPATCH) { pr_err("symbol %s is not marked as a livepatch symbol\n", strtab + sym->st_name); diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index f8a0212189cad..4675a686f942f 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -183,11 +183,9 @@ static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES); static struct hlist_head lock_keys_hash[KEYHASH_SIZE]; unsigned long nr_lock_classes; unsigned long nr_zapped_classes; -#ifndef CONFIG_DEBUG_LOCKDEP -static -#endif +unsigned long max_lock_class_idx; struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; -static DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS); +DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS); static inline struct lock_class *hlock_class(struct held_lock *hlock) { @@ -338,7 +336,7 @@ static inline void lock_release_holdtime(struct held_lock *hlock) * elements. These elements are linked together by the lock_entry member in * struct lock_class. */ -LIST_HEAD(all_lock_classes); +static LIST_HEAD(all_lock_classes); static LIST_HEAD(free_lock_classes); /** @@ -1252,6 +1250,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) struct lockdep_subclass_key *key; struct hlist_head *hash_head; struct lock_class *class; + int idx; DEBUG_LOCKS_WARN_ON(!irqs_disabled()); @@ -1317,6 +1316,9 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) * of classes. */ list_move_tail(&class->lock_entry, &all_lock_classes); + idx = class - lock_classes; + if (idx > max_lock_class_idx) + max_lock_class_idx = idx; if (verbose(class)) { graph_unlock(); @@ -6000,6 +6002,8 @@ static void zap_class(struct pending_free *pf, struct lock_class *class) WRITE_ONCE(class->name, NULL); nr_lock_classes--; __clear_bit(class - lock_classes, lock_classes_in_use); + if (class - lock_classes == max_lock_class_idx) + max_lock_class_idx--; } else { WARN_ONCE(true, "%s() failed for class %s\n", __func__, class->name); @@ -6290,7 +6294,13 @@ void lockdep_reset_lock(struct lockdep_map *lock) lockdep_reset_lock_reg(lock); } -/* Unregister a dynamically allocated key. */ +/* + * Unregister a dynamically allocated key. + * + * Unlike lockdep_register_key(), a search is always done to find a matching + * key irrespective of debug_locks to avoid potential invalid access to freed + * memory in lock_class entry. + */ void lockdep_unregister_key(struct lock_class_key *key) { struct hlist_head *hash_head = keyhashentry(key); @@ -6305,10 +6315,8 @@ void lockdep_unregister_key(struct lock_class_key *key) return; raw_local_irq_save(flags); - if (!graph_lock()) - goto out_irq; + lockdep_lock(); - pf = get_pending_free(); hlist_for_each_entry_rcu(k, hash_head, hash_entry) { if (k == key) { hlist_del_rcu(&k->hash_entry); @@ -6316,11 +6324,13 @@ void lockdep_unregister_key(struct lock_class_key *key) break; } } - WARN_ON_ONCE(!found); - __lockdep_free_key_range(pf, key, 1); - call_rcu_zapped(pf); - graph_unlock(); -out_irq: + WARN_ON_ONCE(!found && debug_locks); + if (found) { + pf = get_pending_free(); + __lockdep_free_key_range(pf, key, 1); + call_rcu_zapped(pf); + } + lockdep_unlock(); raw_local_irq_restore(flags); /* Wait until is_dynamic_key() has finished accessing k->hash_entry. */ diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index ecb8662e7a4ed..bbe9000260d02 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -121,7 +121,6 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ = #define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) -extern struct list_head all_lock_classes; extern struct lock_chain lock_chains[]; #define LOCK_USAGE_CHARS (2*XXX_LOCK_USAGE_STATES + 1) @@ -151,6 +150,10 @@ extern unsigned int nr_large_chain_blocks; extern unsigned int max_lockdep_depth; extern unsigned int max_bfs_queue_depth; +extern unsigned long max_lock_class_idx; + +extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; +extern unsigned long lock_classes_in_use[]; #ifdef CONFIG_PROVE_LOCKING extern unsigned long lockdep_count_forward_deps(struct lock_class *); @@ -205,7 +208,6 @@ struct lockdep_stats { }; DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); -extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; #define __debug_atomic_inc(ptr) \ this_cpu_inc(lockdep_stats.ptr); diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index b8d9a050c337a..15fdc7fa5c688 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -24,14 +24,33 @@ #include "lockdep_internals.h" +/* + * Since iteration of lock_classes is done without holding the lockdep lock, + * it is not safe to iterate all_lock_classes list directly as the iteration + * may branch off to free_lock_classes or the zapped list. Iteration is done + * directly on the lock_classes array by checking the lock_classes_in_use + * bitmap and max_lock_class_idx. + */ +#define iterate_lock_classes(idx, class) \ + for (idx = 0, class = lock_classes; idx <= max_lock_class_idx; \ + idx++, class++) + static void *l_next(struct seq_file *m, void *v, loff_t *pos) { - return seq_list_next(v, &all_lock_classes, pos); + struct lock_class *class = v; + + ++class; + *pos = class - lock_classes; + return (*pos > max_lock_class_idx) ? NULL : class; } static void *l_start(struct seq_file *m, loff_t *pos) { - return seq_list_start_head(&all_lock_classes, *pos); + unsigned long idx = *pos; + + if (idx > max_lock_class_idx) + return NULL; + return lock_classes + idx; } static void l_stop(struct seq_file *m, void *v) @@ -57,14 +76,16 @@ static void print_name(struct seq_file *m, struct lock_class *class) static int l_show(struct seq_file *m, void *v) { - struct lock_class *class = list_entry(v, struct lock_class, lock_entry); + struct lock_class *class = v; struct lock_list *entry; char usage[LOCK_USAGE_CHARS]; + int idx = class - lock_classes; - if (v == &all_lock_classes) { + if (v == lock_classes) seq_printf(m, "all lock classes:\n"); + + if (!test_bit(idx, lock_classes_in_use)) return 0; - } seq_printf(m, "%p", class->key); #ifdef CONFIG_DEBUG_LOCKDEP @@ -220,8 +241,11 @@ static int lockdep_stats_show(struct seq_file *m, void *v) #ifdef CONFIG_PROVE_LOCKING struct lock_class *class; + unsigned long idx; - list_for_each_entry(class, &all_lock_classes, lock_entry) { + iterate_lock_classes(idx, class) { + if (!test_bit(idx, lock_classes_in_use)) + continue; if (class->usage_mask == 0) nr_unused++; @@ -254,6 +278,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v) sum_forward_deps += lockdep_count_forward_deps(class); } + #ifdef CONFIG_DEBUG_LOCKDEP DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused); #endif @@ -345,6 +370,8 @@ static int lockdep_stats_show(struct seq_file *m, void *v) seq_printf(m, " max bfs queue depth: %11u\n", max_bfs_queue_depth); #endif + seq_printf(m, " max lock class index: %11lu\n", + max_lock_class_idx); lockdep_stats_debug_show(m); seq_printf(m, " debug_locks: %11u\n", debug_locks); @@ -622,12 +649,16 @@ static int lock_stat_open(struct inode *inode, struct file *file) if (!res) { struct lock_stat_data *iter = data->stats; struct seq_file *m = file->private_data; + unsigned long idx; - list_for_each_entry(class, &all_lock_classes, lock_entry) { + iterate_lock_classes(idx, class) { + if (!test_bit(idx, lock_classes_in_use)) + continue; iter->class = class; iter->stats = lock_stats(class); iter++; } + data->iter_end = iter; sort(data->stats, data->iter_end - data->stats, @@ -645,6 +676,7 @@ static ssize_t lock_stat_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct lock_class *class; + unsigned long idx; char c; if (count) { @@ -654,8 +686,11 @@ static ssize_t lock_stat_write(struct file *file, const char __user *buf, if (c != '0') return count; - list_for_each_entry(class, &all_lock_classes, lock_entry) + iterate_lock_classes(idx, class) { + if (!test_bit(idx, lock_classes_in_use)) + continue; clear_lock_stats(class); + } } return count; } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index e6af502c2fd77..08780a466fdf7 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1328,7 +1328,7 @@ static int __init resumedelay_setup(char *str) int rc = kstrtouint(str, 0, &resume_delay); if (rc) - return rc; + pr_warn("resumedelay: bad option string '%s'\n", str); return 1; } diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c index d20526c5be15b..b663a97f5867a 100644 --- a/kernel/power/suspend_test.c +++ b/kernel/power/suspend_test.c @@ -157,22 +157,22 @@ static int __init setup_test_suspend(char *value) value++; suspend_type = strsep(&value, ","); if (!suspend_type) - return 0; + return 1; repeat = strsep(&value, ","); if (repeat) { if (kstrtou32(repeat, 0, &test_repeat_count_max)) - return 0; + return 1; } for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) if (!strcmp(pm_labels[i], suspend_type)) { test_state_label = pm_labels[i]; - return 0; + return 1; } printk(warn_bad_state, suspend_type); - return 0; + return 1; } __setup("test_suspend", setup_test_suspend); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 82abfaf3c2aad..833e407545b82 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -146,8 +146,10 @@ static int __control_devkmsg(char *str) static int __init control_devkmsg(char *str) { - if (__control_devkmsg(str) < 0) + if (__control_devkmsg(str) < 0) { + pr_warn("printk.devkmsg: bad option string '%s'\n", str); return 1; + } /* * Set sysctl string accordingly: @@ -166,7 +168,7 @@ static int __init control_devkmsg(char *str) */ devkmsg_log |= DEVKMSG_LOG_MASK_LOCK; - return 0; + return 1; } __setup("printk.devkmsg=", control_devkmsg); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index eea265082e975..ccc4b465775b8 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -371,6 +371,26 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) return !err; } +static int check_ptrace_options(unsigned long data) +{ + if (data & ~(unsigned long)PTRACE_O_MASK) + return -EINVAL; + + if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { + if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || + !IS_ENABLED(CONFIG_SECCOMP)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (seccomp_mode(¤t->seccomp) != SECCOMP_MODE_DISABLED || + current->ptrace & PT_SUSPEND_SECCOMP) + return -EPERM; + } + return 0; +} + static int ptrace_attach(struct task_struct *task, long request, unsigned long addr, unsigned long flags) @@ -382,8 +402,16 @@ static int ptrace_attach(struct task_struct *task, long request, if (seize) { if (addr != 0) goto out; + /* + * This duplicates the check in check_ptrace_options() because + * ptrace_attach() and ptrace_setoptions() have historically + * used different error codes for unknown ptrace options. + */ if (flags & ~(unsigned long)PTRACE_O_MASK) goto out; + retval = check_ptrace_options(flags); + if (retval) + return retval; flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); } else { flags = PT_PTRACED; @@ -654,22 +682,11 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds static int ptrace_setoptions(struct task_struct *child, unsigned long data) { unsigned flags; + int ret; - if (data & ~(unsigned long)PTRACE_O_MASK) - return -EINVAL; - - if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { - if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || - !IS_ENABLED(CONFIG_SECCOMP)) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (seccomp_mode(¤t->seccomp) != SECCOMP_MODE_DISABLED || - current->ptrace & PT_SUSPEND_SECCOMP) - return -EPERM; - } + ret = check_ptrace_options(data); + if (ret) + return ret; /* Avoid intermediate state when all opts are cleared */ flags = child->ptrace; diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h index e373fbe44da5e..431cee212467d 100644 --- a/kernel/rcu/rcu_segcblist.h +++ b/kernel/rcu/rcu_segcblist.h @@ -56,13 +56,13 @@ static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp) static inline void rcu_segcblist_set_flags(struct rcu_segcblist *rsclp, int flags) { - rsclp->flags |= flags; + WRITE_ONCE(rsclp->flags, rsclp->flags | flags); } static inline void rcu_segcblist_clear_flags(struct rcu_segcblist *rsclp, int flags) { - rsclp->flags &= ~flags; + WRITE_ONCE(rsclp->flags, rsclp->flags & ~flags); } static inline bool rcu_segcblist_test_flags(struct rcu_segcblist *rsclp, diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a4c25a6283b0b..5dae0da879ae2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -91,7 +91,7 @@ static struct rcu_state rcu_state = { .abbr = RCU_ABBR, .exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex), .exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex), - .ofl_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock), + .ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED, }; /* Dump rcu_node combining tree at boot to verify correct setup. */ @@ -1086,9 +1086,8 @@ void rcu_irq_enter_irqson(void) * Just check whether or not this CPU has non-offloaded RCU callbacks * queued. */ -int rcu_needs_cpu(u64 basemono, u64 *nextevt) +int rcu_needs_cpu(void) { - *nextevt = KTIME_MAX; return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) && !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data)); } @@ -1175,7 +1174,15 @@ bool rcu_lockdep_current_cpu_online(void) preempt_disable_notrace(); rdp = this_cpu_ptr(&rcu_data); rnp = rdp->mynode; - if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1) + /* + * Strictly, we care here about the case where the current CPU is + * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask + * not being up to date. So arch_spin_is_locked() might have a + * false positive if it's held by some *other* CPU, but that's + * OK because that just means a false *negative* on the warning. + */ + if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || + arch_spin_is_locked(&rcu_state.ofl_lock)) ret = true; preempt_enable_notrace(); return ret; @@ -1739,7 +1746,6 @@ static void rcu_strict_gp_boundary(void *unused) */ static noinline_for_stack bool rcu_gp_init(void) { - unsigned long firstseq; unsigned long flags; unsigned long oldmask; unsigned long mask; @@ -1782,22 +1788,17 @@ static noinline_for_stack bool rcu_gp_init(void) * of RCU's Requirements documentation. */ WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF); + /* Exclude CPU hotplug operations. */ rcu_for_each_leaf_node(rnp) { - // Wait for CPU-hotplug operations that might have - // started before this grace period did. - smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values. - firstseq = READ_ONCE(rnp->ofl_seq); - if (firstseq & 0x1) - while (firstseq == READ_ONCE(rnp->ofl_seq)) - schedule_timeout_idle(1); // Can't wake unless RCU is watching. - smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values. - raw_spin_lock(&rcu_state.ofl_lock); - raw_spin_lock_irq_rcu_node(rnp); + local_irq_save(flags); + arch_spin_lock(&rcu_state.ofl_lock); + raw_spin_lock_rcu_node(rnp); if (rnp->qsmaskinit == rnp->qsmaskinitnext && !rnp->wait_blkd_tasks) { /* Nothing to do on this leaf rcu_node structure. */ - raw_spin_unlock_irq_rcu_node(rnp); - raw_spin_unlock(&rcu_state.ofl_lock); + raw_spin_unlock_rcu_node(rnp); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(flags); continue; } @@ -1832,8 +1833,9 @@ static noinline_for_stack bool rcu_gp_init(void) rcu_cleanup_dead_rnp(rnp); } - raw_spin_unlock_irq_rcu_node(rnp); - raw_spin_unlock(&rcu_state.ofl_lock); + raw_spin_unlock_rcu_node(rnp); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(flags); } rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */ @@ -4287,11 +4289,10 @@ void rcu_cpu_starting(unsigned int cpu) rnp = rdp->mynode; mask = rdp->grpmask; - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(!(rnp->ofl_seq & 0x1)); + local_irq_save(flags); + arch_spin_lock(&rcu_state.ofl_lock); rcu_dynticks_eqs_online(); - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - raw_spin_lock_irqsave_rcu_node(rnp, flags); + raw_spin_lock_rcu_node(rnp); WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask); newcpu = !(rnp->expmaskinitnext & mask); rnp->expmaskinitnext |= mask; @@ -4304,15 +4305,18 @@ void rcu_cpu_starting(unsigned int cpu) /* An incoming CPU should never be blocking a grace period. */ if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */ + /* rcu_report_qs_rnp() *really* wants some flags to restore */ + unsigned long flags2; + + local_irq_save(flags2); rcu_disable_urgency_upon_qs(rdp); /* Report QS -after- changing ->qsmaskinitnext! */ - rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); + rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags2); } else { - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + raw_spin_unlock_rcu_node(rnp); } - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(rnp->ofl_seq & 0x1); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(flags); smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ } @@ -4326,7 +4330,7 @@ void rcu_cpu_starting(unsigned int cpu) */ void rcu_report_dead(unsigned int cpu) { - unsigned long flags; + unsigned long flags, seq_flags; unsigned long mask; struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ @@ -4340,10 +4344,8 @@ void rcu_report_dead(unsigned int cpu) /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ mask = rdp->grpmask; - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(!(rnp->ofl_seq & 0x1)); - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - raw_spin_lock(&rcu_state.ofl_lock); + local_irq_save(seq_flags); + arch_spin_lock(&rcu_state.ofl_lock); raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */ rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq); rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags); @@ -4354,10 +4356,8 @@ void rcu_report_dead(unsigned int cpu) } WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - raw_spin_unlock(&rcu_state.ofl_lock); - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(rnp->ofl_seq & 0x1); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(seq_flags); rdp->cpu_started = false; } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 486fc901bd085..4b4bcef8a9743 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -56,8 +56,6 @@ struct rcu_node { /* Initialized from ->qsmaskinitnext at the */ /* beginning of each grace period. */ unsigned long qsmaskinitnext; - unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */ - /* Online CPUs for next grace period. */ unsigned long expmask; /* CPUs or groups that need to check in */ /* to allow the current expedited GP */ /* to complete. */ @@ -355,7 +353,7 @@ struct rcu_state { const char *name; /* Name of structure. */ char abbr; /* Abbreviated name. */ - raw_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp; + arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp; /* Synchronize offline with */ /* GP pre-initialization. */ }; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c5b45c2f68a15..5678bee7aefee 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -556,16 +556,16 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } - /* Unboost if we were boosted. */ - if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) - rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex); - /* * If this was the last task on the expedited lists, * then we need to report up the rcu_node hierarchy. */ if (!empty_exp && empty_exp_now) rcu_report_exp_rnp(rnp, true); + + /* Unboost if we were boosted. */ + if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) + rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex); } else { local_irq_restore(flags); } diff --git a/kernel/resource.c b/kernel/resource.c index 9c08d6e9eef27..34eaee179689a 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -56,14 +56,6 @@ struct resource_constraint { static DEFINE_RWLOCK(resource_lock); -/* - * For memory hotplug, there is no way to free resource entries allocated - * by boot mem after the system is up. So for reusing the resource entry - * we need to remember the resource. - */ -static struct resource *bootmem_resource_free; -static DEFINE_SPINLOCK(bootmem_resource_lock); - static struct resource *next_resource(struct resource *p) { if (p->child) @@ -160,36 +152,19 @@ __initcall(ioresources_init); static void free_resource(struct resource *res) { - if (!res) - return; - - if (!PageSlab(virt_to_head_page(res))) { - spin_lock(&bootmem_resource_lock); - res->sibling = bootmem_resource_free; - bootmem_resource_free = res; - spin_unlock(&bootmem_resource_lock); - } else { + /** + * If the resource was allocated using memblock early during boot + * we'll leak it here: we can only return full pages back to the + * buddy and trying to be smart and reusing them eventually in + * alloc_resource() overcomplicates resource handling. + */ + if (res && PageSlab(virt_to_head_page(res))) kfree(res); - } } static struct resource *alloc_resource(gfp_t flags) { - struct resource *res = NULL; - - spin_lock(&bootmem_resource_lock); - if (bootmem_resource_free) { - res = bootmem_resource_free; - bootmem_resource_free = res->sibling; - } - spin_unlock(&bootmem_resource_lock); - - if (res) - memset(res, 0, sizeof(struct resource)); - else - res = kzalloc(sizeof(struct resource), flags); - - return res; + return kzalloc(sizeof(struct resource), flags); } /* Return the conflict entry if you can't request it */ diff --git a/kernel/rseq.c b/kernel/rseq.c index 6d45ac3dae7fb..97ac20b4f7387 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -128,10 +128,10 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) int ret; #ifdef CONFIG_64BIT - if (get_user(ptr, &t->rseq->rseq_cs.ptr64)) + if (get_user(ptr, &t->rseq->rseq_cs)) return -EFAULT; #else - if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr))) + if (copy_from_user(&ptr, &t->rseq->rseq_cs, sizeof(ptr))) return -EFAULT; #endif if (!ptr) { @@ -217,9 +217,9 @@ static int clear_rseq_cs(struct task_struct *t) * Set rseq_cs to NULL. */ #ifdef CONFIG_64BIT - return put_user(0UL, &t->rseq->rseq_cs.ptr64); + return put_user(0UL, &t->rseq->rseq_cs); #else - if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64))) + if (clear_user(&t->rseq->rseq_cs, sizeof(t->rseq->rseq_cs))) return -EFAULT; return 0; #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9745613d531ce..9ad73574630ac 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -36,6 +36,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp); +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_thermal_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_cpu_capacity_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp); @@ -4979,6 +4980,7 @@ context_switch(struct rq *rq, struct task_struct *prev, * finish_task_switch()'s mmdrop(). */ switch_mm_irqs_off(prev->active_mm, next->mm, next); + lru_gen_use_mm(next->mm); if (!prev->mm) { // from kernel /* will mmdrop() in finish_task_switch(). */ @@ -5674,6 +5676,8 @@ static inline struct task_struct *pick_task(struct rq *rq) extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi); +static void queue_core_balance(struct rq *rq); + static struct task_struct * pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { @@ -5723,7 +5727,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) } rq->core_pick = NULL; - return next; + goto out; } put_prev_task_balance(rq, prev, rf); @@ -5773,7 +5777,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) */ WARN_ON_ONCE(fi_before); task_vruntime_update(rq, next, false); - goto done; + goto out_set_next; } } @@ -5892,8 +5896,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) resched_curr(rq_i); } -done: +out_set_next: set_next_task(rq, next); +out: + if (rq->core->core_forceidle_count && next == rq->idle) + queue_core_balance(rq); + return next; } @@ -5922,7 +5930,7 @@ static bool try_steal_cookie(int this, int that) if (p == src->core_pick || p == src->curr) goto next; - if (!cpumask_test_cpu(this, &p->cpus_mask)) + if (!is_cpu_allowed(p, this)) goto next; if (p->core_occupation > dst->idle->core_occupation) @@ -5988,7 +5996,7 @@ static void sched_core_balance(struct rq *rq) static DEFINE_PER_CPU(struct callback_head, core_balance_head); -void queue_core_balance(struct rq *rq) +static void queue_core_balance(struct rq *rq) { if (!sched_core_enabled(rq)) return; diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 3d06c5e4220d4..307800586ac81 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -334,12 +334,13 @@ static struct cftype files[] = { */ void cpuacct_charge(struct task_struct *tsk, u64 cputime) { + unsigned int cpu = task_cpu(tsk); struct cpuacct *ca; rcu_read_lock(); for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) - __this_cpu_add(*ca->cpuusage, cputime); + *per_cpu_ptr(ca->cpuusage, cpu) += cputime; rcu_read_unlock(); } diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 26778884d9ab1..6d65ab6e484e2 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -289,6 +289,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) * into the same scale so we can compare. */ boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; + boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); if (sg_cpu->util < boost) sg_cpu->util = boost; } @@ -348,8 +349,11 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, /* * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. + * + * Except when the rq is capped by uclamp_max. */ - if (sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) { + if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && + sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) { next_f = sg_policy->next_freq; /* Restore cached freq as next_freq has changed */ @@ -395,8 +399,11 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, /* * Do not reduce the target performance level if the CPU has not been * idle recently, as the reduction is likely to be premature then. + * + * Except when the rq is capped by uclamp_max. */ - if (sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util) + if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && + sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util) sg_cpu->util = prev_util; cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index d2c072b0ef01f..62f0cf8422775 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2240,12 +2240,6 @@ static int push_dl_task(struct rq *rq) return 0; retry: - if (is_migration_disabled(next_task)) - return 0; - - if (WARN_ON(next_task == rq->curr)) - return 0; - /* * If next_task preempts rq->curr, and rq->curr * can move away, it makes sense to just reschedule @@ -2258,6 +2252,12 @@ static int push_dl_task(struct rq *rq) return 0; } + if (is_migration_disabled(next_task)) + return 0; + + if (WARN_ON(next_task == rq->curr)) + return 0; + /* We might release rq lock */ get_task_struct(next_task); diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index aa29211de1bf8..102d6f70e84d3 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -931,25 +931,15 @@ void print_numa_stats(struct seq_file *m, int node, unsigned long tsf, static void sched_show_numa(struct task_struct *p, struct seq_file *m) { #ifdef CONFIG_NUMA_BALANCING - struct mempolicy *pol; - if (p->mm) P(mm->numa_scan_seq); - task_lock(p); - pol = p->mempolicy; - if (pol && !(pol->flags & MPOL_F_MORON)) - pol = NULL; - mpol_get(pol); - task_unlock(p); - P(numa_pages_migrated); P(numa_preferred_nid); P(total_numa_faults); SEQ_printf(m, "current_node=%d, numa_group_id=%d\n", task_node(p), task_numa_group_id(p)); show_numa_stats(p, m); - mpol_put(pol); #endif } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5146163bfabb9..cddcf2f4f5251 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9040,9 +9040,10 @@ static bool update_pick_idlest(struct sched_group *idlest, * This is an approximation as the number of running tasks may not be * related to the number of busy CPUs due to sched_setaffinity. */ -static inline bool allow_numa_imbalance(int dst_running, int dst_weight) +static inline bool +allow_numa_imbalance(unsigned int running, unsigned int weight) { - return (dst_running < (dst_weight >> 2)); + return (running < (weight >> 2)); } /* @@ -9176,12 +9177,13 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) return idlest; #endif /* - * Otherwise, keep the task on this node to stay close - * its wakeup source and improve locality. If there is - * a real need of migration, periodic load balance will - * take care of it. + * Otherwise, keep the task close to the wakeup source + * and improve locality if the number of running tasks + * would remain below threshold where an imbalance is + * allowed. If there is a real need of migration, + * periodic load balance will take care of it. */ - if (allow_numa_imbalance(local_sgs.sum_nr_running, sd->span_weight)) + if (allow_numa_imbalance(local_sgs.sum_nr_running + 1, local_sgs.group_weight)) return NULL; } @@ -9387,7 +9389,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s /* Consider allowing a small imbalance between NUMA groups */ if (env->sd->flags & SD_NUMA) { env->imbalance = adjust_numa_imbalance(env->imbalance, - busiest->sum_nr_running, busiest->group_weight); + local->sum_nr_running + 1, local->group_weight); } return; diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index d17b0a5ce6ac3..314c36fc9c42f 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -437,7 +437,6 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool fir { update_idle_core(rq); schedstat_inc(rq->sched_goidle); - queue_core_balance(rq); } #ifdef CONFIG_SMP diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7b4f4fbbb4048..14f273c295183 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2026,6 +2026,16 @@ static int push_rt_task(struct rq *rq, bool pull) return 0; retry: + /* + * It's possible that the next_task slipped in of + * higher priority than current. If that's the case + * just reschedule current. + */ + if (unlikely(next_task->prio < rq->curr->prio)) { + resched_curr(rq); + return 0; + } + if (is_migration_disabled(next_task)) { struct task_struct *push_task = NULL; int cpu; @@ -2033,6 +2043,18 @@ static int push_rt_task(struct rq *rq, bool pull) if (!pull || rq->push_busy) return 0; + /* + * Invoking find_lowest_rq() on anything but an RT task doesn't + * make sense. Per the above priority check, curr has to + * be of higher priority than next_task, so no need to + * reschedule when bailing out. + * + * Note that the stoppers are masqueraded as SCHED_FIFO + * (cf. sched_set_stop_task()), so we can't rely on rt_task(). + */ + if (rq->curr->sched_class != &rt_sched_class) + return 0; + cpu = find_lowest_rq(rq->curr); if (cpu == -1 || cpu == rq->cpu) return 0; @@ -2057,16 +2079,6 @@ static int push_rt_task(struct rq *rq, bool pull) if (WARN_ON(next_task == rq->curr)) return 0; - /* - * It's possible that the next_task slipped in of - * higher priority than current. If that's the case - * just reschedule current. - */ - if (unlikely(next_task->prio < rq->curr->prio)) { - resched_curr(rq); - return 0; - } - /* We might release rq lock */ get_task_struct(next_task); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index de53be9057390..e8a5549488dd1 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1247,8 +1247,6 @@ static inline bool sched_group_cookie_match(struct rq *rq, return false; } -extern void queue_core_balance(struct rq *rq); - static inline bool sched_core_enqueued(struct task_struct *p) { return !RB_EMPTY_NODE(&p->core_node); @@ -1282,10 +1280,6 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq) return &rq->__lock; } -static inline void queue_core_balance(struct rq *rq) -{ -} - static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p) { return true; @@ -2841,88 +2835,6 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} #endif /* CONFIG_CPU_FREQ */ -#ifdef CONFIG_UCLAMP_TASK -unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id); - -/** - * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values. - * @rq: The rq to clamp against. Must not be NULL. - * @util: The util value to clamp. - * @p: The task to clamp against. Can be NULL if you want to clamp - * against @rq only. - * - * Clamps the passed @util to the max(@rq, @p) effective uclamp values. - * - * If sched_uclamp_used static key is disabled, then just return the util - * without any clamping since uclamp aggregation at the rq level in the fast - * path is disabled, rendering this operation a NOP. - * - * Use uclamp_eff_value() if you don't care about uclamp values at rq level. It - * will return the correct effective uclamp value of the task even if the - * static key is disabled. - */ -static __always_inline -unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, - struct task_struct *p) -{ - unsigned long min_util = 0; - unsigned long max_util = 0; - - if (!static_branch_likely(&sched_uclamp_used)) - return util; - - if (p) { - min_util = uclamp_eff_value(p, UCLAMP_MIN); - max_util = uclamp_eff_value(p, UCLAMP_MAX); - - /* - * Ignore last runnable task's max clamp, as this task will - * reset it. Similarly, no need to read the rq's min clamp. - */ - if (rq->uclamp_flags & UCLAMP_FLAG_IDLE) - goto out; - } - - min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value)); - max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value)); -out: - /* - * Since CPU's {min,max}_util clamps are MAX aggregated considering - * RUNNABLE tasks with _different_ clamps, we can end up with an - * inversion. Fix it now when the clamps are applied. - */ - if (unlikely(min_util >= max_util)) - return min_util; - - return clamp(util, min_util, max_util); -} - -/* - * When uclamp is compiled in, the aggregation at rq level is 'turned off' - * by default in the fast path and only gets turned on once userspace performs - * an operation that requires it. - * - * Returns true if userspace opted-in to use uclamp and aggregation at rq level - * hence is active. - */ -static inline bool uclamp_is_used(void) -{ - return static_branch_likely(&sched_uclamp_used); -} -#else /* CONFIG_UCLAMP_TASK */ -static inline -unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, - struct task_struct *p) -{ - return util; -} - -static inline bool uclamp_is_used(void) -{ - return false; -} -#endif /* CONFIG_UCLAMP_TASK */ - #ifdef arch_scale_freq_capacity # ifndef arch_scale_freq_invariant # define arch_scale_freq_invariant() true @@ -3020,6 +2932,105 @@ static inline unsigned long cpu_util_rt(struct rq *rq) } #endif +#ifdef CONFIG_UCLAMP_TASK +unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id); + +/** + * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values. + * @rq: The rq to clamp against. Must not be NULL. + * @util: The util value to clamp. + * @p: The task to clamp against. Can be NULL if you want to clamp + * against @rq only. + * + * Clamps the passed @util to the max(@rq, @p) effective uclamp values. + * + * If sched_uclamp_used static key is disabled, then just return the util + * without any clamping since uclamp aggregation at the rq level in the fast + * path is disabled, rendering this operation a NOP. + * + * Use uclamp_eff_value() if you don't care about uclamp values at rq level. It + * will return the correct effective uclamp value of the task even if the + * static key is disabled. + */ +static __always_inline +unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, + struct task_struct *p) +{ + unsigned long min_util = 0; + unsigned long max_util = 0; + + if (!static_branch_likely(&sched_uclamp_used)) + return util; + + if (p) { + min_util = uclamp_eff_value(p, UCLAMP_MIN); + max_util = uclamp_eff_value(p, UCLAMP_MAX); + + /* + * Ignore last runnable task's max clamp, as this task will + * reset it. Similarly, no need to read the rq's min clamp. + */ + if (rq->uclamp_flags & UCLAMP_FLAG_IDLE) + goto out; + } + + min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value)); + max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value)); +out: + /* + * Since CPU's {min,max}_util clamps are MAX aggregated considering + * RUNNABLE tasks with _different_ clamps, we can end up with an + * inversion. Fix it now when the clamps are applied. + */ + if (unlikely(min_util >= max_util)) + return min_util; + + return clamp(util, min_util, max_util); +} + +/* Is the rq being capped/throttled by uclamp_max? */ +static inline bool uclamp_rq_is_capped(struct rq *rq) +{ + unsigned long rq_util; + unsigned long max_util; + + if (!static_branch_likely(&sched_uclamp_used)) + return false; + + rq_util = cpu_util_cfs(cpu_of(rq)) + cpu_util_rt(rq); + max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value); + + return max_util != SCHED_CAPACITY_SCALE && rq_util >= max_util; +} + +/* + * When uclamp is compiled in, the aggregation at rq level is 'turned off' + * by default in the fast path and only gets turned on once userspace performs + * an operation that requires it. + * + * Returns true if userspace opted-in to use uclamp and aggregation at rq level + * hence is active. + */ +static inline bool uclamp_is_used(void) +{ + return static_branch_likely(&sched_uclamp_used); +} +#else /* CONFIG_UCLAMP_TASK */ +static inline +unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, + struct task_struct *p) +{ + return util; +} + +static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; } + +static inline bool uclamp_is_used(void) +{ + return false; +} +#endif /* CONFIG_UCLAMP_TASK */ + #ifdef CONFIG_HAVE_SCHED_AVG_IRQ static inline unsigned long cpu_util_irq(struct rq *rq) { diff --git a/kernel/smp.c b/kernel/smp.c index 01a7c1706a58b..65a630f62363c 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -579,7 +579,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) /* There shouldn't be any pending callbacks on an offline CPU. */ if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) && - !warned && !llist_empty(head))) { + !warned && entry != NULL)) { warned = true; WARN(1, "IPI on offline CPU %d\n", smp_processor_id()); diff --git a/kernel/static_call.c b/kernel/static_call.c index 43ba0b1e0edbb..e9c3e69f38379 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -1,548 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -#include #include -#include -#include -#include -#include -#include -#include -#include -#include - -extern struct static_call_site __start_static_call_sites[], - __stop_static_call_sites[]; -extern struct static_call_tramp_key __start_static_call_tramp_key[], - __stop_static_call_tramp_key[]; - -static bool static_call_initialized; - -/* mutex to protect key modules/sites */ -static DEFINE_MUTEX(static_call_mutex); - -static void static_call_lock(void) -{ - mutex_lock(&static_call_mutex); -} - -static void static_call_unlock(void) -{ - mutex_unlock(&static_call_mutex); -} - -static inline void *static_call_addr(struct static_call_site *site) -{ - return (void *)((long)site->addr + (long)&site->addr); -} - -static inline unsigned long __static_call_key(const struct static_call_site *site) -{ - return (long)site->key + (long)&site->key; -} - -static inline struct static_call_key *static_call_key(const struct static_call_site *site) -{ - return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS); -} - -/* These assume the key is word-aligned. */ -static inline bool static_call_is_init(struct static_call_site *site) -{ - return __static_call_key(site) & STATIC_CALL_SITE_INIT; -} - -static inline bool static_call_is_tail(struct static_call_site *site) -{ - return __static_call_key(site) & STATIC_CALL_SITE_TAIL; -} - -static inline void static_call_set_init(struct static_call_site *site) -{ - site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) - - (long)&site->key; -} - -static int static_call_site_cmp(const void *_a, const void *_b) -{ - const struct static_call_site *a = _a; - const struct static_call_site *b = _b; - const struct static_call_key *key_a = static_call_key(a); - const struct static_call_key *key_b = static_call_key(b); - - if (key_a < key_b) - return -1; - - if (key_a > key_b) - return 1; - - return 0; -} - -static void static_call_site_swap(void *_a, void *_b, int size) -{ - long delta = (unsigned long)_a - (unsigned long)_b; - struct static_call_site *a = _a; - struct static_call_site *b = _b; - struct static_call_site tmp = *a; - - a->addr = b->addr - delta; - a->key = b->key - delta; - - b->addr = tmp.addr + delta; - b->key = tmp.key + delta; -} - -static inline void static_call_sort_entries(struct static_call_site *start, - struct static_call_site *stop) -{ - sort(start, stop - start, sizeof(struct static_call_site), - static_call_site_cmp, static_call_site_swap); -} - -static inline bool static_call_key_has_mods(struct static_call_key *key) -{ - return !(key->type & 1); -} - -static inline struct static_call_mod *static_call_key_next(struct static_call_key *key) -{ - if (!static_call_key_has_mods(key)) - return NULL; - - return key->mods; -} - -static inline struct static_call_site *static_call_key_sites(struct static_call_key *key) -{ - if (static_call_key_has_mods(key)) - return NULL; - - return (struct static_call_site *)(key->type & ~1); -} - -void __static_call_update(struct static_call_key *key, void *tramp, void *func) -{ - struct static_call_site *site, *stop; - struct static_call_mod *site_mod, first; - - cpus_read_lock(); - static_call_lock(); - - if (key->func == func) - goto done; - - key->func = func; - - arch_static_call_transform(NULL, tramp, func, false); - - /* - * If uninitialized, we'll not update the callsites, but they still - * point to the trampoline and we just patched that. - */ - if (WARN_ON_ONCE(!static_call_initialized)) - goto done; - - first = (struct static_call_mod){ - .next = static_call_key_next(key), - .mod = NULL, - .sites = static_call_key_sites(key), - }; - - for (site_mod = &first; site_mod; site_mod = site_mod->next) { - bool init = system_state < SYSTEM_RUNNING; - struct module *mod = site_mod->mod; - - if (!site_mod->sites) { - /* - * This can happen if the static call key is defined in - * a module which doesn't use it. - * - * It also happens in the has_mods case, where the - * 'first' entry has no sites associated with it. - */ - continue; - } - - stop = __stop_static_call_sites; - - if (mod) { -#ifdef CONFIG_MODULES - stop = mod->static_call_sites + - mod->num_static_call_sites; - init = mod->state == MODULE_STATE_COMING; -#endif - } - - for (site = site_mod->sites; - site < stop && static_call_key(site) == key; site++) { - void *site_addr = static_call_addr(site); - - if (!init && static_call_is_init(site)) - continue; - - if (!kernel_text_address((unsigned long)site_addr)) { - /* - * This skips patching built-in __exit, which - * is part of init_section_contains() but is - * not part of kernel_text_address(). - * - * Skipping built-in __exit is fine since it - * will never be executed. - */ - WARN_ONCE(!static_call_is_init(site), - "can't patch static call site at %pS", - site_addr); - continue; - } - - arch_static_call_transform(site_addr, NULL, func, - static_call_is_tail(site)); - } - } - -done: - static_call_unlock(); - cpus_read_unlock(); -} -EXPORT_SYMBOL_GPL(__static_call_update); - -static int __static_call_init(struct module *mod, - struct static_call_site *start, - struct static_call_site *stop) -{ - struct static_call_site *site; - struct static_call_key *key, *prev_key = NULL; - struct static_call_mod *site_mod; - - if (start == stop) - return 0; - - static_call_sort_entries(start, stop); - - for (site = start; site < stop; site++) { - void *site_addr = static_call_addr(site); - - if ((mod && within_module_init((unsigned long)site_addr, mod)) || - (!mod && init_section_contains(site_addr, 1))) - static_call_set_init(site); - - key = static_call_key(site); - if (key != prev_key) { - prev_key = key; - - /* - * For vmlinux (!mod) avoid the allocation by storing - * the sites pointer in the key itself. Also see - * __static_call_update()'s @first. - * - * This allows architectures (eg. x86) to call - * static_call_init() before memory allocation works. - */ - if (!mod) { - key->sites = site; - key->type |= 1; - goto do_transform; - } - - site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); - if (!site_mod) - return -ENOMEM; - - /* - * When the key has a direct sites pointer, extract - * that into an explicit struct static_call_mod, so we - * can have a list of modules. - */ - if (static_call_key_sites(key)) { - site_mod->mod = NULL; - site_mod->next = NULL; - site_mod->sites = static_call_key_sites(key); - - key->mods = site_mod; - - site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); - if (!site_mod) - return -ENOMEM; - } - - site_mod->mod = mod; - site_mod->sites = site; - site_mod->next = static_call_key_next(key); - key->mods = site_mod; - } - -do_transform: - arch_static_call_transform(site_addr, NULL, key->func, - static_call_is_tail(site)); - } - - return 0; -} - -static int addr_conflict(struct static_call_site *site, void *start, void *end) -{ - unsigned long addr = (unsigned long)static_call_addr(site); - - if (addr <= (unsigned long)end && - addr + CALL_INSN_SIZE > (unsigned long)start) - return 1; - - return 0; -} - -static int __static_call_text_reserved(struct static_call_site *iter_start, - struct static_call_site *iter_stop, - void *start, void *end, bool init) -{ - struct static_call_site *iter = iter_start; - - while (iter < iter_stop) { - if (init || !static_call_is_init(iter)) { - if (addr_conflict(iter, start, end)) - return 1; - } - iter++; - } - - return 0; -} - -#ifdef CONFIG_MODULES - -static int __static_call_mod_text_reserved(void *start, void *end) -{ - struct module *mod; - int ret; - - preempt_disable(); - mod = __module_text_address((unsigned long)start); - WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); - if (!try_module_get(mod)) - mod = NULL; - preempt_enable(); - - if (!mod) - return 0; - - ret = __static_call_text_reserved(mod->static_call_sites, - mod->static_call_sites + mod->num_static_call_sites, - start, end, mod->state == MODULE_STATE_COMING); - - module_put(mod); - - return ret; -} - -static unsigned long tramp_key_lookup(unsigned long addr) -{ - struct static_call_tramp_key *start = __start_static_call_tramp_key; - struct static_call_tramp_key *stop = __stop_static_call_tramp_key; - struct static_call_tramp_key *tramp_key; - - for (tramp_key = start; tramp_key != stop; tramp_key++) { - unsigned long tramp; - - tramp = (long)tramp_key->tramp + (long)&tramp_key->tramp; - if (tramp == addr) - return (long)tramp_key->key + (long)&tramp_key->key; - } - - return 0; -} - -static int static_call_add_module(struct module *mod) -{ - struct static_call_site *start = mod->static_call_sites; - struct static_call_site *stop = start + mod->num_static_call_sites; - struct static_call_site *site; - - for (site = start; site != stop; site++) { - unsigned long s_key = __static_call_key(site); - unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS; - unsigned long key; - - /* - * Is the key is exported, 'addr' points to the key, which - * means modules are allowed to call static_call_update() on - * it. - * - * Otherwise, the key isn't exported, and 'addr' points to the - * trampoline so we need to lookup the key. - * - * We go through this dance to prevent crazy modules from - * abusing sensitive static calls. - */ - if (!kernel_text_address(addr)) - continue; - - key = tramp_key_lookup(addr); - if (!key) { - pr_warn("Failed to fixup __raw_static_call() usage at: %ps\n", - static_call_addr(site)); - return -EINVAL; - } - - key |= s_key & STATIC_CALL_SITE_FLAGS; - site->key = key - (long)&site->key; - } - - return __static_call_init(mod, start, stop); -} - -static void static_call_del_module(struct module *mod) -{ - struct static_call_site *start = mod->static_call_sites; - struct static_call_site *stop = mod->static_call_sites + - mod->num_static_call_sites; - struct static_call_key *key, *prev_key = NULL; - struct static_call_mod *site_mod, **prev; - struct static_call_site *site; - - for (site = start; site < stop; site++) { - key = static_call_key(site); - if (key == prev_key) - continue; - - prev_key = key; - - for (prev = &key->mods, site_mod = key->mods; - site_mod && site_mod->mod != mod; - prev = &site_mod->next, site_mod = site_mod->next) - ; - - if (!site_mod) - continue; - - *prev = site_mod->next; - kfree(site_mod); - } -} - -static int static_call_module_notify(struct notifier_block *nb, - unsigned long val, void *data) -{ - struct module *mod = data; - int ret = 0; - - cpus_read_lock(); - static_call_lock(); - - switch (val) { - case MODULE_STATE_COMING: - ret = static_call_add_module(mod); - if (ret) { - WARN(1, "Failed to allocate memory for static calls"); - static_call_del_module(mod); - } - break; - case MODULE_STATE_GOING: - static_call_del_module(mod); - break; - } - - static_call_unlock(); - cpus_read_unlock(); - - return notifier_from_errno(ret); -} - -static struct notifier_block static_call_module_nb = { - .notifier_call = static_call_module_notify, -}; - -#else - -static inline int __static_call_mod_text_reserved(void *start, void *end) -{ - return 0; -} - -#endif /* CONFIG_MODULES */ - -int static_call_text_reserved(void *start, void *end) -{ - bool init = system_state < SYSTEM_RUNNING; - int ret = __static_call_text_reserved(__start_static_call_sites, - __stop_static_call_sites, start, end, init); - - if (ret) - return ret; - - return __static_call_mod_text_reserved(start, end); -} - -int __init static_call_init(void) -{ - int ret; - - if (static_call_initialized) - return 0; - - cpus_read_lock(); - static_call_lock(); - ret = __static_call_init(NULL, __start_static_call_sites, - __stop_static_call_sites); - static_call_unlock(); - cpus_read_unlock(); - - if (ret) { - pr_err("Failed to allocate memory for static_call!\n"); - BUG(); - } - - static_call_initialized = true; - -#ifdef CONFIG_MODULES - register_module_notifier(&static_call_module_nb); -#endif - return 0; -} -early_initcall(static_call_init); long __static_call_return0(void) { return 0; } - -#ifdef CONFIG_STATIC_CALL_SELFTEST - -static int func_a(int x) -{ - return x+1; -} - -static int func_b(int x) -{ - return x+2; -} - -DEFINE_STATIC_CALL(sc_selftest, func_a); - -static struct static_call_data { - int (*func)(int); - int val; - int expect; -} static_call_data [] __initdata = { - { NULL, 2, 3 }, - { func_b, 2, 4 }, - { func_a, 2, 3 } -}; - -static int __init test_static_call_init(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) { - struct static_call_data *scd = &static_call_data[i]; - - if (scd->func) - static_call_update(sc_selftest, scd->func); - - WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect); - } - - return 0; -} -early_initcall(test_static_call_init); - -#endif /* CONFIG_STATIC_CALL_SELFTEST */ +EXPORT_SYMBOL_GPL(__static_call_return0); diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c new file mode 100644 index 0000000000000..dc5665b628140 --- /dev/null +++ b/kernel/static_call_inline.c @@ -0,0 +1,543 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct static_call_site __start_static_call_sites[], + __stop_static_call_sites[]; +extern struct static_call_tramp_key __start_static_call_tramp_key[], + __stop_static_call_tramp_key[]; + +static bool static_call_initialized; + +/* mutex to protect key modules/sites */ +static DEFINE_MUTEX(static_call_mutex); + +static void static_call_lock(void) +{ + mutex_lock(&static_call_mutex); +} + +static void static_call_unlock(void) +{ + mutex_unlock(&static_call_mutex); +} + +static inline void *static_call_addr(struct static_call_site *site) +{ + return (void *)((long)site->addr + (long)&site->addr); +} + +static inline unsigned long __static_call_key(const struct static_call_site *site) +{ + return (long)site->key + (long)&site->key; +} + +static inline struct static_call_key *static_call_key(const struct static_call_site *site) +{ + return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS); +} + +/* These assume the key is word-aligned. */ +static inline bool static_call_is_init(struct static_call_site *site) +{ + return __static_call_key(site) & STATIC_CALL_SITE_INIT; +} + +static inline bool static_call_is_tail(struct static_call_site *site) +{ + return __static_call_key(site) & STATIC_CALL_SITE_TAIL; +} + +static inline void static_call_set_init(struct static_call_site *site) +{ + site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) - + (long)&site->key; +} + +static int static_call_site_cmp(const void *_a, const void *_b) +{ + const struct static_call_site *a = _a; + const struct static_call_site *b = _b; + const struct static_call_key *key_a = static_call_key(a); + const struct static_call_key *key_b = static_call_key(b); + + if (key_a < key_b) + return -1; + + if (key_a > key_b) + return 1; + + return 0; +} + +static void static_call_site_swap(void *_a, void *_b, int size) +{ + long delta = (unsigned long)_a - (unsigned long)_b; + struct static_call_site *a = _a; + struct static_call_site *b = _b; + struct static_call_site tmp = *a; + + a->addr = b->addr - delta; + a->key = b->key - delta; + + b->addr = tmp.addr + delta; + b->key = tmp.key + delta; +} + +static inline void static_call_sort_entries(struct static_call_site *start, + struct static_call_site *stop) +{ + sort(start, stop - start, sizeof(struct static_call_site), + static_call_site_cmp, static_call_site_swap); +} + +static inline bool static_call_key_has_mods(struct static_call_key *key) +{ + return !(key->type & 1); +} + +static inline struct static_call_mod *static_call_key_next(struct static_call_key *key) +{ + if (!static_call_key_has_mods(key)) + return NULL; + + return key->mods; +} + +static inline struct static_call_site *static_call_key_sites(struct static_call_key *key) +{ + if (static_call_key_has_mods(key)) + return NULL; + + return (struct static_call_site *)(key->type & ~1); +} + +void __static_call_update(struct static_call_key *key, void *tramp, void *func) +{ + struct static_call_site *site, *stop; + struct static_call_mod *site_mod, first; + + cpus_read_lock(); + static_call_lock(); + + if (key->func == func) + goto done; + + key->func = func; + + arch_static_call_transform(NULL, tramp, func, false); + + /* + * If uninitialized, we'll not update the callsites, but they still + * point to the trampoline and we just patched that. + */ + if (WARN_ON_ONCE(!static_call_initialized)) + goto done; + + first = (struct static_call_mod){ + .next = static_call_key_next(key), + .mod = NULL, + .sites = static_call_key_sites(key), + }; + + for (site_mod = &first; site_mod; site_mod = site_mod->next) { + bool init = system_state < SYSTEM_RUNNING; + struct module *mod = site_mod->mod; + + if (!site_mod->sites) { + /* + * This can happen if the static call key is defined in + * a module which doesn't use it. + * + * It also happens in the has_mods case, where the + * 'first' entry has no sites associated with it. + */ + continue; + } + + stop = __stop_static_call_sites; + + if (mod) { +#ifdef CONFIG_MODULES + stop = mod->static_call_sites + + mod->num_static_call_sites; + init = mod->state == MODULE_STATE_COMING; +#endif + } + + for (site = site_mod->sites; + site < stop && static_call_key(site) == key; site++) { + void *site_addr = static_call_addr(site); + + if (!init && static_call_is_init(site)) + continue; + + if (!kernel_text_address((unsigned long)site_addr)) { + /* + * This skips patching built-in __exit, which + * is part of init_section_contains() but is + * not part of kernel_text_address(). + * + * Skipping built-in __exit is fine since it + * will never be executed. + */ + WARN_ONCE(!static_call_is_init(site), + "can't patch static call site at %pS", + site_addr); + continue; + } + + arch_static_call_transform(site_addr, NULL, func, + static_call_is_tail(site)); + } + } + +done: + static_call_unlock(); + cpus_read_unlock(); +} +EXPORT_SYMBOL_GPL(__static_call_update); + +static int __static_call_init(struct module *mod, + struct static_call_site *start, + struct static_call_site *stop) +{ + struct static_call_site *site; + struct static_call_key *key, *prev_key = NULL; + struct static_call_mod *site_mod; + + if (start == stop) + return 0; + + static_call_sort_entries(start, stop); + + for (site = start; site < stop; site++) { + void *site_addr = static_call_addr(site); + + if ((mod && within_module_init((unsigned long)site_addr, mod)) || + (!mod && init_section_contains(site_addr, 1))) + static_call_set_init(site); + + key = static_call_key(site); + if (key != prev_key) { + prev_key = key; + + /* + * For vmlinux (!mod) avoid the allocation by storing + * the sites pointer in the key itself. Also see + * __static_call_update()'s @first. + * + * This allows architectures (eg. x86) to call + * static_call_init() before memory allocation works. + */ + if (!mod) { + key->sites = site; + key->type |= 1; + goto do_transform; + } + + site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); + if (!site_mod) + return -ENOMEM; + + /* + * When the key has a direct sites pointer, extract + * that into an explicit struct static_call_mod, so we + * can have a list of modules. + */ + if (static_call_key_sites(key)) { + site_mod->mod = NULL; + site_mod->next = NULL; + site_mod->sites = static_call_key_sites(key); + + key->mods = site_mod; + + site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL); + if (!site_mod) + return -ENOMEM; + } + + site_mod->mod = mod; + site_mod->sites = site; + site_mod->next = static_call_key_next(key); + key->mods = site_mod; + } + +do_transform: + arch_static_call_transform(site_addr, NULL, key->func, + static_call_is_tail(site)); + } + + return 0; +} + +static int addr_conflict(struct static_call_site *site, void *start, void *end) +{ + unsigned long addr = (unsigned long)static_call_addr(site); + + if (addr <= (unsigned long)end && + addr + CALL_INSN_SIZE > (unsigned long)start) + return 1; + + return 0; +} + +static int __static_call_text_reserved(struct static_call_site *iter_start, + struct static_call_site *iter_stop, + void *start, void *end, bool init) +{ + struct static_call_site *iter = iter_start; + + while (iter < iter_stop) { + if (init || !static_call_is_init(iter)) { + if (addr_conflict(iter, start, end)) + return 1; + } + iter++; + } + + return 0; +} + +#ifdef CONFIG_MODULES + +static int __static_call_mod_text_reserved(void *start, void *end) +{ + struct module *mod; + int ret; + + preempt_disable(); + mod = __module_text_address((unsigned long)start); + WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod); + if (!try_module_get(mod)) + mod = NULL; + preempt_enable(); + + if (!mod) + return 0; + + ret = __static_call_text_reserved(mod->static_call_sites, + mod->static_call_sites + mod->num_static_call_sites, + start, end, mod->state == MODULE_STATE_COMING); + + module_put(mod); + + return ret; +} + +static unsigned long tramp_key_lookup(unsigned long addr) +{ + struct static_call_tramp_key *start = __start_static_call_tramp_key; + struct static_call_tramp_key *stop = __stop_static_call_tramp_key; + struct static_call_tramp_key *tramp_key; + + for (tramp_key = start; tramp_key != stop; tramp_key++) { + unsigned long tramp; + + tramp = (long)tramp_key->tramp + (long)&tramp_key->tramp; + if (tramp == addr) + return (long)tramp_key->key + (long)&tramp_key->key; + } + + return 0; +} + +static int static_call_add_module(struct module *mod) +{ + struct static_call_site *start = mod->static_call_sites; + struct static_call_site *stop = start + mod->num_static_call_sites; + struct static_call_site *site; + + for (site = start; site != stop; site++) { + unsigned long s_key = __static_call_key(site); + unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS; + unsigned long key; + + /* + * Is the key is exported, 'addr' points to the key, which + * means modules are allowed to call static_call_update() on + * it. + * + * Otherwise, the key isn't exported, and 'addr' points to the + * trampoline so we need to lookup the key. + * + * We go through this dance to prevent crazy modules from + * abusing sensitive static calls. + */ + if (!kernel_text_address(addr)) + continue; + + key = tramp_key_lookup(addr); + if (!key) { + pr_warn("Failed to fixup __raw_static_call() usage at: %ps\n", + static_call_addr(site)); + return -EINVAL; + } + + key |= s_key & STATIC_CALL_SITE_FLAGS; + site->key = key - (long)&site->key; + } + + return __static_call_init(mod, start, stop); +} + +static void static_call_del_module(struct module *mod) +{ + struct static_call_site *start = mod->static_call_sites; + struct static_call_site *stop = mod->static_call_sites + + mod->num_static_call_sites; + struct static_call_key *key, *prev_key = NULL; + struct static_call_mod *site_mod, **prev; + struct static_call_site *site; + + for (site = start; site < stop; site++) { + key = static_call_key(site); + if (key == prev_key) + continue; + + prev_key = key; + + for (prev = &key->mods, site_mod = key->mods; + site_mod && site_mod->mod != mod; + prev = &site_mod->next, site_mod = site_mod->next) + ; + + if (!site_mod) + continue; + + *prev = site_mod->next; + kfree(site_mod); + } +} + +static int static_call_module_notify(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct module *mod = data; + int ret = 0; + + cpus_read_lock(); + static_call_lock(); + + switch (val) { + case MODULE_STATE_COMING: + ret = static_call_add_module(mod); + if (ret) { + WARN(1, "Failed to allocate memory for static calls"); + static_call_del_module(mod); + } + break; + case MODULE_STATE_GOING: + static_call_del_module(mod); + break; + } + + static_call_unlock(); + cpus_read_unlock(); + + return notifier_from_errno(ret); +} + +static struct notifier_block static_call_module_nb = { + .notifier_call = static_call_module_notify, +}; + +#else + +static inline int __static_call_mod_text_reserved(void *start, void *end) +{ + return 0; +} + +#endif /* CONFIG_MODULES */ + +int static_call_text_reserved(void *start, void *end) +{ + bool init = system_state < SYSTEM_RUNNING; + int ret = __static_call_text_reserved(__start_static_call_sites, + __stop_static_call_sites, start, end, init); + + if (ret) + return ret; + + return __static_call_mod_text_reserved(start, end); +} + +int __init static_call_init(void) +{ + int ret; + + if (static_call_initialized) + return 0; + + cpus_read_lock(); + static_call_lock(); + ret = __static_call_init(NULL, __start_static_call_sites, + __stop_static_call_sites); + static_call_unlock(); + cpus_read_unlock(); + + if (ret) { + pr_err("Failed to allocate memory for static_call!\n"); + BUG(); + } + + static_call_initialized = true; + +#ifdef CONFIG_MODULES + register_module_notifier(&static_call_module_nb); +#endif + return 0; +} +early_initcall(static_call_init); + +#ifdef CONFIG_STATIC_CALL_SELFTEST + +static int func_a(int x) +{ + return x+1; +} + +static int func_b(int x) +{ + return x+2; +} + +DEFINE_STATIC_CALL(sc_selftest, func_a); + +static struct static_call_data { + int (*func)(int); + int val; + int expect; +} static_call_data [] __initdata = { + { NULL, 2, 3 }, + { func_b, 2, 4 }, + { func_a, 2, 3 } +}; + +static int __init test_static_call_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) { + struct static_call_data *scd = &static_call_data[i]; + + if (scd->func) + static_call_update(sc_selftest, scd->func); + + WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect); + } + + return 0; +} +early_initcall(test_static_call_init); + +#endif /* CONFIG_STATIC_CALL_SELFTEST */ diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index a492f159624fa..dc765f3eff16f 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -291,6 +291,7 @@ COND_SYSCALL(mincore); COND_SYSCALL(madvise); COND_SYSCALL(process_madvise); COND_SYSCALL(process_mrelease); +COND_SYSCALL(pmadv_ksm); COND_SYSCALL(remap_file_pages); COND_SYSCALL(mbind); COND_SYSCALL(get_mempolicy); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 730ab56d9e92e..6cf4e5c36ec92 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -91,6 +91,9 @@ #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT) #include #endif +#ifdef CONFIG_USER_NS +#include +#endif #if defined(CONFIG_SYSCTL) @@ -1814,6 +1817,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_USER_NS + { + .procname = "unprivileged_userns_clone", + .data = &unprivileged_userns_clone, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 17a283ce2b20f..3506f6ed790c0 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -169,6 +169,8 @@ static ktime_t tick_init_jiffy_update(void) return period; } +#define MAX_STALLED_JIFFIES 5 + static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) { int cpu = smp_processor_id(); @@ -186,7 +188,7 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) */ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) { #ifdef CONFIG_NO_HZ_FULL - WARN_ON(tick_nohz_full_running); + WARN_ON_ONCE(tick_nohz_full_running); #endif tick_do_timer_cpu = cpu; } @@ -196,6 +198,21 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) if (tick_do_timer_cpu == cpu) tick_do_update_jiffies64(now); + /* + * If jiffies update stalled for too long (timekeeper in stop_machine() + * or VMEXIT'ed for several msecs), force an update. + */ + if (ts->last_tick_jiffies != jiffies) { + ts->stalled_jiffies = 0; + ts->last_tick_jiffies = READ_ONCE(jiffies); + } else { + if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) { + tick_do_update_jiffies64(now); + ts->stalled_jiffies = 0; + ts->last_tick_jiffies = READ_ONCE(jiffies); + } + } + if (ts->inidle) ts->got_idle_tick = 1; } @@ -768,7 +785,7 @@ static inline bool local_timer_softirq_pending(void) static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) { - u64 basemono, next_tick, next_tmr, next_rcu, delta, expires; + u64 basemono, next_tick, delta, expires; unsigned long basejiff; unsigned int seq; @@ -791,7 +808,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) * minimal delta which brings us back to this place * immediately. Lather, rinse and repeat... */ - if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() || + if (rcu_needs_cpu() || arch_needs_cpu() || irq_work_needs_cpu() || local_timer_softirq_pending()) { next_tick = basemono + TICK_NSEC; } else { @@ -802,10 +819,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) * disabled this also looks at the next expiring * hrtimer. */ - next_tmr = get_next_timer_interrupt(basejiff, basemono); - ts->next_timer = next_tmr; - /* Take the next rcu event into account */ - next_tick = next_rcu < next_tmr ? next_rcu : next_tmr; + next_tick = get_next_timer_interrupt(basejiff, basemono); + ts->next_timer = next_tick; } /* @@ -984,6 +999,45 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) __tick_nohz_full_update_tick(ts, ktime_get()); } +/* + * A pending softirq outside an IRQ (or softirq disabled section) context + * should be waiting for ksoftirqd to handle it. Therefore we shouldn't + * reach here due to the need_resched() early check in can_stop_idle_tick(). + * + * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the + * cpu_down() process, softirqs can still be raised while ksoftirqd is parked, + * triggering the below since wakep_softirqd() is ignored. + * + */ +static bool report_idle_softirq(void) +{ + static int ratelimit; + unsigned int pending = local_softirq_pending(); + + if (likely(!pending)) + return false; + + /* Some softirqs claim to be safe against hotplug and ksoftirqd parking */ + if (!cpu_active(smp_processor_id())) { + pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK; + if (!pending) + return false; + } + + if (ratelimit < 10) + return false; + + /* On RT, softirqs handling may be waiting on some lock */ + if (!local_bh_blocked()) + return false; + + pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n", + pending); + ratelimit++; + + return true; +} + static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) { /* @@ -1010,17 +1064,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (need_resched()) return false; - if (unlikely(local_softirq_pending())) { - static int ratelimit; - - if (ratelimit < 10 && !local_bh_blocked() && - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { - pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", - (unsigned int) local_softirq_pending()); - ratelimit++; - } + if (unlikely(report_idle_softirq())) return false; - } if (tick_nohz_full_enabled()) { /* diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index d952ae3934236..504649513399b 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -49,6 +49,8 @@ enum tick_nohz_mode { * @timer_expires_base: Base time clock monotonic for @timer_expires * @next_timer: Expiry time of next expiring timer for debugging purpose only * @tick_dep_mask: Tick dependency mask - is set, if someone needs the tick + * @last_tick_jiffies: Value of jiffies seen on last tick + * @stalled_jiffies: Number of stalled jiffies detected across ticks */ struct tick_sched { struct hrtimer sched_timer; @@ -77,6 +79,8 @@ struct tick_sched { u64 next_timer; ktime_t idle_expires; atomic_t tick_dep_mask; + unsigned long last_tick_jiffies; + unsigned int stalled_jiffies; }; extern struct tick_sched *tick_get_tick_sched(int cpu); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 85f1021ad4595..9dd2a39cb3b00 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1722,11 +1722,14 @@ static inline void __run_timers(struct timer_base *base) time_after_eq(jiffies, base->next_expiry)) { levels = collect_expired_timers(base, heads); /* - * The only possible reason for not finding any expired - * timer at this clk is that all matching timers have been - * dequeued. + * The two possible reasons for not finding any expired + * timer at this clk are that all matching timers have been + * dequeued or no timer has been queued since + * base::next_expiry was set to base::clk + + * NEXT_TIMER_MAX_DELTA. */ - WARN_ON_ONCE(!levels && !base->next_expiry_recalc); + WARN_ON_ONCE(!levels && !base->next_expiry_recalc + && base->timers_pending); base->clk++; base->next_expiry = __next_timer_interrupt(base); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index eb44418574f9c..96265a717ca4e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3663,12 +3663,17 @@ static char *trace_iter_expand_format(struct trace_iterator *iter) } /* Returns true if the string is safe to dereference from an event */ -static bool trace_safe_str(struct trace_iterator *iter, const char *str) +static bool trace_safe_str(struct trace_iterator *iter, const char *str, + bool star, int len) { unsigned long addr = (unsigned long)str; struct trace_event *trace_event; struct trace_event_call *event; + /* Ignore strings with no length */ + if (star && !len) + return true; + /* OK if part of the event data */ if ((addr >= (unsigned long)iter->ent) && (addr < (unsigned long)iter->ent + iter->ent_size)) @@ -3854,7 +3859,7 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, * instead. See samples/trace_events/trace-events-sample.h * for reference. */ - if (WARN_ONCE(!trace_safe_str(iter, str), + if (WARN_ONCE(!trace_safe_str(iter, str, star, len), "fmt: '%s' current_buffer: '%s'", fmt, show_buffer(&iter->seq))) { int ret; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 3147614c1812a..25b5d0f9f3758 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -40,6 +40,14 @@ static LIST_HEAD(ftrace_generic_fields); static LIST_HEAD(ftrace_common_fields); static bool eventdir_initialized; +static LIST_HEAD(module_strings); + +struct module_string { + struct list_head next; + struct module *module; + char *str; +}; + #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO) static struct kmem_cache *field_cachep; @@ -2633,6 +2641,76 @@ static void update_event_printk(struct trace_event_call *call, } } +static void add_str_to_module(struct module *module, char *str) +{ + struct module_string *modstr; + + modstr = kmalloc(sizeof(*modstr), GFP_KERNEL); + + /* + * If we failed to allocate memory here, then we'll just + * let the str memory leak when the module is removed. + * If this fails to allocate, there's worse problems than + * a leaked string on module removal. + */ + if (WARN_ON_ONCE(!modstr)) + return; + + modstr->module = module; + modstr->str = str; + + list_add(&modstr->next, &module_strings); +} + +static void update_event_fields(struct trace_event_call *call, + struct trace_eval_map *map) +{ + struct ftrace_event_field *field; + struct list_head *head; + char *ptr; + char *str; + int len = strlen(map->eval_string); + + /* Dynamic events should never have field maps */ + if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC)) + return; + + head = trace_get_fields(call); + list_for_each_entry(field, head, link) { + ptr = strchr(field->type, '['); + if (!ptr) + continue; + ptr++; + + if (!isalpha(*ptr) && *ptr != '_') + continue; + + if (strncmp(map->eval_string, ptr, len) != 0) + continue; + + str = kstrdup(field->type, GFP_KERNEL); + if (WARN_ON_ONCE(!str)) + return; + ptr = str + (ptr - field->type); + ptr = eval_replace(ptr, map, len); + /* enum/sizeof string smaller than value */ + if (WARN_ON_ONCE(!ptr)) { + kfree(str); + continue; + } + + /* + * If the event is part of a module, then we need to free the string + * when the module is removed. Otherwise, it will stay allocated + * until a reboot. + */ + if (call->module) + add_str_to_module(call->module, str); + + field->type = str; + } +} + void trace_event_eval_update(struct trace_eval_map **map, int len) { struct trace_event_call *call, *p; @@ -2668,6 +2746,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) first = false; } update_event_printk(call, map[i]); + update_event_fields(call, map[i]); } } } @@ -2853,6 +2932,7 @@ static void trace_module_add_events(struct module *mod) static void trace_module_remove_events(struct module *mod) { struct trace_event_call *call, *p; + struct module_string *modstr, *m; down_write(&trace_event_sem); list_for_each_entry_safe(call, p, &ftrace_events, list) { @@ -2861,6 +2941,14 @@ static void trace_module_remove_events(struct module *mod) if (call->module == mod) __trace_remove_event_call(call); } + /* Check for any strings allocade for this module */ + list_for_each_entry_safe(modstr, m, &module_strings, next) { + if (modstr->module != mod) + continue; + list_del(&modstr->next); + kfree(modstr->str); + kfree(modstr); + } up_write(&trace_event_sem); /* diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 5481ba44a8d68..423ab2563ad75 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -21,6 +21,13 @@ #include #include +/* sysctl */ +#ifdef CONFIG_USER_NS_UNPRIVILEGED +int unprivileged_userns_clone = 1; +#else +int unprivileged_userns_clone; +#endif + static struct kmem_cache *user_ns_cachep __read_mostly; static DEFINE_MUTEX(userns_state_mutex); diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 00703444a2194..230038d4f9081 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -271,7 +271,7 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes) return 0; error_p: - for (i = 0; i < nr_pages; i++) + while (--i >= 0) __free_page(pages[i]); kfree(pages); error: @@ -370,6 +370,7 @@ static void __put_watch_queue(struct kref *kref) for (i = 0; i < wqueue->nr_pages; i++) __free_page(wqueue->notes[i]); + kfree(wqueue->notes); bitmap_free(wqueue->notes_bitmap); wfilter = rcu_access_pointer(wqueue->filter); @@ -395,6 +396,7 @@ static void free_watch(struct rcu_head *rcu) put_watch_queue(rcu_access_pointer(watch->queue)); atomic_dec(&watch->cred->user->nr_watches); put_cred(watch->cred); + kfree(watch); } static void __put_watch(struct kref *kref) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 14b89aa37c5c9..440fd666c16d1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -416,7 +416,8 @@ config SECTION_MISMATCH_WARN_ONLY If unsure, say Y. config DEBUG_FORCE_FUNCTION_ALIGN_64B - bool "Force all function address 64B aligned" if EXPERT + bool "Force all function address 64B aligned" + depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC) help There are cases that a commit from one domain changes the function address alignment of other domains, and cause magic performance diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c index be38a2c5ecc2b..42825941f19f2 100644 --- a/lib/kunit/try-catch.c +++ b/lib/kunit/try-catch.c @@ -52,7 +52,7 @@ static unsigned long kunit_test_timeout(void) * If tests timeout due to exceeding sysctl_hung_task_timeout_secs, * the task will be killed and an oops generated. */ - return 300 * MSEC_PER_SEC; /* 5 min */ + return 300 * msecs_to_jiffies(MSEC_PER_SEC); /* 5 min */ } void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context) diff --git a/lib/logic_iomem.c b/lib/logic_iomem.c index 8c3365f26e51d..b247d412ddef7 100644 --- a/lib/logic_iomem.c +++ b/lib/logic_iomem.c @@ -68,7 +68,7 @@ int logic_iomem_add_region(struct resource *resource, } EXPORT_SYMBOL(logic_iomem_add_region); -#ifndef CONFIG_LOGIC_IOMEM_FALLBACK +#ifndef CONFIG_INDIRECT_IOMEM_FALLBACK static void __iomem *real_ioremap(phys_addr_t offset, size_t size) { WARN(1, "invalid ioremap(0x%llx, 0x%zx)\n", @@ -81,7 +81,7 @@ static void real_iounmap(volatile void __iomem *addr) WARN(1, "invalid iounmap for addr 0x%llx\n", (unsigned long long)(uintptr_t __force)addr); } -#endif /* CONFIG_LOGIC_IOMEM_FALLBACK */ +#endif /* CONFIG_INDIRECT_IOMEM_FALLBACK */ void __iomem *ioremap(phys_addr_t offset, size_t size) { @@ -168,7 +168,7 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); -#ifndef CONFIG_LOGIC_IOMEM_FALLBACK +#ifndef CONFIG_INDIRECT_IOMEM_FALLBACK #define MAKE_FALLBACK(op, sz) \ static u##sz real_raw_read ## op(const volatile void __iomem *addr) \ { \ @@ -213,7 +213,7 @@ static void real_memcpy_toio(volatile void __iomem *addr, const void *buffer, WARN(1, "Invalid memcpy_toio at address 0x%llx\n", (unsigned long long)(uintptr_t __force)addr); } -#endif /* CONFIG_LOGIC_IOMEM_FALLBACK */ +#endif /* CONFIG_INDIRECT_IOMEM_FALLBACK */ #define MAKE_OP(op, sz) \ u##sz __raw_read ## op(const volatile void __iomem *addr) \ diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 926f4823d5eac..fd1728d94babb 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -271,8 +271,12 @@ static FORCE_INLINE int LZ4_decompress_generic( ip += length; op += length; - /* Necessarily EOF, due to parsing restrictions */ - if (!partialDecoding || (cpy == oend)) + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. + */ + if (!partialDecoding || (cpy == oend) || (ip >= (iend - 2))) break; } else { /* may overwrite up to WILDCOPYLENGTH beyond cpy */ diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index a4c7cd74cff58..4fb7700a741bd 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -4,6 +4,8 @@ # from userspace. # +pound := \# + CC = gcc OPTFLAGS = -O2 # Adjust as desired CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS) @@ -42,7 +44,7 @@ else ifeq ($(HAS_NEON),yes) OBJS += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 else - HAS_ALTIVEC := $(shell printf '\#include \nvector int a;\n' |\ + HAS_ALTIVEC := $(shell printf '$(pound)include \nvector int a;\n' |\ gcc -c -x c - >/dev/null && rm ./-.o && echo yes) ifeq ($(HAS_ALTIVEC),yes) CFLAGS += -I../../../arch/powerpc/include diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index a3cf071941ab4..841a55242abaa 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c @@ -19,7 +19,6 @@ #define NDISKS 16 /* Including P and Q */ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); -struct raid6_calls raid6_call; char *dataptrs[NDISKS]; char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c index a6789c0c626b0..32ff6bd497f8e 100644 --- a/lib/ref_tracker.c +++ b/lib/ref_tracker.c @@ -20,6 +20,7 @@ void ref_tracker_dir_exit(struct ref_tracker_dir *dir) unsigned long flags; bool leak = false; + dir->dead = true; spin_lock_irqsave(&dir->lock, flags); list_for_each_entry_safe(tracker, n, &dir->quarantine, head) { list_del(&tracker->head); @@ -72,6 +73,8 @@ int ref_tracker_alloc(struct ref_tracker_dir *dir, gfp_t gfp_mask = gfp; unsigned long flags; + WARN_ON_ONCE(dir->dead); + if (gfp & __GFP_DIRECT_RECLAIM) gfp_mask |= __GFP_NOFAIL; *trackerp = tracker = kzalloc(sizeof(*tracker), gfp_mask); @@ -100,6 +103,8 @@ int ref_tracker_free(struct ref_tracker_dir *dir, unsigned int nr_entries; unsigned long flags; + WARN_ON_ONCE(dir->dead); + if (!tracker) { refcount_dec(&dir->untracked); return -EEXIST; diff --git a/lib/test_kmod.c b/lib/test_kmod.c index ce15893914131..cb800b1d0d99c 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -1149,6 +1149,7 @@ static struct kmod_test_device *register_test_dev_kmod(void) if (ret) { pr_err("could not register misc device: %d\n", ret); free_test_dev_kmod(test_dev); + test_dev = NULL; goto out; } diff --git a/lib/test_lockup.c b/lib/test_lockup.c index 906b598740a7b..c3fd87d6c2dd0 100644 --- a/lib/test_lockup.c +++ b/lib/test_lockup.c @@ -417,9 +417,14 @@ static bool test_kernel_ptr(unsigned long addr, int size) return false; /* should be at least readable kernel address */ - if (access_ok(ptr, 1) || - access_ok(ptr + size - 1, 1) || - get_kernel_nofault(buf, ptr) || + if (!IS_ENABLED(CONFIG_ALTERNATE_USER_ADDRESS_SPACE) && + (access_ok((void __user *)ptr, 1) || + access_ok((void __user *)ptr + size - 1, 1))) { + pr_err("user space ptr invalid in kernel: %#lx\n", addr); + return true; + } + + if (get_kernel_nofault(buf, ptr) || get_kernel_nofault(buf, ptr + size - 1)) { pr_err("invalid kernel ptr: %#lx\n", addr); return true; diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 8b1c318189ce8..e77d4856442c3 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1463,6 +1463,25 @@ static noinline void check_create_range_4(struct xarray *xa, XA_BUG_ON(xa, !xa_empty(xa)); } +static noinline void check_create_range_5(struct xarray *xa, + unsigned long index, unsigned int order) +{ + XA_STATE_ORDER(xas, xa, index, order); + unsigned int i; + + xa_store_order(xa, index, order, xa_mk_index(index), GFP_KERNEL); + + for (i = 0; i < order + 10; i++) { + do { + xas_lock(&xas); + xas_create_range(&xas); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + } + + xa_destroy(xa); +} + static noinline void check_create_range(struct xarray *xa) { unsigned int order; @@ -1490,6 +1509,9 @@ static noinline void check_create_range(struct xarray *xa) check_create_range_4(xa, (3U << order) + 1, order); check_create_range_4(xa, (3U << order) - 1, order); check_create_range_4(xa, (1U << 24) + 1, order); + + check_create_range_5(xa, 0, order); + check_create_range_5(xa, (1U << order), order); } check_create_range_3(); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3b8129dd374cd..fbf261bbea950 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -49,10 +49,15 @@ #include /* for PAGE_SIZE */ #include /* cpu_to_le16 */ +#include #include #include "kstrtox.h" +/* Disable pointer hashing if requested */ +bool no_hash_pointers __ro_after_init; +EXPORT_SYMBOL_GPL(no_hash_pointers); + static noinline unsigned long long simple_strntoull(const char *startp, size_t max_chars, char **endp, unsigned int base) { const char *cp; @@ -848,6 +853,19 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr, return pointer_string(buf, end, (const void *)hashval, spec); } +static char *default_pointer(char *buf, char *end, const void *ptr, + struct printf_spec spec) +{ + /* + * default is to _not_ leak addresses, so hash before printing, + * unless no_hash_pointers is specified on the command line. + */ + if (unlikely(no_hash_pointers)) + return pointer_string(buf, end, ptr, spec); + + return ptr_to_id(buf, end, ptr, spec); +} + int kptr_restrict __read_mostly; static noinline_for_stack @@ -857,7 +875,7 @@ char *restricted_pointer(char *buf, char *end, const void *ptr, switch (kptr_restrict) { case 0: /* Handle as %p, hash and do _not_ leak addresses. */ - return ptr_to_id(buf, end, ptr, spec); + return default_pointer(buf, end, ptr, spec); case 1: { const struct cred *cred; @@ -1761,7 +1779,7 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc, char output[sizeof("0123 little-endian (0x01234567)")]; char *p = output; unsigned int i; - u32 val; + u32 orig, val; if (fmt[1] != 'c' || fmt[2] != 'c') return error_string(buf, end, "(%p4?)", spec); @@ -1769,21 +1787,22 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc, if (check_pointer(&buf, end, fourcc, spec)) return buf; - val = *fourcc & ~BIT(31); + orig = get_unaligned(fourcc); + val = orig & ~BIT(31); - for (i = 0; i < sizeof(*fourcc); i++) { + for (i = 0; i < sizeof(u32); i++) { unsigned char c = val >> (i * 8); /* Print non-control ASCII characters as-is, dot otherwise */ *p++ = isascii(c) && isprint(c) ? c : '.'; } - strcpy(p, *fourcc & BIT(31) ? " big-endian" : " little-endian"); + strcpy(p, orig & BIT(31) ? " big-endian" : " little-endian"); p += strlen(p); *p++ = ' '; *p++ = '('; - p = special_hex_number(p, output + sizeof(output) - 2, *fourcc, sizeof(u32)); + p = special_hex_number(p, output + sizeof(output) - 2, orig, sizeof(u32)); *p++ = ')'; *p = '\0'; @@ -2223,10 +2242,6 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode, return widen_string(buf, buf - buf_start, end, spec); } -/* Disable pointer hashing if requested */ -bool no_hash_pointers __ro_after_init; -EXPORT_SYMBOL_GPL(no_hash_pointers); - int __init no_hash_pointers_enable(char *str) { if (no_hash_pointers) @@ -2455,7 +2470,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'e': /* %pe with a non-ERR_PTR gets treated as plain %p */ if (!IS_ERR(ptr)) - break; + return default_pointer(buf, end, ptr, spec); return err_ptr(buf, end, ptr, spec); case 'u': case 'k': @@ -2465,16 +2480,9 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, default: return error_string(buf, end, "(einval)", spec); } + default: + return default_pointer(buf, end, ptr, spec); } - - /* - * default is to _not_ leak addresses, so hash before printing, - * unless no_hash_pointers is specified on the command line. - */ - if (unlikely(no_hash_pointers)) - return pointer_string(buf, end, ptr, spec); - else - return ptr_to_id(buf, end, ptr, spec); } /* diff --git a/lib/xarray.c b/lib/xarray.c index 6f47f6375808a..88ca87435e3da 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -722,6 +722,8 @@ void xas_create_range(struct xa_state *xas) for (;;) { struct xa_node *node = xas->xa_node; + if (node->shift >= shift) + break; xas->xa_node = xa_parent_locked(xas->xa, node); xas->xa_offset = node->offset - 1; if (node->offset != 0) @@ -1079,6 +1081,7 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order) xa_mk_node(child)); if (xa_is_value(curr)) values--; + xas_update(xas, child); } else { unsigned int canon = offset - xas->xa_sibs; @@ -1093,6 +1096,7 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order) } while (offset-- > xas->xa_offset); node->nr_values += values; + xas_update(xas, node); } EXPORT_SYMBOL_GPL(xas_split); #endif diff --git a/mm/Kconfig b/mm/Kconfig index 3326ee3903f33..7fd84e0384dc6 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -892,6 +892,32 @@ config ANON_VMA_NAME area from being merged with adjacent virtual memory areas due to the difference in their name. +# the multi-gen LRU { +config LRU_GEN + bool "Multi-Gen LRU" + depends on MMU + # the following options can use up the spare bits in page flags + depends on !MAXSMP && (64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP) + help + A high performance LRU implementation for memory overcommit. See + Documentation/admin-guide/mm/multigen_lru.rst for details. + +config LRU_GEN_ENABLED + bool "Enable by default" + depends on LRU_GEN + help + This option enables the multi-gen LRU by default. + +config LRU_GEN_STATS + bool "Full stats for debugging" + depends on LRU_GEN + help + Do not enable this option unless you plan to look at historical stats + from evicted generations for debugging purpose. + + This option has a per-memcg and per-node memory overhead. +# } + source "mm/damon/Kconfig" endmenu diff --git a/mm/highmem.c b/mm/highmem.c index 762679050c9a0..916b66e0776c2 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -624,7 +624,7 @@ void __kmap_local_sched_out(void) /* With debug all even slots are unmapped and act as guard */ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { - WARN_ON_ONCE(!pte_none(pteval)); + WARN_ON_ONCE(pte_val(pteval) != 0); continue; } if (WARN_ON_ONCE(pte_none(pteval))) @@ -661,7 +661,7 @@ void __kmap_local_sched_in(void) /* With debug all even slots are unmapped and act as guard */ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { - WARN_ON_ONCE(!pte_none(pteval)); + WARN_ON_ONCE(pte_val(pteval) != 0); continue; } if (WARN_ON_ONCE(pte_none(pteval))) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 406a3c28c0266..3df389fd307fb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2364,7 +2364,8 @@ static void __split_huge_page_tail(struct page *head, int tail, #ifdef CONFIG_64BIT (1L << PG_arch_2) | #endif - (1L << PG_dirty))); + (1L << PG_dirty) | + LRU_GEN_MASK | LRU_REFS_MASK)); /* ->mapping in first tail page is compound_mapcount */ VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f294db835f4bc..a1da8757cc9cc 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3469,7 +3469,6 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) { int nr_nodes, node; struct page *page; - int rc = 0; lockdep_assert_held(&hugetlb_lock); @@ -3480,15 +3479,19 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) } for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) { - if (!list_empty(&h->hugepage_freelists[node])) { - page = list_entry(h->hugepage_freelists[node].next, - struct page, lru); - rc = demote_free_huge_page(h, page); - break; + list_for_each_entry(page, &h->hugepage_freelists[node], lru) { + if (PageHWPoison(page)) + continue; + + return demote_free_huge_page(h, page); } } - return rc; + /* + * Only way to get here is if all pages on free lists are poisoned. + * Return -EBUSY so that caller will not retry. + */ + return -EBUSY; } #define HSTATE_ATTR_RO(_name) \ diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 13128fa130625..af82c6f7d7239 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -222,27 +222,6 @@ static bool kfence_unprotect(unsigned long addr) return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), false)); } -static inline struct kfence_metadata *addr_to_metadata(unsigned long addr) -{ - long index; - - /* The checks do not affect performance; only called from slow-paths. */ - - if (!is_kfence_address((void *)addr)) - return NULL; - - /* - * May be an invalid index if called with an address at the edge of - * __kfence_pool, in which case we would report an "invalid access" - * error. - */ - index = (addr - (unsigned long)__kfence_pool) / (PAGE_SIZE * 2) - 1; - if (index < 0 || index >= CONFIG_KFENCE_NUM_OBJECTS) - return NULL; - - return &kfence_metadata[index]; -} - static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta) { unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2; @@ -555,6 +534,8 @@ static bool __init kfence_init_pool(void) * enters __slab_free() slow-path. */ for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { + struct slab *slab = page_slab(&pages[i]); + if (!i || (i % 2)) continue; @@ -562,7 +543,11 @@ static bool __init kfence_init_pool(void) if (WARN_ON(compound_head(&pages[i]) != &pages[i])) goto err; - __SetPageSlab(&pages[i]); + __folio_set_slab(slab_folio(slab)); +#ifdef CONFIG_MEMCG + slab->memcg_data = (unsigned long)&kfence_metadata[i / 2 - 1].objcg | + MEMCG_DATA_OBJCGS; +#endif } /* @@ -938,6 +923,9 @@ void __kfence_free(void *addr) { struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr); +#ifdef CONFIG_MEMCG + KFENCE_WARN_ON(meta->objcg); +#endif /* * If the objects of the cache are SLAB_TYPESAFE_BY_RCU, defer freeing * the object, as the object page may be recycled for other-typed diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h index 2a2d5de9d3791..600f2e2431d6d 100644 --- a/mm/kfence/kfence.h +++ b/mm/kfence/kfence.h @@ -89,10 +89,34 @@ struct kfence_metadata { struct kfence_track free_track; /* For updating alloc_covered on frees. */ u32 alloc_stack_hash; +#ifdef CONFIG_MEMCG + struct obj_cgroup *objcg; +#endif }; extern struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS]; +static inline struct kfence_metadata *addr_to_metadata(unsigned long addr) +{ + long index; + + /* The checks do not affect performance; only called from slow-paths. */ + + if (!is_kfence_address((void *)addr)) + return NULL; + + /* + * May be an invalid index if called with an address at the edge of + * __kfence_pool, in which case we would report an "invalid access" + * error. + */ + index = (addr - (unsigned long)__kfence_pool) / (PAGE_SIZE * 2) - 1; + if (index < 0 || index >= CONFIG_KFENCE_NUM_OBJECTS) + return NULL; + + return &kfence_metadata[index]; +} + /* KFENCE error types for report generation. */ enum kfence_error_type { KFENCE_ERROR_OOB, /* Detected a out-of-bounds access. */ diff --git a/mm/kfence/report.c b/mm/kfence/report.c index f93a7b2a338be..f5a6d8ba3e21f 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -273,3 +273,50 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r /* We encountered a memory safety error, taint the kernel! */ add_taint(TAINT_BAD_PAGE, LOCKDEP_STILL_OK); } + +#ifdef CONFIG_PRINTK +static void kfence_to_kp_stack(const struct kfence_track *track, void **kp_stack) +{ + int i, j; + + i = get_stack_skipnr(track->stack_entries, track->num_stack_entries, NULL); + for (j = 0; i < track->num_stack_entries && j < KS_ADDRS_COUNT; ++i, ++j) + kp_stack[j] = (void *)track->stack_entries[i]; + if (j < KS_ADDRS_COUNT) + kp_stack[j] = NULL; +} + +bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +{ + struct kfence_metadata *meta = addr_to_metadata((unsigned long)object); + unsigned long flags; + + if (!meta) + return false; + + /* + * If state is UNUSED at least show the pointer requested; the rest + * would be garbage data. + */ + kpp->kp_ptr = object; + + /* Requesting info an a never-used object is almost certainly a bug. */ + if (WARN_ON(meta->state == KFENCE_OBJECT_UNUSED)) + return true; + + raw_spin_lock_irqsave(&meta->lock, flags); + + kpp->kp_slab = slab; + kpp->kp_slab_cache = meta->cache; + kpp->kp_objp = (void *)meta->addr; + kfence_to_kp_stack(&meta->alloc_track, kpp->kp_stack); + if (meta->state == KFENCE_OBJECT_FREED) + kfence_to_kp_stack(&meta->free_track, kpp->kp_free_stack); + /* get_stack_skipnr() ensures the first entry is outside allocator. */ + kpp->kp_ret = kpp->kp_stack[0]; + + raw_spin_unlock_irqrestore(&meta->lock, flags); + + return true; +} +#endif diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 7580baa76af1c..a182f5ddaf68b 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -796,6 +796,8 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) unsigned long flags; struct kmemleak_object *object; struct kmemleak_scan_area *area = NULL; + unsigned long untagged_ptr; + unsigned long untagged_objp; object = find_and_get_object(ptr, 1); if (!object) { @@ -804,6 +806,9 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) return; } + untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); + untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer); + if (scan_area_cache) area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp)); @@ -815,8 +820,8 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) goto out_unlock; } if (size == SIZE_MAX) { - size = object->pointer + object->size - ptr; - } else if (ptr + size > object->pointer + object->size) { + size = untagged_objp + object->size - untagged_ptr; + } else if (untagged_ptr + size > untagged_objp + object->size) { kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr); dump_object_info(object); kmem_cache_free(scan_area_cache, area); @@ -1127,7 +1132,7 @@ EXPORT_SYMBOL(kmemleak_no_scan); void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count, gfp_t gfp) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_alloc(__va(phys), size, min_count, gfp); } EXPORT_SYMBOL(kmemleak_alloc_phys); @@ -1141,7 +1146,7 @@ EXPORT_SYMBOL(kmemleak_alloc_phys); */ void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_free_part(__va(phys), size); } EXPORT_SYMBOL(kmemleak_free_part_phys); @@ -1153,7 +1158,7 @@ EXPORT_SYMBOL(kmemleak_free_part_phys); */ void __ref kmemleak_not_leak_phys(phys_addr_t phys) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_not_leak(__va(phys)); } EXPORT_SYMBOL(kmemleak_not_leak_phys); @@ -1165,7 +1170,7 @@ EXPORT_SYMBOL(kmemleak_not_leak_phys); */ void __ref kmemleak_ignore_phys(phys_addr_t phys) { - if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) + if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) kmemleak_ignore(__va(phys)); } EXPORT_SYMBOL(kmemleak_ignore_phys); diff --git a/mm/ksm.c b/mm/ksm.c index c20bd4d9a0d9e..cc8d9ddb304af 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2432,54 +2432,78 @@ static int ksm_scan_thread(void *nothing) return 0; } -int ksm_madvise(struct vm_area_struct *vma, unsigned long start, - unsigned long end, int advice, unsigned long *vm_flags) +int ksm_madvise_merge(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *vm_flags) { - struct mm_struct *mm = vma->vm_mm; int err; - switch (advice) { - case MADV_MERGEABLE: - /* - * Be somewhat over-protective for now! - */ - if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | - VM_PFNMAP | VM_IO | VM_DONTEXPAND | - VM_HUGETLB | VM_MIXEDMAP)) - return 0; /* just ignore the advice */ + /* + * Be somewhat over-protective for now! + */ + if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | + VM_PFNMAP | VM_IO | VM_DONTEXPAND | + VM_HUGETLB | VM_MIXEDMAP)) + return 0; /* just ignore the advice */ - if (vma_is_dax(vma)) - return 0; + if (vma_is_dax(vma)) + return 0; #ifdef VM_SAO if (*vm_flags & VM_SAO) return 0; #endif #ifdef VM_SPARC_ADI - if (*vm_flags & VM_SPARC_ADI) - return 0; + if (*vm_flags & VM_SPARC_ADI) + return 0; #endif - if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { - err = __ksm_enter(mm); - if (err) - return err; - } + if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { + err = __ksm_enter(mm); + if (err) + return err; + } - *vm_flags |= VM_MERGEABLE; - break; + *vm_flags |= VM_MERGEABLE; - case MADV_UNMERGEABLE: - if (!(*vm_flags & VM_MERGEABLE)) - return 0; /* just ignore the advice */ + return 0; +} - if (vma->anon_vma) { - err = unmerge_ksm_pages(vma, start, end); - if (err) - return err; - } +int ksm_madvise_unmerge(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long *vm_flags) +{ + int err; + + if (!(*vm_flags & VM_MERGEABLE)) + return 0; /* just ignore the advice */ + + if (vma->anon_vma) { + err = unmerge_ksm_pages(vma, start, end); + if (err) + return err; + } - *vm_flags &= ~VM_MERGEABLE; + *vm_flags &= ~VM_MERGEABLE; + + return 0; +} + +int ksm_madvise(struct vm_area_struct *vma, unsigned long start, + unsigned long end, int advice, unsigned long *vm_flags) +{ + struct mm_struct *mm = vma->vm_mm; + int err; + + switch (advice) { + case MADV_MERGEABLE: + err = ksm_madvise_merge(mm, vma, vm_flags); + if (err) + return err; + break; + + case MADV_UNMERGEABLE: + err = ksm_madvise_unmerge(vma, start, end, vm_flags); + if (err) + return err; break; } diff --git a/mm/madvise.c b/mm/madvise.c index 38d0f515d5486..8593fc3a06f3c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1433,8 +1433,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, iov_iter_advance(&iter, iovec.iov_len); } - if (ret == 0) - ret = total_len - iov_iter_count(&iter); + ret = (total_len - iov_iter_count(&iter)) ? : ret; release_mm: mmput(mm); @@ -1445,3 +1444,114 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, out: return ret; } + +SYSCALL_DEFINE3(pmadv_ksm, int, pidfd, int, behaviour, unsigned int, flags) +{ +#ifdef CONFIG_KSM + ssize_t ret; + struct pid *pid; + struct task_struct *task; + struct mm_struct *mm; + unsigned int f_flags; + struct vm_area_struct *vma; + + if (flags != 0) { + ret = -EINVAL; + goto out; + } + + switch (behaviour) { + case MADV_MERGEABLE: + case MADV_UNMERGEABLE: + break; + default: + ret = -EINVAL; + goto out; + break; + } + + pid = pidfd_get_pid(pidfd, &f_flags); + if (IS_ERR(pid)) { + ret = PTR_ERR(pid); + goto out; + } + + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) { + ret = -ESRCH; + goto put_pid; + } + + /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */ + mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); + if (IS_ERR_OR_NULL(mm)) { + ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; + goto release_task; + } + + /* Require CAP_SYS_NICE for influencing process performance. */ + if (!capable(CAP_SYS_NICE)) { + ret = -EPERM; + goto release_mm; + } + + if (mmap_write_lock_killable(mm)) { + ret = -EINTR; + goto release_mm; + } + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + switch (behaviour) { + case MADV_MERGEABLE: + ret = ksm_madvise_merge(vma->vm_mm, vma, &vma->vm_flags); + break; + case MADV_UNMERGEABLE: + ret = ksm_madvise_unmerge(vma, vma->vm_start, vma->vm_end, &vma->vm_flags); + break; + default: + /* look, ma, no brain */ + break; + } + if (ret) + break; + } + + mmap_write_unlock(mm); + +release_mm: + mmput(mm); +release_task: + put_task_struct(task); +put_pid: + put_pid(pid); +out: + return ret; +#else /* CONFIG_KSM */ + return -ENOSYS; +#endif /* CONFIG_KSM */ +} + +#ifdef CONFIG_KSM +static ssize_t ksm_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%u\n", __NR_pmadv_ksm); +} +static struct kobj_attribute pmadv_ksm_attr = __ATTR_RO(ksm); + +static struct attribute *pmadv_sysfs_attrs[] = { + &pmadv_ksm_attr.attr, + NULL, +}; + +static const struct attribute_group pmadv_sysfs_attr_group = { + .attrs = pmadv_sysfs_attrs, + .name = "pmadv", +}; + +static int __init pmadv_sysfs_init(void) +{ + return sysfs_create_group(kernel_kobj, &pmadv_sysfs_attr_group); +} +subsys_initcall(pmadv_sysfs_init); +#endif /* CONFIG_KSM */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 36e9f38c919d0..1feb89ac0170c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2744,6 +2744,7 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) * - LRU isolation * - lock_page_memcg() * - exclusive reference + * - mem_cgroup_trylock_pages() */ folio->memcg_data = (unsigned long)memcg; } @@ -5121,6 +5122,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) static void mem_cgroup_free(struct mem_cgroup *memcg) { + lru_gen_exit_memcg(memcg); memcg_wb_domain_exit(memcg); __mem_cgroup_free(memcg); } @@ -5180,6 +5182,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) memcg->deferred_split_queue.split_queue_len = 0; #endif idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); + lru_gen_init_memcg(memcg); return memcg; fail: mem_cgroup_id_remove(memcg); @@ -6152,6 +6155,29 @@ static void mem_cgroup_move_task(void) } #endif +#ifdef CONFIG_LRU_GEN +static void mem_cgroup_attach(struct cgroup_taskset *tset) +{ + struct cgroup_subsys_state *css; + struct task_struct *task = NULL; + + cgroup_taskset_for_each_leader(task, css, tset) + break; + + if (!task) + return; + + task_lock(task); + if (task->mm && task->mm->owner == task) + lru_gen_migrate_mm(task->mm); + task_unlock(task); +} +#else +static void mem_cgroup_attach(struct cgroup_taskset *tset) +{ +} +#endif /* CONFIG_LRU_GEN */ + static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value) { if (value == PAGE_COUNTER_MAX) @@ -6497,6 +6523,7 @@ struct cgroup_subsys memory_cgrp_subsys = { .css_reset = mem_cgroup_css_reset, .css_rstat_flush = mem_cgroup_css_rstat_flush, .can_attach = mem_cgroup_can_attach, + .attach = mem_cgroup_attach, .cancel_attach = mem_cgroup_cancel_attach, .post_attach = mem_cgroup_move_task, .dfl_cftypes = memory_files, @@ -7053,7 +7080,7 @@ static int __init cgroup_memory(char *s) if (!strcmp(token, "nokmem")) cgroup_memory_nokmem = true; } - return 0; + return 1; } __setup("cgroup.memory=", cgroup_memory); diff --git a/mm/memory.c b/mm/memory.c index c125c4969913a..3cb140550cf41 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -122,18 +122,6 @@ int randomize_va_space __read_mostly = 2; #endif -#ifndef arch_faults_on_old_pte -static inline bool arch_faults_on_old_pte(void) -{ - /* - * Those arches which don't have hw access flag feature need to - * implement their own helper. By default, "true" means pagefault - * will be hit on old pte. - */ - return true; -} -#endif - #ifndef arch_wants_old_prefaulted_pte static inline bool arch_wants_old_prefaulted_pte(void) { @@ -1313,6 +1301,17 @@ struct zap_details { struct folio *single_folio; /* Locked folio to be unmapped */ }; +/* Whether we should zap all COWed (private) pages too */ +static inline bool should_zap_cows(struct zap_details *details) +{ + /* By default, zap all pages */ + if (!details) + return true; + + /* Or, we zap COWed pages only if the caller wants to */ + return !details->zap_mapping; +} + /* * We set details->zap_mapping when we want to unmap shared but keep private * pages. Return true if skip zapping this page, false otherwise. @@ -1320,11 +1319,15 @@ struct zap_details { static inline bool zap_skip_check_mapping(struct zap_details *details, struct page *page) { - if (!details || !page) + /* If we can make a decision without *page.. */ + if (should_zap_cows(details)) + return false; + + /* E.g. the caller passes NULL for the case of a zero page */ + if (!page) return false; - return details->zap_mapping && - (details->zap_mapping != page_rmapping(page)); + return details->zap_mapping != page_rmapping(page); } static unsigned long zap_pte_range(struct mmu_gather *tlb, @@ -1405,17 +1408,24 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, continue; } - /* If details->check_mapping, we leave swap entries. */ - if (unlikely(details)) - continue; - - if (!non_swap_entry(entry)) + if (!non_swap_entry(entry)) { + /* Genuine swap entry, hence a private anon page */ + if (!should_zap_cows(details)) + continue; rss[MM_SWAPENTS]--; - else if (is_migration_entry(entry)) { + } else if (is_migration_entry(entry)) { struct page *page; page = pfn_swap_entry_to_page(entry); + if (zap_skip_check_mapping(details, page)) + continue; rss[mm_counter(page)]--; + } else if (is_hwpoison_entry(entry)) { + if (!should_zap_cows(details)) + continue; + } else { + /* We should have covered all the swap entry types */ + WARN_ON_ONCE(1); } if (unlikely(!free_swap_and_cache(entry))) print_bad_pte(vma, addr, ptent, NULL); @@ -2778,7 +2788,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src, * On architectures with software "accessed" bits, we would * take a double page fault, so mark it accessed here. */ - if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) { + if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) { pte_t entry; vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); @@ -3871,11 +3881,20 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) return ret; if (unlikely(PageHWPoison(vmf->page))) { - if (ret & VM_FAULT_LOCKED) - unlock_page(vmf->page); - put_page(vmf->page); + struct page *page = vmf->page; + vm_fault_t poisonret = VM_FAULT_HWPOISON; + if (ret & VM_FAULT_LOCKED) { + if (page_mapped(page)) + unmap_mapping_pages(page_mapping(page), + page->index, 1, false); + /* Retry if a clean page was removed from the cache. */ + if (invalidate_inode_page(page)) + poisonret = VM_FAULT_NOPAGE; + unlock_page(page); + } + put_page(page); vmf->page = NULL; - return VM_FAULT_HWPOISON; + return poisonret; } if (unlikely(!(ret & VM_FAULT_LOCKED))) @@ -4766,6 +4785,27 @@ static inline void mm_account_fault(struct pt_regs *regs, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } +#ifdef CONFIG_LRU_GEN +static void lru_gen_enter_fault(struct vm_area_struct *vma) +{ + /* the LRU algorithm doesn't apply to sequential or random reads */ + current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ)); +} + +static void lru_gen_exit_fault(void) +{ + current->in_lru_fault = false; +} +#else +static void lru_gen_enter_fault(struct vm_area_struct *vma) +{ +} + +static void lru_gen_exit_fault(void) +{ +} +#endif /* CONFIG_LRU_GEN */ + /* * By the time we get here, we already hold the mm semaphore * @@ -4797,11 +4837,15 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, if (flags & FAULT_FLAG_USER) mem_cgroup_enter_user_fault(); + lru_gen_enter_fault(vma); + if (unlikely(is_vm_hugetlb_page(vma))) ret = hugetlb_fault(vma->vm_mm, vma, address, flags); else ret = __handle_mm_fault(vma, address, flags); + lru_gen_exit_fault(); + if (flags & FAULT_FLAG_USER) { mem_cgroup_exit_user_fault(); /* diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 69284d3b5e53f..f468bd911c3b9 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -786,7 +786,6 @@ static int vma_replace_policy(struct vm_area_struct *vma, static int mbind_range(struct mm_struct *mm, unsigned long start, unsigned long end, struct mempolicy *new_pol) { - struct vm_area_struct *next; struct vm_area_struct *prev; struct vm_area_struct *vma; int err = 0; @@ -801,8 +800,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (start > vma->vm_start) prev = vma; - for (; vma && vma->vm_start < end; prev = vma, vma = next) { - next = vma->vm_next; + for (; vma && vma->vm_start < end; prev = vma, vma = vma->vm_next) { vmstart = max(start, vma->vm_start); vmend = min(end, vma->vm_end); @@ -817,10 +815,6 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, anon_vma_name(vma)); if (prev) { vma = prev; - next = vma->vm_next; - if (mpol_equal(vma_policy(vma), new_pol)) - continue; - /* vma_merge() joined vma && vma->next, case 8 */ goto replace; } if (vma->vm_start != vmstart) { @@ -2742,6 +2736,7 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!mpol_new) goto err_out; + atomic_set(&mpol_new->refcnt, 1); goto restart; } diff --git a/mm/migrate.c b/mm/migrate.c index c7da064b4781b..086a366374678 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -3190,7 +3190,7 @@ static void __set_migration_target_nodes(void) /* * For callers that do not hold get_online_mems() already. */ -static void set_migration_target_nodes(void) +void set_migration_target_nodes(void) { get_online_mems(); __set_migration_target_nodes(); @@ -3254,51 +3254,24 @@ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, return notifier_from_errno(0); } -/* - * React to hotplug events that might affect the migration targets - * like events that online or offline NUMA nodes. - * - * The ordering is also currently dependent on which nodes have - * CPUs. That means we need CPU on/offline notification too. - */ -static int migration_online_cpu(unsigned int cpu) -{ - set_migration_target_nodes(); - return 0; -} - -static int migration_offline_cpu(unsigned int cpu) +void __init migrate_on_reclaim_init(void) { - set_migration_target_nodes(); - return 0; -} - -static int __init migrate_on_reclaim_init(void) -{ - int ret; - node_demotion = kmalloc_array(nr_node_ids, sizeof(struct demotion_nodes), GFP_KERNEL); WARN_ON(!node_demotion); - ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline", - NULL, migration_offline_cpu); + hotplug_memory_notifier(migrate_on_reclaim_callback, 100); /* - * In the unlikely case that this fails, the automatic - * migration targets may become suboptimal for nodes - * where N_CPU changes. With such a small impact in a - * rare case, do not bother trying to do anything special. + * At this point, all numa nodes with memory/CPus have their state + * properly set, so we can build the demotion order now. + * Let us hold the cpu_hotplug lock just, as we could possibily have + * CPU hotplug events during boot. */ - WARN_ON(ret < 0); - ret = cpuhp_setup_state(CPUHP_AP_MM_DEMOTION_ONLINE, "mm/demotion:online", - migration_online_cpu, NULL); - WARN_ON(ret < 0); - - hotplug_memory_notifier(migrate_on_reclaim_callback, 100); - return 0; + cpus_read_lock(); + set_migration_target_nodes(); + cpus_read_unlock(); } -late_initcall(migrate_on_reclaim_init); #endif /* CONFIG_HOTPLUG_CPU */ bool numa_demotion_enabled = false; diff --git a/mm/mlock.c b/mm/mlock.c index 25934e7db3e10..37f969ec68fa4 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -827,13 +827,12 @@ int user_shm_lock(size_t size, struct ucounts *ucounts) locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; lock_limit = rlimit(RLIMIT_MEMLOCK); - if (lock_limit == RLIM_INFINITY) - allowed = 1; - lock_limit >>= PAGE_SHIFT; + if (lock_limit != RLIM_INFINITY) + lock_limit >>= PAGE_SHIFT; spin_lock(&shmlock_user_lock); memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); - if (!allowed && (memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) { + if ((memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) { dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); goto out; } diff --git a/mm/mm_init.c b/mm/mm_init.c index 9ddaf0e1b0ab9..0d7b2bd2454a1 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -65,14 +65,16 @@ void __init mminit_verify_pageflags_layout(void) shift = 8 * sizeof(unsigned long); width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH - - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH; + - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH - LRU_GEN_WIDTH - LRU_REFS_WIDTH; mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths", - "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Flags %d\n", + "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Gen %d Tier %d Flags %d\n", SECTIONS_WIDTH, NODES_WIDTH, ZONES_WIDTH, LAST_CPUPID_WIDTH, KASAN_TAG_WIDTH, + LRU_GEN_WIDTH, + LRU_REFS_WIDTH, NR_PAGEFLAGS); mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts", "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n", diff --git a/mm/mmap.c b/mm/mmap.c index f61a15474dd6d..18875c216f8db 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2557,7 +2557,7 @@ static int __init cmdline_parse_stack_guard_gap(char *p) if (!*endptr) stack_guard_gap = val << PAGE_SHIFT; - return 0; + return 1; } __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap); diff --git a/mm/mmzone.c b/mm/mmzone.c index eb89d6e018e29..2ec0d77934244 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -81,6 +81,8 @@ void lruvec_init(struct lruvec *lruvec) for_each_lru(lru) INIT_LIST_HEAD(&lruvec->lists[lru]); + + lru_gen_init_lruvec(lruvec); } #if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) diff --git a/mm/mremap.c b/mm/mremap.c index 002eec83e91e5..0e175aef536e1 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -486,6 +486,9 @@ unsigned long move_page_tables(struct vm_area_struct *vma, pmd_t *old_pmd, *new_pmd; pud_t *old_pud, *new_pud; + if (!len) + return 0; + old_end = old_addr + len; flush_cache_range(vma, old_addr, old_end); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3589febc6d319..e6f211dcf82e7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6112,7 +6112,7 @@ static int build_zonerefs_node(pg_data_t *pgdat, struct zoneref *zonerefs) do { zone_type--; zone = pgdat->node_zones + zone_type; - if (managed_zone(zone)) { + if (populated_zone(zone)) { zoneref_set_zone(zone, &zonerefs[nr_zones++]); check_highest_zone(zone_type); } @@ -7972,10 +7972,17 @@ static void __init find_zone_movable_pfns_for_nodes(void) out2: /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */ - for (nid = 0; nid < MAX_NUMNODES; nid++) + for (nid = 0; nid < MAX_NUMNODES; nid++) { + unsigned long start_pfn, end_pfn; + zone_movable_pfn[nid] = roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); + get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + if (zone_movable_pfn[nid] >= end_pfn) + zone_movable_pfn[nid] = 0; + } + out: /* restore the node_state */ node_states[N_MEMORY] = saved_node_state; diff --git a/mm/page_io.c b/mm/page_io.c index 0bf8e40f4e573..d3eea0a3f1afa 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -51,54 +51,6 @@ void end_swap_bio_write(struct bio *bio) bio_put(bio); } -static void swap_slot_free_notify(struct page *page) -{ - struct swap_info_struct *sis; - struct gendisk *disk; - swp_entry_t entry; - - /* - * There is no guarantee that the page is in swap cache - the software - * suspend code (at least) uses end_swap_bio_read() against a non- - * swapcache page. So we must check PG_swapcache before proceeding with - * this optimization. - */ - if (unlikely(!PageSwapCache(page))) - return; - - sis = page_swap_info(page); - if (data_race(!(sis->flags & SWP_BLKDEV))) - return; - - /* - * The swap subsystem performs lazy swap slot freeing, - * expecting that the page will be swapped out again. - * So we can avoid an unnecessary write if the page - * isn't redirtied. - * This is good for real swap storage because we can - * reduce unnecessary I/O and enhance wear-leveling - * if an SSD is used as the as swap device. - * But if in-memory swap device (eg zram) is used, - * this causes a duplicated copy between uncompressed - * data in VM-owned memory and compressed data in - * zram-owned memory. So let's free zram-owned memory - * and make the VM-owned decompressed page *dirty*, - * so the page should be swapped out somewhere again if - * we again wish to reclaim it. - */ - disk = sis->bdev->bd_disk; - entry.val = page_private(page); - if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) { - unsigned long offset; - - offset = swp_offset(entry); - - SetPageDirty(page); - disk->fops->swap_slot_free_notify(sis->bdev, - offset); - } -} - static void end_swap_bio_read(struct bio *bio) { struct page *page = bio_first_page_all(bio); @@ -114,7 +66,6 @@ static void end_swap_bio_read(struct bio *bio) } SetPageUptodate(page); - swap_slot_free_notify(page); out: unlock_page(page); WRITE_ONCE(bio->bi_private, NULL); @@ -392,11 +343,6 @@ int swap_readpage(struct page *page, bool synchronous) if (sis->flags & SWP_SYNCHRONOUS_IO) { ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); if (!ret) { - if (trylock_page(page)) { - swap_slot_free_notify(page); - unlock_page(page); - } - count_vm_event(PSWPIN); goto out; } diff --git a/mm/rmap.c b/mm/rmap.c index 6a1e8c7f62136..32c3a83b7b5ab 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -73,6 +73,7 @@ #include #include #include +#include #include @@ -819,6 +820,12 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma, } if (pvmw.pte) { + if (lru_gen_enabled() && pte_young(*pvmw.pte) && + !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) { + lru_gen_look_around(&pvmw); + referenced++; + } + if (ptep_clear_flush_young_notify(vma, address, pvmw.pte)) { /* @@ -1599,7 +1606,30 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, /* MADV_FREE page check */ if (!PageSwapBacked(page)) { - if (!PageDirty(page)) { + int ref_count, map_count; + + /* + * Synchronize with gup_pte_range(): + * - clear PTE; barrier; read refcount + * - inc refcount; barrier; read PTE + */ + smp_mb(); + + ref_count = page_ref_count(page); + map_count = page_mapcount(page); + + /* + * Order reads for page refcount and dirty flag + * (see comments in __remove_mapping()). + */ + smp_rmb(); + + /* + * The only page refs must be one from isolation + * plus the rmap(s) (dropped by discard:). + */ + if (ref_count == 1 + map_count && + !PageDirty(page)) { /* Invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE); diff --git a/mm/secretmem.c b/mm/secretmem.c index 22b310adb53d9..5a62ef3bcfcff 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -158,6 +158,22 @@ const struct address_space_operations secretmem_aops = { .isolate_page = secretmem_isolate_page, }; +static int secretmem_setattr(struct user_namespace *mnt_userns, + struct dentry *dentry, struct iattr *iattr) +{ + struct inode *inode = d_inode(dentry); + unsigned int ia_valid = iattr->ia_valid; + + if ((ia_valid & ATTR_SIZE) && inode->i_size) + return -EINVAL; + + return simple_setattr(mnt_userns, dentry, iattr); +} + +static const struct inode_operations secretmem_iops = { + .setattr = secretmem_setattr, +}; + static struct vfsmount *secretmem_mnt; static struct file *secretmem_file_create(unsigned long flags) @@ -177,6 +193,7 @@ static struct file *secretmem_file_create(unsigned long flags) mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); mapping_set_unevictable(inode->i_mapping); + inode->i_op = &secretmem_iops; inode->i_mapping->a_ops = &secretmem_aops; /* pretend we are a normal file with zero size */ diff --git a/mm/slab.c b/mm/slab.c index ddf5737c63d90..f4b3eebc1e2ca 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3421,6 +3421,7 @@ static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp, if (is_kfence_address(objp)) { kmemleak_free_recursive(objp, cachep->flags); + memcg_slab_free_hook(cachep, &objp, 1); __kfence_free(objp); return; } @@ -3649,7 +3650,7 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller); #endif /* CONFIG_NUMA */ #ifdef CONFIG_PRINTK -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) { struct kmem_cache *cachep; unsigned int objnr; diff --git a/mm/slab.h b/mm/slab.h index c7f2abc2b154c..506dab2a6735e 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -851,7 +851,7 @@ struct kmem_obj_info { void *kp_stack[KS_ADDRS_COUNT]; void *kp_free_stack[KS_ADDRS_COUNT]; }; -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab); +void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab); #endif #ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR diff --git a/mm/slab_common.c b/mm/slab_common.c index 23f2ab0713b77..a9a7d79daa102 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -555,6 +555,13 @@ bool kmem_valid_obj(void *object) } EXPORT_SYMBOL_GPL(kmem_valid_obj); +static void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +{ + if (__kfence_obj_info(kpp, object, slab)) + return; + __kmem_obj_info(kpp, object, slab); +} + /** * kmem_dump_obj - Print available slab provenance information * @object: slab object for which to find provenance information. @@ -590,6 +597,8 @@ void kmem_dump_obj(void *object) pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name); else pr_cont(" slab%s", cp); + if (is_kfence_address(object)) + pr_cont(" (kfence)"); if (kp.kp_objp) pr_cont(" start %px", kp.kp_objp); if (kp.kp_data_offset) diff --git a/mm/slob.c b/mm/slob.c index 60c5842215f1b..fd9c643facbc4 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -463,7 +463,7 @@ static void slob_free(void *block, int size) } #ifdef CONFIG_PRINTK -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) { kpp->kp_ptr = object; kpp->kp_slab = slab; diff --git a/mm/slub.c b/mm/slub.c index 261474092e43e..e3277e33ea6e4 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4322,7 +4322,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s) } #ifdef CONFIG_PRINTK -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) +void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) { void *base; int __maybe_unused i; diff --git a/mm/swap.c b/mm/swap.c index bcf3ac288b56d..e65e7520bebf9 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -344,7 +344,7 @@ static bool need_activate_page_drain(int cpu) return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0; } -static void folio_activate(struct folio *folio) +void folio_activate(struct folio *folio) { if (folio_test_lru(folio) && !folio_test_active(folio) && !folio_test_unevictable(folio)) { @@ -364,7 +364,7 @@ static inline void activate_page_drain(int cpu) { } -static void folio_activate(struct folio *folio) +void folio_activate(struct folio *folio) { struct lruvec *lruvec; @@ -407,6 +407,43 @@ static void __lru_cache_activate_folio(struct folio *folio) local_unlock(&lru_pvecs.lock); } +#ifdef CONFIG_LRU_GEN +static void folio_inc_refs(struct folio *folio) +{ + unsigned long refs; + unsigned long old_flags, new_flags; + + if (folio_test_unevictable(folio)) + return; + + /* see the comment on MAX_NR_TIERS */ + do { + new_flags = old_flags = READ_ONCE(folio->flags); + + if (!(new_flags & BIT(PG_referenced))) { + new_flags |= BIT(PG_referenced); + continue; + } + + if (!(new_flags & BIT(PG_workingset))) { + new_flags |= BIT(PG_workingset); + continue; + } + + refs = new_flags & LRU_REFS_MASK; + refs = min(refs + BIT(LRU_REFS_PGOFF), LRU_REFS_MASK); + + new_flags &= ~LRU_REFS_MASK; + new_flags |= refs; + } while (new_flags != old_flags && + cmpxchg(&folio->flags, old_flags, new_flags) != old_flags); +} +#else +static void folio_inc_refs(struct folio *folio) +{ +} +#endif /* CONFIG_LRU_GEN */ + /* * Mark a page as having seen activity. * @@ -419,6 +456,11 @@ static void __lru_cache_activate_folio(struct folio *folio) */ void folio_mark_accessed(struct folio *folio) { + if (lru_gen_enabled()) { + folio_inc_refs(folio); + return; + } + if (!folio_test_referenced(folio)) { folio_set_referenced(folio); } else if (folio_test_unevictable(folio)) { @@ -462,6 +504,11 @@ void folio_add_lru(struct folio *folio) VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio); VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); + /* see the comment in lru_gen_add_folio() */ + if (lru_gen_enabled() && !folio_test_unevictable(folio) && + lru_gen_in_fault() && !(current->flags & PF_MEMALLOC)) + folio_set_active(folio); + folio_get(folio); local_lock(&lru_pvecs.lock); pvec = this_cpu_ptr(&lru_pvecs.lru_add); @@ -563,7 +610,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec) static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec) { - if (PageActive(page) && !PageUnevictable(page)) { + if (!PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) { int nr_pages = thp_nr_pages(page); del_page_from_lru_list(page, lruvec); @@ -677,7 +724,7 @@ void deactivate_file_page(struct page *page) */ void deactivate_page(struct page *page) { - if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { + if (PageLRU(page) && !PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) { struct pagevec *pvec; local_lock(&lru_pvecs.lock); diff --git a/mm/usercopy.c b/mm/usercopy.c index d0d268135d96d..21fd84ee7fcd4 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -295,7 +295,10 @@ static bool enable_checks __initdata = true; static int __init parse_hardened_usercopy(char *str) { - return strtobool(str, &enable_checks); + if (strtobool(str, &enable_checks)) + pr_warn("Invalid option string for hardened_usercopy: '%s'\n", + str); + return 1; } __setup("hardened_usercopy=", parse_hardened_usercopy); diff --git a/mm/vmscan.c b/mm/vmscan.c index 59b14e0d696c9..8f8f9ac2cd2c0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -50,6 +50,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -125,6 +129,13 @@ struct scan_control { /* Always discard instead of demoting to lower tier memory */ unsigned int no_demotion:1; +#ifdef CONFIG_LRU_GEN + /* help make better choices when multiple memcgs are available */ + unsigned int memcgs_need_aging:1; + unsigned int memcgs_need_swapping:1; + unsigned int memcgs_avoid_swapping:1; +#endif + /* Allocation order */ s8 order; @@ -1287,9 +1298,11 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, if (PageSwapCache(page)) { swp_entry_t swap = { .val = page_private(page) }; - mem_cgroup_swapout(page, swap); + + /* get a shadow entry before mem_cgroup_swapout() clears folio_memcg() */ if (reclaimed && !mapping_exiting(mapping)) shadow = workingset_eviction(page, target_memcg); + mem_cgroup_swapout(page, swap); __delete_from_swap_cache(page, swap, shadow); xa_unlock_irq(&mapping->i_pages); put_swap_page(page, swap); @@ -1556,6 +1569,11 @@ static unsigned int shrink_page_list(struct list_head *page_list, if (!sc->may_unmap && page_mapped(page)) goto keep_locked; + /* folio_update_gen() tried to promote this page? */ + if (lru_gen_enabled() && !ignore_references && + page_mapped(page) && PageReferenced(page)) + goto keep_locked; + may_enter_fs = (sc->gfp_mask & __GFP_FS) || (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); @@ -2718,6 +2736,112 @@ enum scan_balance { SCAN_FILE, }; +static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc) +{ + unsigned long file; + struct lruvec *target_lruvec; + + if (lru_gen_enabled()) + return; + + target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); + + /* + * Flush the memory cgroup stats, so that we read accurate per-memcg + * lruvec stats for heuristics. + */ + mem_cgroup_flush_stats(); + + /* + * Determine the scan balance between anon and file LRUs. + */ + spin_lock_irq(&target_lruvec->lru_lock); + sc->anon_cost = target_lruvec->anon_cost; + sc->file_cost = target_lruvec->file_cost; + spin_unlock_irq(&target_lruvec->lru_lock); + + /* + * Target desirable inactive:active list ratios for the anon + * and file LRU lists. + */ + if (!sc->force_deactivate) { + unsigned long refaults; + + refaults = lruvec_page_state(target_lruvec, + WORKINGSET_ACTIVATE_ANON); + if (refaults != target_lruvec->refaults[0] || + inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) + sc->may_deactivate |= DEACTIVATE_ANON; + else + sc->may_deactivate &= ~DEACTIVATE_ANON; + + /* + * When refaults are being observed, it means a new + * workingset is being established. Deactivate to get + * rid of any stale active pages quickly. + */ + refaults = lruvec_page_state(target_lruvec, + WORKINGSET_ACTIVATE_FILE); + if (refaults != target_lruvec->refaults[1] || + inactive_is_low(target_lruvec, LRU_INACTIVE_FILE)) + sc->may_deactivate |= DEACTIVATE_FILE; + else + sc->may_deactivate &= ~DEACTIVATE_FILE; + } else + sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; + + /* + * If we have plenty of inactive file pages that aren't + * thrashing, try to reclaim those first before touching + * anonymous pages. + */ + file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE); + if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) + sc->cache_trim_mode = 1; + else + sc->cache_trim_mode = 0; + + /* + * Prevent the reclaimer from falling into the cache trap: as + * cache pages start out inactive, every cache fault will tip + * the scan balance towards the file LRU. And as the file LRU + * shrinks, so does the window for rotation from references. + * This means we have a runaway feedback loop where a tiny + * thrashing file LRU becomes infinitely more attractive than + * anon pages. Try to detect this based on file LRU size. + */ + if (!cgroup_reclaim(sc)) { + unsigned long total_high_wmark = 0; + unsigned long free, anon; + int z; + + free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); + file = node_page_state(pgdat, NR_ACTIVE_FILE) + + node_page_state(pgdat, NR_INACTIVE_FILE); + + for (z = 0; z < MAX_NR_ZONES; z++) { + struct zone *zone = &pgdat->node_zones[z]; + + if (!managed_zone(zone)) + continue; + + total_high_wmark += high_wmark_pages(zone); + } + + /* + * Consider anon: if that's low too, this isn't a + * runaway file reclaim problem, but rather just + * extreme pressure. Reclaim as per usual then. + */ + anon = node_page_state(pgdat, NR_INACTIVE_ANON); + + sc->file_is_tiny = + file + free <= total_high_wmark && + !(sc->may_deactivate & DEACTIVATE_ANON) && + anon >> sc->priority; + } +} + /* * Determine how aggressively the anon and file LRU lists should be * scanned. The relative value of each set of LRU lists is determined @@ -2939,155 +3063,2750 @@ static bool can_age_anon_pages(struct pglist_data *pgdat, return can_demote(pgdat->node_id, sc); } -static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) -{ - unsigned long nr[NR_LRU_LISTS]; - unsigned long targets[NR_LRU_LISTS]; - unsigned long nr_to_scan; - enum lru_list lru; - unsigned long nr_reclaimed = 0; - unsigned long nr_to_reclaim = sc->nr_to_reclaim; - struct blk_plug plug; - bool scan_adjusted; +#ifdef CONFIG_LRU_GEN - get_scan_count(lruvec, sc, nr); +#ifdef CONFIG_LRU_GEN_ENABLED +DEFINE_STATIC_KEY_ARRAY_TRUE(lru_gen_caps, NR_LRU_GEN_CAPS); +#else +DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS); +#endif - /* Record the original scan target for proportional adjustments later */ - memcpy(targets, nr, sizeof(nr)); +/****************************************************************************** + * shorthand helpers + ******************************************************************************/ - /* - * Global reclaiming within direct reclaim at DEF_PRIORITY is a normal - * event that can occur when there is little memory pressure e.g. - * multiple streaming readers/writers. Hence, we do not abort scanning - * when the requested number of pages are reclaimed when scanning at - * DEF_PRIORITY on the assumption that the fact we are direct - * reclaiming implies that kswapd is not keeping up and it is best to - * do a batch of work at once. For memcg reclaim one check is made to - * abort proportional reclaim if either the file or anon lru has already - * dropped to zero at the first pass. - */ - scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() && - sc->priority == DEF_PRIORITY); +#define DEFINE_MAX_SEQ(lruvec) \ + unsigned long max_seq = READ_ONCE((lruvec)->lrugen.max_seq) - blk_start_plug(&plug); - while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || - nr[LRU_INACTIVE_FILE]) { - unsigned long nr_anon, nr_file, percentage; - unsigned long nr_scanned; +#define DEFINE_MIN_SEQ(lruvec) \ + unsigned long min_seq[ANON_AND_FILE] = { \ + READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_ANON]), \ + READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_FILE]), \ + } - for_each_evictable_lru(lru) { - if (nr[lru]) { - nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX); - nr[lru] -= nr_to_scan; +#define for_each_gen_type_zone(gen, type, zone) \ + for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \ + for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \ + for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++) - nr_reclaimed += shrink_list(lru, nr_to_scan, - lruvec, sc); - } - } +static int folio_lru_gen(struct folio *folio) +{ + unsigned long flags = READ_ONCE(folio->flags); - cond_resched(); + return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; +} - if (nr_reclaimed < nr_to_reclaim || scan_adjusted) - continue; +static int folio_lru_tier(struct folio *folio) +{ + int refs; + unsigned long flags = READ_ONCE(folio->flags); - /* - * For kswapd and memcg, reclaim at least the number of pages - * requested. Ensure that the anon and file LRUs are scanned - * proportionally what was requested by get_scan_count(). We - * stop reclaiming one LRU and reduce the amount scanning - * proportional to the original scan target. - */ - nr_file = nr[LRU_INACTIVE_FILE] + nr[LRU_ACTIVE_FILE]; - nr_anon = nr[LRU_INACTIVE_ANON] + nr[LRU_ACTIVE_ANON]; + refs = (flags & LRU_REFS_FLAGS) == LRU_REFS_FLAGS ? + ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + 1 : 0; - /* - * It's just vindictive to attack the larger once the smaller - * has gone to zero. And given the way we stop scanning the - * smaller below, this makes sure that we only make one nudge - * towards proportionality once we've got nr_to_reclaim. - */ - if (!nr_file || !nr_anon) - break; + return lru_tier_from_refs(refs); +} - if (nr_file > nr_anon) { - unsigned long scan_target = targets[LRU_INACTIVE_ANON] + - targets[LRU_ACTIVE_ANON] + 1; - lru = LRU_BASE; - percentage = nr_anon * 100 / scan_target; - } else { - unsigned long scan_target = targets[LRU_INACTIVE_FILE] + - targets[LRU_ACTIVE_FILE] + 1; - lru = LRU_FILE; - percentage = nr_file * 100 / scan_target; - } +static bool get_cap(int cap) +{ +#ifdef CONFIG_LRU_GEN_ENABLED + return static_branch_likely(&lru_gen_caps[cap]); +#else + return static_branch_unlikely(&lru_gen_caps[cap]); +#endif +} - /* Stop scanning the smaller of the LRU */ - nr[lru] = 0; - nr[lru + LRU_ACTIVE] = 0; +static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid) +{ + struct pglist_data *pgdat = NODE_DATA(nid); - /* - * Recalculate the other LRU scan count based on its original - * scan target and the percentage scanning already complete - */ - lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE; - nr_scanned = targets[lru] - nr[lru]; - nr[lru] = targets[lru] * (100 - percentage) / 100; - nr[lru] -= min(nr[lru], nr_scanned); +#ifdef CONFIG_MEMCG + if (memcg) { + struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec; - lru += LRU_ACTIVE; - nr_scanned = targets[lru] - nr[lru]; - nr[lru] = targets[lru] * (100 - percentage) / 100; - nr[lru] -= min(nr[lru], nr_scanned); + /* for hotadd_new_pgdat() */ + if (!lruvec->pgdat) + lruvec->pgdat = pgdat; - scan_adjusted = true; + return lruvec; } - blk_finish_plug(&plug); - sc->nr_reclaimed += nr_reclaimed; - - /* - * Even if we did not try to evict anon pages at all, we want to - * rebalance the anon lru active/inactive ratio. - */ - if (can_age_anon_pages(lruvec_pgdat(lruvec), sc) && - inactive_is_low(lruvec, LRU_INACTIVE_ANON)) - shrink_active_list(SWAP_CLUSTER_MAX, lruvec, - sc, LRU_ACTIVE_ANON); +#endif + return pgdat ? &pgdat->__lruvec : NULL; } -/* Use reclaim/compaction for costly allocs or under memory pressure */ -static bool in_reclaim_compaction(struct scan_control *sc) +static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc) { - if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && - (sc->order > PAGE_ALLOC_COSTLY_ORDER || - sc->priority < DEF_PRIORITY - 2)) - return true; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + struct pglist_data *pgdat = lruvec_pgdat(lruvec); - return false; + if (!can_demote(pgdat->node_id, sc) && + mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH) + return 0; + + return mem_cgroup_swappiness(memcg); } -/* - * Reclaim/compaction is used for high-order allocation requests. It reclaims - * order-0 pages before compacting the zone. should_continue_reclaim() returns - * true if more pages should be reclaimed such that when the page allocator - * calls try_to_compact_pages() that it will have enough free pages to succeed. - * It will give up earlier than that if there is difficulty reclaiming pages. - */ -static inline bool should_continue_reclaim(struct pglist_data *pgdat, - unsigned long nr_reclaimed, - struct scan_control *sc) +static int get_nr_gens(struct lruvec *lruvec, int type) { - unsigned long pages_for_compaction; - unsigned long inactive_lru_pages; - int z; + return lruvec->lrugen.max_seq - lruvec->lrugen.min_seq[type] + 1; +} - /* If not in reclaim/compaction mode, stop */ - if (!in_reclaim_compaction(sc)) - return false; +static bool __maybe_unused seq_is_valid(struct lruvec *lruvec) +{ + /* see the comment on lru_gen_struct */ + return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS && + get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) && + get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS; +} - /* - * Stop if we failed to reclaim any pages from the last SWAP_CLUSTER_MAX - * number of pages that were scanned. This will return to the caller - * with the risk reclaim/compaction and the resulting allocation attempt +/****************************************************************************** + * mm_struct list + ******************************************************************************/ + +static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg) +{ + static struct lru_gen_mm_list mm_list = { + .fifo = LIST_HEAD_INIT(mm_list.fifo), + .lock = __SPIN_LOCK_UNLOCKED(mm_list.lock), + }; + +#ifdef CONFIG_MEMCG + if (memcg) + return &memcg->mm_list; +#endif + return &mm_list; +} + +void lru_gen_add_mm(struct mm_struct *mm) +{ + int nid; + struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm); + struct lru_gen_mm_list *mm_list = get_mm_list(memcg); + + VM_BUG_ON_MM(!list_empty(&mm->lru_gen.list), mm); +#ifdef CONFIG_MEMCG + VM_BUG_ON_MM(mm->lru_gen.memcg, mm); + mm->lru_gen.memcg = memcg; +#endif + spin_lock(&mm_list->lock); + + for_each_node_state(nid, N_MEMORY) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + + if (!lruvec) + continue; + + if (lruvec->mm_state.tail == &mm_list->fifo) + lruvec->mm_state.tail = &mm->lru_gen.list; + } + + list_add_tail(&mm->lru_gen.list, &mm_list->fifo); + + spin_unlock(&mm_list->lock); +} + +void lru_gen_del_mm(struct mm_struct *mm) +{ + int nid; + struct lru_gen_mm_list *mm_list; + struct mem_cgroup *memcg = NULL; + + if (list_empty(&mm->lru_gen.list)) + return; + +#ifdef CONFIG_MEMCG + memcg = mm->lru_gen.memcg; +#endif + mm_list = get_mm_list(memcg); + + spin_lock(&mm_list->lock); + + for_each_node(nid) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + + if (!lruvec) + continue; + + if (lruvec->mm_state.tail == &mm->lru_gen.list) + lruvec->mm_state.tail = lruvec->mm_state.tail->next; + + if (lruvec->mm_state.head != &mm->lru_gen.list) + continue; + + lruvec->mm_state.head = lruvec->mm_state.head->next; + if (lruvec->mm_state.head == &mm_list->fifo) + WRITE_ONCE(lruvec->mm_state.seq, lruvec->mm_state.seq + 1); + } + + list_del_init(&mm->lru_gen.list); + + spin_unlock(&mm_list->lock); + +#ifdef CONFIG_MEMCG + mem_cgroup_put(mm->lru_gen.memcg); + mm->lru_gen.memcg = NULL; +#endif +} + +#ifdef CONFIG_MEMCG +void lru_gen_migrate_mm(struct mm_struct *mm) +{ + struct mem_cgroup *memcg; + + lockdep_assert_held(&mm->owner->alloc_lock); + + /* for mm_update_next_owner() */ + if (mem_cgroup_disabled()) + return; + + rcu_read_lock(); + memcg = mem_cgroup_from_task(mm->owner); + rcu_read_unlock(); + if (memcg == mm->lru_gen.memcg) + return; + + VM_BUG_ON_MM(!mm->lru_gen.memcg, mm); + VM_BUG_ON_MM(list_empty(&mm->lru_gen.list), mm); + + lru_gen_del_mm(mm); + lru_gen_add_mm(mm); +} +#endif + +/* + * Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when + * n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of + * bits in a bitmap, k is the number of hash functions and n is the number of + * inserted items. + * + * Page table walkers use one of the two filters to reduce their search space. + * To get rid of non-leaf entries that no longer have enough leaf entries, the + * aging uses the double-buffering technique to flip to the other filter each + * time it produces a new generation. For non-leaf entries that have enough + * leaf entries, the aging carries them over to the next generation in + * walk_pmd_range(); the eviction also report them when walking the rmap + * in lru_gen_look_around(). + * + * For future optimizations: + * 1. It's not necessary to keep both filters all the time. The spare one can be + * freed after the RCU grace period and reallocated if needed again. + * 2. And when reallocating, it's worth scaling its size according to the number + * of inserted entries in the other filter, to reduce the memory overhead on + * small systems and false positives on large systems. + * 3. Jenkins' hash function is an alternative to Knuth's. + */ +#define BLOOM_FILTER_SHIFT 15 + +static inline int filter_gen_from_seq(unsigned long seq) +{ + return seq % NR_BLOOM_FILTERS; +} + +static void get_item_key(void *item, int *key) +{ + u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2); + + BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32)); + + key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1); + key[1] = hash >> BLOOM_FILTER_SHIFT; +} + +static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq) +{ + unsigned long *filter; + int gen = filter_gen_from_seq(seq); + + lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock); + + filter = lruvec->mm_state.filters[gen]; + if (filter) { + bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT)); + return; + } + + filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT), GFP_ATOMIC); + WRITE_ONCE(lruvec->mm_state.filters[gen], filter); +} + +static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item) +{ + int key[2]; + unsigned long *filter; + int gen = filter_gen_from_seq(seq); + + filter = READ_ONCE(lruvec->mm_state.filters[gen]); + if (!filter) + return; + + get_item_key(item, key); + + if (!test_bit(key[0], filter)) + set_bit(key[0], filter); + if (!test_bit(key[1], filter)) + set_bit(key[1], filter); +} + +static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item) +{ + int key[2]; + unsigned long *filter; + int gen = filter_gen_from_seq(seq); + + filter = READ_ONCE(lruvec->mm_state.filters[gen]); + if (!filter) + return true; + + get_item_key(item, key); + + return test_bit(key[0], filter) && test_bit(key[1], filter); +} + +static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last) +{ + int i; + int hist; + + lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock); + + if (walk) { + hist = lru_hist_from_seq(walk->max_seq); + + for (i = 0; i < NR_MM_STATS; i++) { + WRITE_ONCE(lruvec->mm_state.stats[hist][i], + lruvec->mm_state.stats[hist][i] + walk->mm_stats[i]); + walk->mm_stats[i] = 0; + } + } + + if (NR_HIST_GENS > 1 && last) { + hist = lru_hist_from_seq(lruvec->mm_state.seq + 1); + + for (i = 0; i < NR_MM_STATS; i++) + WRITE_ONCE(lruvec->mm_state.stats[hist][i], 0); + } +} + +static bool should_skip_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk) +{ + int type; + unsigned long size = 0; + struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); + + if (!walk->full_scan && cpumask_empty(mm_cpumask(mm)) && + !node_isset(pgdat->node_id, mm->lru_gen.nodes)) + return true; + + node_clear(pgdat->node_id, mm->lru_gen.nodes); + + for (type = !walk->can_swap; type < ANON_AND_FILE; type++) { + size += type ? get_mm_counter(mm, MM_FILEPAGES) : + get_mm_counter(mm, MM_ANONPAGES) + + get_mm_counter(mm, MM_SHMEMPAGES); + } + + if (size < MIN_LRU_BATCH) + return true; + + if (mm_is_oom_victim(mm)) + return true; + + return !mmget_not_zero(mm); +} + +static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, + struct mm_struct **iter) +{ + bool first = false; + bool last = true; + struct mm_struct *mm = NULL; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + struct lru_gen_mm_list *mm_list = get_mm_list(memcg); + struct lru_gen_mm_state *mm_state = &lruvec->mm_state; + + /* + * There are four interesting cases for this page table walker: + * 1. It tries to start a new iteration of mm_list with a stale max_seq; + * there is nothing to be done. + * 2. It's the first of the current generation, and it needs to reset + * the Bloom filter for the next generation. + * 3. It reaches the end of mm_list, and it needs to increment + * mm_state->seq; the iteration is done. + * 4. It's the last of the current generation, and it needs to reset the + * mm stats counters for the next generation. + */ + if (*iter) + mmput_async(*iter); + else if (walk->max_seq <= READ_ONCE(mm_state->seq)) + return false; + + spin_lock(&mm_list->lock); + + VM_BUG_ON(mm_state->seq + 1 < walk->max_seq); + VM_BUG_ON(*iter && mm_state->seq > walk->max_seq); + VM_BUG_ON(*iter && !mm_state->nr_walkers); + + if (walk->max_seq <= mm_state->seq) { + if (!*iter) + last = false; + goto done; + } + + if (!mm_state->nr_walkers) { + VM_BUG_ON(mm_state->head && mm_state->head != &mm_list->fifo); + + mm_state->head = mm_list->fifo.next; + first = true; + } + + while (!mm && mm_state->head != &mm_list->fifo) { + mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); + + mm_state->head = mm_state->head->next; + + /* full scan for those added after the last iteration */ + if (!mm_state->tail || mm_state->tail == &mm->lru_gen.list) { + mm_state->tail = mm_state->head; + walk->full_scan = true; + } + + if (should_skip_mm(mm, walk)) + mm = NULL; + } + + if (mm_state->head == &mm_list->fifo) + WRITE_ONCE(mm_state->seq, mm_state->seq + 1); +done: + if (*iter && !mm) + mm_state->nr_walkers--; + if (!*iter && mm) + mm_state->nr_walkers++; + + if (mm_state->nr_walkers) + last = false; + + if (mm && first) + reset_bloom_filter(lruvec, walk->max_seq + 1); + + if (*iter || last) + reset_mm_stats(lruvec, walk, last); + + spin_unlock(&mm_list->lock); + + *iter = mm; + + return last; +} + +static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) +{ + bool success = false; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + struct lru_gen_mm_list *mm_list = get_mm_list(memcg); + struct lru_gen_mm_state *mm_state = &lruvec->mm_state; + + if (max_seq <= READ_ONCE(mm_state->seq)) + return false; + + spin_lock(&mm_list->lock); + + VM_BUG_ON(mm_state->seq + 1 < max_seq); + + if (max_seq > mm_state->seq && !mm_state->nr_walkers) { + VM_BUG_ON(mm_state->head && mm_state->head != &mm_list->fifo); + + WRITE_ONCE(mm_state->seq, mm_state->seq + 1); + reset_mm_stats(lruvec, NULL, true); + success = true; + } + + spin_unlock(&mm_list->lock); + + return success; +} + +/****************************************************************************** + * refault feedback loop + ******************************************************************************/ + +/* + * A feedback loop based on Proportional-Integral-Derivative (PID) controller. + * + * The P term is refaulted/(evicted+protected) from a tier in the generation + * currently being evicted; the I term is the exponential moving average of the + * P term over the generations previously evicted, using the smoothing factor + * 1/2; the D term isn't supported. + * + * The setpoint (SP) is always the first tier of one type; the process variable + * (PV) is either any tier of the other type or any other tier of the same + * type. + * + * The error is the difference between the SP and the PV; the correction is + * turn off protection when SP>PV or turn on protection when SPlrugen; + int hist = lru_hist_from_seq(lrugen->min_seq[type]); + + pos->refaulted = lrugen->avg_refaulted[type][tier] + + atomic_long_read(&lrugen->refaulted[hist][type][tier]); + pos->total = lrugen->avg_total[type][tier] + + atomic_long_read(&lrugen->evicted[hist][type][tier]); + if (tier) + pos->total += lrugen->protected[hist][type][tier - 1]; + pos->gain = gain; +} + +static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover) +{ + int hist, tier; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1; + unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1; + + lockdep_assert_held(&lruvec->lru_lock); + + if (!carryover && !clear) + return; + + hist = lru_hist_from_seq(seq); + + for (tier = 0; tier < MAX_NR_TIERS; tier++) { + if (carryover) { + unsigned long sum; + + sum = lrugen->avg_refaulted[type][tier] + + atomic_long_read(&lrugen->refaulted[hist][type][tier]); + WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2); + + sum = lrugen->avg_total[type][tier] + + atomic_long_read(&lrugen->evicted[hist][type][tier]); + if (tier) + sum += lrugen->protected[hist][type][tier - 1]; + WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2); + } + + if (clear) { + atomic_long_set(&lrugen->refaulted[hist][type][tier], 0); + atomic_long_set(&lrugen->evicted[hist][type][tier], 0); + if (tier) + WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0); + } + } +} + +static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv) +{ + /* + * Return true if the PV has a limited number of refaults or a lower + * refaulted/total than the SP. + */ + return pv->refaulted < MIN_LRU_BATCH || + pv->refaulted * (sp->total + MIN_LRU_BATCH) * sp->gain <= + (sp->refaulted + 1) * pv->total * pv->gain; +} + +/****************************************************************************** + * the aging + ******************************************************************************/ + +static int folio_update_gen(struct folio *folio, int gen) +{ + unsigned long old_flags, new_flags; + + VM_BUG_ON(gen >= MAX_NR_GENS); + VM_BUG_ON(!rcu_read_lock_held()); + + do { + new_flags = old_flags = READ_ONCE(folio->flags); + + /* for shrink_page_list() */ + if (!(new_flags & LRU_GEN_MASK)) { + new_flags |= BIT(PG_referenced); + continue; + } + + new_flags &= ~LRU_GEN_MASK; + new_flags |= (gen + 1UL) << LRU_GEN_PGOFF; + new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); + } while (new_flags != old_flags && + cmpxchg(&folio->flags, old_flags, new_flags) != old_flags); + + return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; +} + +static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + unsigned long old_flags, new_flags; + int type = folio_is_file_lru(folio); + struct lru_gen_struct *lrugen = &lruvec->lrugen; + int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); + + do { + new_flags = old_flags = READ_ONCE(folio->flags); + VM_BUG_ON_FOLIO(!(new_flags & LRU_GEN_MASK), folio); + + new_gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; + /* folio_update_gen() has promoted this page? */ + if (new_gen >= 0 && new_gen != old_gen) + return new_gen; + + new_gen = (old_gen + 1) % MAX_NR_GENS; + + new_flags &= ~LRU_GEN_MASK; + new_flags |= (new_gen + 1UL) << LRU_GEN_PGOFF; + new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); + /* for folio_end_writeback() */ + if (reclaiming) + new_flags |= BIT(PG_reclaim); + } while (cmpxchg(&folio->flags, old_flags, new_flags) != old_flags); + + lru_gen_update_size(lruvec, folio, old_gen, new_gen); + + return new_gen; +} + +static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio, + int old_gen, int new_gen) +{ + int type = folio_is_file_lru(folio); + int zone = folio_zonenum(folio); + int delta = folio_nr_pages(folio); + + VM_BUG_ON(old_gen >= MAX_NR_GENS); + VM_BUG_ON(new_gen >= MAX_NR_GENS); + + walk->batched++; + + walk->nr_pages[old_gen][type][zone] -= delta; + walk->nr_pages[new_gen][type][zone] += delta; +} + +static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk) +{ + int gen, type, zone; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + walk->batched = 0; + + for_each_gen_type_zone(gen, type, zone) { + enum lru_list lru = type * LRU_INACTIVE_FILE; + int delta = walk->nr_pages[gen][type][zone]; + + if (!delta) + continue; + + walk->nr_pages[gen][type][zone] = 0; + WRITE_ONCE(lrugen->nr_pages[gen][type][zone], + lrugen->nr_pages[gen][type][zone] + delta); + + if (lru_gen_is_active(lruvec, gen)) + lru += LRU_ACTIVE; + __update_lru_size(lruvec, lru, zone, delta); + } +} + +static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *walk) +{ + struct address_space *mapping; + struct vm_area_struct *vma = walk->vma; + struct lru_gen_mm_walk *priv = walk->private; + + if (!vma_is_accessible(vma) || is_vm_hugetlb_page(vma) || + (vma->vm_flags & (VM_LOCKED | VM_SPECIAL | VM_SEQ_READ | VM_RAND_READ)) || + vma == get_gate_vma(vma->vm_mm)) + return true; + + if (vma_is_anonymous(vma)) + return !priv->can_swap; + + if (WARN_ON_ONCE(!vma->vm_file || !vma->vm_file->f_mapping)) + return true; + + mapping = vma->vm_file->f_mapping; + if (mapping_unevictable(mapping)) + return true; + + /* check readpage to exclude special mappings like dax, etc. */ + return shmem_mapping(mapping) ? !priv->can_swap : !mapping->a_ops->readpage; +} + +/* + * Some userspace memory allocators map many single-page VMAs. Instead of + * returning back to the PGD table for each of such VMAs, finish an entire PMD + * table to reduce zigzags and improve cache performance. + */ +static bool get_next_vma(struct mm_walk *walk, unsigned long mask, unsigned long size, + unsigned long *start, unsigned long *end) +{ + unsigned long next = round_up(*end, size); + + VM_BUG_ON(mask & size); + VM_BUG_ON(*start >= *end); + VM_BUG_ON((next & mask) != (*start & mask)); + + while (walk->vma) { + if (next >= walk->vma->vm_end) { + walk->vma = walk->vma->vm_next; + continue; + } + + if ((next & mask) != (walk->vma->vm_start & mask)) + return false; + + if (should_skip_vma(walk->vma->vm_start, walk->vma->vm_end, walk)) { + walk->vma = walk->vma->vm_next; + continue; + } + + *start = max(next, walk->vma->vm_start); + next = (next | ~mask) + 1; + /* rounded-up boundaries can wrap to 0 */ + *end = next && next < walk->vma->vm_end ? next : walk->vma->vm_end; + + return true; + } + + return false; +} + +static bool suitable_to_scan(int total, int young) +{ + int n = clamp_t(int, cache_line_size() / sizeof(pte_t), 2, 8); + + /* suitable if the average number of young PTEs per cacheline is >=1 */ + return young * n >= total; +} + +static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + int i; + pte_t *pte; + spinlock_t *ptl; + unsigned long addr; + int total = 0; + int young = 0; + struct lru_gen_mm_walk *priv = walk->private; + struct mem_cgroup *memcg = lruvec_memcg(priv->lruvec); + struct pglist_data *pgdat = lruvec_pgdat(priv->lruvec); + int old_gen, new_gen = lru_gen_from_seq(priv->max_seq); + + VM_BUG_ON(pmd_leaf(*pmd)); + + pte = pte_offset_map_lock(walk->mm, pmd, start & PMD_MASK, &ptl); + arch_enter_lazy_mmu_mode(); +restart: + for (i = pte_index(start), addr = start; addr != end; i++, addr += PAGE_SIZE) { + struct folio *folio; + unsigned long pfn = pte_pfn(pte[i]); + + VM_BUG_ON(addr < walk->vma->vm_start || addr >= walk->vma->vm_end); + + total++; + priv->mm_stats[MM_PTE_TOTAL]++; + + if (!pte_present(pte[i]) || is_zero_pfn(pfn)) + continue; + + if (WARN_ON_ONCE(pte_devmap(pte[i]) || pte_special(pte[i]))) + continue; + + if (!pte_young(pte[i])) { + priv->mm_stats[MM_PTE_OLD]++; + continue; + } + + VM_BUG_ON(!pfn_valid(pfn)); + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) + continue; + + folio = pfn_folio(pfn); + if (folio_nid(folio) != pgdat->node_id) + continue; + + if (folio_memcg_rcu(folio) != memcg) + continue; + + if (!ptep_test_and_clear_young(walk->vma, addr, pte + i)) + continue; + + young++; + priv->mm_stats[MM_PTE_YOUNG]++; + + if (pte_dirty(pte[i]) && !folio_test_dirty(folio) && + !(folio_test_anon(folio) && folio_test_swapbacked(folio) && + !folio_test_swapcache(folio))) + folio_mark_dirty(folio); + + old_gen = folio_update_gen(folio, new_gen); + if (old_gen >= 0 && old_gen != new_gen) + update_batch_size(priv, folio, old_gen, new_gen); + } + + if (i < PTRS_PER_PTE && get_next_vma(walk, PMD_MASK, PAGE_SIZE, &start, &end)) + goto restart; + + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(pte, ptl); + + return suitable_to_scan(total, young); +} + +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) +static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma, + struct mm_walk *walk, unsigned long *start) +{ + int i; + pmd_t *pmd; + spinlock_t *ptl; + struct lru_gen_mm_walk *priv = walk->private; + struct mem_cgroup *memcg = lruvec_memcg(priv->lruvec); + struct pglist_data *pgdat = lruvec_pgdat(priv->lruvec); + int old_gen, new_gen = lru_gen_from_seq(priv->max_seq); + + VM_BUG_ON(pud_leaf(*pud)); + + /* try to batch at most 1+MIN_LRU_BATCH+1 entries */ + if (*start == -1) { + *start = next; + return; + } + + i = next == -1 ? 0 : pmd_index(next) - pmd_index(*start); + if (i && i <= MIN_LRU_BATCH) { + __set_bit(i - 1, priv->bitmap); + return; + } + + pmd = pmd_offset(pud, *start); + ptl = pmd_lock(walk->mm, pmd); + arch_enter_lazy_mmu_mode(); + + do { + struct folio *folio; + unsigned long pfn = pmd_pfn(pmd[i]); + unsigned long addr = i ? (*start & PMD_MASK) + i * PMD_SIZE : *start; + + VM_BUG_ON(addr < vma->vm_start || addr >= vma->vm_end); + + if (!pmd_present(pmd[i]) || is_huge_zero_pmd(pmd[i])) + goto next; + + if (WARN_ON_ONCE(pmd_devmap(pmd[i]))) + goto next; + + if (!pmd_trans_huge(pmd[i])) { + if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && + get_cap(LRU_GEN_NONLEAF_YOUNG)) + pmdp_test_and_clear_young(vma, addr, pmd + i); + goto next; + } + + VM_BUG_ON(!pfn_valid(pfn)); + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) + goto next; + + folio = pfn_folio(pfn); + if (folio_nid(folio) != pgdat->node_id) + goto next; + + if (folio_memcg_rcu(folio) != memcg) + goto next; + + if (!pmdp_test_and_clear_young(vma, addr, pmd + i)) + goto next; + + priv->mm_stats[MM_PTE_YOUNG]++; + + if (pmd_dirty(pmd[i]) && !folio_test_dirty(folio) && + !(folio_test_anon(folio) && folio_test_swapbacked(folio) && + !folio_test_swapcache(folio))) + folio_mark_dirty(folio); + + old_gen = folio_update_gen(folio, new_gen); + if (old_gen >= 0 && old_gen != new_gen) + update_batch_size(priv, folio, old_gen, new_gen); +next: + i = i > MIN_LRU_BATCH ? 0 : + find_next_bit(priv->bitmap, MIN_LRU_BATCH, i) + 1; + } while (i <= MIN_LRU_BATCH); + + arch_leave_lazy_mmu_mode(); + spin_unlock(ptl); + + *start = -1; + bitmap_zero(priv->bitmap, MIN_LRU_BATCH); +} +#else +static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma, + struct mm_walk *walk, unsigned long *start) +{ +} +#endif + +static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + int i; + pmd_t *pmd; + unsigned long next; + unsigned long addr; + struct vm_area_struct *vma; + unsigned long pos = -1; + struct lru_gen_mm_walk *priv = walk->private; + + VM_BUG_ON(pud_leaf(*pud)); + + /* + * Finish an entire PMD in two passes: the first only reaches to PTE + * tables to avoid taking the PMD lock; the second, if necessary, takes + * the PMD lock to clear the accessed bit in PMD entries. + */ + pmd = pmd_offset(pud, start & PUD_MASK); +restart: + /* walk_pte_range() may call get_next_vma() */ + vma = walk->vma; + for (i = pmd_index(start), addr = start; addr != end; i++, addr = next) { + pmd_t val = pmd_read_atomic(pmd + i); + + /* for pmd_read_atomic() */ + barrier(); + + next = pmd_addr_end(addr, end); + + if (!pmd_present(val)) { + priv->mm_stats[MM_PTE_TOTAL]++; + continue; + } + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (pmd_trans_huge(val)) { + unsigned long pfn = pmd_pfn(val); + struct pglist_data *pgdat = lruvec_pgdat(priv->lruvec); + + priv->mm_stats[MM_PTE_TOTAL]++; + + if (is_huge_zero_pmd(val)) + continue; + + if (!pmd_young(val)) { + priv->mm_stats[MM_PTE_OLD]++; + continue; + } + + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) + continue; + + walk_pmd_range_locked(pud, addr, vma, walk, &pos); + continue; + } +#endif + priv->mm_stats[MM_PMD_TOTAL]++; + +#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG + if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { + if (!pmd_young(val)) + continue; + + walk_pmd_range_locked(pud, addr, vma, walk, &pos); + } +#endif + if (!priv->full_scan && !test_bloom_filter(priv->lruvec, priv->max_seq, pmd + i)) + continue; + + priv->mm_stats[MM_PMD_FOUND]++; + + if (!walk_pte_range(&val, addr, next, walk)) + continue; + + priv->mm_stats[MM_PMD_ADDED]++; + + /* carry over to the next generation */ + update_bloom_filter(priv->lruvec, priv->max_seq + 1, pmd + i); + } + + walk_pmd_range_locked(pud, -1, vma, walk, &pos); + + if (i < PTRS_PER_PMD && get_next_vma(walk, PUD_MASK, PMD_SIZE, &start, &end)) + goto restart; +} + +static int walk_pud_range(p4d_t *p4d, unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + int i; + pud_t *pud; + unsigned long addr; + unsigned long next; + struct lru_gen_mm_walk *priv = walk->private; + + VM_BUG_ON(p4d_leaf(*p4d)); + + pud = pud_offset(p4d, start & P4D_MASK); +restart: + for (i = pud_index(start), addr = start; addr != end; i++, addr = next) { + pud_t val = READ_ONCE(pud[i]); + + next = pud_addr_end(addr, end); + + if (!pud_present(val) || WARN_ON_ONCE(pud_leaf(val))) + continue; + + walk_pmd_range(&val, addr, next, walk); + + if (priv->batched >= MAX_LRU_BATCH) { + end = (addr | ~PUD_MASK) + 1; + goto done; + } + } + + if (i < PTRS_PER_PUD && get_next_vma(walk, P4D_MASK, PUD_SIZE, &start, &end)) + goto restart; + + end = round_up(end, P4D_SIZE); +done: + /* rounded-up boundaries can wrap to 0 */ + priv->next_addr = end && walk->vma ? max(end, walk->vma->vm_start) : 0; + + return -EAGAIN; +} + +static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_mm_walk *walk) +{ + static const struct mm_walk_ops mm_walk_ops = { + .test_walk = should_skip_vma, + .p4d_entry = walk_pud_range, + }; + + int err; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + + walk->next_addr = FIRST_USER_ADDRESS; + + do { + err = -EBUSY; + + /* folio_update_gen() requires stable folio_memcg() */ + if (!mem_cgroup_trylock_pages(memcg)) + break; + + /* the caller might be holding the lock for write */ + if (mmap_read_trylock(mm)) { + unsigned long start = walk->next_addr; + unsigned long end = mm->highest_vm_end; + + err = walk_page_range(mm, start, end, &mm_walk_ops, walk); + + mmap_read_unlock(mm); + + if (walk->batched) { + spin_lock_irq(&lruvec->lru_lock); + reset_batch_size(lruvec, walk); + spin_unlock_irq(&lruvec->lru_lock); + } + } + + mem_cgroup_unlock_pages(); + + cond_resched(); + } while (err == -EAGAIN && walk->next_addr && !mm_is_oom_victim(mm)); +} + +static struct lru_gen_mm_walk *alloc_mm_walk(void) +{ + if (current->reclaim_state && current->reclaim_state->mm_walk) + return current->reclaim_state->mm_walk; + + return kzalloc(sizeof(struct lru_gen_mm_walk), + __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); +} + +static void free_mm_walk(struct lru_gen_mm_walk *walk) +{ + if (!current->reclaim_state || !current->reclaim_state->mm_walk) + kfree(walk); +} + +static void inc_min_seq(struct lruvec *lruvec) +{ + int type; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_BUG_ON(!seq_is_valid(lruvec)); + + for (type = 0; type < ANON_AND_FILE; type++) { + if (get_nr_gens(lruvec, type) != MAX_NR_GENS) + continue; + + reset_ctrl_pos(lruvec, type, true); + WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1); + } +} + +static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) +{ + int gen, type, zone; + bool success = false; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + DEFINE_MIN_SEQ(lruvec); + + VM_BUG_ON(!seq_is_valid(lruvec)); + + for (type = !can_swap; type < ANON_AND_FILE; type++) { + while (min_seq[type] + MIN_NR_GENS <= lrugen->max_seq) { + gen = lru_gen_from_seq(min_seq[type]); + + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + if (!list_empty(&lrugen->lists[gen][type][zone])) + goto next; + } + + min_seq[type]++; + } +next: + ; + } + + /* see the comment on lru_gen_struct */ + if (can_swap) { + min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]); + min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]); + } + + for (type = !can_swap; type < ANON_AND_FILE; type++) { + if (min_seq[type] == lrugen->min_seq[type]) + continue; + + reset_ctrl_pos(lruvec, type, true); + WRITE_ONCE(lrugen->min_seq[type], min_seq[type]); + success = true; + } + + return success; +} + +static void inc_max_seq(struct lruvec *lruvec) +{ + int prev, next; + int type, zone; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + spin_lock_irq(&lruvec->lru_lock); + + VM_BUG_ON(!seq_is_valid(lruvec)); + + inc_min_seq(lruvec); + + /* update the active/inactive LRU sizes for compatibility */ + prev = lru_gen_from_seq(lrugen->max_seq - 1); + next = lru_gen_from_seq(lrugen->max_seq + 1); + + for (type = 0; type < ANON_AND_FILE; type++) { + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + enum lru_list lru = type * LRU_INACTIVE_FILE; + long delta = lrugen->nr_pages[prev][type][zone] - + lrugen->nr_pages[next][type][zone]; + + if (!delta) + continue; + + __update_lru_size(lruvec, lru, zone, delta); + __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, -delta); + } + } + + for (type = 0; type < ANON_AND_FILE; type++) + reset_ctrl_pos(lruvec, type, false); + + WRITE_ONCE(lrugen->timestamps[next], jiffies); + /* make sure preceding modifications appear */ + smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1); + + spin_unlock_irq(&lruvec->lru_lock); +} + +static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, + struct scan_control *sc, bool can_swap, bool full_scan) +{ + bool success; + struct lru_gen_mm_walk *walk; + struct mm_struct *mm = NULL; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_BUG_ON(max_seq > READ_ONCE(lrugen->max_seq)); + + /* + * If the hardware doesn't automatically set the accessed bit, fallback + * to lru_gen_look_around(), which only clears the accessed bit in a + * handful of PTEs. Spreading the work out over a period of time usually + * is less efficient, but it avoids bursty page faults. + */ + if (!full_scan && (!arch_has_hw_pte_young() || !get_cap(LRU_GEN_MM_WALK))) { + success = iterate_mm_list_nowalk(lruvec, max_seq); + goto done; + } + + walk = alloc_mm_walk(); + if (!walk) { + success = iterate_mm_list_nowalk(lruvec, max_seq); + goto done; + } + + walk->lruvec = lruvec; + walk->max_seq = max_seq; + walk->can_swap = can_swap; + walk->full_scan = full_scan; + + do { + success = iterate_mm_list(lruvec, walk, &mm); + if (mm) + walk_mm(lruvec, mm, walk); + + cond_resched(); + } while (mm); + + free_mm_walk(walk); +done: + if (!success) { + if (!current_is_kswapd() && !sc->priority) + wait_event_killable(lruvec->mm_state.wait, + max_seq < READ_ONCE(lrugen->max_seq)); + + return max_seq < READ_ONCE(lrugen->max_seq); + } + + VM_BUG_ON(max_seq != READ_ONCE(lrugen->max_seq)); + + inc_max_seq(lruvec); + /* either this sees any waiters or they will see updated max_seq */ + if (wq_has_sleeper(&lruvec->mm_state.wait)) + wake_up_all(&lruvec->mm_state.wait); + + wakeup_flusher_threads(WB_REASON_VMSCAN); + + return true; +} + +static long get_nr_evictable(struct lruvec *lruvec, unsigned long max_seq, + unsigned long *min_seq, bool can_swap, bool *need_aging) +{ + int gen, type, zone; + long old = 0; + long young = 0; + long total = 0; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + for (type = !can_swap; type < ANON_AND_FILE; type++) { + unsigned long seq; + + for (seq = min_seq[type]; seq <= max_seq; seq++) { + long size = 0; + + gen = lru_gen_from_seq(seq); + + for (zone = 0; zone < MAX_NR_ZONES; zone++) + size += READ_ONCE(lrugen->nr_pages[gen][type][zone]); + + total += size; + if (seq == max_seq) + young += size; + if (seq + MIN_NR_GENS == max_seq) + old += size; + } + } + + /* try to spread pages out across MIN_NR_GENS+1 generations */ + if (min_seq[LRU_GEN_FILE] + MIN_NR_GENS > max_seq) + *need_aging = true; + else if (min_seq[LRU_GEN_FILE] + MIN_NR_GENS < max_seq) + *need_aging = false; + else if (young * MIN_NR_GENS > total) + *need_aging = true; + else if (old * (MIN_NR_GENS + 2) < total) + *need_aging = true; + else + *need_aging = false; + + return total > 0 ? total : 0; +} + +static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, + unsigned long min_ttl) +{ + bool need_aging; + long nr_to_scan; + int swappiness = get_swappiness(lruvec, sc); + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + DEFINE_MAX_SEQ(lruvec); + DEFINE_MIN_SEQ(lruvec); + + if (min_ttl) { + int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); + unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); + + if (time_is_after_jiffies(birth + min_ttl)) + return false; + } + + mem_cgroup_calculate_protection(NULL, memcg); + + if (mem_cgroup_below_min(memcg)) + return false; + + nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, swappiness, &need_aging); + if (!nr_to_scan) + return false; + + nr_to_scan >>= sc->priority; + + if (!mem_cgroup_online(memcg)) + nr_to_scan++; + + if (nr_to_scan && need_aging && (!mem_cgroup_below_low(memcg) || sc->memcg_low_reclaim)) + try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false); + + return true; +} + +/* to protect the working set of the last N jiffies */ +static unsigned long lru_gen_min_ttl __read_mostly; + +static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) +{ + struct mem_cgroup *memcg; + bool success = false; + unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl); + + VM_BUG_ON(!current_is_kswapd()); + + /* + * To reduce the chance of going into the aging path or swapping, which + * can be costly, optimistically skip them unless their corresponding + * flags were cleared in the eviction path. This improves the overall + * performance when multiple memcgs are available. + */ + if (!sc->memcgs_need_aging) { + sc->memcgs_need_aging = true; + sc->memcgs_avoid_swapping = !sc->memcgs_need_swapping; + sc->memcgs_need_swapping = true; + return; + } + + sc->memcgs_need_swapping = true; + sc->memcgs_avoid_swapping = true; + + current->reclaim_state->mm_walk = &pgdat->mm_walk; + + memcg = mem_cgroup_iter(NULL, NULL, NULL); + do { + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); + + if (age_lruvec(lruvec, sc, min_ttl)) + success = true; + + cond_resched(); + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); + + current->reclaim_state->mm_walk = NULL; + + /* + * The main goal is to OOM kill if every generation from all memcgs is + * younger than min_ttl. However, another theoretical possibility is all + * memcgs are either below min or empty. + */ + if (!success && mutex_trylock(&oom_lock)) { + struct oom_control oc = { + .gfp_mask = sc->gfp_mask, + .order = sc->order, + }; + + out_of_memory(&oc); + + mutex_unlock(&oom_lock); + } +} + +/* + * This function exploits spatial locality when shrink_page_list() walks the + * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages. + * If the scan was done cacheline efficiently, it adds the PMD entry pointing + * to the PTE table to the Bloom filter. This process is a feedback loop from + * the eviction to the aging. + */ +void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +{ + int i; + pte_t *pte; + unsigned long start; + unsigned long end; + unsigned long addr; + struct folio *folio; + struct lru_gen_mm_walk *walk; + int young = 0; + unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {}; + struct mem_cgroup *memcg = page_memcg(pvmw->page); + struct pglist_data *pgdat = page_pgdat(pvmw->page); + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); + DEFINE_MAX_SEQ(lruvec); + int old_gen, new_gen = lru_gen_from_seq(max_seq); + + lockdep_assert_held(pvmw->ptl); + VM_BUG_ON_PAGE(PageLRU(pvmw->page), pvmw->page); + + start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start); + end = pmd_addr_end(pvmw->address, pvmw->vma->vm_end); + + if (end - start > MIN_LRU_BATCH * PAGE_SIZE) { + if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2) + end = start + MIN_LRU_BATCH * PAGE_SIZE; + else if (end - pvmw->address < MIN_LRU_BATCH * PAGE_SIZE / 2) + start = end - MIN_LRU_BATCH * PAGE_SIZE; + else { + start = pvmw->address - MIN_LRU_BATCH * PAGE_SIZE / 2; + end = pvmw->address + MIN_LRU_BATCH * PAGE_SIZE / 2; + } + } + + pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE; + + rcu_read_lock(); + arch_enter_lazy_mmu_mode(); + + for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) { + unsigned long pfn = pte_pfn(pte[i]); + + VM_BUG_ON(addr < pvmw->vma->vm_start || addr >= pvmw->vma->vm_end); + + if (!pte_present(pte[i]) || is_zero_pfn(pfn)) + continue; + + if (WARN_ON_ONCE(pte_devmap(pte[i]) || pte_special(pte[i]))) + continue; + + if (!pte_young(pte[i])) + continue; + + VM_BUG_ON(!pfn_valid(pfn)); + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) + continue; + + folio = pfn_folio(pfn); + if (folio_nid(folio) != pgdat->node_id) + continue; + + if (folio_memcg_rcu(folio) != memcg) + continue; + + if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i)) + continue; + + young++; + + if (pte_dirty(pte[i]) && !folio_test_dirty(folio) && + !(folio_test_anon(folio) && folio_test_swapbacked(folio) && + !folio_test_swapcache(folio))) + folio_mark_dirty(folio); + + old_gen = folio_lru_gen(folio); + if (old_gen < 0) + folio_set_referenced(folio); + else if (old_gen != new_gen) + __set_bit(i, bitmap); + } + + arch_leave_lazy_mmu_mode(); + rcu_read_unlock(); + + /* feedback from rmap walkers to page table walkers */ + if (suitable_to_scan(i, young)) + update_bloom_filter(lruvec, max_seq, pvmw->pmd); + + walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL; + + if (!walk && bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) { + for_each_set_bit(i, bitmap, MIN_LRU_BATCH) { + folio = page_folio(pte_page(pte[i])); + folio_activate(folio); + } + return; + } + + /* folio_update_gen() requires stable folio_memcg() */ + if (!mem_cgroup_trylock_pages(memcg)) + return; + + if (!walk) { + spin_lock_irq(&lruvec->lru_lock); + new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq); + } + + for_each_set_bit(i, bitmap, MIN_LRU_BATCH) { + folio = page_folio(pte_page(pte[i])); + if (folio_memcg_rcu(folio) != memcg) + continue; + + old_gen = folio_update_gen(folio, new_gen); + if (old_gen < 0 || old_gen == new_gen) + continue; + + if (walk) + update_batch_size(walk, folio, old_gen, new_gen); + else + lru_gen_update_size(lruvec, folio, old_gen, new_gen); + } + + if (!walk) + spin_unlock_irq(&lruvec->lru_lock); + + mem_cgroup_unlock_pages(); +} + +/****************************************************************************** + * the eviction + ******************************************************************************/ + +static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) +{ + bool success; + int gen = folio_lru_gen(folio); + int type = folio_is_file_lru(folio); + int zone = folio_zonenum(folio); + int tier = folio_lru_tier(folio); + int delta = folio_nr_pages(folio); + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_BUG_ON_FOLIO(gen >= MAX_NR_GENS, folio); + + if (!folio_evictable(folio)) { + success = lru_gen_del_folio(lruvec, folio, true); + VM_BUG_ON_FOLIO(!success, folio); + folio_set_unevictable(folio); + lruvec_add_folio(lruvec, folio); + __count_vm_events(UNEVICTABLE_PGCULLED, delta); + return true; + } + + if (type == LRU_GEN_FILE && folio_test_anon(folio) && folio_test_dirty(folio)) { + success = lru_gen_del_folio(lruvec, folio, true); + VM_BUG_ON_FOLIO(!success, folio); + folio_set_swapbacked(folio); + lruvec_add_folio_tail(lruvec, folio); + return true; + } + + if (gen != lru_gen_from_seq(lrugen->min_seq[type])) { + list_move(&folio->lru, &lrugen->lists[gen][type][zone]); + return true; + } + + if (tier > tier_idx) { + int hist = lru_hist_from_seq(lrugen->min_seq[type]); + + gen = folio_inc_gen(lruvec, folio, false); + list_move_tail(&folio->lru, &lrugen->lists[gen][type][zone]); + + WRITE_ONCE(lrugen->protected[hist][type][tier - 1], + lrugen->protected[hist][type][tier - 1] + delta); + __mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta); + return true; + } + + if (folio_test_locked(folio) || folio_test_writeback(folio) || + (type == LRU_GEN_FILE && folio_test_dirty(folio))) { + gen = folio_inc_gen(lruvec, folio, true); + list_move(&folio->lru, &lrugen->lists[gen][type][zone]); + return true; + } + + return false; +} + +static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc) +{ + bool success; + + if (!sc->may_unmap && folio_mapped(folio)) + return false; + + if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) && + (folio_test_dirty(folio) || + (folio_test_anon(folio) && !folio_test_swapcache(folio)))) + return false; + + if (!folio_try_get(folio)) + return false; + + if (!folio_test_clear_lru(folio)) { + folio_put(folio); + return false; + } + + success = lru_gen_del_folio(lruvec, folio, true); + VM_BUG_ON_FOLIO(!success, folio); + + return true; +} + +static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, + int type, int tier, struct list_head *list) +{ + int gen, zone; + enum vm_event_item item; + int sorted = 0; + int scanned = 0; + int isolated = 0; + int remaining = MAX_LRU_BATCH; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + + VM_BUG_ON(!list_empty(list)); + + if (get_nr_gens(lruvec, type) == MIN_NR_GENS) + return 0; + + gen = lru_gen_from_seq(lrugen->min_seq[type]); + + for (zone = sc->reclaim_idx; zone >= 0; zone--) { + LIST_HEAD(moved); + int skipped = 0; + struct list_head *head = &lrugen->lists[gen][type][zone]; + + while (!list_empty(head)) { + struct folio *folio = lru_to_folio(head); + int delta = folio_nr_pages(folio); + + VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio); + VM_BUG_ON_FOLIO(folio_test_active(folio), folio); + VM_BUG_ON_FOLIO(folio_is_file_lru(folio) != type, folio); + VM_BUG_ON_FOLIO(folio_zonenum(folio) != zone, folio); + + scanned += delta; + + if (sort_folio(lruvec, folio, tier)) + sorted += delta; + else if (isolate_folio(lruvec, folio, sc)) { + list_add(&folio->lru, list); + isolated += delta; + } else { + list_move(&folio->lru, &moved); + skipped += delta; + } + + if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH) + break; + } + + if (skipped) { + list_splice(&moved, head); + __count_zid_vm_events(PGSCAN_SKIP, zone, skipped); + } + + if (!remaining || isolated >= MIN_LRU_BATCH) + break; + } + + item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT; + if (!cgroup_reclaim(sc)) { + __count_vm_events(item, isolated); + __count_vm_events(PGREFILL, sorted); + } + __count_memcg_events(memcg, item, isolated); + __count_memcg_events(memcg, PGREFILL, sorted); + __count_vm_events(PGSCAN_ANON + type, isolated); + + /* + * There might not be eligible pages due to reclaim_idx, may_unmap and + * may_writepage. Check the remaining to prevent livelock if there is no + * progress. + */ + return isolated || !remaining ? scanned : 0; +} + +static int get_tier_idx(struct lruvec *lruvec, int type) +{ + int tier; + struct ctrl_pos sp, pv; + + /* + * To leave a margin for fluctuations, use a larger gain factor (1:2). + * This value is chosen because any other tier would have at least twice + * as many refaults as the first tier. + */ + read_ctrl_pos(lruvec, type, 0, 1, &sp); + for (tier = 1; tier < MAX_NR_TIERS; tier++) { + read_ctrl_pos(lruvec, type, tier, 2, &pv); + if (!positive_ctrl_err(&sp, &pv)) + break; + } + + return tier - 1; +} + +static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_idx) +{ + int type, tier; + struct ctrl_pos sp, pv; + int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness }; + + /* + * Compare the first tier of anon with that of file to determine which + * type to scan. Also need to compare other tiers of the selected type + * with the first tier of the other type to determine the last tier (of + * the selected type) to evict. + */ + read_ctrl_pos(lruvec, LRU_GEN_ANON, 0, gain[LRU_GEN_ANON], &sp); + read_ctrl_pos(lruvec, LRU_GEN_FILE, 0, gain[LRU_GEN_FILE], &pv); + type = positive_ctrl_err(&sp, &pv); + + read_ctrl_pos(lruvec, !type, 0, gain[!type], &sp); + for (tier = 1; tier < MAX_NR_TIERS; tier++) { + read_ctrl_pos(lruvec, type, tier, gain[type], &pv); + if (!positive_ctrl_err(&sp, &pv)) + break; + } + + *tier_idx = tier - 1; + + return type; +} + +static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness, + int *type_scanned, struct list_head *list) +{ + int i; + int type; + int scanned; + int tier = -1; + DEFINE_MIN_SEQ(lruvec); + + VM_BUG_ON(!seq_is_valid(lruvec)); + + /* + * Try to make the obvious choice first. When anon and file are both + * available from the same generation, interpret swappiness 1 as file + * first and 200 as anon first. + */ + if (!swappiness) + type = LRU_GEN_FILE; + else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE]) + type = LRU_GEN_ANON; + else if (swappiness == 1) + type = LRU_GEN_FILE; + else if (swappiness == 200) + type = LRU_GEN_ANON; + else + type = get_type_to_scan(lruvec, swappiness, &tier); + + for (i = !swappiness; i < ANON_AND_FILE; i++) { + if (tier < 0) + tier = get_tier_idx(lruvec, type); + + scanned = scan_folios(lruvec, sc, type, tier, list); + if (scanned) + break; + + type = !type; + tier = -1; + } + + *type_scanned = type; + + return scanned; +} + +static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness, + bool *swapped) +{ + int type; + int scanned; + int reclaimed; + LIST_HEAD(list); + struct folio *folio; + enum vm_event_item item; + struct reclaim_stat stat; + struct lru_gen_mm_walk *walk; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + + spin_lock_irq(&lruvec->lru_lock); + + scanned = isolate_folios(lruvec, sc, swappiness, &type, &list); + + if (try_to_inc_min_seq(lruvec, swappiness)) + scanned++; + + if (get_nr_gens(lruvec, LRU_GEN_FILE) == MIN_NR_GENS) + scanned = 0; + + spin_unlock_irq(&lruvec->lru_lock); + + if (list_empty(&list)) + return scanned; + + reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false); + + /* + * To avoid livelock, don't add rejected pages back to the same lists + * they were isolated from. See lru_gen_add_folio(). + */ + list_for_each_entry(folio, &list, lru) { + if (folio_test_reclaim(folio) && + (folio_test_dirty(folio) || folio_test_writeback(folio))) + folio_clear_active(folio); + else if (folio_is_file_lru(folio) || folio_test_swapcache(folio)) + folio_set_active(folio); + + folio_clear_referenced(folio); + folio_clear_workingset(folio); + } + + spin_lock_irq(&lruvec->lru_lock); + + move_pages_to_lru(lruvec, &list); + + walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL; + if (walk && walk->batched) + reset_batch_size(lruvec, walk); + + item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT; + if (!cgroup_reclaim(sc)) + __count_vm_events(item, reclaimed); + __count_memcg_events(memcg, item, reclaimed); + __count_vm_events(PGSTEAL_ANON + type, reclaimed); + + spin_unlock_irq(&lruvec->lru_lock); + + mem_cgroup_uncharge_list(&list); + free_unref_page_list(&list); + + sc->nr_reclaimed += reclaimed; + + if (type == LRU_GEN_ANON && swapped) + *swapped = true; + + return scanned; +} + +static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap) +{ + bool need_aging; + long nr_to_scan; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + DEFINE_MAX_SEQ(lruvec); + DEFINE_MIN_SEQ(lruvec); + + if (mem_cgroup_below_min(memcg) || + (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim)) + return 0; + + nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, can_swap, &need_aging); + if (!nr_to_scan) + return 0; + + /* reset the priority if the target has been met */ + nr_to_scan >>= sc->nr_reclaimed < sc->nr_to_reclaim ? sc->priority : DEF_PRIORITY; + + if (!mem_cgroup_online(memcg)) + nr_to_scan++; + + if (!nr_to_scan) + return 0; + + if (!need_aging) { + sc->memcgs_need_aging = false; + return nr_to_scan; + } + + /* leave the work to lru_gen_age_node() */ + if (current_is_kswapd()) + return 0; + + /* try other memcgs before going to the aging path */ + if (!cgroup_reclaim(sc) && !sc->force_deactivate) { + sc->skipped_deactivate = true; + return 0; + } + + if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false)) + return nr_to_scan; + + return min_seq[LRU_GEN_FILE] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0; +} + +static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) +{ + struct blk_plug plug; + long scanned = 0; + bool swapped = false; + unsigned long reclaimed = sc->nr_reclaimed; + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + + lru_add_drain(); + + blk_start_plug(&plug); + + if (current_is_kswapd()) + current->reclaim_state->mm_walk = &pgdat->mm_walk; + + while (true) { + int delta; + int swappiness; + long nr_to_scan; + + if (sc->may_swap) + swappiness = get_swappiness(lruvec, sc); + else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc)) + swappiness = 1; + else + swappiness = 0; + + nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); + if (!nr_to_scan) + break; + + delta = evict_folios(lruvec, sc, swappiness, &swapped); + if (!delta) + break; + + if (sc->memcgs_avoid_swapping && swappiness < 200 && swapped) + break; + + scanned += delta; + if (scanned >= nr_to_scan) { + if (!swapped && sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH) + sc->memcgs_need_swapping = false; + break; + } + + cond_resched(); + } + + if (current_is_kswapd()) + current->reclaim_state->mm_walk = NULL; + + blk_finish_plug(&plug); +} + +/****************************************************************************** + * state change + ******************************************************************************/ + +static bool __maybe_unused state_is_valid(struct lruvec *lruvec) +{ + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + if (lrugen->enabled) { + enum lru_list lru; + + for_each_evictable_lru(lru) { + if (!list_empty(&lruvec->lists[lru])) + return false; + } + } else { + int gen, type, zone; + + for_each_gen_type_zone(gen, type, zone) { + if (!list_empty(&lrugen->lists[gen][type][zone])) + return false; + + /* unlikely but not a bug when reset_batch_size() is pending */ + VM_WARN_ON(lrugen->nr_pages[gen][type][zone]); + } + } + + return true; +} + +static bool fill_evictable(struct lruvec *lruvec) +{ + enum lru_list lru; + int remaining = MAX_LRU_BATCH; + + for_each_evictable_lru(lru) { + int type = is_file_lru(lru); + bool active = is_active_lru(lru); + struct list_head *head = &lruvec->lists[lru]; + + while (!list_empty(head)) { + bool success; + struct folio *folio = lru_to_folio(head); + + VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio); + VM_BUG_ON_FOLIO(folio_test_active(folio) != active, folio); + VM_BUG_ON_FOLIO(folio_is_file_lru(folio) != type, folio); + VM_BUG_ON_FOLIO(folio_lru_gen(folio) < MAX_NR_GENS, folio); + + lruvec_del_folio(lruvec, folio); + success = lru_gen_add_folio(lruvec, folio, false); + VM_BUG_ON(!success); + + if (!--remaining) + return false; + } + } + + return true; +} + +static bool drain_evictable(struct lruvec *lruvec) +{ + int gen, type, zone; + int remaining = MAX_LRU_BATCH; + + for_each_gen_type_zone(gen, type, zone) { + struct list_head *head = &lruvec->lrugen.lists[gen][type][zone]; + + while (!list_empty(head)) { + bool success; + struct folio *folio = lru_to_folio(head); + + VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio); + VM_BUG_ON_FOLIO(folio_test_active(folio), folio); + VM_BUG_ON_FOLIO(folio_is_file_lru(folio) != type, folio); + VM_BUG_ON_FOLIO(folio_zonenum(folio) != zone, folio); + + success = lru_gen_del_folio(lruvec, folio, false); + VM_BUG_ON(!success); + lruvec_add_folio(lruvec, folio); + + if (!--remaining) + return false; + } + } + + return true; +} + +static void lru_gen_change_state(bool enable) +{ + static DEFINE_MUTEX(state_mutex); + + struct mem_cgroup *memcg; + + cgroup_lock(); + cpus_read_lock(); + get_online_mems(); + mutex_lock(&state_mutex); + + if (enable == lru_gen_enabled()) + goto unlock; + + if (enable) + static_branch_enable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]); + else + static_branch_disable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]); + + memcg = mem_cgroup_iter(NULL, NULL, NULL); + do { + int nid; + + for_each_node(nid) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + + if (!lruvec) + continue; + + spin_lock_irq(&lruvec->lru_lock); + + VM_BUG_ON(!seq_is_valid(lruvec)); + VM_BUG_ON(!state_is_valid(lruvec)); + + lruvec->lrugen.enabled = enable; + + while (!(enable ? fill_evictable(lruvec) : drain_evictable(lruvec))) { + spin_unlock_irq(&lruvec->lru_lock); + cond_resched(); + spin_lock_irq(&lruvec->lru_lock); + } + + spin_unlock_irq(&lruvec->lru_lock); + } + + cond_resched(); + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); +unlock: + mutex_unlock(&state_mutex); + put_online_mems(); + cpus_read_unlock(); + cgroup_unlock(); +} + +/****************************************************************************** + * sysfs interface + ******************************************************************************/ + +static ssize_t show_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl))); +} + +static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t len) +{ + unsigned int msecs; + + if (kstrtouint(buf, 0, &msecs)) + return -EINVAL; + + WRITE_ONCE(lru_gen_min_ttl, msecs_to_jiffies(msecs)); + + return len; +} + +static struct kobj_attribute lru_gen_min_ttl_attr = __ATTR( + min_ttl_ms, 0644, show_min_ttl, store_min_ttl +); + +static ssize_t show_enable(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + unsigned int caps = 0; + + if (get_cap(LRU_GEN_CORE)) + caps |= BIT(LRU_GEN_CORE); + + if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) + caps |= BIT(LRU_GEN_MM_WALK); + + if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG)) + caps |= BIT(LRU_GEN_NONLEAF_YOUNG); + + return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps); +} + +static ssize_t store_enable(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t len) +{ + int i; + unsigned int caps; + + if (tolower(*buf) == 'n') + caps = 0; + else if (tolower(*buf) == 'y') + caps = -1; + else if (kstrtouint(buf, 0, &caps)) + return -EINVAL; + + for (i = 0; i < NR_LRU_GEN_CAPS; i++) { + bool enable = caps & BIT(i); + + if (i == LRU_GEN_CORE) + lru_gen_change_state(enable); + else if (enable) + static_branch_enable(&lru_gen_caps[i]); + else + static_branch_disable(&lru_gen_caps[i]); + } + + return len; +} + +static struct kobj_attribute lru_gen_enabled_attr = __ATTR( + enabled, 0644, show_enable, store_enable +); + +static struct attribute *lru_gen_attrs[] = { + &lru_gen_min_ttl_attr.attr, + &lru_gen_enabled_attr.attr, + NULL +}; + +static struct attribute_group lru_gen_attr_group = { + .name = "lru_gen", + .attrs = lru_gen_attrs, +}; + +/****************************************************************************** + * debugfs interface + ******************************************************************************/ + +static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos) +{ + struct mem_cgroup *memcg; + loff_t nr_to_skip = *pos; + + m->private = kvmalloc(PATH_MAX, GFP_KERNEL); + if (!m->private) + return ERR_PTR(-ENOMEM); + + memcg = mem_cgroup_iter(NULL, NULL, NULL); + do { + int nid; + + for_each_node_state(nid, N_MEMORY) { + if (!nr_to_skip--) + return get_lruvec(memcg, nid); + } + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); + + return NULL; +} + +static void lru_gen_seq_stop(struct seq_file *m, void *v) +{ + if (!IS_ERR_OR_NULL(v)) + mem_cgroup_iter_break(NULL, lruvec_memcg(v)); + + kvfree(m->private); + m->private = NULL; +} + +static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + int nid = lruvec_pgdat(v)->node_id; + struct mem_cgroup *memcg = lruvec_memcg(v); + + ++*pos; + + nid = next_memory_node(nid); + if (nid == MAX_NUMNODES) { + memcg = mem_cgroup_iter(NULL, memcg, NULL); + if (!memcg) + return NULL; + + nid = first_memory_node; + } + + return get_lruvec(memcg, nid); +} + +static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, + unsigned long max_seq, unsigned long *min_seq, + unsigned long seq) +{ + int i; + int type, tier; + int hist = lru_hist_from_seq(seq); + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + for (tier = 0; tier < MAX_NR_TIERS; tier++) { + seq_printf(m, " %10d", tier); + for (type = 0; type < ANON_AND_FILE; type++) { + unsigned long n[3] = {}; + + if (seq == max_seq) { + n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]); + n[1] = READ_ONCE(lrugen->avg_total[type][tier]); + + seq_printf(m, " %10luR %10luT %10lu ", n[0], n[1], n[2]); + } else if (seq == min_seq[type] || NR_HIST_GENS > 1) { + n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]); + n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]); + if (tier) + n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]); + + seq_printf(m, " %10lur %10lue %10lup", n[0], n[1], n[2]); + } else + seq_puts(m, " 0 0 0 "); + } + seq_putc(m, '\n'); + } + + seq_puts(m, " "); + for (i = 0; i < NR_MM_STATS; i++) { + if (seq == max_seq && NR_HIST_GENS == 1) + seq_printf(m, " %10lu%c", READ_ONCE(lruvec->mm_state.stats[hist][i]), + toupper(MM_STAT_CODES[i])); + else if (seq != max_seq && NR_HIST_GENS > 1) + seq_printf(m, " %10lu%c", READ_ONCE(lruvec->mm_state.stats[hist][i]), + MM_STAT_CODES[i]); + else + seq_puts(m, " 0 "); + } + seq_putc(m, '\n'); +} + +static int lru_gen_seq_show(struct seq_file *m, void *v) +{ + unsigned long seq; + bool full = !debugfs_real_fops(m->file)->write; + struct lruvec *lruvec = v; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + int nid = lruvec_pgdat(lruvec)->node_id; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + DEFINE_MAX_SEQ(lruvec); + DEFINE_MIN_SEQ(lruvec); + + if (nid == first_memory_node) { + const char *path = memcg ? m->private : ""; + +#ifdef CONFIG_MEMCG + if (memcg) + cgroup_path(memcg->css.cgroup, m->private, PATH_MAX); +#endif + seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), path); + } + + seq_printf(m, " node %5d\n", nid); + + if (!full) + seq = min_seq[LRU_GEN_ANON]; + else if (max_seq >= MAX_NR_GENS) + seq = max_seq - MAX_NR_GENS + 1; + else + seq = 0; + + for (; seq <= max_seq; seq++) { + int gen, type, zone; + unsigned int msecs; + + gen = lru_gen_from_seq(seq); + msecs = jiffies_to_msecs(jiffies - READ_ONCE(lrugen->timestamps[gen])); + + seq_printf(m, " %10lu %10u", seq, msecs); + + for (type = 0; type < ANON_AND_FILE; type++) { + long size = 0; + + if (seq < min_seq[type]) { + seq_puts(m, " -0 "); + continue; + } + + for (zone = 0; zone < MAX_NR_ZONES; zone++) + size += READ_ONCE(lrugen->nr_pages[gen][type][zone]); + + seq_printf(m, " %10lu ", max(size, 0L)); + } + + seq_putc(m, '\n'); + + if (full) + lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq); + } + + return 0; +} + +static const struct seq_operations lru_gen_seq_ops = { + .start = lru_gen_seq_start, + .stop = lru_gen_seq_stop, + .next = lru_gen_seq_next, + .show = lru_gen_seq_show, +}; + +static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc, + bool can_swap, bool full_scan) +{ + DEFINE_MAX_SEQ(lruvec); + + if (seq == max_seq) + try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, full_scan); + + return seq > max_seq ? -EINVAL : 0; +} + +static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc, + int swappiness, unsigned long nr_to_reclaim) +{ + struct blk_plug plug; + int err = -EINTR; + DEFINE_MAX_SEQ(lruvec); + + if (seq + MIN_NR_GENS > max_seq) + return -EINVAL; + + sc->nr_reclaimed = 0; + + blk_start_plug(&plug); + + while (!signal_pending(current)) { + DEFINE_MIN_SEQ(lruvec); + + if (seq < min_seq[!swappiness] || sc->nr_reclaimed >= nr_to_reclaim || + !evict_folios(lruvec, sc, swappiness, NULL)) { + err = 0; + break; + } + + cond_resched(); + } + + blk_finish_plug(&plug); + + return err; +} + +static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq, + struct scan_control *sc, int swappiness, unsigned long opt) +{ + struct lruvec *lruvec; + int err = -EINVAL; + struct mem_cgroup *memcg = NULL; + + if (!mem_cgroup_disabled()) { + rcu_read_lock(); + memcg = mem_cgroup_from_id(memcg_id); +#ifdef CONFIG_MEMCG + if (memcg && !css_tryget(&memcg->css)) + memcg = NULL; +#endif + rcu_read_unlock(); + + if (!memcg) + goto done; + } + if (memcg_id != mem_cgroup_id(memcg)) + goto done; + + if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY)) + goto done; + + lruvec = get_lruvec(memcg, nid); + + if (swappiness < 0) + swappiness = get_swappiness(lruvec, sc); + else if (swappiness > 200) + goto done; + + switch (cmd) { + case '+': + err = run_aging(lruvec, seq, sc, swappiness, opt); + break; + case '-': + err = run_eviction(lruvec, seq, sc, swappiness, opt); + break; + } +done: + mem_cgroup_put(memcg); + + return err; +} + +static ssize_t lru_gen_seq_write(struct file *file, const char __user *src, + size_t len, loff_t *pos) +{ + void *buf; + char *cur, *next; + unsigned int flags; + int err = 0; + struct scan_control sc = { + .may_writepage = true, + .may_unmap = true, + .may_swap = true, + .reclaim_idx = MAX_NR_ZONES - 1, + .gfp_mask = GFP_KERNEL, + }; + + buf = kvmalloc(len + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, src, len)) { + kvfree(buf); + return -EFAULT; + } + + next = buf; + next[len] = '\0'; + + sc.reclaim_state.mm_walk = alloc_mm_walk(); + if (!sc.reclaim_state.mm_walk) { + kvfree(buf); + return -ENOMEM; + } + + set_task_reclaim_state(current, &sc.reclaim_state); + flags = memalloc_noreclaim_save(); + + while ((cur = strsep(&next, ",;\n"))) { + int n; + int end; + char cmd; + unsigned int memcg_id; + unsigned int nid; + unsigned long seq; + unsigned int swappiness = -1; + unsigned long opt = -1; + + cur = skip_spaces(cur); + if (!*cur) + continue; + + n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid, + &seq, &end, &swappiness, &end, &opt, &end); + if (n < 4 || cur[end]) { + err = -EINVAL; + break; + } + + err = run_cmd(cmd, memcg_id, nid, seq, &sc, swappiness, opt); + if (err) + break; + } + + memalloc_noreclaim_restore(flags); + set_task_reclaim_state(current, NULL); + + free_mm_walk(sc.reclaim_state.mm_walk); + kvfree(buf); + + return err ? : len; +} + +static int lru_gen_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &lru_gen_seq_ops); +} + +static const struct file_operations lru_gen_rw_fops = { + .open = lru_gen_seq_open, + .read = seq_read, + .write = lru_gen_seq_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static const struct file_operations lru_gen_ro_fops = { + .open = lru_gen_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/****************************************************************************** + * initialization + ******************************************************************************/ + +void lru_gen_init_lruvec(struct lruvec *lruvec) +{ + int i; + int gen, type, zone; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + lrugen->max_seq = MIN_NR_GENS + 1; + lrugen->enabled = lru_gen_enabled(); + + for (i = 0; i <= MIN_NR_GENS + 1; i++) + lrugen->timestamps[i] = jiffies; + + for_each_gen_type_zone(gen, type, zone) + INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]); + + lruvec->mm_state.seq = MIN_NR_GENS; + init_waitqueue_head(&lruvec->mm_state.wait); +} + +#ifdef CONFIG_MEMCG +void lru_gen_init_memcg(struct mem_cgroup *memcg) +{ + INIT_LIST_HEAD(&memcg->mm_list.fifo); + spin_lock_init(&memcg->mm_list.lock); +} + +void lru_gen_exit_memcg(struct mem_cgroup *memcg) +{ + int i; + int nid; + + for_each_node(nid) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + + VM_BUG_ON(memchr_inv(lruvec->lrugen.nr_pages, 0, + sizeof(lruvec->lrugen.nr_pages))); + + for (i = 0; i < NR_BLOOM_FILTERS; i++) { + bitmap_free(lruvec->mm_state.filters[i]); + lruvec->mm_state.filters[i] = NULL; + } + } +} +#endif + +static int __init init_lru_gen(void) +{ + BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS); + BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS); + BUILD_BUG_ON(sizeof(MM_STAT_CODES) != NR_MM_STATS + 1); + + if (sysfs_create_group(mm_kobj, &lru_gen_attr_group)) + pr_err("lru_gen: failed to create sysfs group\n"); + + debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops); + debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops); + + return 0; +}; +late_initcall(init_lru_gen); + +#else + +static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) +{ +} + +static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) +{ +} + +#endif /* CONFIG_LRU_GEN */ + +static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) +{ + unsigned long nr[NR_LRU_LISTS]; + unsigned long targets[NR_LRU_LISTS]; + unsigned long nr_to_scan; + enum lru_list lru; + unsigned long nr_reclaimed = 0; + unsigned long nr_to_reclaim = sc->nr_to_reclaim; + struct blk_plug plug; + bool scan_adjusted; + + if (lru_gen_enabled()) { + lru_gen_shrink_lruvec(lruvec, sc); + return; + } + + get_scan_count(lruvec, sc, nr); + + /* Record the original scan target for proportional adjustments later */ + memcpy(targets, nr, sizeof(nr)); + + /* + * Global reclaiming within direct reclaim at DEF_PRIORITY is a normal + * event that can occur when there is little memory pressure e.g. + * multiple streaming readers/writers. Hence, we do not abort scanning + * when the requested number of pages are reclaimed when scanning at + * DEF_PRIORITY on the assumption that the fact we are direct + * reclaiming implies that kswapd is not keeping up and it is best to + * do a batch of work at once. For memcg reclaim one check is made to + * abort proportional reclaim if either the file or anon lru has already + * dropped to zero at the first pass. + */ + scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() && + sc->priority == DEF_PRIORITY); + + blk_start_plug(&plug); + while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || + nr[LRU_INACTIVE_FILE]) { + unsigned long nr_anon, nr_file, percentage; + unsigned long nr_scanned; + + for_each_evictable_lru(lru) { + if (nr[lru]) { + nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX); + nr[lru] -= nr_to_scan; + + nr_reclaimed += shrink_list(lru, nr_to_scan, + lruvec, sc); + } + } + + cond_resched(); + + if (nr_reclaimed < nr_to_reclaim || scan_adjusted) + continue; + + /* + * For kswapd and memcg, reclaim at least the number of pages + * requested. Ensure that the anon and file LRUs are scanned + * proportionally what was requested by get_scan_count(). We + * stop reclaiming one LRU and reduce the amount scanning + * proportional to the original scan target. + */ + nr_file = nr[LRU_INACTIVE_FILE] + nr[LRU_ACTIVE_FILE]; + nr_anon = nr[LRU_INACTIVE_ANON] + nr[LRU_ACTIVE_ANON]; + + /* + * It's just vindictive to attack the larger once the smaller + * has gone to zero. And given the way we stop scanning the + * smaller below, this makes sure that we only make one nudge + * towards proportionality once we've got nr_to_reclaim. + */ + if (!nr_file || !nr_anon) + break; + + if (nr_file > nr_anon) { + unsigned long scan_target = targets[LRU_INACTIVE_ANON] + + targets[LRU_ACTIVE_ANON] + 1; + lru = LRU_BASE; + percentage = nr_anon * 100 / scan_target; + } else { + unsigned long scan_target = targets[LRU_INACTIVE_FILE] + + targets[LRU_ACTIVE_FILE] + 1; + lru = LRU_FILE; + percentage = nr_file * 100 / scan_target; + } + + /* Stop scanning the smaller of the LRU */ + nr[lru] = 0; + nr[lru + LRU_ACTIVE] = 0; + + /* + * Recalculate the other LRU scan count based on its original + * scan target and the percentage scanning already complete + */ + lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE; + nr_scanned = targets[lru] - nr[lru]; + nr[lru] = targets[lru] * (100 - percentage) / 100; + nr[lru] -= min(nr[lru], nr_scanned); + + lru += LRU_ACTIVE; + nr_scanned = targets[lru] - nr[lru]; + nr[lru] = targets[lru] * (100 - percentage) / 100; + nr[lru] -= min(nr[lru], nr_scanned); + + scan_adjusted = true; + } + blk_finish_plug(&plug); + sc->nr_reclaimed += nr_reclaimed; + + /* + * Even if we did not try to evict anon pages at all, we want to + * rebalance the anon lru active/inactive ratio. + */ + if (can_age_anon_pages(lruvec_pgdat(lruvec), sc) && + inactive_is_low(lruvec, LRU_INACTIVE_ANON)) + shrink_active_list(SWAP_CLUSTER_MAX, lruvec, + sc, LRU_ACTIVE_ANON); +} + +/* Use reclaim/compaction for costly allocs or under memory pressure */ +static bool in_reclaim_compaction(struct scan_control *sc) +{ + if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && + (sc->order > PAGE_ALLOC_COSTLY_ORDER || + sc->priority < DEF_PRIORITY - 2)) + return true; + + return false; +} + +/* + * Reclaim/compaction is used for high-order allocation requests. It reclaims + * order-0 pages before compacting the zone. should_continue_reclaim() returns + * true if more pages should be reclaimed such that when the page allocator + * calls try_to_compact_pages() that it will have enough free pages to succeed. + * It will give up earlier than that if there is difficulty reclaiming pages. + */ +static inline bool should_continue_reclaim(struct pglist_data *pgdat, + unsigned long nr_reclaimed, + struct scan_control *sc) +{ + unsigned long pages_for_compaction; + unsigned long inactive_lru_pages; + int z; + + /* If not in reclaim/compaction mode, stop */ + if (!in_reclaim_compaction(sc)) + return false; + + /* + * Stop if we failed to reclaim any pages from the last SWAP_CLUSTER_MAX + * number of pages that were scanned. This will return to the caller + * with the risk reclaim/compaction and the resulting allocation attempt * fails. In the past we have tried harder for __GFP_RETRY_MAYFAIL * allocations through requiring that the full LRU list has been scanned * first, by assuming that zero delta of sc->nr_scanned means full LRU @@ -3188,109 +5907,16 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) unsigned long nr_reclaimed, nr_scanned; struct lruvec *target_lruvec; bool reclaimable = false; - unsigned long file; target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); again: - /* - * Flush the memory cgroup stats, so that we read accurate per-memcg - * lruvec stats for heuristics. - */ - mem_cgroup_flush_stats(); - memset(&sc->nr, 0, sizeof(sc->nr)); nr_reclaimed = sc->nr_reclaimed; nr_scanned = sc->nr_scanned; - /* - * Determine the scan balance between anon and file LRUs. - */ - spin_lock_irq(&target_lruvec->lru_lock); - sc->anon_cost = target_lruvec->anon_cost; - sc->file_cost = target_lruvec->file_cost; - spin_unlock_irq(&target_lruvec->lru_lock); - - /* - * Target desirable inactive:active list ratios for the anon - * and file LRU lists. - */ - if (!sc->force_deactivate) { - unsigned long refaults; - - refaults = lruvec_page_state(target_lruvec, - WORKINGSET_ACTIVATE_ANON); - if (refaults != target_lruvec->refaults[0] || - inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) - sc->may_deactivate |= DEACTIVATE_ANON; - else - sc->may_deactivate &= ~DEACTIVATE_ANON; - - /* - * When refaults are being observed, it means a new - * workingset is being established. Deactivate to get - * rid of any stale active pages quickly. - */ - refaults = lruvec_page_state(target_lruvec, - WORKINGSET_ACTIVATE_FILE); - if (refaults != target_lruvec->refaults[1] || - inactive_is_low(target_lruvec, LRU_INACTIVE_FILE)) - sc->may_deactivate |= DEACTIVATE_FILE; - else - sc->may_deactivate &= ~DEACTIVATE_FILE; - } else - sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; - - /* - * If we have plenty of inactive file pages that aren't - * thrashing, try to reclaim those first before touching - * anonymous pages. - */ - file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE); - if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) - sc->cache_trim_mode = 1; - else - sc->cache_trim_mode = 0; - - /* - * Prevent the reclaimer from falling into the cache trap: as - * cache pages start out inactive, every cache fault will tip - * the scan balance towards the file LRU. And as the file LRU - * shrinks, so does the window for rotation from references. - * This means we have a runaway feedback loop where a tiny - * thrashing file LRU becomes infinitely more attractive than - * anon pages. Try to detect this based on file LRU size. - */ - if (!cgroup_reclaim(sc)) { - unsigned long total_high_wmark = 0; - unsigned long free, anon; - int z; - - free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); - file = node_page_state(pgdat, NR_ACTIVE_FILE) + - node_page_state(pgdat, NR_INACTIVE_FILE); - - for (z = 0; z < MAX_NR_ZONES; z++) { - struct zone *zone = &pgdat->node_zones[z]; - if (!managed_zone(zone)) - continue; - - total_high_wmark += high_wmark_pages(zone); - } - - /* - * Consider anon: if that's low too, this isn't a - * runaway file reclaim problem, but rather just - * extreme pressure. Reclaim as per usual then. - */ - anon = node_page_state(pgdat, NR_INACTIVE_ANON); - - sc->file_is_tiny = - file + free <= total_high_wmark && - !(sc->may_deactivate & DEACTIVATE_ANON) && - anon >> sc->priority; - } + prepare_scan_count(pgdat, sc); shrink_node_memcgs(pgdat, sc); @@ -3547,6 +6173,9 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat) struct lruvec *target_lruvec; unsigned long refaults; + if (lru_gen_enabled()) + return; + target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat); refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON); target_lruvec->refaults[0] = refaults; @@ -3917,6 +6546,11 @@ static void age_active_anon(struct pglist_data *pgdat, struct mem_cgroup *memcg; struct lruvec *lruvec; + if (lru_gen_enabled()) { + lru_gen_age_node(pgdat, sc); + return; + } + if (!can_age_anon_pages(pgdat, sc)) return; diff --git a/mm/vmstat.c b/mm/vmstat.c index 4057372745d04..9e9536df51b5a 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "internal.h" @@ -2043,7 +2044,12 @@ static void __init init_cpu_node_state(void) static int vmstat_cpu_online(unsigned int cpu) { refresh_zone_stat_thresholds(); - node_set_state(cpu_to_node(cpu), N_CPU); + + if (!node_state(cpu_to_node(cpu), N_CPU)) { + node_set_state(cpu_to_node(cpu), N_CPU); + set_migration_target_nodes(); + } + return 0; } @@ -2066,6 +2072,8 @@ static int vmstat_cpu_dead(unsigned int cpu) return 0; node_clear_state(node, N_CPU); + set_migration_target_nodes(); + return 0; } @@ -2097,6 +2105,9 @@ void __init init_mm_internals(void) start_shepherd_timer(); #endif +#if defined(CONFIG_MIGRATION) && defined(CONFIG_HOTPLUG_CPU) + migrate_on_reclaim_init(); +#endif #ifdef CONFIG_PROC_FS proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op); diff --git a/mm/workingset.c b/mm/workingset.c index 8c03afe1d67cb..93ee00c7e4d11 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -187,7 +187,6 @@ static unsigned int bucket_order __read_mostly; static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction, bool workingset) { - eviction >>= bucket_order; eviction &= EVICTION_MASK; eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; eviction = (eviction << NODES_SHIFT) | pgdat->node_id; @@ -212,10 +211,116 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, *memcgidp = memcgid; *pgdat = NODE_DATA(nid); - *evictionp = entry << bucket_order; + *evictionp = entry; *workingsetp = workingset; } +#ifdef CONFIG_LRU_GEN + +static int folio_lru_refs(struct folio *folio) +{ + unsigned long flags = READ_ONCE(folio->flags); + + BUILD_BUG_ON(LRU_GEN_WIDTH + LRU_REFS_WIDTH > BITS_PER_LONG - EVICTION_SHIFT); + + /* see the comment on MAX_NR_TIERS */ + return flags & BIT(PG_workingset) ? (flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF : 0; +} + +static void *lru_gen_eviction(struct folio *folio) +{ + int hist, tier; + unsigned long token; + unsigned long min_seq; + struct lruvec *lruvec; + struct lru_gen_struct *lrugen; + int type = folio_is_file_lru(folio); + int refs = folio_lru_refs(folio); + int delta = folio_nr_pages(folio); + bool workingset = folio_test_workingset(folio); + struct mem_cgroup *memcg = folio_memcg(folio); + struct pglist_data *pgdat = folio_pgdat(folio); + + lruvec = mem_cgroup_lruvec(memcg, pgdat); + lrugen = &lruvec->lrugen; + min_seq = READ_ONCE(lrugen->min_seq[type]); + token = (min_seq << LRU_REFS_WIDTH) | refs; + + hist = lru_hist_from_seq(min_seq); + tier = lru_tier_from_refs(refs + workingset); + atomic_long_add(delta, &lrugen->evicted[hist][type][tier]); + + return pack_shadow(mem_cgroup_id(memcg), pgdat, token, workingset); +} + +static void lru_gen_refault(struct folio *folio, void *shadow) +{ + int hist, tier, refs; + int memcg_id; + bool workingset; + unsigned long token; + unsigned long min_seq; + struct lruvec *lruvec; + struct lru_gen_struct *lrugen; + struct mem_cgroup *memcg; + struct pglist_data *pgdat; + int type = folio_is_file_lru(folio); + int delta = folio_nr_pages(folio); + + unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset); + + refs = token & (BIT(LRU_REFS_WIDTH) - 1); + if (refs && !workingset) + return; + + if (folio_pgdat(folio) != pgdat) + return; + + rcu_read_lock(); + memcg = folio_memcg_rcu(folio); + if (mem_cgroup_id(memcg) != memcg_id) + goto unlock; + + token >>= LRU_REFS_WIDTH; + lruvec = mem_cgroup_lruvec(memcg, pgdat); + lrugen = &lruvec->lrugen; + min_seq = READ_ONCE(lrugen->min_seq[type]); + if (token != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH))) + goto unlock; + + hist = lru_hist_from_seq(min_seq); + tier = lru_tier_from_refs(refs + workingset); + atomic_long_add(delta, &lrugen->refaulted[hist][type][tier]); + mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + type, delta); + + /* + * Count the following two cases as stalls: + * 1. For pages accessed through page tables, hotter pages pushed out + * hot pages which refaulted immediately. + * 2. For pages accessed through file descriptors, numbers of accesses + * might have been beyond the limit. + */ + if (lru_gen_in_fault() || refs + workingset == BIT(LRU_REFS_WIDTH)) { + folio_set_workingset(folio); + mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta); + } +unlock: + rcu_read_unlock(); +} + +#else + +static void *lru_gen_eviction(struct folio *folio) +{ + return NULL; +} + +static void lru_gen_refault(struct folio *folio, void *shadow) +{ +} + +#endif /* CONFIG_LRU_GEN */ + /** * workingset_age_nonresident - age non-resident entries as LRU ages * @lruvec: the lruvec that was aged @@ -264,10 +369,14 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) VM_BUG_ON_PAGE(page_count(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); + if (lru_gen_enabled()) + return lru_gen_eviction(page_folio(page)); + lruvec = mem_cgroup_lruvec(target_memcg, pgdat); /* XXX: target_memcg can be NULL, go through lruvec */ memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); eviction = atomic_long_read(&lruvec->nonresident_age); + eviction >>= bucket_order; workingset_age_nonresident(lruvec, thp_nr_pages(page)); return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page)); } @@ -297,7 +406,13 @@ void workingset_refault(struct folio *folio, void *shadow) int memcgid; long nr; + if (lru_gen_enabled()) { + lru_gen_refault(folio, shadow); + return; + } + unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset); + eviction <<= bucket_order; rcu_read_lock(); /* diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 6bd0971807721..363d47f945324 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -89,18 +89,20 @@ static void ax25_kill_by_device(struct net_device *dev) sk = s->sk; if (!sk) { spin_unlock_bh(&ax25_list_lock); - s->ax25_dev = NULL; ax25_disconnect(s, ENETUNREACH); + s->ax25_dev = NULL; spin_lock_bh(&ax25_list_lock); goto again; } sock_hold(sk); spin_unlock_bh(&ax25_list_lock); lock_sock(sk); - s->ax25_dev = NULL; - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); - ax25_dev_put(ax25_dev); ax25_disconnect(s, ENETUNREACH); + s->ax25_dev = NULL; + if (sk->sk_socket) { + dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + ax25_dev_put(ax25_dev); + } release_sock(sk); spin_lock_bh(&ax25_list_lock); sock_put(sk); @@ -979,14 +981,16 @@ static int ax25_release(struct socket *sock) { struct sock *sk = sock->sk; ax25_cb *ax25; + ax25_dev *ax25_dev; if (sk == NULL) return 0; sock_hold(sk); - sock_orphan(sk); lock_sock(sk); + sock_orphan(sk); ax25 = sk_to_ax25(sk); + ax25_dev = ax25->ax25_dev; if (sk->sk_type == SOCK_SEQPACKET) { switch (ax25->state) { @@ -1048,6 +1052,15 @@ static int ax25_release(struct socket *sock) sk->sk_state_change(sk); ax25_destroy_socket(ax25); } + if (ax25_dev) { + del_timer_sync(&ax25->timer); + del_timer_sync(&ax25->t1timer); + del_timer_sync(&ax25->t2timer); + del_timer_sync(&ax25->t3timer); + del_timer_sync(&ax25->idletimer); + dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + ax25_dev_put(ax25_dev); + } sock->sk = NULL; release_sock(sk); diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 15ab812c4fe4b..3a476e4f6cd0b 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -261,12 +261,20 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); - if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) - ax25_stop_heartbeat(ax25); - ax25_stop_t1timer(ax25); - ax25_stop_t2timer(ax25); - ax25_stop_t3timer(ax25); - ax25_stop_idletimer(ax25); + if (reason == ENETUNREACH) { + del_timer_sync(&ax25->timer); + del_timer_sync(&ax25->t1timer); + del_timer_sync(&ax25->t2timer); + del_timer_sync(&ax25->t3timer); + del_timer_sync(&ax25->idletimer); + } else { + if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_stop_heartbeat(ax25); + ax25_stop_t1timer(ax25); + ax25_stop_t2timer(ax25); + ax25_stop_t3timer(ax25); + ax25_stop_idletimer(ax25); + } ax25->state = AX25_STATE_0; diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index f4004cf0ff6fb..9f311fddfaf9a 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -134,7 +134,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) { struct inet6_dev *in6_dev = __in6_dev_get(dev); - if (in6_dev && in6_dev->cnf.mc_forwarding) + if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding)) return BATADV_NO_FLAGS; else return BATADV_MCAST_WANT_NO_RTR6; diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h index 05e2e917fc254..e5876751f07ed 100644 --- a/net/bluetooth/eir.h +++ b/net/bluetooth/eir.h @@ -15,6 +15,11 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_append_local_name(struct hci_dev *hdev, u8 *eir, u8 ad_len); u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len); +static inline u16 eir_precalc_len(u8 data_len) +{ + return sizeof(u8) * 2 + data_len; +} + static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len) { diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 04ebe901e86f0..84312c8365493 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -669,7 +669,9 @@ static void le_conn_timeout(struct work_struct *work) if (conn->role == HCI_ROLE_SLAVE) { /* Disable LE Advertising */ le_disable_advertising(hdev); + hci_dev_lock(hdev); hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); + hci_dev_unlock(hdev); return; } @@ -689,6 +691,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, bacpy(&conn->dst, dst); bacpy(&conn->src, &hdev->bdaddr); + conn->handle = HCI_CONN_HANDLE_UNSET; conn->hdev = hdev; conn->type = type; conn->role = role; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index fc30f4c03d292..d984777c9b58b 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3068,6 +3068,11 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, struct hci_ev_conn_complete *ev = data; struct hci_conn *conn; + if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for invalid handle"); + return; + } + bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); hci_dev_lock(hdev); @@ -3106,6 +3111,17 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, } } + /* The HCI_Connection_Complete event is only sent once per connection. + * Processing it more than once per connection can corrupt kernel memory. + * + * As the connection handle is set here for the first time, it indicates + * whether the connection is already set up. + */ + if (conn->handle != HCI_CONN_HANDLE_UNSET) { + bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection"); + goto unlock; + } + if (!ev->status) { conn->handle = __le16_to_cpu(ev->handle); @@ -4534,7 +4550,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, if (!info) { bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x", HCI_EV_INQUIRY_RESULT_WITH_RSSI); - return; + goto unlock; } bacpy(&data.bdaddr, &info->bdaddr); @@ -4565,7 +4581,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, if (!info) { bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x", HCI_EV_INQUIRY_RESULT_WITH_RSSI); - return; + goto unlock; } bacpy(&data.bdaddr, &info->bdaddr); @@ -4587,7 +4603,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x", HCI_EV_INQUIRY_RESULT_WITH_RSSI); } - +unlock: hci_dev_unlock(hdev); } @@ -4661,6 +4677,24 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, struct hci_ev_sync_conn_complete *ev = data; struct hci_conn *conn; + switch (ev->link_type) { + case SCO_LINK: + case ESCO_LINK: + break; + default: + /* As per Core 5.3 Vol 4 Part E 7.7.35 (p.2219), Link_Type + * for HCI_Synchronous_Connection_Complete is limited to + * either SCO or eSCO + */ + bt_dev_err(hdev, "Ignoring connect complete event for invalid link type"); + return; + } + + if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete for invalid handle"); + return; + } + bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); hci_dev_lock(hdev); @@ -4684,23 +4718,19 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, goto unlock; } + /* The HCI_Synchronous_Connection_Complete event is only sent once per connection. + * Processing it more than once per connection can corrupt kernel memory. + * + * As the connection handle is set here for the first time, it indicates + * whether the connection is already set up. + */ + if (conn->handle != HCI_CONN_HANDLE_UNSET) { + bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete event for existing connection"); + goto unlock; + } + switch (ev->status) { case 0x00: - /* The synchronous connection complete event should only be - * sent once per new connection. Receiving a successful - * complete event when the connection status is already - * BT_CONNECTED means that the device is misbehaving and sent - * multiple complete event packets for the same new connection. - * - * Registering the device more than once can corrupt kernel - * memory, hence upon detecting this invalid event, we report - * an error and ignore the packet. - */ - if (conn->state == BT_CONNECTED) { - bt_dev_err(hdev, "Ignoring connect complete event for existing connection"); - goto unlock; - } - conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; conn->type = ev->link_type; @@ -5423,8 +5453,9 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle); - if (hcon) { + if (hcon && hcon->type == AMP_LINK) { hcon->state = BT_CLOSED; + hci_disconn_cfm(hcon, ev->reason); hci_conn_del(hcon); } @@ -5496,6 +5527,11 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, struct smp_irk *irk; u8 addr_type; + if (handle > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Ignoring HCI_LE_Connection_Complete for invalid handle"); + return; + } + hci_dev_lock(hdev); /* All controllers implicitly stop advertising in the event of a @@ -5537,6 +5573,17 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, cancel_delayed_work(&conn->le_conn_timeout); } + /* The HCI_LE_Connection_Complete event is only sent once per connection. + * Processing it more than once per connection can corrupt kernel memory. + * + * As the connection handle is set here for the first time, it indicates + * whether the connection is already set up. + */ + if (conn->handle != HCI_CONN_HANDLE_UNSET) { + bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection"); + goto unlock; + } + le_conn_update_addr(conn, bdaddr, bdaddr_type, local_rpa); /* Lookup the identity address from the stored connection @@ -6798,7 +6845,7 @@ static const struct hci_ev { HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt, sizeof(struct hci_ev_num_comp_blocks)), /* [0xff = HCI_EV_VENDOR] */ - HCI_EV(HCI_EV_VENDOR, msft_vendor_evt, 0), + HCI_EV_VL(HCI_EV_VENDOR, msft_vendor_evt, 0, HCI_MAX_EVENT_SIZE), }; static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb, @@ -6823,8 +6870,9 @@ static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb, * decide if that is acceptable. */ if (skb->len > ev->max_len) - bt_dev_warn(hdev, "unexpected event 0x%2.2x length: %u > %u", - event, skb->len, ev->max_len); + bt_dev_warn_ratelimited(hdev, + "unexpected event 0x%2.2x length: %u > %u", + event, skb->len, ev->max_len); data = hci_ev_skb_pull(hdev, skb, event, ev->min_len); if (!data) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index ab9aa700b6b33..8f4c5698913d7 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -379,6 +379,9 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, { struct hci_cmd_sync_work_entry *entry; + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + return -ENODEV; + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; @@ -2806,6 +2809,9 @@ static int hci_set_event_filter_sync(struct hci_dev *hdev, u8 flt_type, if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return 0; + if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) + return 0; + memset(&cp, 0, sizeof(cp)); cp.flt_type = flt_type; @@ -2826,6 +2832,13 @@ static int hci_clear_event_filter_sync(struct hci_dev *hdev) if (!hci_dev_test_flag(hdev, HCI_EVENT_FILTER_CONFIGURED)) return 0; + /* In theory the state machine should not reach here unless + * a hci_set_event_filter_sync() call succeeds, but we do + * the check both for parity and as a future reminder. + */ + if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) + return 0; + return hci_set_event_filter_sync(hdev, HCI_FLT_CLEAR_ALL, 0x00, BDADDR_ANY, 0x00); } @@ -4422,7 +4435,7 @@ static int hci_disconnect_all_sync(struct hci_dev *hdev, u8 reason) return err; } - return err; + return 0; } /* This function perform power off HCI command sequence as follows: @@ -4825,6 +4838,12 @@ static int hci_update_event_filter_sync(struct hci_dev *hdev) if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return 0; + /* Some fake CSR controllers lock up after setting this type of + * filter, so avoid sending the request altogether. + */ + if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) + return 0; + /* Always clear event filter when starting */ hci_clear_event_filter_sync(hdev); @@ -5140,8 +5159,8 @@ static void set_ext_conn_params(struct hci_conn *conn, p->max_ce_len = cpu_to_le16(0x0000); } -int hci_le_ext_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, - u8 own_addr_type) +static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, + struct hci_conn *conn, u8 own_addr_type) { struct hci_cp_le_ext_create_conn *cp; struct hci_cp_le_ext_conn_param *p; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index e817ff0607a06..8df99c07f2724 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1436,6 +1436,7 @@ static void l2cap_ecred_connect(struct l2cap_chan *chan) l2cap_ecred_init(chan, 0); + memset(&data, 0, sizeof(data)); data.pdu.req.psm = chan->psm; data.pdu.req.mtu = cpu_to_le16(chan->imtu); data.pdu.req.mps = cpu_to_le16(chan->mps); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 230a7a8196c07..15eab8b968ce8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9086,12 +9086,14 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, u16 eir_len = 0; u32 flags = 0; + /* allocate buff for LE or BR/EDR adv */ if (conn->le_adv_data_len > 0) skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED, - conn->le_adv_data_len); + sizeof(*ev) + conn->le_adv_data_len); else skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED, - 2 + name_len + 5); + sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0) + + eir_precalc_len(sizeof(conn->dev_class))); ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, &conn->dst); @@ -9707,13 +9709,11 @@ void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, { struct sk_buff *skb; struct mgmt_ev_device_found *ev; - u16 eir_len; - u32 flags; + u16 eir_len = 0; + u32 flags = 0; - if (name_len) - skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, 2 + name_len); - else - skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, 0); + skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, + sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0)); ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, bdaddr); @@ -9723,10 +9723,8 @@ void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, if (name) { eir_len = eir_append_data(ev->eir, 0, EIR_NAME_COMPLETE, name, name_len); - flags = 0; skb_put(skb, eir_len); } else { - eir_len = 0; flags = MGMT_DEV_FOUND_NAME_REQUEST_FAILED; } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 46dd957559672..eb43aaac0392c 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -960,7 +960,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx))) goto out; - if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) { + if (user_ctx->local_port > U16_MAX) { ret = -ERANGE; goto out; } @@ -968,7 +968,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat ctx.family = (u16)user_ctx->family; ctx.protocol = (u16)user_ctx->protocol; ctx.dport = (u16)user_ctx->local_port; - ctx.sport = (__force __be16)user_ctx->remote_port; + ctx.sport = user_ctx->remote_port; switch (ctx.family) { case AF_INET: diff --git a/net/can/isotp.c b/net/can/isotp.c index d2a430b6a13bd..5bce7c66c1219 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -141,6 +141,7 @@ struct isotp_sock { struct can_isotp_options opt; struct can_isotp_fc_options rxfc, txfc; struct can_isotp_ll_options ll; + u32 frame_txtime; u32 force_tx_stmin; u32 force_rx_stmin; struct tpcon rx, tx; @@ -360,7 +361,7 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae) so->tx_gap = ktime_set(0, 0); /* add transmission time for CAN frame N_As */ - so->tx_gap = ktime_add_ns(so->tx_gap, so->opt.frame_txtime); + so->tx_gap = ktime_add_ns(so->tx_gap, so->frame_txtime); /* add waiting time for consecutive frames N_Cs */ if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN) so->tx_gap = ktime_add_ns(so->tx_gap, @@ -1005,26 +1006,29 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, { struct sock *sk = sock->sk; struct sk_buff *skb; - int err = 0; - int noblock; + struct isotp_sock *so = isotp_sk(sk); + int noblock = flags & MSG_DONTWAIT; + int ret = 0; - noblock = flags & MSG_DONTWAIT; - flags &= ~MSG_DONTWAIT; + if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK)) + return -EINVAL; + + if (!so->bound) + return -EADDRNOTAVAIL; - skb = skb_recv_datagram(sk, flags, noblock, &err); + flags &= ~MSG_DONTWAIT; + skb = skb_recv_datagram(sk, flags, noblock, &ret); if (!skb) - return err; + return ret; if (size < skb->len) msg->msg_flags |= MSG_TRUNC; else size = skb->len; - err = memcpy_to_msg(msg, skb->data, size); - if (err < 0) { - skb_free_datagram(sk, skb); - return err; - } + ret = memcpy_to_msg(msg, skb->data, size); + if (ret < 0) + goto out_err; sock_recv_timestamp(msg, sk, skb); @@ -1034,9 +1038,13 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, memcpy(msg->msg_name, skb->cb, msg->msg_namelen); } + /* set length of return value */ + ret = (flags & MSG_TRUNC) ? skb->len : size; + +out_err: skb_free_datagram(sk, skb); - return size; + return ret; } static int isotp_release(struct socket *sock) @@ -1104,6 +1112,7 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) struct net *net = sock_net(sk); int ifindex; struct net_device *dev; + canid_t tx_id, rx_id; int err = 0; int notify_enetdown = 0; int do_rx_reg = 1; @@ -1111,8 +1120,18 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (len < ISOTP_MIN_NAMELEN) return -EINVAL; - if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) - return -EADDRNOTAVAIL; + /* sanitize tx/rx CAN identifiers */ + tx_id = addr->can_addr.tp.tx_id; + if (tx_id & CAN_EFF_FLAG) + tx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK); + else + tx_id &= CAN_SFF_MASK; + + rx_id = addr->can_addr.tp.rx_id; + if (rx_id & CAN_EFF_FLAG) + rx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK); + else + rx_id &= CAN_SFF_MASK; if (!addr->can_ifindex) return -ENODEV; @@ -1124,21 +1143,13 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) do_rx_reg = 0; /* do not validate rx address for functional addressing */ - if (do_rx_reg) { - if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) { - err = -EADDRNOTAVAIL; - goto out; - } - - if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) { - err = -EADDRNOTAVAIL; - goto out; - } + if (do_rx_reg && rx_id == tx_id) { + err = -EADDRNOTAVAIL; + goto out; } if (so->bound && addr->can_ifindex == so->ifindex && - addr->can_addr.tp.rx_id == so->rxid && - addr->can_addr.tp.tx_id == so->txid) + rx_id == so->rxid && tx_id == so->txid) goto out; dev = dev_get_by_index(net, addr->can_ifindex); @@ -1162,8 +1173,7 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) ifindex = dev->ifindex; if (do_rx_reg) - can_rx_register(net, dev, addr->can_addr.tp.rx_id, - SINGLE_MASK(addr->can_addr.tp.rx_id), + can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id), isotp_rcv, sk, "isotp", sk); dev_put(dev); @@ -1183,8 +1193,8 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) /* switch to new settings */ so->ifindex = ifindex; - so->rxid = addr->can_addr.tp.rx_id; - so->txid = addr->can_addr.tp.tx_id; + so->rxid = rx_id; + so->txid = tx_id; so->bound = 1; out: @@ -1238,6 +1248,14 @@ static int isotp_setsockopt_locked(struct socket *sock, int level, int optname, /* no separate rx_ext_address is given => use ext_address */ if (!(so->opt.flags & CAN_ISOTP_RX_EXT_ADDR)) so->opt.rx_ext_address = so->opt.ext_address; + + /* check for frame_txtime changes (0 => no changes) */ + if (so->opt.frame_txtime) { + if (so->opt.frame_txtime == CAN_ISOTP_FRAME_TXTIME_ZERO) + so->frame_txtime = 0; + else + so->frame_txtime = so->opt.frame_txtime; + } break; case CAN_ISOTP_RECV_FC: @@ -1439,6 +1457,7 @@ static int isotp_init(struct sock *sk) so->opt.rxpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT; so->opt.txpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT; so->opt.frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME; + so->frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME; so->rxfc.bs = CAN_ISOTP_DEFAULT_RECV_BS; so->rxfc.stmin = CAN_ISOTP_DEFAULT_RECV_STMIN; so->rxfc.wftmax = CAN_ISOTP_DEFAULT_RECV_WFTMAX; diff --git a/net/core/dev.c b/net/core/dev.c index 1baab07820f65..91cf709c98b37 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10732,8 +10732,7 @@ static int __net_init netdev_init(struct net *net) BUILD_BUG_ON(GRO_HASH_BUCKETS > 8 * sizeof_field(struct napi_struct, gro_bitmask)); - if (net != &init_net) - INIT_LIST_HEAD(&net->dev_base_head); + INIT_LIST_HEAD(&net->dev_base_head); net->dev_name_head = netdev_create_hash(); if (net->dev_name_head == NULL) diff --git a/net/core/filter.c b/net/core/filter.c index 9eb785842258a..af0bafe9dcce2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6777,24 +6777,33 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (!th->ack || th->rst || th->syn) return -ENOENT; + if (unlikely(iph_len < sizeof(struct iphdr))) + return -EINVAL; + if (tcp_synq_no_recent_overflow(sk)) return -ENOENT; cookie = ntohl(th->ack_seq) - 1; - switch (sk->sk_family) { - case AF_INET: - if (unlikely(iph_len < sizeof(struct iphdr))) + /* Both struct iphdr and struct ipv6hdr have the version field at the + * same offset so we can cast to the shorter header (struct iphdr). + */ + switch (((struct iphdr *)iph)->version) { + case 4: + if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk)) return -EINVAL; ret = __cookie_v4_check((struct iphdr *)iph, th, cookie); break; #if IS_BUILTIN(CONFIG_IPV6) - case AF_INET6: + case 6: if (unlikely(iph_len < sizeof(struct ipv6hdr))) return -EINVAL; + if (sk->sk_family != AF_INET6) + return -EINVAL; + ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie); break; #endif /* CONFIG_IPV6 */ @@ -8033,6 +8042,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { const int size_default = sizeof(__u32); + int field_size; if (off < 0 || off >= sizeof(struct bpf_sock)) return false; @@ -8044,7 +8054,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, case offsetof(struct bpf_sock, family): case offsetof(struct bpf_sock, type): case offsetof(struct bpf_sock, protocol): - case offsetof(struct bpf_sock, dst_port): case offsetof(struct bpf_sock, src_port): case offsetof(struct bpf_sock, rx_queue_mapping): case bpf_ctx_range(struct bpf_sock, src_ip4): @@ -8053,6 +8062,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); + case bpf_ctx_range(struct bpf_sock, dst_port): + field_size = size == size_default ? + size_default : sizeof_field(struct bpf_sock, dst_port); + bpf_ctx_record_field_size(info, field_size); + return bpf_ctx_narrow_access_ok(off, size, field_size); + case offsetofend(struct bpf_sock, dst_port) ... + offsetof(struct bpf_sock, dst_ip4) - 1: + return false; } return size == size_default; @@ -10604,12 +10621,24 @@ static bool sk_lookup_is_valid_access(int off, int size, case bpf_ctx_range(struct bpf_sk_lookup, local_ip4): case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]): case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]): - case bpf_ctx_range(struct bpf_sk_lookup, remote_port): case bpf_ctx_range(struct bpf_sk_lookup, local_port): case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex): bpf_ctx_record_field_size(info, sizeof(__u32)); return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32)); + case bpf_ctx_range(struct bpf_sk_lookup, remote_port): + /* Allow 4-byte access to 2-byte field for backward compatibility */ + if (size == sizeof(__u32)) + return true; + bpf_ctx_record_field_size(info, sizeof(__be16)); + return bpf_ctx_narrow_access_ok(off, size, sizeof(__be16)); + + case offsetofend(struct bpf_sk_lookup, remote_port) ... + offsetof(struct bpf_sk_lookup, local_ip4) - 1: + /* Allow access to zero padding for backward compatibility */ + bpf_ctx_record_field_size(info, sizeof(__u16)); + return bpf_ctx_narrow_access_ok(off, size, sizeof(__u16)); + default: return false; } @@ -10691,6 +10720,11 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type, sport, 2, target_size)); break; + case offsetofend(struct bpf_sk_lookup, remote_port): + *target_size = 2; + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); + break; + case offsetof(struct bpf_sk_lookup, local_port): *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, bpf_target_off(struct bpf_sk_lookup_kern, diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 15833e1d6ea11..544d2028ccf51 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1182,6 +1182,7 @@ bool __skb_flow_dissect(const struct net *net, VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } key_vlan->vlan_tpid = saved_vlan_tpid; + key_vlan->vlan_eth_type = proto; } fdret = FLOW_DISSECT_RET_PROTO_AGAIN; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index a5b5bb99c6446..212e65add9512 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -44,13 +44,7 @@ EXPORT_SYMBOL_GPL(net_rwsem); static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) }; #endif -struct net init_net = { - .ns.count = REFCOUNT_INIT(1), - .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), -#ifdef CONFIG_KEYS - .key_domain = &init_net_key_domain, -#endif -}; +struct net init_net; EXPORT_SYMBOL(init_net); static bool init_net_initialized; @@ -1084,7 +1078,7 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, rtnl_set_sk_err(net, RTNLGRP_NSID, err); } -static int __init net_ns_init(void) +void __init net_ns_init(void) { struct net_generic *ng; @@ -1105,6 +1099,9 @@ static int __init net_ns_init(void) rcu_assign_pointer(init_net.gen, ng); +#ifdef CONFIG_KEYS + init_net.key_domain = &init_net_key_domain; +#endif down_write(&pernet_ops_rwsem); if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); @@ -1119,12 +1116,8 @@ static int __init net_ns_init(void) RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, RTNL_FLAG_DOIT_UNLOCKED); - - return 0; } -pure_initcall(net_ns_init); - static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list) { ops_pre_exit_list(ops, net_exit_list); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2fb8eb6791e8a..43b995e935cd6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3652,13 +3652,24 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr, bool *changed, struct netlink_ext_ack *extack) { char *alt_ifname; + size_t size; int err; err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack); if (err) return err; - alt_ifname = nla_strdup(attr, GFP_KERNEL); + if (cmd == RTM_NEWLINKPROP) { + size = rtnl_prop_list_size(dev); + size += nla_total_size(ALTIFNAMSIZ); + if (size >= U16_MAX) { + NL_SET_ERR_MSG(extack, + "effective property list too long"); + return -EINVAL; + } + } + + alt_ifname = nla_strdup(attr, GFP_KERNEL_ACCOUNT); if (!alt_ifname) return -ENOMEM; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ea51e23e9247e..180fa6a26ad45 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -201,7 +201,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data, skb->head = data; skb->data = data; skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; + skb_set_end_offset(skb, size); skb->mac_header = (typeof(skb->mac_header))~0U; skb->transport_header = (typeof(skb->transport_header))~0U; @@ -1736,11 +1736,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->head = data; skb->head_frag = 0; skb->data += off; + + skb_set_end_offset(skb, size); #ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; off = nhead; -#else - skb->end = skb->head + size; #endif skb->tail += off; skb_headers_offset_update(skb, nhead); @@ -1788,6 +1787,38 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) } EXPORT_SYMBOL(skb_realloc_headroom); +int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) +{ + unsigned int saved_end_offset, saved_truesize; + struct skb_shared_info *shinfo; + int res; + + saved_end_offset = skb_end_offset(skb); + saved_truesize = skb->truesize; + + res = pskb_expand_head(skb, 0, 0, pri); + if (res) + return res; + + skb->truesize = saved_truesize; + + if (likely(skb_end_offset(skb) == saved_end_offset)) + return 0; + + shinfo = skb_shinfo(skb); + + /* We are about to change back skb->end, + * we need to move skb_shinfo() to its new location. + */ + memmove(skb->head + saved_end_offset, + shinfo, + offsetof(struct skb_shared_info, frags[shinfo->nr_frags])); + + skb_set_end_offset(skb, saved_end_offset); + + return 0; +} + /** * skb_expand_head - reallocate header of &sk_buff * @skb: buffer to reallocate @@ -5244,11 +5275,18 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, if (skb_cloned(to)) return false; - /* The page pool signature of struct page will eventually figure out - * which pages can be recycled or not but for now let's prohibit slab - * allocated and page_pool allocated SKBs from being coalesced. + /* In general, avoid mixing slab allocated and page_pool allocated + * pages within the same SKB. However when @to is not pp_recycle and + * @from is cloned, we can transition frag pages from page_pool to + * reference counted. + * + * On the other hand, don't allow coalescing two pp_recycle SKBs if + * @from is cloned, in case the SKB is using page_pool fragment + * references (PP_FLAG_PAGE_FRAG). Since we only take full page + * references for cloned SKBs at the moment that would result in + * inconsistent reference counts. */ - if (to->pp_recycle != from->pp_recycle) + if (to->pp_recycle != (from->pp_recycle && !skb_cloned(from))) return false; if (len <= skb_tailroom(to)) { @@ -6044,11 +6082,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb->head = data; skb->data = data; skb->head_frag = 0; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; -#else - skb->end = skb->head + size; -#endif + skb_set_end_offset(skb, size); skb_set_tail_pointer(skb, skb_headlen(skb)); skb_headers_offset_update(skb, 0); skb->cloned = 0; @@ -6186,11 +6220,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, skb->head = data; skb->head_frag = 0; skb->data = data; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; -#else - skb->end = skb->head + size; -#endif + skb_set_end_offset(skb, size); skb_reset_tail_pointer(skb); skb_headers_offset_update(skb, 0); skb->cloned = 0; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 929a2b096b04e..cc381165ea080 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -27,6 +27,7 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, int elem_first_coalesce) { struct page_frag *pfrag = sk_page_frag(sk); + u32 osize = msg->sg.size; int ret = 0; len -= msg->sg.size; @@ -35,13 +36,17 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, u32 orig_offset; int use, i; - if (!sk_page_frag_refill(sk, pfrag)) - return -ENOMEM; + if (!sk_page_frag_refill(sk, pfrag)) { + ret = -ENOMEM; + goto msg_trim; + } orig_offset = pfrag->offset; use = min_t(int, len, pfrag->size - orig_offset); - if (!sk_wmem_schedule(sk, use)) - return -ENOMEM; + if (!sk_wmem_schedule(sk, use)) { + ret = -ENOMEM; + goto msg_trim; + } i = msg->sg.end; sk_msg_iter_var_prev(i); @@ -71,6 +76,10 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, } return ret; + +msg_trim: + sk_msg_trim(sk, msg, osize); + return ret; } EXPORT_SYMBOL_GPL(sk_msg_alloc); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 88e2808019b47..d3ce6113e6c36 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -561,7 +561,6 @@ static void dsa_port_teardown(struct dsa_port *dp) struct devlink_port *dlp = &dp->devlink_port; struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a, *tmp; - struct net_device *slave; if (!dp->setup) return; @@ -583,11 +582,9 @@ static void dsa_port_teardown(struct dsa_port *dp) dsa_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_USER: - slave = dp->slave; - - if (slave) { + if (dp->slave) { + dsa_slave_destroy(dp->slave); dp->slave = NULL; - dsa_slave_destroy(slave); } break; } @@ -1137,17 +1134,17 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (err) goto teardown_cpu_ports; - err = dsa_tree_setup_master(dst); + err = dsa_tree_setup_ports(dst); if (err) goto teardown_switches; - err = dsa_tree_setup_ports(dst); + err = dsa_tree_setup_master(dst); if (err) - goto teardown_master; + goto teardown_ports; err = dsa_tree_setup_lags(dst); if (err) - goto teardown_ports; + goto teardown_master; dst->setup = true; @@ -1155,10 +1152,10 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) return 0; -teardown_ports: - dsa_tree_teardown_ports(dst); teardown_master: dsa_tree_teardown_master(dst); +teardown_ports: + dsa_tree_teardown_ports(dst); teardown_switches: dsa_tree_teardown_switches(dst); teardown_cpu_ports: @@ -1176,10 +1173,10 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_lags(dst); - dsa_tree_teardown_ports(dst); - dsa_tree_teardown_master(dst); + dsa_tree_teardown_ports(dst); + dsa_tree_teardown_switches(dst); dsa_tree_teardown_cpu_ports(dst); @@ -1722,6 +1719,10 @@ void dsa_switch_shutdown(struct dsa_switch *ds) struct dsa_port *dp; mutex_lock(&dsa2_mutex); + + if (!ds->setup) + goto out; + rtnl_lock(); dsa_switch_for_each_user_port(dp, ds) { @@ -1738,6 +1739,7 @@ void dsa_switch_shutdown(struct dsa_switch *ds) dp->master->dsa_ptr = NULL; rtnl_unlock(); +out: mutex_unlock(&dsa2_mutex); } EXPORT_SYMBOL_GPL(dsa_switch_shutdown); diff --git a/net/dsa/master.c b/net/dsa/master.c index 880f910b23a99..10b51ffbb6f48 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -337,11 +337,24 @@ static const struct attribute_group dsa_group = { static struct lock_class_key dsa_master_addr_list_lock_key; +static void dsa_master_reset_mtu(struct net_device *dev) +{ + int err; + + err = dev_set_mtu(dev, ETH_DATA_LEN); + if (err) + netdev_dbg(dev, + "Unable to reset MTU to exclude DSA overheads\n"); +} + int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { + const struct dsa_device_ops *tag_ops = cpu_dp->tag_ops; struct dsa_switch *ds = cpu_dp->ds; struct device_link *consumer_link; - int ret; + int mtu, ret; + + mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops); /* The DSA master must use SET_NETDEV_DEV for this to work. */ consumer_link = device_link_add(ds->dev, dev->dev.parent, @@ -351,6 +364,15 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) "Failed to create a device link to DSA switch %s\n", dev_name(ds->dev)); + /* The switch driver may not implement ->port_change_mtu(), case in + * which dsa_slave_change_mtu() will not update the master MTU either, + * so we need to do that here. + */ + ret = dev_set_mtu(dev, mtu); + if (ret) + netdev_warn(dev, "error %d setting MTU to %d to include DSA overhead\n", + ret, mtu); + /* If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get * sent to the tag format's receive function. @@ -388,6 +410,7 @@ void dsa_master_teardown(struct net_device *dev) sysfs_remove_group(&dev->dev.kobj, &dsa_group); dsa_netdev_ops_set(dev, NULL); dsa_master_ethtool_teardown(dev); + dsa_master_reset_mtu(dev); dsa_master_set_promiscuity(dev, -1); dev->dsa_ptr = NULL; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index e3c7d2627a619..517cc83d13cc8 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -113,26 +113,15 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, return dsa_tag_8021q_bridge_join(ds, info); } -static int dsa_switch_bridge_leave(struct dsa_switch *ds, - struct dsa_notifier_bridge_info *info) +static int dsa_switch_sync_vlan_filtering(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info) { - struct dsa_switch_tree *dst = ds->dst; struct netlink_ext_ack extack = {0}; bool change_vlan_filtering = false; bool vlan_filtering; struct dsa_port *dp; int err; - if (dst->index == info->tree_index && ds->index == info->sw_index && - ds->ops->port_bridge_leave) - ds->ops->port_bridge_leave(ds, info->port, info->bridge); - - if ((dst->index != info->tree_index || ds->index != info->sw_index) && - ds->ops->crosschip_bridge_leave) - ds->ops->crosschip_bridge_leave(ds, info->tree_index, - info->sw_index, info->port, - info->bridge); - if (ds->needs_standalone_vlan_filtering && !br_vlan_enabled(info->bridge.dev)) { change_vlan_filtering = true; @@ -172,6 +161,31 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, return err; } + return 0; +} + +static int dsa_switch_bridge_leave(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info) +{ + struct dsa_switch_tree *dst = ds->dst; + int err; + + if (dst->index == info->tree_index && ds->index == info->sw_index && + ds->ops->port_bridge_leave) + ds->ops->port_bridge_leave(ds, info->port, info->bridge); + + if ((dst->index != info->tree_index || ds->index != info->sw_index) && + ds->ops->crosschip_bridge_leave) + ds->ops->crosschip_bridge_leave(ds, info->tree_index, + info->sw_index, info->port, + info->bridge); + + if (ds->dst->index == info->tree_index && ds->index == info->sw_index) { + err = dsa_switch_sync_vlan_filtering(ds, info); + if (err) + return err; + } + return dsa_tag_8021q_bridge_leave(ds, info); } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 87983e70f03f3..a833a7a67ce79 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -669,6 +669,24 @@ config TCP_CONG_BBR AQM schemes that do not provide a delay signal. It requires the fq ("Fair Queue") pacing packet scheduler. +config TCP_CONG_BBR2 + tristate "BBR2 TCP" + default n + help + + BBR2 TCP congestion control is a model-based congestion control + algorithm that aims to maximize network utilization, keep queues and + retransmit rates low, and to be able to coexist with Reno/CUBIC in + common scenarios. It builds an explicit model of the network path. It + tolerates a targeted degree of random packet loss and delay that are + unrelated to congestion. It can operate over LAN, WAN, cellular, wifi, + or cable modem links, and can use DCTCP-L4S-style ECN signals. It can + coexist with flows that use loss-based congestion control, and can + operate with shallow buffers, deep buffers, bufferbloat, policers, or + AQM schemes that do not provide a delay signal. It requires pacing, + using either TCP internal pacing or the fq ("Fair Queue") pacing packet + scheduler. + choice prompt "Default TCP congestion control" default DEFAULT_CUBIC @@ -706,6 +724,9 @@ choice config DEFAULT_BBR bool "BBR" if TCP_CONG_BBR=y + config DEFAULT_BBR2 + bool "BBR2" if TCP_CONG_BBR2=y + config DEFAULT_RENO bool "Reno" endchoice @@ -730,6 +751,7 @@ config DEFAULT_TCP_CONG default "dctcp" if DEFAULT_DCTCP default "cdg" if DEFAULT_CDG default "bbr" if DEFAULT_BBR + default "bbr2" if DEFAULT_BBR2 default "cubic" config TCP_MD5SIG diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index bbdd9c44f14e3..8dee1547d8202 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o +obj-$(CONFIG_TCP_CONG_BBR2) += tcp_bbr2.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 4db0325f6e1af..dc28f0588e540 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1116,13 +1116,18 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) return err; } -static int arp_invalidate(struct net_device *dev, __be32 ip) +int arp_invalidate(struct net_device *dev, __be32 ip, bool force) { struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev); int err = -ENXIO; struct neigh_table *tbl = &arp_tbl; if (neigh) { + if ((neigh->nud_state & NUD_VALID) && !force) { + neigh_release(neigh); + return 0; + } + if (neigh->nud_state & ~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| @@ -1169,7 +1174,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, if (!dev) return -EINVAL; } - return arp_invalidate(dev, ip); + return arp_invalidate(dev, ip, true); } /* diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index de610cb83694f..afda34c26c627 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -20,7 +20,7 @@ static u32 optional_ops[] = { offsetof(struct tcp_congestion_ops, cwnd_event), offsetof(struct tcp_congestion_ops, in_ack_event), offsetof(struct tcp_congestion_ops, pkts_acked), - offsetof(struct tcp_congestion_ops, min_tso_segs), + offsetof(struct tcp_congestion_ops, tso_segs), offsetof(struct tcp_congestion_ops, sndbuf_expand), offsetof(struct tcp_congestion_ops, cong_control), }; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 85117b45216d4..89a5a48755950 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1115,9 +1115,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) return; /* Add broadcast address, if it is explicitly assigned. */ - if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) + if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) { fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim, 0); + arp_invalidate(dev, ifa->ifa_broadcast, false); + } if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && (prefix != addr || ifa->ifa_prefixlen < 32)) { @@ -1131,6 +1133,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) if (ifa->ifa_prefixlen < 31) { fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask, 32, prim, 0); + arp_invalidate(dev, prefix | ~mask, false); } } } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2dd375f7407b6..0a0f497703450 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -888,8 +888,13 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, } if (cfg->fc_oif || cfg->fc_gw_family) { - struct fib_nh *nh = fib_info_nh(fi, 0); + struct fib_nh *nh; + + /* cannot match on nexthop object attributes */ + if (fi->nh) + return 1; + nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { if (fib_encap_match(net, cfg->fc_encap_type, cfg->fc_encap, nh, cfg, extack)) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 30ab717ff1b81..17440840a7914 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -637,7 +637,9 @@ int __inet_hash(struct sock *sk, struct sock *osk) int err = 0; if (sk->sk_state != TCP_LISTEN) { + local_bh_disable(); inet_ehash_nolisten(sk, osk, NULL); + local_bh_enable(); return 0; } WARN_ON(!sk_unhashed(sk)); @@ -669,45 +671,54 @@ int inet_hash(struct sock *sk) { int err = 0; - if (sk->sk_state != TCP_CLOSE) { - local_bh_disable(); + if (sk->sk_state != TCP_CLOSE) err = __inet_hash(sk, NULL); - local_bh_enable(); - } return err; } EXPORT_SYMBOL_GPL(inet_hash); -void inet_unhash(struct sock *sk) +static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct inet_listen_hashbucket *ilb = NULL; - spinlock_t *lock; - if (sk_unhashed(sk)) return; - if (sk->sk_state == TCP_LISTEN) { - ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &ilb->lock; - } else { - lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - } - spin_lock_bh(lock); - if (sk_unhashed(sk)) - goto unlock; - if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_stop_listen_sock(sk); if (ilb) { + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + inet_unhash2(hashinfo, sk); ilb->count--; } __sk_nulls_del_node_init_rcu(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); -unlock: - spin_unlock_bh(lock); +} + +void inet_unhash(struct sock *sk) +{ + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + + if (sk_unhashed(sk)) + return; + + if (sk->sk_state == TCP_LISTEN) { + struct inet_listen_hashbucket *ilb; + + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + /* Don't disable bottom halves while acquiring the lock to + * avoid circular locking dependency on PREEMPT_RT. + */ + spin_lock(&ilb->lock); + __inet_unhash(sk, ilb); + spin_unlock(&ilb->lock); + } else { + spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + + spin_lock_bh(lock); + __inet_unhash(sk, NULL); + spin_unlock_bh(lock); + } } EXPORT_SYMBOL_GPL(inet_unhash); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f33ad1f383b68..d5d058de36646 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -499,6 +499,15 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) } EXPORT_SYMBOL(__ip_select_ident); +static void ip_rt_fix_tos(struct flowi4 *fl4) +{ + __u8 tos = RT_FL_TOS(fl4); + + fl4->flowi4_tos = tos & IPTOS_RT_MASK; + fl4->flowi4_scope = tos & RTO_ONLINK ? + RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; +} + static void __build_flow_key(const struct net *net, struct flowi4 *fl4, const struct sock *sk, const struct iphdr *iph, @@ -824,6 +833,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); + ip_rt_fix_tos(&fl4); __ip_do_redirect(rt, skb, &fl4, true); } @@ -1048,6 +1058,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); + ip_rt_fix_tos(&fl4); /* Don't make lookup fail for bridged encapsulations */ if (skb && netif_is_any_bridge_port(skb->dev)) @@ -1122,6 +1133,8 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) goto out; new = true; + } else { + ip_rt_fix_tos(&fl4); } __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu); @@ -2603,7 +2616,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, const struct sk_buff *skb) { - __u8 tos = RT_FL_TOS(fl4); struct fib_result res = { .type = RTN_UNSPEC, .fi = NULL, @@ -2613,9 +2625,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, struct rtable *rth; fl4->flowi4_iif = LOOPBACK_IFINDEX; - fl4->flowi4_tos = tos & IPTOS_RT_MASK; - fl4->flowi4_scope = ((tos & RTO_ONLINK) ? - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); + ip_rt_fix_tos(fl4); rcu_read_lock(); rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 28ff2a820f7c9..857f7cef2bd49 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3085,6 +3085,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->rx_opt.dsack = 0; tp->rx_opt.num_sacks = 0; tp->rcv_ooopack = 0; + tp->fast_ack_mode = 0; /* Clean up fastopen related fields */ diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index ec5550089b4d2..56e719166d4c3 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -294,26 +294,40 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) sk->sk_pacing_rate = rate; } -/* override sysctl_tcp_min_tso_segs */ static u32 bbr_min_tso_segs(struct sock *sk) { return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; } +/* Return the number of segments BBR would like in a TSO/GSO skb, given + * a particular max gso size as a constraint. + */ +static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, + u32 gso_max_size) +{ + u32 segs; + u64 bytes; + + /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ + bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift; + + bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); + segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk)); + return segs; +} + +/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ +static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) +{ + return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); +} + +/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */ static u32 bbr_tso_segs_goal(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - u32 segs, bytes; - - /* Sort of tcp_tso_autosize() but ignoring - * driver provided sk_gso_max_size. - */ - bytes = min_t(unsigned long, - sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), - GSO_MAX_SIZE - 1 - MAX_TCP_HEADER); - segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); - return min(segs, 0x7FU); + return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE); } /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ @@ -1149,7 +1163,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .undo_cwnd = bbr_undo_cwnd, .cwnd_event = bbr_cwnd_event, .ssthresh = bbr_ssthresh, - .min_tso_segs = bbr_min_tso_segs, + .tso_segs = bbr_tso_segs, .get_info = bbr_get_info, .set_state = bbr_set_state, }; diff --git a/net/ipv4/tcp_bbr2.c b/net/ipv4/tcp_bbr2.c new file mode 100644 index 0000000000000..fa49e17c47ca9 --- /dev/null +++ b/net/ipv4/tcp_bbr2.c @@ -0,0 +1,2674 @@ +/* BBR (Bottleneck Bandwidth and RTT) congestion control, v2 + * + * BBRv2 is a model-based congestion control algorithm that aims for low + * queues, low loss, and (bounded) Reno/CUBIC coexistence. To maintain a model + * of the network path, it uses measurements of bandwidth and RTT, as well as + * (if they occur) packet loss and/or DCTCP/L4S-style ECN signals. Note that + * although it can use ECN or loss signals explicitly, it does not require + * either; it can bound its in-flight data based on its estimate of the BDP. + * + * The model has both higher and lower bounds for the operating range: + * lo: bw_lo, inflight_lo: conservative short-term lower bound + * hi: bw_hi, inflight_hi: robust long-term upper bound + * The bandwidth-probing time scale is (a) extended dynamically based on + * estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by + * an interactive wall-clock time-scale to be more scalable and responsive + * than Reno and CUBIC. + * + * Here is a state transition diagram for BBR: + * + * | + * V + * +---> STARTUP ----+ + * | | | + * | V | + * | DRAIN ----+ + * | | | + * | V | + * +---> PROBE_BW ----+ + * | ^ | | + * | | | | + * | +----+ | + * | | + * +---- PROBE_RTT <--+ + * + * A BBR flow starts in STARTUP, and ramps up its sending rate quickly. + * When it estimates the pipe is full, it enters DRAIN to drain the queue. + * In steady state a BBR flow only uses PROBE_BW and PROBE_RTT. + * A long-lived BBR flow spends the vast majority of its time remaining + * (repeatedly) in PROBE_BW, fully probing and utilizing the pipe's bandwidth + * in a fair manner, with a small, bounded queue. *If* a flow has been + * continuously sending for the entire min_rtt window, and hasn't seen an RTT + * sample that matches or decreases its min_rtt estimate for 10 seconds, then + * it briefly enters PROBE_RTT to cut inflight to a minimum value to re-probe + * the path's two-way propagation delay (min_rtt). When exiting PROBE_RTT, if + * we estimated that we reached the full bw of the pipe then we enter PROBE_BW; + * otherwise we enter STARTUP to try to fill the pipe. + * + * BBR is described in detail in: + * "BBR: Congestion-Based Congestion Control", + * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh, + * Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016. + * + * There is a public e-mail list for discussing BBR development and testing: + * https://groups.google.com/forum/#!forum/bbr-dev + * + * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled, + * otherwise TCP stack falls back to an internal pacing using one high + * resolution timer per TCP socket and may use more resources. + */ +#include +#include +#include +#include +#include + +#include "tcp_dctcp.h" + +/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth + * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps. + * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32. + * Since the minimum window is >=4 packets, the lower bound isn't + * an issue. The upper bound isn't an issue with existing technologies. + */ +#define BW_SCALE 24 +#define BW_UNIT (1 << BW_SCALE) + +#define BBR_SCALE 8 /* scaling factor for fractions in BBR (e.g. gains) */ +#define BBR_UNIT (1 << BBR_SCALE) + +#define FLAG_DEBUG_VERBOSE 0x1 /* Verbose debugging messages */ +#define FLAG_DEBUG_LOOPBACK 0x2 /* Do NOT skip loopback addr */ + +#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ + +/* BBR has the following modes for deciding how fast to send: */ +enum bbr_mode { + BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */ + BBR_DRAIN, /* drain any queue created during startup */ + BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */ + BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */ +}; + +/* How does the incoming ACK stream relate to our bandwidth probing? */ +enum bbr_ack_phase { + BBR_ACKS_INIT, /* not probing; not getting probe feedback */ + BBR_ACKS_REFILLING, /* sending at est. bw to fill pipe */ + BBR_ACKS_PROBE_STARTING, /* inflight rising to probe bw */ + BBR_ACKS_PROBE_FEEDBACK, /* getting feedback from bw probing */ + BBR_ACKS_PROBE_STOPPING, /* stopped probing; still getting feedback */ +}; + +/* BBR congestion control block */ +struct bbr { + u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */ + u32 min_rtt_stamp; /* timestamp of min_rtt_us */ + u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */ + u32 probe_rtt_min_us; /* min RTT in bbr_probe_rtt_win_ms window */ + u32 probe_rtt_min_stamp; /* timestamp of probe_rtt_min_us*/ + u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ + u32 prior_rcv_nxt; /* tp->rcv_nxt when CE state last changed */ + u64 cycle_mstamp; /* time of this cycle phase start */ + u32 mode:3, /* current bbr_mode in state machine */ + prev_ca_state:3, /* CA state on previous ACK */ + packet_conservation:1, /* use packet conservation? */ + round_start:1, /* start of packet-timed tx->ack round? */ + ce_state:1, /* If most recent data has CE bit set */ + bw_probe_up_rounds:5, /* cwnd-limited rounds in PROBE_UP */ + try_fast_path:1, /* can we take fast path? */ + unused2:11, + idle_restart:1, /* restarting after idle? */ + probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ + cycle_idx:3, /* current index in pacing_gain cycle array */ + has_seen_rtt:1; /* have we seen an RTT sample yet? */ + u32 pacing_gain:11, /* current gain for setting pacing rate */ + cwnd_gain:11, /* current gain for setting cwnd */ + full_bw_reached:1, /* reached full bw in Startup? */ + full_bw_cnt:2, /* number of rounds without large bw gains */ + init_cwnd:7; /* initial cwnd */ + u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ + u32 full_bw; /* recent bw, to estimate if pipe is full */ + + /* For tracking ACK aggregation: */ + u64 ack_epoch_mstamp; /* start of ACK sampling epoch */ + u16 extra_acked[2]; /* max excess data ACKed in epoch */ + u32 ack_epoch_acked:20, /* packets (S)ACKed in sampling epoch */ + extra_acked_win_rtts:5, /* age of extra_acked, in round trips */ + extra_acked_win_idx:1, /* current index in extra_acked array */ + /* BBR v2 state: */ + unused1:2, + startup_ecn_rounds:2, /* consecutive hi ECN STARTUP rounds */ + loss_in_cycle:1, /* packet loss in this cycle? */ + ecn_in_cycle:1; /* ECN in this cycle? */ + u32 loss_round_delivered; /* scb->tx.delivered ending loss round */ + u32 undo_bw_lo; /* bw_lo before latest losses */ + u32 undo_inflight_lo; /* inflight_lo before latest losses */ + u32 undo_inflight_hi; /* inflight_hi before latest losses */ + u32 bw_latest; /* max delivered bw in last round trip */ + u32 bw_lo; /* lower bound on sending bandwidth */ + u32 bw_hi[2]; /* upper bound of sending bandwidth range*/ + u32 inflight_latest; /* max delivered data in last round trip */ + u32 inflight_lo; /* lower bound of inflight data range */ + u32 inflight_hi; /* upper bound of inflight data range */ + u32 bw_probe_up_cnt; /* packets delivered per inflight_hi incr */ + u32 bw_probe_up_acks; /* packets (S)ACKed since inflight_hi incr */ + u32 probe_wait_us; /* PROBE_DOWN until next clock-driven probe */ + u32 ecn_eligible:1, /* sender can use ECN (RTT, handshake)? */ + ecn_alpha:9, /* EWMA delivered_ce/delivered; 0..256 */ + bw_probe_samples:1, /* rate samples reflect bw probing? */ + prev_probe_too_high:1, /* did last PROBE_UP go too high? */ + stopped_risky_probe:1, /* last PROBE_UP stopped due to risk? */ + rounds_since_probe:8, /* packet-timed rounds since probed bw */ + loss_round_start:1, /* loss_round_delivered round trip? */ + loss_in_round:1, /* loss marked in this round trip? */ + ecn_in_round:1, /* ECN marked in this round trip? */ + ack_phase:3, /* bbr_ack_phase: meaning of ACKs */ + loss_events_in_round:4,/* losses in STARTUP round */ + initialized:1; /* has bbr_init() been called? */ + u32 alpha_last_delivered; /* tp->delivered at alpha update */ + u32 alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */ + + /* Params configurable using setsockopt. Refer to correspoding + * module param for detailed description of params. + */ + struct bbr_params { + u32 high_gain:11, /* max allowed value: 2047 */ + drain_gain:10, /* max allowed value: 1023 */ + cwnd_gain:11; /* max allowed value: 2047 */ + u32 cwnd_min_target:4, /* max allowed value: 15 */ + min_rtt_win_sec:5, /* max allowed value: 31 */ + probe_rtt_mode_ms:9, /* max allowed value: 511 */ + full_bw_cnt:3, /* max allowed value: 7 */ + cwnd_tso_budget:1, /* allowed values: {0, 1} */ + unused3:6, + drain_to_target:1, /* boolean */ + precise_ece_ack:1, /* boolean */ + extra_acked_in_startup:1, /* allowed values: {0, 1} */ + fast_path:1; /* boolean */ + u32 full_bw_thresh:10, /* max allowed value: 1023 */ + startup_cwnd_gain:11, /* max allowed value: 2047 */ + bw_probe_pif_gain:9, /* max allowed value: 511 */ + usage_based_cwnd:1, /* boolean */ + unused2:1; + u16 probe_rtt_win_ms:14, /* max allowed value: 16383 */ + refill_add_inc:2; /* max allowed value: 3 */ + u16 extra_acked_gain:11, /* max allowed value: 2047 */ + extra_acked_win_rtts:5; /* max allowed value: 31*/ + u16 pacing_gain[CYCLE_LEN]; /* max allowed value: 1023 */ + /* Mostly BBR v2 parameters below here: */ + u32 ecn_alpha_gain:8, /* max allowed value: 255 */ + ecn_factor:8, /* max allowed value: 255 */ + ecn_thresh:8, /* max allowed value: 255 */ + beta:8; /* max allowed value: 255 */ + u32 ecn_max_rtt_us:19, /* max allowed value: 524287 */ + bw_probe_reno_gain:9, /* max allowed value: 511 */ + full_loss_cnt:4; /* max allowed value: 15 */ + u32 probe_rtt_cwnd_gain:8, /* max allowed value: 255 */ + inflight_headroom:8, /* max allowed value: 255 */ + loss_thresh:8, /* max allowed value: 255 */ + bw_probe_max_rounds:8; /* max allowed value: 255 */ + u32 bw_probe_rand_rounds:4, /* max allowed value: 15 */ + bw_probe_base_us:26, /* usecs: 0..2^26-1 (67 secs) */ + full_ecn_cnt:2; /* max allowed value: 3 */ + u32 bw_probe_rand_us:26, /* usecs: 0..2^26-1 (67 secs) */ + undo:1, /* boolean */ + tso_rtt_shift:4, /* max allowed value: 15 */ + unused5:1; + u32 ecn_reprobe_gain:9, /* max allowed value: 511 */ + unused1:14, + ecn_alpha_init:9; /* max allowed value: 256 */ + } params; + + struct { + u32 snd_isn; /* Initial sequence number */ + u32 rs_bw; /* last valid rate sample bw */ + u32 target_cwnd; /* target cwnd, based on BDP */ + u8 undo:1, /* Undo even happened but not yet logged */ + unused:7; + char event; /* single-letter event debug codes */ + u16 unused2; + } debug; +}; + +struct bbr_context { + u32 sample_bw; + u32 target_cwnd; + u32 log:1; +}; + +/* Window length of min_rtt filter (in sec). Max allowed value is 31 (0x1F) */ +static u32 bbr_min_rtt_win_sec = 10; +/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode. + * Max allowed value is 511 (0x1FF). + */ +static u32 bbr_probe_rtt_mode_ms = 200; +/* Window length of probe_rtt_min_us filter (in ms), and consequently the + * typical interval between PROBE_RTT mode entries. + * Note that bbr_probe_rtt_win_ms must be <= bbr_min_rtt_win_sec * MSEC_PER_SEC + */ +static u32 bbr_probe_rtt_win_ms = 5000; +/* Skip TSO below the following bandwidth (bits/sec): */ +static int bbr_min_tso_rate = 1200000; + +/* Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting + * in bigger TSO bursts. By default we cut the RTT-based allowance in half + * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance + * is below 1500 bytes after 6 * ~500 usec = 3ms. + */ +static u32 bbr_tso_rtt_shift = 9; /* halve allowance per 2^9 usecs, 512us */ + +/* Select cwnd TSO budget approach: + * 0: padding + * 1: flooring + */ +static uint bbr_cwnd_tso_budget = 1; + +/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. + * In order to help drive the network toward lower queues and low latency while + * maintaining high utilization, the average pacing rate aims to be slightly + * lower than the estimated bandwidth. This is an important aspect of the + * design. + */ +static const int bbr_pacing_margin_percent = 1; + +/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain + * that will allow a smoothly increasing pacing rate that will double each RTT + * and send the same number of packets per RTT that an un-paced, slow-starting + * Reno or CUBIC flow would. Max allowed value is 2047 (0x7FF). + */ +static int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1; +/* The gain for deriving startup cwnd. Max allowed value is 2047 (0x7FF). */ +static int bbr_startup_cwnd_gain = BBR_UNIT * 2885 / 1000 + 1; +/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain + * the queue created in BBR_STARTUP in a single round. Max allowed value + * is 1023 (0x3FF). + */ +static int bbr_drain_gain = BBR_UNIT * 1000 / 2885; +/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs. + * Max allowed value is 2047 (0x7FF). + */ +static int bbr_cwnd_gain = BBR_UNIT * 2; +/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw. + * Max allowed value for each element is 1023 (0x3FF). + */ +enum bbr_pacing_gain_phase { + BBR_BW_PROBE_UP = 0, /* push up inflight to probe for bw/vol */ + BBR_BW_PROBE_DOWN = 1, /* drain excess inflight from the queue */ + BBR_BW_PROBE_CRUISE = 2, /* use pipe, w/ headroom in queue/pipe */ + BBR_BW_PROBE_REFILL = 3, /* v2: refill the pipe again to 100% */ +}; +static int bbr_pacing_gain[] = { + BBR_UNIT * 5 / 4, /* probe for more available bw */ + BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */ + BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */ + BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */ +}; + +/* Try to keep at least this many packets in flight, if things go smoothly. For + * smooth functioning, a sliding window protocol ACKing every other packet + * needs at least 4 packets in flight. Max allowed value is 15 (0xF). + */ +static u32 bbr_cwnd_min_target = 4; + +/* Cwnd to BDP proportion in PROBE_RTT mode scaled by BBR_UNIT. Default: 50%. + * Use 0 to disable. Max allowed value is 255. + */ +static u32 bbr_probe_rtt_cwnd_gain = BBR_UNIT * 1 / 2; + +/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */ +/* If bw has increased significantly (1.25x), there may be more bw available. + * Max allowed value is 1023 (0x3FF). + */ +static u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4; +/* But after 3 rounds w/o significant bw growth, estimate pipe is full. + * Max allowed value is 7 (0x7). + */ +static u32 bbr_full_bw_cnt = 3; + +static u32 bbr_flags; /* Debugging related stuff */ + +/* Whether to debug using printk. + */ +static bool bbr_debug_with_printk; + +/* Whether to debug using ftrace event tcp:tcp_bbr_event. + * Ignored when bbr_debug_with_printk is set. + */ +static bool bbr_debug_ftrace; + +/* Experiment: each cycle, try to hold sub-unity gain until inflight <= BDP. */ +static bool bbr_drain_to_target = true; /* default: enabled */ + +/* Experiment: Flags to control BBR with ECN behavior. + */ +static bool bbr_precise_ece_ack = true; /* default: enabled */ + +/* The max rwin scaling shift factor is 14 (RFC 1323), so the max sane rwin is + * (2^(16+14) B)/(1024 B/packet) = 1M packets. + */ +static u32 bbr_cwnd_warn_val = 1U << 20; + +static u16 bbr_debug_port_mask; + +/* BBR module parameters. These are module parameters only in Google prod. + * Upstream these are intentionally not module parameters. + */ +static int bbr_pacing_gain_size = CYCLE_LEN; + +/* Gain factor for adding extra_acked to target cwnd: */ +static int bbr_extra_acked_gain = 256; + +/* Window length of extra_acked window. Max allowed val is 31. */ +static u32 bbr_extra_acked_win_rtts = 5; + +/* Max allowed val for ack_epoch_acked, after which sampling epoch is reset */ +static u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20; + +/* Time period for clamping cwnd increment due to ack aggregation */ +static u32 bbr_extra_acked_max_us = 100 * 1000; + +/* Use extra acked in startup ? + * 0: disabled + * 1: use latest extra_acked value from 1-2 rtt in startup + */ +static int bbr_extra_acked_in_startup = 1; /* default: enabled */ + +/* Experiment: don't grow cwnd beyond twice of what we just probed. */ +static bool bbr_usage_based_cwnd; /* default: disabled */ + +/* For lab testing, researchers can enable BBRv2 ECN support with this flag, + * when they know that any ECN marks that the connections experience will be + * DCTCP/L4S-style ECN marks, rather than RFC3168 ECN marks. + * TODO(ncardwell): Production use of the BBRv2 ECN functionality depends on + * negotiation or configuration that is outside the scope of the BBRv2 + * alpha release. + */ +static bool bbr_ecn_enable = false; + +module_param_named(min_tso_rate, bbr_min_tso_rate, int, 0644); +module_param_named(tso_rtt_shift, bbr_tso_rtt_shift, int, 0644); +module_param_named(high_gain, bbr_high_gain, int, 0644); +module_param_named(drain_gain, bbr_drain_gain, int, 0644); +module_param_named(startup_cwnd_gain, bbr_startup_cwnd_gain, int, 0644); +module_param_named(cwnd_gain, bbr_cwnd_gain, int, 0644); +module_param_array_named(pacing_gain, bbr_pacing_gain, int, + &bbr_pacing_gain_size, 0644); +module_param_named(cwnd_min_target, bbr_cwnd_min_target, uint, 0644); +module_param_named(probe_rtt_cwnd_gain, + bbr_probe_rtt_cwnd_gain, uint, 0664); +module_param_named(cwnd_warn_val, bbr_cwnd_warn_val, uint, 0664); +module_param_named(debug_port_mask, bbr_debug_port_mask, ushort, 0644); +module_param_named(flags, bbr_flags, uint, 0644); +module_param_named(debug_ftrace, bbr_debug_ftrace, bool, 0644); +module_param_named(debug_with_printk, bbr_debug_with_printk, bool, 0644); +module_param_named(min_rtt_win_sec, bbr_min_rtt_win_sec, uint, 0644); +module_param_named(probe_rtt_mode_ms, bbr_probe_rtt_mode_ms, uint, 0644); +module_param_named(probe_rtt_win_ms, bbr_probe_rtt_win_ms, uint, 0644); +module_param_named(full_bw_thresh, bbr_full_bw_thresh, uint, 0644); +module_param_named(full_bw_cnt, bbr_full_bw_cnt, uint, 0644); +module_param_named(cwnd_tso_bduget, bbr_cwnd_tso_budget, uint, 0664); +module_param_named(extra_acked_gain, bbr_extra_acked_gain, int, 0664); +module_param_named(extra_acked_win_rtts, + bbr_extra_acked_win_rtts, uint, 0664); +module_param_named(extra_acked_max_us, + bbr_extra_acked_max_us, uint, 0664); +module_param_named(ack_epoch_acked_reset_thresh, + bbr_ack_epoch_acked_reset_thresh, uint, 0664); +module_param_named(drain_to_target, bbr_drain_to_target, bool, 0664); +module_param_named(precise_ece_ack, bbr_precise_ece_ack, bool, 0664); +module_param_named(extra_acked_in_startup, + bbr_extra_acked_in_startup, int, 0664); +module_param_named(usage_based_cwnd, bbr_usage_based_cwnd, bool, 0664); +module_param_named(ecn_enable, bbr_ecn_enable, bool, 0664); + +static void bbr2_exit_probe_rtt(struct sock *sk); +static void bbr2_reset_congestion_signals(struct sock *sk); + +static void bbr_check_probe_rtt_done(struct sock *sk); + +/* Do we estimate that STARTUP filled the pipe? */ +static bool bbr_full_bw_reached(const struct sock *sk) +{ + const struct bbr *bbr = inet_csk_ca(sk); + + return bbr->full_bw_reached; +} + +/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ +static u32 bbr_max_bw(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return max(bbr->bw_hi[0], bbr->bw_hi[1]); +} + +/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */ +static u32 bbr_bw(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return min(bbr_max_bw(sk), bbr->bw_lo); +} + +/* Return maximum extra acked in past k-2k round trips, + * where k = bbr_extra_acked_win_rtts. + */ +static u16 bbr_extra_acked(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return max(bbr->extra_acked[0], bbr->extra_acked[1]); +} + +/* Return rate in bytes per second, optionally with a gain. + * The order here is chosen carefully to avoid overflow of u64. This should + * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. + */ +static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain, + int margin) +{ + unsigned int mss = tcp_sk(sk)->mss_cache; + + rate *= mss; + rate *= gain; + rate >>= BBR_SCALE; + rate *= USEC_PER_SEC / 100 * (100 - margin); + rate >>= BW_SCALE; + rate = max(rate, 1ULL); + return rate; +} + +static u64 bbr_bw_bytes_per_sec(struct sock *sk, u64 rate) +{ + return bbr_rate_bytes_per_sec(sk, rate, BBR_UNIT, 0); +} + +static u64 bbr_rate_kbps(struct sock *sk, u64 rate) +{ + rate = bbr_bw_bytes_per_sec(sk, rate); + rate *= 8; + do_div(rate, 1000); + return rate; +} + +static u32 bbr_tso_segs_goal(struct sock *sk); +static void bbr_debug(struct sock *sk, u32 acked, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + static const char ca_states[] = { + [TCP_CA_Open] = 'O', + [TCP_CA_Disorder] = 'D', + [TCP_CA_CWR] = 'C', + [TCP_CA_Recovery] = 'R', + [TCP_CA_Loss] = 'L', + }; + static const char mode[] = { + 'G', /* Growing - BBR_STARTUP */ + 'D', /* Drain - BBR_DRAIN */ + 'W', /* Window - BBR_PROBE_BW */ + 'M', /* Min RTT - BBR_PROBE_RTT */ + }; + static const char ack_phase[] = { /* bbr_ack_phase strings */ + 'I', /* BBR_ACKS_INIT - 'Init' */ + 'R', /* BBR_ACKS_REFILLING - 'Refilling' */ + 'B', /* BBR_ACKS_PROBE_STARTING - 'Before' */ + 'F', /* BBR_ACKS_PROBE_FEEDBACK - 'Feedback' */ + 'A', /* BBR_ACKS_PROBE_STOPPING - 'After' */ + }; + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + const u32 una = tp->snd_una - bbr->debug.snd_isn; + const u32 fack = tcp_highest_sack_seq(tp); + const u16 dport = ntohs(inet_sk(sk)->inet_dport); + bool is_port_match = (bbr_debug_port_mask && + ((dport & bbr_debug_port_mask) == 0)); + char debugmsg[320]; + + if (sk->sk_state == TCP_SYN_SENT) + return; /* no bbr_init() yet if SYN retransmit -> CA_Loss */ + + if (!tp->snd_cwnd || tp->snd_cwnd > bbr_cwnd_warn_val) { + char addr[INET6_ADDRSTRLEN + 10] = { 0 }; + + if (sk->sk_family == AF_INET) + snprintf(addr, sizeof(addr), "%pI4:%u", + &inet_sk(sk)->inet_daddr, dport); + else if (sk->sk_family == AF_INET6) + snprintf(addr, sizeof(addr), "%pI6:%u", + &sk->sk_v6_daddr, dport); + + WARN_ONCE(1, + "BBR %s cwnd alert: %u " + "snd_una: %u ca: %d pacing_gain: %u cwnd_gain: %u " + "bw: %u rtt: %u min_rtt: %u " + "acked: %u tso_segs: %u " + "bw: %d %ld %d pif: %u\n", + addr, tp->snd_cwnd, + una, inet_csk(sk)->icsk_ca_state, + bbr->pacing_gain, bbr->cwnd_gain, + bbr_max_bw(sk), (tp->srtt_us >> 3), bbr->min_rtt_us, + acked, bbr_tso_segs_goal(sk), + rs->delivered, rs->interval_us, rs->is_retrans, + tcp_packets_in_flight(tp)); + } + + if (likely(!bbr_debug_with_printk && !bbr_debug_ftrace)) + return; + + if (!sock_flag(sk, SOCK_DBG) && !is_port_match) + return; + + if (!ctx->log && !tp->app_limited && !(bbr_flags & FLAG_DEBUG_VERBOSE)) + return; + + if (ipv4_is_loopback(inet_sk(sk)->inet_daddr) && + !(bbr_flags & FLAG_DEBUG_LOOPBACK)) + return; + + snprintf(debugmsg, sizeof(debugmsg) - 1, + "BBR %pI4:%-5u %5u,%03u:%-7u %c " + "%c %2u br %2u cr %2d rtt %5ld d %2d i %5ld mrtt %d %cbw %llu " + "bw %llu lb %llu ib %llu qb %llu " + "a %u if %2u %c %c dl %u l %u al %u # %u t %u %c %c " + "lr %d er %d ea %d bwl %lld il %d ih %d c %d " + "v %d %c %u %c %s\n", + &inet_sk(sk)->inet_daddr, dport, + una / 1000, una % 1000, fack - tp->snd_una, + ca_states[inet_csk(sk)->icsk_ca_state], + bbr->debug.undo ? '@' : mode[bbr->mode], + tp->snd_cwnd, + bbr_extra_acked(sk), /* br (legacy): extra_acked */ + rs->tx_in_flight, /* cr (legacy): tx_inflight */ + rs->rtt_us, + rs->delivered, + rs->interval_us, + bbr->min_rtt_us, + rs->is_app_limited ? '_' : 'l', + bbr_rate_kbps(sk, ctx->sample_bw), /* lbw: latest sample bw */ + bbr_rate_kbps(sk, bbr_max_bw(sk)), /* bw: max bw */ + 0ULL, /* lb: [obsolete] */ + 0ULL, /* ib: [obsolete] */ + (u64)sk->sk_pacing_rate * 8 / 1000, + acked, + tcp_packets_in_flight(tp), + rs->is_ack_delayed ? 'd' : '.', + bbr->round_start ? '*' : '.', + tp->delivered, tp->lost, + tp->app_limited, + 0, /* #: [obsolete] */ + ctx->target_cwnd, + tp->reord_seen ? 'r' : '.', /* r: reordering seen? */ + ca_states[bbr->prev_ca_state], + (rs->lost + rs->delivered) > 0 ? + (1000 * rs->lost / + (rs->lost + rs->delivered)) : 0, /* lr: loss rate x1000 */ + (rs->delivered) > 0 ? + (1000 * rs->delivered_ce / + (rs->delivered)) : 0, /* er: ECN rate x1000 */ + 1000 * bbr->ecn_alpha >> BBR_SCALE, /* ea: ECN alpha x1000 */ + bbr->bw_lo == ~0U ? + -1 : (s64)bbr_rate_kbps(sk, bbr->bw_lo), /* bwl */ + bbr->inflight_lo, /* il */ + bbr->inflight_hi, /* ih */ + bbr->bw_probe_up_cnt, /* c */ + 2, /* v: version */ + bbr->debug.event, + bbr->cycle_idx, + ack_phase[bbr->ack_phase], + bbr->bw_probe_samples ? "Y" : "N"); + debugmsg[sizeof(debugmsg) - 1] = 0; + + /* printk takes a higher precedence. */ + if (bbr_debug_with_printk) + printk(KERN_DEBUG "%s", debugmsg); + + if (unlikely(bbr->debug.undo)) + bbr->debug.undo = 0; +} + +/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ +static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) +{ + u64 rate = bw; + + rate = bbr_rate_bytes_per_sec(sk, rate, gain, + bbr_pacing_margin_percent); + rate = min_t(u64, rate, sk->sk_max_pacing_rate); + return rate; +} + +/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ +static void bbr_init_pacing_rate_from_rtt(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + u32 rtt_us; + + if (tp->srtt_us) { /* any RTT sample yet? */ + rtt_us = max(tp->srtt_us >> 3, 1U); + bbr->has_seen_rtt = 1; + } else { /* no RTT sample yet */ + rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ + } + bw = (u64)tp->snd_cwnd * BW_UNIT; + do_div(bw, rtt_us); + sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr->params.high_gain); +} + +/* Pace using current bw estimate and a gain factor. */ +static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + unsigned long rate = bbr_bw_to_pacing_rate(sk, bw, gain); + + if (unlikely(!bbr->has_seen_rtt && tp->srtt_us)) + bbr_init_pacing_rate_from_rtt(sk); + if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) + sk->sk_pacing_rate = rate; +} + +static u32 bbr_min_tso_segs(struct sock *sk) +{ + return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; +} + +/* Return the number of segments BBR would like in a TSO/GSO skb, given + * a particular max gso size as a constraint. + */ +static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, + u32 gso_max_size) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 segs, r; + u64 bytes; + + /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ + bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift; + + /* Budget a TSO/GSO burst size allowance based on min_rtt. For every + * K = 2^tso_rtt_shift microseconds of min_rtt, halve the burst. + * The min_rtt-based burst allowance is: 64 KBytes / 2^(min_rtt/K) + */ + if (bbr->params.tso_rtt_shift) { + r = bbr->min_rtt_us >> bbr->params.tso_rtt_shift; + if (r < BITS_PER_TYPE(u32)) /* prevent undefined behavior */ + bytes += GSO_MAX_SIZE >> r; + } + + bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); + segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk)); + return segs; +} + +/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ +static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) +{ + return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); +} + +/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */ +static u32 bbr_tso_segs_goal(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE); +} + +/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ +static void bbr_save_cwnd(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT) + bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */ + else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */ + bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd); +} + +static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (event == CA_EVENT_TX_START && tp->app_limited) { + bbr->idle_restart = 1; + bbr->ack_epoch_mstamp = tp->tcp_mstamp; + bbr->ack_epoch_acked = 0; + /* Avoid pointless buffer overflows: pace at est. bw if we don't + * need more speed (we're restarting from idle and app-limited). + */ + if (bbr->mode == BBR_PROBE_BW) + bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); + else if (bbr->mode == BBR_PROBE_RTT) + bbr_check_probe_rtt_done(sk); + } else if ((event == CA_EVENT_ECN_IS_CE || + event == CA_EVENT_ECN_NO_CE) && + bbr_ecn_enable && + bbr->params.precise_ece_ack) { + u32 state = bbr->ce_state; + dctcp_ece_ack_update(sk, event, &bbr->prior_rcv_nxt, &state); + bbr->ce_state = state; + if (tp->fast_ack_mode == 2 && event == CA_EVENT_ECN_IS_CE) + tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); + } +} + +/* Calculate bdp based on min RTT and the estimated bottleneck bandwidth: + * + * bdp = ceil(bw * min_rtt * gain) + * + * The key factor, gain, controls the amount of queue. While a small gain + * builds a smaller queue, it becomes more vulnerable to noise in RTT + * measurements (e.g., delayed ACKs or other ACK compression effects). This + * noise may cause BBR to under-estimate the rate. + */ +static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bdp; + u64 w; + + /* If we've never had a valid RTT sample, cap cwnd at the initial + * default. This should only happen when the connection is not using TCP + * timestamps and has retransmitted all of the SYN/SYNACK/data packets + * ACKed so far. In this case, an RTO can cut cwnd to 1, in which + * case we need to slow-start up toward something safe: initial cwnd. + */ + if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */ + return bbr->init_cwnd; /* be safe: cap at initial cwnd */ + + w = (u64)bw * bbr->min_rtt_us; + + /* Apply a gain to the given value, remove the BW_SCALE shift, and + * round the value up to avoid a negative feedback loop. + */ + bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; + + return bdp; +} + +/* To achieve full performance in high-speed paths, we budget enough cwnd to + * fit full-sized skbs in-flight on both end hosts to fully utilize the path: + * - one skb in sending host Qdisc, + * - one skb in sending host TSO/GSO engine + * - one skb being received by receiver host LRO/GRO/delayed-ACK engine + * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because + * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, + * which allows 2 outstanding 2-packet sequences, to try to keep pipe + * full even with ACK-every-other-packet delayed ACKs. + */ +static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 tso_segs_goal; + + tso_segs_goal = 3 * bbr_tso_segs_goal(sk); + + /* Allow enough full-sized skbs in flight to utilize end systems. */ + if (bbr->params.cwnd_tso_budget == 1) { + cwnd = max_t(u32, cwnd, tso_segs_goal); + cwnd = max_t(u32, cwnd, bbr->params.cwnd_min_target); + } else { + cwnd += tso_segs_goal; + cwnd = (cwnd + 1) & ~1U; + } + /* Ensure gain cycling gets inflight above BDP even for small BDPs. */ + if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) + cwnd += 2; + + return cwnd; +} + +/* Find inflight based on min RTT and the estimated bottleneck bandwidth. */ +static u32 bbr_inflight(struct sock *sk, u32 bw, int gain) +{ + u32 inflight; + + inflight = bbr_bdp(sk, bw, gain); + inflight = bbr_quantization_budget(sk, inflight); + + return inflight; +} + +/* With pacing at lower layers, there's often less data "in the network" than + * "in flight". With TSQ and departure time pacing at lower layers (e.g. fq), + * we often have several skbs queued in the pacing layer with a pre-scheduled + * earliest departure time (EDT). BBR adapts its pacing rate based on the + * inflight level that it estimates has already been "baked in" by previous + * departure time decisions. We calculate a rough estimate of the number of our + * packets that might be in the network at the earliest departure time for the + * next skb scheduled: + * in_network_at_edt = inflight_at_edt - (EDT - now) * bw + * If we're increasing inflight, then we want to know if the transmit of the + * EDT skb will push inflight above the target, so inflight_at_edt includes + * bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight, + * then estimate if inflight will sink too low just before the EDT transmit. + */ +static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 now_ns, edt_ns, interval_us; + u32 interval_delivered, inflight_at_edt; + + now_ns = tp->tcp_clock_cache; + edt_ns = max(tp->tcp_wstamp_ns, now_ns); + interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC); + interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE; + inflight_at_edt = inflight_now; + if (bbr->pacing_gain > BBR_UNIT) /* increasing inflight */ + inflight_at_edt += bbr_tso_segs_goal(sk); /* include EDT skb */ + if (interval_delivered >= inflight_at_edt) + return 0; + return inflight_at_edt - interval_delivered; +} + +/* Find the cwnd increment based on estimate of ack aggregation */ +static u32 bbr_ack_aggregation_cwnd(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 max_aggr_cwnd, aggr_cwnd = 0; + + if (bbr->params.extra_acked_gain && + (bbr_full_bw_reached(sk) || bbr->params.extra_acked_in_startup)) { + max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us) + / BW_UNIT; + aggr_cwnd = (bbr->params.extra_acked_gain * bbr_extra_acked(sk)) + >> BBR_SCALE; + aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd); + } + + return aggr_cwnd; +} + +/* Returns the cwnd for PROBE_RTT mode. */ +static u32 bbr_probe_rtt_cwnd(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->params.probe_rtt_cwnd_gain == 0) + return bbr->params.cwnd_min_target; + return max_t(u32, bbr->params.cwnd_min_target, + bbr_bdp(sk, bbr_bw(sk), bbr->params.probe_rtt_cwnd_gain)); +} + +/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss + * has drawn us down below target), or snap down to target if we're above it. + */ +static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, + u32 acked, u32 bw, int gain, u32 cwnd, + struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 target_cwnd = 0, prev_cwnd = tp->snd_cwnd, max_probe; + + if (!acked) + goto done; /* no packet fully ACKed; just apply caps */ + + target_cwnd = bbr_bdp(sk, bw, gain); + + /* Increment the cwnd to account for excess ACKed data that seems + * due to aggregation (of data and/or ACKs) visible in the ACK stream. + */ + target_cwnd += bbr_ack_aggregation_cwnd(sk); + target_cwnd = bbr_quantization_budget(sk, target_cwnd); + + /* If we're below target cwnd, slow start cwnd toward target cwnd. */ + bbr->debug.target_cwnd = target_cwnd; + + /* Update cwnd and enable fast path if cwnd reaches target_cwnd. */ + bbr->try_fast_path = 0; + if (bbr_full_bw_reached(sk)) { /* only cut cwnd if we filled the pipe */ + cwnd += acked; + if (cwnd >= target_cwnd) { + cwnd = target_cwnd; + bbr->try_fast_path = 1; + } + } else if (cwnd < target_cwnd || cwnd < 2 * bbr->init_cwnd) { + cwnd += acked; + } else { + bbr->try_fast_path = 1; + } + + /* When growing cwnd, don't grow beyond twice what we just probed. */ + if (bbr->params.usage_based_cwnd) { + max_probe = max(2 * tp->max_packets_out, tp->snd_cwnd); + cwnd = min(cwnd, max_probe); + } + + cwnd = max_t(u32, cwnd, bbr->params.cwnd_min_target); +done: + tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */ + if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ + tp->snd_cwnd = min_t(u32, tp->snd_cwnd, bbr_probe_rtt_cwnd(sk)); + + ctx->target_cwnd = target_cwnd; + ctx->log = (tp->snd_cwnd != prev_cwnd); +} + +/* See if we have reached next round trip */ +static void bbr_update_round_start(struct sock *sk, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->round_start = 0; + + /* See if we've reached the next RTT */ + if (rs->interval_us > 0 && + !before(rs->prior_delivered, bbr->next_rtt_delivered)) { + bbr->next_rtt_delivered = tp->delivered; + bbr->round_start = 1; + } +} + +/* Calculate the bandwidth based on how fast packets are delivered */ +static void bbr_calculate_bw_sample(struct sock *sk, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + u64 bw = 0; + + /* Divide delivered by the interval to find a (lower bound) bottleneck + * bandwidth sample. Delivered is in packets and interval_us in uS and + * ratio will be <<1 for most connections. So delivered is first scaled. + * Round up to allow growth at low rates, even with integer division. + */ + if (rs->interval_us > 0) { + if (WARN_ONCE(rs->delivered < 0, + "negative delivered: %d interval_us: %ld\n", + rs->delivered, rs->interval_us)) + return; + + bw = DIV_ROUND_UP_ULL((u64)rs->delivered * BW_UNIT, rs->interval_us); + } + + ctx->sample_bw = bw; + bbr->debug.rs_bw = bw; +} + +/* Estimates the windowed max degree of ack aggregation. + * This is used to provision extra in-flight data to keep sending during + * inter-ACK silences. + * + * Degree of ack aggregation is estimated as extra data acked beyond expected. + * + * max_extra_acked = "maximum recent excess data ACKed beyond max_bw * interval" + * cwnd += max_extra_acked + * + * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms). + * Max filter is an approximate sliding window of 5-10 (packet timed) round + * trips for non-startup phase, and 1-2 round trips for startup. + */ +static void bbr_update_ack_aggregation(struct sock *sk, + const struct rate_sample *rs) +{ + u32 epoch_us, expected_acked, extra_acked; + struct bbr *bbr = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); + u32 extra_acked_win_rtts_thresh = bbr->params.extra_acked_win_rtts; + + if (!bbr->params.extra_acked_gain || rs->acked_sacked <= 0 || + rs->delivered < 0 || rs->interval_us <= 0) + return; + + if (bbr->round_start) { + bbr->extra_acked_win_rtts = min(0x1F, + bbr->extra_acked_win_rtts + 1); + if (bbr->params.extra_acked_in_startup && + !bbr_full_bw_reached(sk)) + extra_acked_win_rtts_thresh = 1; + if (bbr->extra_acked_win_rtts >= + extra_acked_win_rtts_thresh) { + bbr->extra_acked_win_rtts = 0; + bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ? + 0 : 1; + bbr->extra_acked[bbr->extra_acked_win_idx] = 0; + } + } + + /* Compute how many packets we expected to be delivered over epoch. */ + epoch_us = tcp_stamp_us_delta(tp->delivered_mstamp, + bbr->ack_epoch_mstamp); + expected_acked = ((u64)bbr_bw(sk) * epoch_us) / BW_UNIT; + + /* Reset the aggregation epoch if ACK rate is below expected rate or + * significantly large no. of ack received since epoch (potentially + * quite old epoch). + */ + if (bbr->ack_epoch_acked <= expected_acked || + (bbr->ack_epoch_acked + rs->acked_sacked >= + bbr_ack_epoch_acked_reset_thresh)) { + bbr->ack_epoch_acked = 0; + bbr->ack_epoch_mstamp = tp->delivered_mstamp; + expected_acked = 0; + } + + /* Compute excess data delivered, beyond what was expected. */ + bbr->ack_epoch_acked = min_t(u32, 0xFFFFF, + bbr->ack_epoch_acked + rs->acked_sacked); + extra_acked = bbr->ack_epoch_acked - expected_acked; + extra_acked = min(extra_acked, tp->snd_cwnd); + if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx]) + bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; +} + +/* Estimate when the pipe is full, using the change in delivery rate: BBR + * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by + * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited + * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the + * higher rwin, 3: we get higher delivery rate samples. Or transient + * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar + * design goal, but uses delay and inter-ACK spacing instead of bandwidth. + */ +static void bbr_check_full_bw_reached(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bw_thresh; + + if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited) + return; + + bw_thresh = (u64)bbr->full_bw * bbr->params.full_bw_thresh >> BBR_SCALE; + if (bbr_max_bw(sk) >= bw_thresh) { + bbr->full_bw = bbr_max_bw(sk); + bbr->full_bw_cnt = 0; + return; + } + ++bbr->full_bw_cnt; + bbr->full_bw_reached = bbr->full_bw_cnt >= bbr->params.full_bw_cnt; +} + +/* If pipe is probably full, drain the queue and then enter steady-state. */ +static bool bbr_check_drain(struct sock *sk, const struct rate_sample *rs, + struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { + bbr->mode = BBR_DRAIN; /* drain queue we created */ + tcp_sk(sk)->snd_ssthresh = + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); + bbr2_reset_congestion_signals(sk); + } /* fall through to check if in-flight is already small: */ + if (bbr->mode == BBR_DRAIN && + bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) + return true; /* exiting DRAIN now */ + return false; +} + +static void bbr_check_probe_rtt_done(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (!(bbr->probe_rtt_done_stamp && + after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) + return; + + bbr->probe_rtt_min_stamp = tcp_jiffies32; /* schedule next PROBE_RTT */ + tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd); + bbr2_exit_probe_rtt(sk); +} + +/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and + * periodically drain the bottleneck queue, to converge to measure the true + * min_rtt (unloaded propagation delay). This allows the flows to keep queues + * small (reducing queuing delay and packet loss) and achieve fairness among + * BBR flows. + * + * The min_rtt filter window is 10 seconds. When the min_rtt estimate expires, + * we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets. + * After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed + * round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and + * re-enter the previous mode. BBR uses 200ms to approximately bound the + * performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s). + * + * Note that flows need only pay 2% if they are busy sending over the last 10 + * seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have + * natural silences or low-rate periods within 10 seconds where the rate is low + * enough for long enough to drain its queue in the bottleneck. We pick up + * these min RTT measurements opportunistically with our min_rtt filter. :-) + */ +static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + bool probe_rtt_expired, min_rtt_expired; + u32 expire; + + /* Track min RTT in probe_rtt_win_ms to time next PROBE_RTT state. */ + expire = bbr->probe_rtt_min_stamp + + msecs_to_jiffies(bbr->params.probe_rtt_win_ms); + probe_rtt_expired = after(tcp_jiffies32, expire); + if (rs->rtt_us >= 0 && + (rs->rtt_us <= bbr->probe_rtt_min_us || + (probe_rtt_expired && !rs->is_ack_delayed))) { + bbr->probe_rtt_min_us = rs->rtt_us; + bbr->probe_rtt_min_stamp = tcp_jiffies32; + } + /* Track min RTT seen in the min_rtt_win_sec filter window: */ + expire = bbr->min_rtt_stamp + bbr->params.min_rtt_win_sec * HZ; + min_rtt_expired = after(tcp_jiffies32, expire); + if (bbr->probe_rtt_min_us <= bbr->min_rtt_us || + min_rtt_expired) { + bbr->min_rtt_us = bbr->probe_rtt_min_us; + bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp; + } + + if (bbr->params.probe_rtt_mode_ms > 0 && probe_rtt_expired && + !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { + bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ + bbr_save_cwnd(sk); /* note cwnd so we can restore it */ + bbr->probe_rtt_done_stamp = 0; + bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; + bbr->next_rtt_delivered = tp->delivered; + } + + if (bbr->mode == BBR_PROBE_RTT) { + /* Ignore low rate samples during this mode. */ + tp->app_limited = + (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; + /* Maintain min packets in flight for max(200 ms, 1 round). */ + if (!bbr->probe_rtt_done_stamp && + tcp_packets_in_flight(tp) <= bbr_probe_rtt_cwnd(sk)) { + bbr->probe_rtt_done_stamp = tcp_jiffies32 + + msecs_to_jiffies(bbr->params.probe_rtt_mode_ms); + bbr->probe_rtt_round_done = 0; + bbr->next_rtt_delivered = tp->delivered; + } else if (bbr->probe_rtt_done_stamp) { + if (bbr->round_start) + bbr->probe_rtt_round_done = 1; + if (bbr->probe_rtt_round_done) + bbr_check_probe_rtt_done(sk); + } + } + /* Restart after idle ends only once we process a new S/ACK for data */ + if (rs->delivered > 0) + bbr->idle_restart = 0; +} + +static void bbr_update_gains(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + switch (bbr->mode) { + case BBR_STARTUP: + bbr->pacing_gain = bbr->params.high_gain; + bbr->cwnd_gain = bbr->params.startup_cwnd_gain; + break; + case BBR_DRAIN: + bbr->pacing_gain = bbr->params.drain_gain; /* slow, to drain */ + bbr->cwnd_gain = bbr->params.startup_cwnd_gain; /* keep cwnd */ + break; + case BBR_PROBE_BW: + bbr->pacing_gain = bbr->params.pacing_gain[bbr->cycle_idx]; + bbr->cwnd_gain = bbr->params.cwnd_gain; + break; + case BBR_PROBE_RTT: + bbr->pacing_gain = BBR_UNIT; + bbr->cwnd_gain = BBR_UNIT; + break; + default: + WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode); + break; + } +} + +static void bbr_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + int i; + + WARN_ON_ONCE(tp->snd_cwnd >= bbr_cwnd_warn_val); + + bbr->initialized = 1; + bbr->params.high_gain = min(0x7FF, bbr_high_gain); + bbr->params.drain_gain = min(0x3FF, bbr_drain_gain); + bbr->params.startup_cwnd_gain = min(0x7FF, bbr_startup_cwnd_gain); + bbr->params.cwnd_gain = min(0x7FF, bbr_cwnd_gain); + bbr->params.cwnd_tso_budget = min(0x1U, bbr_cwnd_tso_budget); + bbr->params.cwnd_min_target = min(0xFU, bbr_cwnd_min_target); + bbr->params.min_rtt_win_sec = min(0x1FU, bbr_min_rtt_win_sec); + bbr->params.probe_rtt_mode_ms = min(0x1FFU, bbr_probe_rtt_mode_ms); + bbr->params.full_bw_cnt = min(0x7U, bbr_full_bw_cnt); + bbr->params.full_bw_thresh = min(0x3FFU, bbr_full_bw_thresh); + bbr->params.extra_acked_gain = min(0x7FF, bbr_extra_acked_gain); + bbr->params.extra_acked_win_rtts = min(0x1FU, bbr_extra_acked_win_rtts); + bbr->params.drain_to_target = bbr_drain_to_target ? 1 : 0; + bbr->params.precise_ece_ack = bbr_precise_ece_ack ? 1 : 0; + bbr->params.extra_acked_in_startup = bbr_extra_acked_in_startup ? 1 : 0; + bbr->params.probe_rtt_cwnd_gain = min(0xFFU, bbr_probe_rtt_cwnd_gain); + bbr->params.probe_rtt_win_ms = + min(0x3FFFU, + min_t(u32, bbr_probe_rtt_win_ms, + bbr->params.min_rtt_win_sec * MSEC_PER_SEC)); + for (i = 0; i < CYCLE_LEN; i++) + bbr->params.pacing_gain[i] = min(0x3FF, bbr_pacing_gain[i]); + bbr->params.usage_based_cwnd = bbr_usage_based_cwnd ? 1 : 0; + bbr->params.tso_rtt_shift = min(0xFU, bbr_tso_rtt_shift); + + bbr->debug.snd_isn = tp->snd_una; + bbr->debug.target_cwnd = 0; + bbr->debug.undo = 0; + + bbr->init_cwnd = min(0x7FU, tp->snd_cwnd); + bbr->prior_cwnd = tp->prior_cwnd; + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + bbr->next_rtt_delivered = 0; + bbr->prev_ca_state = TCP_CA_Open; + bbr->packet_conservation = 0; + + bbr->probe_rtt_done_stamp = 0; + bbr->probe_rtt_round_done = 0; + bbr->probe_rtt_min_us = tcp_min_rtt(tp); + bbr->probe_rtt_min_stamp = tcp_jiffies32; + bbr->min_rtt_us = tcp_min_rtt(tp); + bbr->min_rtt_stamp = tcp_jiffies32; + + bbr->has_seen_rtt = 0; + bbr_init_pacing_rate_from_rtt(sk); + + bbr->round_start = 0; + bbr->idle_restart = 0; + bbr->full_bw_reached = 0; + bbr->full_bw = 0; + bbr->full_bw_cnt = 0; + bbr->cycle_mstamp = 0; + bbr->cycle_idx = 0; + bbr->mode = BBR_STARTUP; + bbr->debug.rs_bw = 0; + + bbr->ack_epoch_mstamp = tp->tcp_mstamp; + bbr->ack_epoch_acked = 0; + bbr->extra_acked_win_rtts = 0; + bbr->extra_acked_win_idx = 0; + bbr->extra_acked[0] = 0; + bbr->extra_acked[1] = 0; + + bbr->ce_state = 0; + bbr->prior_rcv_nxt = tp->rcv_nxt; + bbr->try_fast_path = 0; + + cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); +} + +static u32 bbr_sndbuf_expand(struct sock *sk) +{ + /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ + return 3; +} + +/* __________________________________________________________________________ + * + * Functions new to BBR v2 ("bbr") congestion control are below here. + * __________________________________________________________________________ + */ + +/* Incorporate a new bw sample into the current window of our max filter. */ +static void bbr2_take_bw_hi_sample(struct sock *sk, u32 bw) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->bw_hi[1] = max(bw, bbr->bw_hi[1]); +} + +/* Keep max of last 1-2 cycles. Each PROBE_BW cycle, flip filter window. */ +static void bbr2_advance_bw_hi_filter(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (!bbr->bw_hi[1]) + return; /* no samples in this window; remember old window */ + bbr->bw_hi[0] = bbr->bw_hi[1]; + bbr->bw_hi[1] = 0; +} + +/* How much do we want in flight? Our BDP, unless congestion cut cwnd. */ +static u32 bbr2_target_inflight(struct sock *sk) +{ + u32 bdp = bbr_inflight(sk, bbr_bw(sk), BBR_UNIT); + + return min(bdp, tcp_sk(sk)->snd_cwnd); +} + +static bool bbr2_is_probing_bandwidth(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return (bbr->mode == BBR_STARTUP) || + (bbr->mode == BBR_PROBE_BW && + (bbr->cycle_idx == BBR_BW_PROBE_REFILL || + bbr->cycle_idx == BBR_BW_PROBE_UP)); +} + +/* Has the given amount of time elapsed since we marked the phase start? */ +static bool bbr2_has_elapsed_in_phase(const struct sock *sk, u32 interval_us) +{ + const struct tcp_sock *tp = tcp_sk(sk); + const struct bbr *bbr = inet_csk_ca(sk); + + return tcp_stamp_us_delta(tp->tcp_mstamp, + bbr->cycle_mstamp + interval_us) > 0; +} + +static void bbr2_handle_queue_too_high_in_startup(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->full_bw_reached = 1; + bbr->inflight_hi = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); +} + +/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */ +static void bbr2_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible || + !bbr->params.full_ecn_cnt || !bbr->params.ecn_thresh) + return; + + if (ce_ratio >= bbr->params.ecn_thresh) + bbr->startup_ecn_rounds++; + else + bbr->startup_ecn_rounds = 0; + + if (bbr->startup_ecn_rounds >= bbr->params.full_ecn_cnt) { + bbr->debug.event = 'E'; /* ECN caused STARTUP exit */ + bbr2_handle_queue_too_high_in_startup(sk); + return; + } +} + +static void bbr2_update_ecn_alpha(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + s32 delivered, delivered_ce; + u64 alpha, ce_ratio; + u32 gain; + + if (bbr->params.ecn_factor == 0) + return; + + delivered = tp->delivered - bbr->alpha_last_delivered; + delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce; + + if (delivered == 0 || /* avoid divide by zero */ + WARN_ON_ONCE(delivered < 0 || delivered_ce < 0)) /* backwards? */ + return; + + /* See if we should use ECN sender logic for this connection. */ + if (!bbr->ecn_eligible && bbr_ecn_enable && + (bbr->min_rtt_us <= bbr->params.ecn_max_rtt_us || + !bbr->params.ecn_max_rtt_us)) + bbr->ecn_eligible = 1; + + ce_ratio = (u64)delivered_ce << BBR_SCALE; + do_div(ce_ratio, delivered); + gain = bbr->params.ecn_alpha_gain; + alpha = ((BBR_UNIT - gain) * bbr->ecn_alpha) >> BBR_SCALE; + alpha += (gain * ce_ratio) >> BBR_SCALE; + bbr->ecn_alpha = min_t(u32, alpha, BBR_UNIT); + + bbr->alpha_last_delivered = tp->delivered; + bbr->alpha_last_delivered_ce = tp->delivered_ce; + + bbr2_check_ecn_too_high_in_startup(sk, ce_ratio); +} + +/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */ +static void bbr2_raise_inflight_hi_slope(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 growth_this_round, cnt; + + /* Calculate "slope": packets S/Acked per inflight_hi increment. */ + growth_this_round = 1 << bbr->bw_probe_up_rounds; + bbr->bw_probe_up_rounds = min(bbr->bw_probe_up_rounds + 1, 30); + cnt = tp->snd_cwnd / growth_this_round; + cnt = max(cnt, 1U); + bbr->bw_probe_up_cnt = cnt; + bbr->debug.event = 'G'; /* Grow inflight_hi slope */ +} + +/* In BBR_BW_PROBE_UP, not seeing high loss/ECN/queue, so raise inflight_hi. */ +static void bbr2_probe_inflight_hi_upward(struct sock *sk, + const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 delta; + + if (!tp->is_cwnd_limited || tp->snd_cwnd < bbr->inflight_hi) { + bbr->bw_probe_up_acks = 0; /* don't accmulate unused credits */ + return; /* not fully using inflight_hi, so don't grow it */ + } + + /* For each bw_probe_up_cnt packets ACKed, increase inflight_hi by 1. */ + bbr->bw_probe_up_acks += rs->acked_sacked; + if (bbr->bw_probe_up_acks >= bbr->bw_probe_up_cnt) { + delta = bbr->bw_probe_up_acks / bbr->bw_probe_up_cnt; + bbr->bw_probe_up_acks -= delta * bbr->bw_probe_up_cnt; + bbr->inflight_hi += delta; + bbr->debug.event = 'I'; /* Increment inflight_hi */ + } + + if (bbr->round_start) + bbr2_raise_inflight_hi_slope(sk); +} + +/* Does loss/ECN rate for this sample say inflight is "too high"? + * This is used by both the bbr_check_loss_too_high_in_startup() function, + * which can be used in either v1 or v2, and the PROBE_UP phase of v2, which + * uses it to notice when loss/ECN rates suggest inflight is too high. + */ +static bool bbr2_is_inflight_too_high(const struct sock *sk, + const struct rate_sample *rs) +{ + const struct bbr *bbr = inet_csk_ca(sk); + u32 loss_thresh, ecn_thresh; + + if (rs->lost > 0 && rs->tx_in_flight) { + loss_thresh = (u64)rs->tx_in_flight * bbr->params.loss_thresh >> + BBR_SCALE; + if (rs->lost > loss_thresh) + return true; + } + + if (rs->delivered_ce > 0 && rs->delivered > 0 && + bbr->ecn_eligible && bbr->params.ecn_thresh) { + ecn_thresh = (u64)rs->delivered * bbr->params.ecn_thresh >> + BBR_SCALE; + if (rs->delivered_ce >= ecn_thresh) + return true; + } + + return false; +} + +/* Calculate the tx_in_flight level that corresponded to excessive loss. + * We find "lost_prefix" segs of the skb where loss rate went too high, + * by solving for "lost_prefix" in the following equation: + * lost / inflight >= loss_thresh + * (lost_prev + lost_prefix) / (inflight_prev + lost_prefix) >= loss_thresh + * Then we take that equation, convert it to fixed point, and + * round up to the nearest packet. + */ +static u32 bbr2_inflight_hi_from_lost_skb(const struct sock *sk, + const struct rate_sample *rs, + const struct sk_buff *skb) +{ + const struct bbr *bbr = inet_csk_ca(sk); + u32 loss_thresh = bbr->params.loss_thresh; + u32 pcount, divisor, inflight_hi; + s32 inflight_prev, lost_prev; + u64 loss_budget, lost_prefix; + + pcount = tcp_skb_pcount(skb); + + /* How much data was in flight before this skb? */ + inflight_prev = rs->tx_in_flight - pcount; + if (WARN_ONCE(inflight_prev < 0, + "tx_in_flight: %u pcount: %u reneg: %u", + rs->tx_in_flight, pcount, tcp_sk(sk)->is_sack_reneg)) + return ~0U; + + /* How much inflight data was marked lost before this skb? */ + lost_prev = rs->lost - pcount; + if (WARN_ON_ONCE(lost_prev < 0)) + return ~0U; + + /* At what prefix of this lost skb did losss rate exceed loss_thresh? */ + loss_budget = (u64)inflight_prev * loss_thresh + BBR_UNIT - 1; + loss_budget >>= BBR_SCALE; + if (lost_prev >= loss_budget) { + lost_prefix = 0; /* previous losses crossed loss_thresh */ + } else { + lost_prefix = loss_budget - lost_prev; + lost_prefix <<= BBR_SCALE; + divisor = BBR_UNIT - loss_thresh; + if (WARN_ON_ONCE(!divisor)) /* loss_thresh is 8 bits */ + return ~0U; + do_div(lost_prefix, divisor); + } + + inflight_hi = inflight_prev + lost_prefix; + return inflight_hi; +} + +/* If loss/ECN rates during probing indicated we may have overfilled a + * buffer, return an operating point that tries to leave unutilized headroom in + * the path for other flows, for fairness convergence and lower RTTs and loss. + */ +static u32 bbr2_inflight_with_headroom(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 headroom, headroom_fraction; + + if (bbr->inflight_hi == ~0U) + return ~0U; + + headroom_fraction = bbr->params.inflight_headroom; + headroom = ((u64)bbr->inflight_hi * headroom_fraction) >> BBR_SCALE; + headroom = max(headroom, 1U); + return max_t(s32, bbr->inflight_hi - headroom, + bbr->params.cwnd_min_target); +} + +/* Bound cwnd to a sensible level, based on our current probing state + * machine phase and model of a good inflight level (inflight_lo, inflight_hi). + */ +static void bbr2_bound_cwnd_for_inflight_model(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 cap; + + /* tcp_rcv_synsent_state_process() currently calls tcp_ack() + * and thus cong_control() without first initializing us(!). + */ + if (!bbr->initialized) + return; + + cap = ~0U; + if (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx != BBR_BW_PROBE_CRUISE) { + /* Probe to see if more packets fit in the path. */ + cap = bbr->inflight_hi; + } else { + if (bbr->mode == BBR_PROBE_RTT || + (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx == BBR_BW_PROBE_CRUISE)) + cap = bbr2_inflight_with_headroom(sk); + } + /* Adapt to any loss/ECN since our last bw probe. */ + cap = min(cap, bbr->inflight_lo); + + cap = max_t(u32, cap, bbr->params.cwnd_min_target); + tp->snd_cwnd = min(cap, tp->snd_cwnd); +} + +/* Estimate a short-term lower bound on the capacity available now, based + * on measurements of the current delivery process and recent history. When we + * are seeing loss/ECN at times when we are not probing bw, then conservatively + * move toward flow balance by multiplicatively cutting our short-term + * estimated safe rate and volume of data (bw_lo and inflight_lo). We use a + * multiplicative decrease in order to converge to a lower capacity in time + * logarithmic in the magnitude of the decrease. + * + * However, we do not cut our short-term estimates lower than the current rate + * and volume of delivered data from this round trip, since from the current + * delivery process we can estimate the measured capacity available now. + * + * Anything faster than that approach would knowingly risk high loss, which can + * cause low bw for Reno/CUBIC and high loss recovery latency for + * request/response flows using any congestion control. + */ +static void bbr2_adapt_lower_bounds(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 ecn_cut, ecn_inflight_lo, beta; + + /* We only use lower-bound estimates when not probing bw. + * When probing we need to push inflight higher to probe bw. + */ + if (bbr2_is_probing_bandwidth(sk)) + return; + + /* ECN response. */ + if (bbr->ecn_in_round && bbr->ecn_eligible && bbr->params.ecn_factor) { + /* Reduce inflight to (1 - alpha*ecn_factor). */ + ecn_cut = (BBR_UNIT - + ((bbr->ecn_alpha * bbr->params.ecn_factor) >> + BBR_SCALE)); + if (bbr->inflight_lo == ~0U) + bbr->inflight_lo = tp->snd_cwnd; + ecn_inflight_lo = (u64)bbr->inflight_lo * ecn_cut >> BBR_SCALE; + } else { + ecn_inflight_lo = ~0U; + } + + /* Loss response. */ + if (bbr->loss_in_round) { + /* Reduce bw and inflight to (1 - beta). */ + if (bbr->bw_lo == ~0U) + bbr->bw_lo = bbr_max_bw(sk); + if (bbr->inflight_lo == ~0U) + bbr->inflight_lo = tp->snd_cwnd; + beta = bbr->params.beta; + bbr->bw_lo = + max_t(u32, bbr->bw_latest, + (u64)bbr->bw_lo * + (BBR_UNIT - beta) >> BBR_SCALE); + bbr->inflight_lo = + max_t(u32, bbr->inflight_latest, + (u64)bbr->inflight_lo * + (BBR_UNIT - beta) >> BBR_SCALE); + } + + /* Adjust to the lower of the levels implied by loss or ECN. */ + bbr->inflight_lo = min(bbr->inflight_lo, ecn_inflight_lo); +} + +/* Reset any short-term lower-bound adaptation to congestion, so that we can + * push our inflight up. + */ +static void bbr2_reset_lower_bounds(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->bw_lo = ~0U; + bbr->inflight_lo = ~0U; +} + +/* After bw probing (STARTUP/PROBE_UP), reset signals before entering a state + * machine phase where we adapt our lower bound based on congestion signals. + */ +static void bbr2_reset_congestion_signals(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->loss_in_round = 0; + bbr->ecn_in_round = 0; + bbr->loss_in_cycle = 0; + bbr->ecn_in_cycle = 0; + bbr->bw_latest = 0; + bbr->inflight_latest = 0; +} + +/* Update (most of) our congestion signals: track the recent rate and volume of + * delivered data, presence of loss, and EWMA degree of ECN marking. + */ +static void bbr2_update_congestion_signals( + struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + + bbr->loss_round_start = 0; + if (rs->interval_us <= 0 || !rs->acked_sacked) + return; /* Not a valid observation */ + bw = ctx->sample_bw; + + if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) + bbr2_take_bw_hi_sample(sk, bw); + + bbr->loss_in_round |= (rs->losses > 0); + + /* Update rate and volume of delivered data from latest round trip: */ + bbr->bw_latest = max_t(u32, bbr->bw_latest, ctx->sample_bw); + bbr->inflight_latest = max_t(u32, bbr->inflight_latest, rs->delivered); + + if (before(rs->prior_delivered, bbr->loss_round_delivered)) + return; /* skip the per-round-trip updates */ + /* Now do per-round-trip updates. */ + bbr->loss_round_delivered = tp->delivered; /* mark round trip */ + bbr->loss_round_start = 1; + bbr2_adapt_lower_bounds(sk); + + /* Update windowed "latest" (single-round-trip) filters. */ + bbr->loss_in_round = 0; + bbr->ecn_in_round = 0; + bbr->bw_latest = ctx->sample_bw; + bbr->inflight_latest = rs->delivered; +} + +/* Bandwidth probing can cause loss. To help coexistence with loss-based + * congestion control we spread out our probing in a Reno-conscious way. Due to + * the shape of the Reno sawtooth, the time required between loss epochs for an + * idealized Reno flow is a number of round trips that is the BDP of that + * flow. We count packet-timed round trips directly, since measured RTT can + * vary widely, and Reno is driven by packet-timed round trips. + */ +static bool bbr2_is_reno_coexistence_probe_time(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 inflight, rounds, reno_gain, reno_rounds; + + /* Random loss can shave some small percentage off of our inflight + * in each round. To survive this, flows need robust periodic probes. + */ + rounds = bbr->params.bw_probe_max_rounds; + + reno_gain = bbr->params.bw_probe_reno_gain; + if (reno_gain) { + inflight = bbr2_target_inflight(sk); + reno_rounds = ((u64)inflight * reno_gain) >> BBR_SCALE; + rounds = min(rounds, reno_rounds); + } + return bbr->rounds_since_probe >= rounds; +} + +/* How long do we want to wait before probing for bandwidth (and risking + * loss)? We randomize the wait, for better mixing and fairness convergence. + * + * We bound the Reno-coexistence inter-bw-probe time to be 62-63 round trips. + * This is calculated to allow fairness with a 25Mbps, 30ms Reno flow, + * (eg 4K video to a broadband user): + * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets + * + * We bound the BBR-native inter-bw-probe wall clock time to be: + * (a) higher than 2 sec: to try to avoid causing loss for a long enough time + * to allow Reno at 30ms to get 4K video bw, the inter-bw-probe time must + * be at least: 25Mbps * .030sec / (1514bytes) * 0.030sec = 1.9secs + * (b) lower than 3 sec: to ensure flows can start probing in a reasonable + * amount of time to discover unutilized bw on human-scale interactive + * time-scales (e.g. perhaps traffic from a web page download that we + * were competing with is now complete). + */ +static void bbr2_pick_probe_wait(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + /* Decide the random round-trip bound for wait until probe: */ + bbr->rounds_since_probe = + prandom_u32_max(bbr->params.bw_probe_rand_rounds); + /* Decide the random wall clock bound for wait until probe: */ + bbr->probe_wait_us = bbr->params.bw_probe_base_us + + prandom_u32_max(bbr->params.bw_probe_rand_us); +} + +static void bbr2_set_cycle_idx(struct sock *sk, int cycle_idx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->cycle_idx = cycle_idx; + /* New phase, so need to update cwnd and pacing rate. */ + bbr->try_fast_path = 0; +} + +/* Send at estimated bw to fill the pipe, but not queue. We need this phase + * before PROBE_UP, because as soon as we send faster than the available bw + * we will start building a queue, and if the buffer is shallow we can cause + * loss. If we do not fill the pipe before we cause this loss, our bw_hi and + * inflight_hi estimates will underestimate. + */ +static void bbr2_start_bw_probe_refill(struct sock *sk, u32 bw_probe_up_rounds) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr2_reset_lower_bounds(sk); + if (bbr->inflight_hi != ~0U) + bbr->inflight_hi += bbr->params.refill_add_inc; + bbr->bw_probe_up_rounds = bw_probe_up_rounds; + bbr->bw_probe_up_acks = 0; + bbr->stopped_risky_probe = 0; + bbr->ack_phase = BBR_ACKS_REFILLING; + bbr->next_rtt_delivered = tp->delivered; + bbr2_set_cycle_idx(sk, BBR_BW_PROBE_REFILL); +} + +/* Now probe max deliverable data rate and volume. */ +static void bbr2_start_bw_probe_up(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->ack_phase = BBR_ACKS_PROBE_STARTING; + bbr->next_rtt_delivered = tp->delivered; + bbr->cycle_mstamp = tp->tcp_mstamp; + bbr2_set_cycle_idx(sk, BBR_BW_PROBE_UP); + bbr2_raise_inflight_hi_slope(sk); +} + +/* Start a new PROBE_BW probing cycle of some wall clock length. Pick a wall + * clock time at which to probe beyond an inflight that we think to be + * safe. This will knowingly risk packet loss, so we want to do this rarely, to + * keep packet loss rates low. Also start a round-trip counter, to probe faster + * if we estimate a Reno flow at our BDP would probe faster. + */ +static void bbr2_start_bw_probe_down(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr2_reset_congestion_signals(sk); + bbr->bw_probe_up_cnt = ~0U; /* not growing inflight_hi any more */ + bbr2_pick_probe_wait(sk); + bbr->cycle_mstamp = tp->tcp_mstamp; /* start wall clock */ + bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; + bbr->next_rtt_delivered = tp->delivered; + bbr2_set_cycle_idx(sk, BBR_BW_PROBE_DOWN); +} + +/* Cruise: maintain what we estimate to be a neutral, conservative + * operating point, without attempting to probe up for bandwidth or down for + * RTT, and only reducing inflight in response to loss/ECN signals. + */ +static void bbr2_start_bw_probe_cruise(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->inflight_lo != ~0U) + bbr->inflight_lo = min(bbr->inflight_lo, bbr->inflight_hi); + + bbr2_set_cycle_idx(sk, BBR_BW_PROBE_CRUISE); +} + +/* Loss and/or ECN rate is too high while probing. + * Adapt (once per bw probe) by cutting inflight_hi and then restarting cycle. + */ +static void bbr2_handle_inflight_too_high(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + const u32 beta = bbr->params.beta; + + bbr->prev_probe_too_high = 1; + bbr->bw_probe_samples = 0; /* only react once per probe */ + bbr->debug.event = 'L'; /* Loss/ECN too high */ + /* If we are app-limited then we are not robustly + * probing the max volume of inflight data we think + * might be safe (analogous to how app-limited bw + * samples are not known to be robustly probing bw). + */ + if (!rs->is_app_limited) + bbr->inflight_hi = max_t(u32, rs->tx_in_flight, + (u64)bbr2_target_inflight(sk) * + (BBR_UNIT - beta) >> BBR_SCALE); + if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) + bbr2_start_bw_probe_down(sk); +} + +/* If we're seeing bw and loss samples reflecting our bw probing, adapt + * using the signals we see. If loss or ECN mark rate gets too high, then adapt + * inflight_hi downward. If we're able to push inflight higher without such + * signals, push higher: adapt inflight_hi upward. + */ +static bool bbr2_adapt_upper_bounds(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + /* Track when we'll see bw/loss samples resulting from our bw probes. */ + if (bbr->ack_phase == BBR_ACKS_PROBE_STARTING && bbr->round_start) + bbr->ack_phase = BBR_ACKS_PROBE_FEEDBACK; + if (bbr->ack_phase == BBR_ACKS_PROBE_STOPPING && bbr->round_start) { + /* End of samples from bw probing phase. */ + bbr->bw_probe_samples = 0; + bbr->ack_phase = BBR_ACKS_INIT; + /* At this point in the cycle, our current bw sample is also + * our best recent chance at finding the highest available bw + * for this flow. So now is the best time to forget the bw + * samples from the previous cycle, by advancing the window. + */ + if (bbr->mode == BBR_PROBE_BW && !rs->is_app_limited) + bbr2_advance_bw_hi_filter(sk); + /* If we had an inflight_hi, then probed and pushed inflight all + * the way up to hit that inflight_hi without seeing any + * high loss/ECN in all the resulting ACKs from that probing, + * then probe up again, this time letting inflight persist at + * inflight_hi for a round trip, then accelerating beyond. + */ + if (bbr->mode == BBR_PROBE_BW && + bbr->stopped_risky_probe && !bbr->prev_probe_too_high) { + bbr->debug.event = 'R'; /* reprobe */ + bbr2_start_bw_probe_refill(sk, 0); + return true; /* yes, decided state transition */ + } + } + + if (bbr2_is_inflight_too_high(sk, rs)) { + if (bbr->bw_probe_samples) /* sample is from bw probing? */ + bbr2_handle_inflight_too_high(sk, rs); + } else { + /* Loss/ECN rate is declared safe. Adjust upper bound upward. */ + if (bbr->inflight_hi == ~0U) /* no excess queue signals yet? */ + return false; + + /* To be resilient to random loss, we must raise inflight_hi + * if we observe in any phase that a higher level is safe. + */ + if (rs->tx_in_flight > bbr->inflight_hi) { + bbr->inflight_hi = rs->tx_in_flight; + bbr->debug.event = 'U'; /* raise up inflight_hi */ + } + + if (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx == BBR_BW_PROBE_UP) + bbr2_probe_inflight_hi_upward(sk, rs); + } + + return false; +} + +/* Check if it's time to probe for bandwidth now, and if so, kick it off. */ +static bool bbr2_check_time_to_probe_bw(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 n; + + /* If we seem to be at an operating point where we are not seeing loss + * but we are seeing ECN marks, then when the ECN marks cease we reprobe + * quickly (in case a burst of cross-traffic has ceased and freed up bw, + * or in case we are sharing with multiplicatively probing traffic). + */ + if (bbr->params.ecn_reprobe_gain && bbr->ecn_eligible && + bbr->ecn_in_cycle && !bbr->loss_in_cycle && + inet_csk(sk)->icsk_ca_state == TCP_CA_Open) { + bbr->debug.event = 'A'; /* *A*ll clear to probe *A*gain */ + /* Calculate n so that when bbr2_raise_inflight_hi_slope() + * computes growth_this_round as 2^n it will be roughly the + * desired volume of data (inflight_hi*ecn_reprobe_gain). + */ + n = ilog2((((u64)bbr->inflight_hi * + bbr->params.ecn_reprobe_gain) >> BBR_SCALE)); + bbr2_start_bw_probe_refill(sk, n); + return true; + } + + if (bbr2_has_elapsed_in_phase(sk, bbr->probe_wait_us) || + bbr2_is_reno_coexistence_probe_time(sk)) { + bbr2_start_bw_probe_refill(sk, 0); + return true; + } + return false; +} + +/* Is it time to transition from PROBE_DOWN to PROBE_CRUISE? */ +static bool bbr2_check_time_to_cruise(struct sock *sk, u32 inflight, u32 bw) +{ + struct bbr *bbr = inet_csk_ca(sk); + bool is_under_bdp, is_long_enough; + + /* Always need to pull inflight down to leave headroom in queue. */ + if (inflight > bbr2_inflight_with_headroom(sk)) + return false; + + is_under_bdp = inflight <= bbr_inflight(sk, bw, BBR_UNIT); + if (bbr->params.drain_to_target) + return is_under_bdp; + + is_long_enough = bbr2_has_elapsed_in_phase(sk, bbr->min_rtt_us); + return is_under_bdp || is_long_enough; +} + +/* PROBE_BW state machine: cruise, refill, probe for bw, or drain? */ +static void bbr2_update_cycle_phase(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + bool is_risky = false, is_queuing = false; + u32 inflight, bw; + + if (!bbr_full_bw_reached(sk)) + return; + + /* In DRAIN, PROBE_BW, or PROBE_RTT, adjust upper bounds. */ + if (bbr2_adapt_upper_bounds(sk, rs)) + return; /* already decided state transition */ + + if (bbr->mode != BBR_PROBE_BW) + return; + + inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); + bw = bbr_max_bw(sk); + + switch (bbr->cycle_idx) { + /* First we spend most of our time cruising with a pacing_gain of 1.0, + * which paces at the estimated bw, to try to fully use the pipe + * without building queue. If we encounter loss/ECN marks, we adapt + * by slowing down. + */ + case BBR_BW_PROBE_CRUISE: + if (bbr2_check_time_to_probe_bw(sk)) + return; /* already decided state transition */ + break; + + /* After cruising, when it's time to probe, we first "refill": we send + * at the estimated bw to fill the pipe, before probing higher and + * knowingly risking overflowing the bottleneck buffer (causing loss). + */ + case BBR_BW_PROBE_REFILL: + if (bbr->round_start) { + /* After one full round trip of sending in REFILL, we + * start to see bw samples reflecting our REFILL, which + * may be putting too much data in flight. + */ + bbr->bw_probe_samples = 1; + bbr2_start_bw_probe_up(sk); + } + break; + + /* After we refill the pipe, we probe by using a pacing_gain > 1.0, to + * probe for bw. If we have not seen loss/ECN, we try to raise inflight + * to at least pacing_gain*BDP; note that this may take more than + * min_rtt if min_rtt is small (e.g. on a LAN). + * + * We terminate PROBE_UP bandwidth probing upon any of the following: + * + * (1) We've pushed inflight up to hit the inflight_hi target set in the + * most recent previous bw probe phase. Thus we want to start + * draining the queue immediately because it's very likely the most + * recently sent packets will fill the queue and cause drops. + * (checked here) + * (2) We have probed for at least 1*min_rtt_us, and the + * estimated queue is high enough (inflight > 1.25 * estimated_bdp). + * (checked here) + * (3) Loss filter says loss rate is "too high". + * (checked in bbr_is_inflight_too_high()) + * (4) ECN filter says ECN mark rate is "too high". + * (checked in bbr_is_inflight_too_high()) + */ + case BBR_BW_PROBE_UP: + if (bbr->prev_probe_too_high && + inflight >= bbr->inflight_hi) { + bbr->stopped_risky_probe = 1; + is_risky = true; + bbr->debug.event = 'D'; /* D for danger */ + } else if (bbr2_has_elapsed_in_phase(sk, bbr->min_rtt_us) && + inflight >= + bbr_inflight(sk, bw, + bbr->params.bw_probe_pif_gain)) { + is_queuing = true; + bbr->debug.event = 'Q'; /* building Queue */ + } + if (is_risky || is_queuing) { + bbr->prev_probe_too_high = 0; /* no loss/ECN (yet) */ + bbr2_start_bw_probe_down(sk); /* restart w/ down */ + } + break; + + /* After probing in PROBE_UP, we have usually accumulated some data in + * the bottleneck buffer (if bw probing didn't find more bw). We next + * enter PROBE_DOWN to try to drain any excess data from the queue. To + * do this, we use a pacing_gain < 1.0. We hold this pacing gain until + * our inflight is less then that target cruising point, which is the + * minimum of (a) the amount needed to leave headroom, and (b) the + * estimated BDP. Once inflight falls to match the target, we estimate + * the queue is drained; persisting would underutilize the pipe. + */ + case BBR_BW_PROBE_DOWN: + if (bbr2_check_time_to_probe_bw(sk)) + return; /* already decided state transition */ + if (bbr2_check_time_to_cruise(sk, inflight, bw)) + bbr2_start_bw_probe_cruise(sk); + break; + + default: + WARN_ONCE(1, "BBR invalid cycle index %u\n", bbr->cycle_idx); + } +} + +/* Exiting PROBE_RTT, so return to bandwidth probing in STARTUP or PROBE_BW. */ +static void bbr2_exit_probe_rtt(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr2_reset_lower_bounds(sk); + if (bbr_full_bw_reached(sk)) { + bbr->mode = BBR_PROBE_BW; + /* Raising inflight after PROBE_RTT may cause loss, so reset + * the PROBE_BW clock and schedule the next bandwidth probe for + * a friendly and randomized future point in time. + */ + bbr2_start_bw_probe_down(sk); + /* Since we are exiting PROBE_RTT, we know inflight is + * below our estimated BDP, so it is reasonable to cruise. + */ + bbr2_start_bw_probe_cruise(sk); + } else { + bbr->mode = BBR_STARTUP; + } +} + +/* Exit STARTUP based on loss rate > 1% and loss gaps in round >= N. Wait until + * the end of the round in recovery to get a good estimate of how many packets + * have been lost, and how many we need to drain with a low pacing rate. + */ +static void bbr2_check_loss_too_high_in_startup(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr_full_bw_reached(sk)) + return; + + /* For STARTUP exit, check the loss rate at the end of each round trip + * of Recovery episodes in STARTUP. We check the loss rate at the end + * of the round trip to filter out noisy/low loss and have a better + * sense of inflight (extent of loss), so we can drain more accurately. + */ + if (rs->losses && bbr->loss_events_in_round < 0xf) + bbr->loss_events_in_round++; /* update saturating counter */ + if (bbr->params.full_loss_cnt && bbr->loss_round_start && + inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery && + bbr->loss_events_in_round >= bbr->params.full_loss_cnt && + bbr2_is_inflight_too_high(sk, rs)) { + bbr->debug.event = 'P'; /* Packet loss caused STARTUP exit */ + bbr2_handle_queue_too_high_in_startup(sk); + return; + } + if (bbr->loss_round_start) + bbr->loss_events_in_round = 0; +} + +/* If we are done draining, advance into steady state operation in PROBE_BW. */ +static void bbr2_check_drain(struct sock *sk, const struct rate_sample *rs, + struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr_check_drain(sk, rs, ctx)) { + bbr->mode = BBR_PROBE_BW; + bbr2_start_bw_probe_down(sk); + } +} + +static void bbr2_update_model(struct sock *sk, const struct rate_sample *rs, + struct bbr_context *ctx) +{ + bbr2_update_congestion_signals(sk, rs, ctx); + bbr_update_ack_aggregation(sk, rs); + bbr2_check_loss_too_high_in_startup(sk, rs); + bbr_check_full_bw_reached(sk, rs); + bbr2_check_drain(sk, rs, ctx); + bbr2_update_cycle_phase(sk, rs); + bbr_update_min_rtt(sk, rs); +} + +/* Fast path for app-limited case. + * + * On each ack, we execute bbr state machine, which primarily consists of: + * 1) update model based on new rate sample, and + * 2) update control based on updated model or state change. + * + * There are certain workload/scenarios, e.g. app-limited case, where + * either we can skip updating model or we can skip update of both model + * as well as control. This provides signifcant softirq cpu savings for + * processing incoming acks. + * + * In case of app-limited, if there is no congestion (loss/ecn) and + * if observed bw sample is less than current estimated bw, then we can + * skip some of the computation in bbr state processing: + * + * - if there is no rtt/mode/phase change: In this case, since all the + * parameters of the network model are constant, we can skip model + * as well control update. + * + * - else we can skip rest of the model update. But we still need to + * update the control to account for the new rtt/mode/phase. + * + * Returns whether we can take fast path or not. + */ +static bool bbr2_fast_path(struct sock *sk, bool *update_model, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 prev_min_rtt_us, prev_mode; + + if (bbr->params.fast_path && bbr->try_fast_path && + rs->is_app_limited && ctx->sample_bw < bbr_max_bw(sk) && + !bbr->loss_in_round && !bbr->ecn_in_round) { + prev_mode = bbr->mode; + prev_min_rtt_us = bbr->min_rtt_us; + bbr2_check_drain(sk, rs, ctx); + bbr2_update_cycle_phase(sk, rs); + bbr_update_min_rtt(sk, rs); + + if (bbr->mode == prev_mode && + bbr->min_rtt_us == prev_min_rtt_us && + bbr->try_fast_path) + return true; + + /* Skip model update, but control still needs to be updated */ + *update_model = false; + } + return false; +} + +static void bbr2_main(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + struct bbr_context ctx = { 0 }; + bool update_model = true; + u32 bw; + + bbr->debug.event = '.'; /* init to default NOP (no event yet) */ + + bbr_update_round_start(sk, rs, &ctx); + if (bbr->round_start) { + bbr->rounds_since_probe = + min_t(s32, bbr->rounds_since_probe + 1, 0xFF); + bbr2_update_ecn_alpha(sk); + } + + bbr->ecn_in_round |= rs->is_ece; + bbr_calculate_bw_sample(sk, rs, &ctx); + + if (bbr2_fast_path(sk, &update_model, rs, &ctx)) + goto out; + + if (update_model) + bbr2_update_model(sk, rs, &ctx); + + bbr_update_gains(sk); + bw = bbr_bw(sk); + bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); + bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain, + tp->snd_cwnd, &ctx); + bbr2_bound_cwnd_for_inflight_model(sk); + +out: + bbr->prev_ca_state = inet_csk(sk)->icsk_ca_state; + bbr->loss_in_cycle |= rs->lost > 0; + bbr->ecn_in_cycle |= rs->delivered_ce > 0; + + bbr_debug(sk, rs->acked_sacked, rs, &ctx); +} + +/* Module parameters that are settable by TCP_CONGESTION_PARAMS are declared + * down here, so that the algorithm functions that use the parameters must use + * the per-socket parameters; if they accidentally use the global version + * then there will be a compile error. + * TODO(ncardwell): move all per-socket parameters down to this section. + */ + +/* On losses, scale down inflight and pacing rate by beta scaled by BBR_SCALE. + * No loss response when 0. Max allwed value is 255. + */ +static u32 bbr_beta = BBR_UNIT * 30 / 100; + +/* Gain factor for ECN mark ratio samples, scaled by BBR_SCALE. + * Max allowed value is 255. + */ +static u32 bbr_ecn_alpha_gain = BBR_UNIT * 1 / 16; /* 1/16 = 6.25% */ + +/* The initial value for the ecn_alpha state variable. Default and max + * BBR_UNIT (256), representing 1.0. This allows a flow to respond quickly + * to congestion if the bottleneck is congested when the flow starts up. + */ +static u32 bbr_ecn_alpha_init = BBR_UNIT; /* 1.0, to respond quickly */ + +/* On ECN, cut inflight_lo to (1 - ecn_factor * ecn_alpha) scaled by BBR_SCALE. + * No ECN based bounding when 0. Max allwed value is 255. + */ +static u32 bbr_ecn_factor = BBR_UNIT * 1 / 3; /* 1/3 = 33% */ + +/* Estimate bw probing has gone too far if CE ratio exceeds this threshold. + * Scaled by BBR_SCALE. Disabled when 0. Max allowed is 255. + */ +static u32 bbr_ecn_thresh = BBR_UNIT * 1 / 2; /* 1/2 = 50% */ + +/* Max RTT (in usec) at which to use sender-side ECN logic. + * Disabled when 0 (ECN allowed at any RTT). + * Max allowed for the parameter is 524287 (0x7ffff) us, ~524 ms. + */ +static u32 bbr_ecn_max_rtt_us = 5000; + +/* If non-zero, if in a cycle with no losses but some ECN marks, after ECN + * clears then use a multiplicative increase to quickly reprobe bw by + * starting inflight probing at the given multiple of inflight_hi. + * Default for this experimental knob is 0 (disabled). + * Planned value for experiments: BBR_UNIT * 1 / 2 = 128, representing 0.5. + */ +static u32 bbr_ecn_reprobe_gain; + +/* Estimate bw probing has gone too far if loss rate exceeds this level. */ +static u32 bbr_loss_thresh = BBR_UNIT * 2 / 100; /* 2% loss */ + +/* Exit STARTUP if number of loss marking events in a Recovery round is >= N, + * and loss rate is higher than bbr_loss_thresh. + * Disabled if 0. Max allowed value is 15 (0xF). + */ +static u32 bbr_full_loss_cnt = 8; + +/* Exit STARTUP if number of round trips with ECN mark rate above ecn_thresh + * meets this count. Max allowed value is 3. + */ +static u32 bbr_full_ecn_cnt = 2; + +/* Fraction of unutilized headroom to try to leave in path upon high loss. */ +static u32 bbr_inflight_headroom = BBR_UNIT * 15 / 100; + +/* Multiplier to get target inflight (as multiple of BDP) for PROBE_UP phase. + * Default is 1.25x, as in BBR v1. Max allowed is 511. + */ +static u32 bbr_bw_probe_pif_gain = BBR_UNIT * 5 / 4; + +/* Multiplier to get Reno-style probe epoch duration as: k * BDP round trips. + * If zero, disables this BBR v2 Reno-style BDP-scaled coexistence mechanism. + * Max allowed is 511. + */ +static u32 bbr_bw_probe_reno_gain = BBR_UNIT; + +/* Max number of packet-timed rounds to wait before probing for bandwidth. If + * we want to tolerate 1% random loss per round, and not have this cut our + * inflight too much, we must probe for bw periodically on roughly this scale. + * If low, limits Reno/CUBIC coexistence; if high, limits loss tolerance. + * We aim to be fair with Reno/CUBIC up to a BDP of at least: + * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets + */ +static u32 bbr_bw_probe_max_rounds = 63; + +/* Max amount of randomness to inject in round counting for Reno-coexistence. + * Max value is 15. + */ +static u32 bbr_bw_probe_rand_rounds = 2; + +/* Use BBR-native probe time scale starting at this many usec. + * We aim to be fair with Reno/CUBIC up to an inter-loss time epoch of at least: + * BDP*RTT = 25Mbps * .030sec /(1514bytes) * 0.030sec = 1.9 secs + */ +static u32 bbr_bw_probe_base_us = 2 * USEC_PER_SEC; /* 2 secs */ + +/* Use BBR-native probes spread over this many usec: */ +static u32 bbr_bw_probe_rand_us = 1 * USEC_PER_SEC; /* 1 secs */ + +/* Undo the model changes made in loss recovery if recovery was spurious? */ +static bool bbr_undo = true; + +/* Use fast path if app-limited, no loss/ECN, and target cwnd was reached? */ +static bool bbr_fast_path = true; /* default: enabled */ + +/* Use fast ack mode ? */ +static int bbr_fast_ack_mode = 1; /* default: rwnd check off */ + +/* How much to additively increase inflight_hi when entering REFILL? */ +static u32 bbr_refill_add_inc; /* default: disabled */ + +module_param_named(beta, bbr_beta, uint, 0644); +module_param_named(ecn_alpha_gain, bbr_ecn_alpha_gain, uint, 0644); +module_param_named(ecn_alpha_init, bbr_ecn_alpha_init, uint, 0644); +module_param_named(ecn_factor, bbr_ecn_factor, uint, 0644); +module_param_named(ecn_thresh, bbr_ecn_thresh, uint, 0644); +module_param_named(ecn_max_rtt_us, bbr_ecn_max_rtt_us, uint, 0644); +module_param_named(ecn_reprobe_gain, bbr_ecn_reprobe_gain, uint, 0644); +module_param_named(loss_thresh, bbr_loss_thresh, uint, 0664); +module_param_named(full_loss_cnt, bbr_full_loss_cnt, uint, 0664); +module_param_named(full_ecn_cnt, bbr_full_ecn_cnt, uint, 0664); +module_param_named(inflight_headroom, bbr_inflight_headroom, uint, 0664); +module_param_named(bw_probe_pif_gain, bbr_bw_probe_pif_gain, uint, 0664); +module_param_named(bw_probe_reno_gain, bbr_bw_probe_reno_gain, uint, 0664); +module_param_named(bw_probe_max_rounds, bbr_bw_probe_max_rounds, uint, 0664); +module_param_named(bw_probe_rand_rounds, bbr_bw_probe_rand_rounds, uint, 0664); +module_param_named(bw_probe_base_us, bbr_bw_probe_base_us, uint, 0664); +module_param_named(bw_probe_rand_us, bbr_bw_probe_rand_us, uint, 0664); +module_param_named(undo, bbr_undo, bool, 0664); +module_param_named(fast_path, bbr_fast_path, bool, 0664); +module_param_named(fast_ack_mode, bbr_fast_ack_mode, uint, 0664); +module_param_named(refill_add_inc, bbr_refill_add_inc, uint, 0664); + +static void bbr2_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr_init(sk); /* run shared init code for v1 and v2 */ + + /* BBR v2 parameters: */ + bbr->params.beta = min_t(u32, 0xFFU, bbr_beta); + bbr->params.ecn_alpha_gain = min_t(u32, 0xFFU, bbr_ecn_alpha_gain); + bbr->params.ecn_alpha_init = min_t(u32, BBR_UNIT, bbr_ecn_alpha_init); + bbr->params.ecn_factor = min_t(u32, 0xFFU, bbr_ecn_factor); + bbr->params.ecn_thresh = min_t(u32, 0xFFU, bbr_ecn_thresh); + bbr->params.ecn_max_rtt_us = min_t(u32, 0x7ffffU, bbr_ecn_max_rtt_us); + bbr->params.ecn_reprobe_gain = min_t(u32, 0x1FF, bbr_ecn_reprobe_gain); + bbr->params.loss_thresh = min_t(u32, 0xFFU, bbr_loss_thresh); + bbr->params.full_loss_cnt = min_t(u32, 0xFU, bbr_full_loss_cnt); + bbr->params.full_ecn_cnt = min_t(u32, 0x3U, bbr_full_ecn_cnt); + bbr->params.inflight_headroom = + min_t(u32, 0xFFU, bbr_inflight_headroom); + bbr->params.bw_probe_pif_gain = + min_t(u32, 0x1FFU, bbr_bw_probe_pif_gain); + bbr->params.bw_probe_reno_gain = + min_t(u32, 0x1FFU, bbr_bw_probe_reno_gain); + bbr->params.bw_probe_max_rounds = + min_t(u32, 0xFFU, bbr_bw_probe_max_rounds); + bbr->params.bw_probe_rand_rounds = + min_t(u32, 0xFU, bbr_bw_probe_rand_rounds); + bbr->params.bw_probe_base_us = + min_t(u32, (1 << 26) - 1, bbr_bw_probe_base_us); + bbr->params.bw_probe_rand_us = + min_t(u32, (1 << 26) - 1, bbr_bw_probe_rand_us); + bbr->params.undo = bbr_undo; + bbr->params.fast_path = bbr_fast_path ? 1 : 0; + bbr->params.refill_add_inc = min_t(u32, 0x3U, bbr_refill_add_inc); + + /* BBR v2 state: */ + bbr->initialized = 1; + /* Start sampling ECN mark rate after first full flight is ACKed: */ + bbr->loss_round_delivered = tp->delivered + 1; + bbr->loss_round_start = 0; + bbr->undo_bw_lo = 0; + bbr->undo_inflight_lo = 0; + bbr->undo_inflight_hi = 0; + bbr->loss_events_in_round = 0; + bbr->startup_ecn_rounds = 0; + bbr2_reset_congestion_signals(sk); + bbr->bw_lo = ~0U; + bbr->bw_hi[0] = 0; + bbr->bw_hi[1] = 0; + bbr->inflight_lo = ~0U; + bbr->inflight_hi = ~0U; + bbr->bw_probe_up_cnt = ~0U; + bbr->bw_probe_up_acks = 0; + bbr->bw_probe_up_rounds = 0; + bbr->probe_wait_us = 0; + bbr->stopped_risky_probe = 0; + bbr->ack_phase = BBR_ACKS_INIT; + bbr->rounds_since_probe = 0; + bbr->bw_probe_samples = 0; + bbr->prev_probe_too_high = 0; + bbr->ecn_eligible = 0; + bbr->ecn_alpha = bbr->params.ecn_alpha_init; + bbr->alpha_last_delivered = 0; + bbr->alpha_last_delivered_ce = 0; + + tp->fast_ack_mode = min_t(u32, 0x2U, bbr_fast_ack_mode); + + if ((tp->ecn_flags & TCP_ECN_OK) && bbr_ecn_enable) + tp->ecn_flags |= TCP_ECN_ECT_PERMANENT; +} + +/* Core TCP stack informs us that the given skb was just marked lost. */ +static void bbr2_skb_marked_lost(struct sock *sk, const struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + struct rate_sample rs; + + /* Capture "current" data over the full round trip of loss, + * to have a better chance to see the full capacity of the path. + */ + if (!bbr->loss_in_round) /* first loss in this round trip? */ + bbr->loss_round_delivered = tp->delivered; /* set round trip */ + bbr->loss_in_round = 1; + bbr->loss_in_cycle = 1; + + if (!bbr->bw_probe_samples) + return; /* not an skb sent while probing for bandwidth */ + if (unlikely(!scb->tx.delivered_mstamp)) + return; /* skb was SACKed, reneged, marked lost; ignore it */ + /* We are probing for bandwidth. Construct a rate sample that + * estimates what happened in the flight leading up to this lost skb, + * then see if the loss rate went too high, and if so at which packet. + */ + memset(&rs, 0, sizeof(rs)); + rs.tx_in_flight = scb->tx.in_flight; + rs.lost = tp->lost - scb->tx.lost; + rs.is_app_limited = scb->tx.is_app_limited; + if (bbr2_is_inflight_too_high(sk, &rs)) { + rs.tx_in_flight = bbr2_inflight_hi_from_lost_skb(sk, &rs, skb); + bbr2_handle_inflight_too_high(sk, &rs); + } +} + +/* Revert short-term model if current loss recovery event was spurious. */ +static u32 bbr2_undo_cwnd(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->debug.undo = 1; + bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ + bbr->full_bw_cnt = 0; + bbr->loss_in_round = 0; + + if (!bbr->params.undo) + return tp->snd_cwnd; + + /* Revert to cwnd and other state saved before loss episode. */ + bbr->bw_lo = max(bbr->bw_lo, bbr->undo_bw_lo); + bbr->inflight_lo = max(bbr->inflight_lo, bbr->undo_inflight_lo); + bbr->inflight_hi = max(bbr->inflight_hi, bbr->undo_inflight_hi); + return bbr->prior_cwnd; +} + +/* Entering loss recovery, so save state for when we undo recovery. */ +static u32 bbr2_ssthresh(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr_save_cwnd(sk); + /* For undo, save state that adapts based on loss signal. */ + bbr->undo_bw_lo = bbr->bw_lo; + bbr->undo_inflight_lo = bbr->inflight_lo; + bbr->undo_inflight_hi = bbr->inflight_hi; + return tcp_sk(sk)->snd_ssthresh; +} + +static enum tcp_bbr2_phase bbr2_get_phase(struct bbr *bbr) +{ + switch (bbr->mode) { + case BBR_STARTUP: + return BBR2_PHASE_STARTUP; + case BBR_DRAIN: + return BBR2_PHASE_DRAIN; + case BBR_PROBE_BW: + break; + case BBR_PROBE_RTT: + return BBR2_PHASE_PROBE_RTT; + default: + return BBR2_PHASE_INVALID; + } + switch (bbr->cycle_idx) { + case BBR_BW_PROBE_UP: + return BBR2_PHASE_PROBE_BW_UP; + case BBR_BW_PROBE_DOWN: + return BBR2_PHASE_PROBE_BW_DOWN; + case BBR_BW_PROBE_CRUISE: + return BBR2_PHASE_PROBE_BW_CRUISE; + case BBR_BW_PROBE_REFILL: + return BBR2_PHASE_PROBE_BW_REFILL; + default: + return BBR2_PHASE_INVALID; + } +} + +static size_t bbr2_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) +{ + if (ext & (1 << (INET_DIAG_BBRINFO - 1)) || + ext & (1 << (INET_DIAG_VEGASINFO - 1))) { + struct bbr *bbr = inet_csk_ca(sk); + u64 bw = bbr_bw_bytes_per_sec(sk, bbr_bw(sk)); + u64 bw_hi = bbr_bw_bytes_per_sec(sk, bbr_max_bw(sk)); + u64 bw_lo = bbr->bw_lo == ~0U ? + ~0ULL : bbr_bw_bytes_per_sec(sk, bbr->bw_lo); + + memset(&info->bbr2, 0, sizeof(info->bbr2)); + info->bbr2.bbr_bw_lsb = (u32)bw; + info->bbr2.bbr_bw_msb = (u32)(bw >> 32); + info->bbr2.bbr_min_rtt = bbr->min_rtt_us; + info->bbr2.bbr_pacing_gain = bbr->pacing_gain; + info->bbr2.bbr_cwnd_gain = bbr->cwnd_gain; + info->bbr2.bbr_bw_hi_lsb = (u32)bw_hi; + info->bbr2.bbr_bw_hi_msb = (u32)(bw_hi >> 32); + info->bbr2.bbr_bw_lo_lsb = (u32)bw_lo; + info->bbr2.bbr_bw_lo_msb = (u32)(bw_lo >> 32); + info->bbr2.bbr_mode = bbr->mode; + info->bbr2.bbr_phase = (__u8)bbr2_get_phase(bbr); + info->bbr2.bbr_version = (__u8)2; + info->bbr2.bbr_inflight_lo = bbr->inflight_lo; + info->bbr2.bbr_inflight_hi = bbr->inflight_hi; + info->bbr2.bbr_extra_acked = bbr_extra_acked(sk); + *attr = INET_DIAG_BBRINFO; + return sizeof(info->bbr2); + } + return 0; +} + +static void bbr2_set_state(struct sock *sk, u8 new_state) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (new_state == TCP_CA_Loss) { + struct rate_sample rs = { .losses = 1 }; + struct bbr_context ctx = { 0 }; + + bbr->prev_ca_state = TCP_CA_Loss; + bbr->full_bw = 0; + if (!bbr2_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) { + /* bbr_adapt_lower_bounds() needs cwnd before + * we suffered an RTO, to update inflight_lo: + */ + bbr->inflight_lo = + max(tp->snd_cwnd, bbr->prior_cwnd); + } + bbr_debug(sk, 0, &rs, &ctx); + } else if (bbr->prev_ca_state == TCP_CA_Loss && + new_state != TCP_CA_Loss) { + tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd); + bbr->try_fast_path = 0; /* bound cwnd using latest model */ + } +} + +static struct tcp_congestion_ops tcp_bbr2_cong_ops __read_mostly = { + .flags = TCP_CONG_NON_RESTRICTED | TCP_CONG_WANTS_CE_EVENTS, + .name = "bbr2", + .owner = THIS_MODULE, + .init = bbr2_init, + .cong_control = bbr2_main, + .sndbuf_expand = bbr_sndbuf_expand, + .skb_marked_lost = bbr2_skb_marked_lost, + .undo_cwnd = bbr2_undo_cwnd, + .cwnd_event = bbr_cwnd_event, + .ssthresh = bbr2_ssthresh, + .tso_segs = bbr_tso_segs, + .get_info = bbr2_get_info, + .set_state = bbr2_set_state, +}; + +static int __init bbr_register(void) +{ + BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); + return tcp_register_congestion_control(&tcp_bbr2_cong_ops); +} + +static void __exit bbr_unregister(void) +{ + tcp_unregister_congestion_control(&tcp_bbr2_cong_ops); +} + +module_init(bbr_register); +module_exit(bbr_unregister); + +MODULE_AUTHOR("Van Jacobson "); +MODULE_AUTHOR("Neal Cardwell "); +MODULE_AUTHOR("Yuchung Cheng "); +MODULE_AUTHOR("Soheil Hassas Yeganeh "); +MODULE_AUTHOR("Priyaranjan Jha "); +MODULE_AUTHOR("Yousuk Seung "); +MODULE_AUTHOR("Kevin Yang "); +MODULE_AUTHOR("Arjun Roy "); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 9b9b02052fd36..1cdcb4df0eb7e 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -138,10 +138,9 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, struct sk_psock *psock = sk_psock_get(sk); int ret; - if (unlikely(!psock)) { - sk_msg_free(sk, msg); - return 0; - } + if (unlikely(!psock)) + return -EPIPE; + ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) : tcp_bpf_push_locked(sk, msg, bytes, flags, false); sk_psock_put(sk, psock); @@ -335,7 +334,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, cork = true; psock->cork = NULL; } - sk_msg_return(sk, msg, tosend); + sk_msg_return(sk, msg, msg->sg.size); release_sock(sk); ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); @@ -375,8 +374,11 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, } if (msg && msg->sg.data[msg->sg.start].page_link && - msg->sg.data[msg->sg.start].length) + msg->sg.data[msg->sg.start].length) { + if (eval == __SK_REDIRECT) + sk_mem_charge(sk, msg->sg.size); goto more_data; + } } return ret; } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index db5831e6c136a..153ed9010c0c2 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -179,6 +179,7 @@ void tcp_init_congestion_control(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); tcp_sk(sk)->prior_ssthresh = 0; + tcp_sk(sk)->fast_ack_mode = 0; if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); if (tcp_ca_needs_ecn(sk)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bfe4112e000c0..4aaf5ca6ce9c8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -349,7 +349,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) tcp_enter_quickack_mode(sk, 2); break; case INET_ECN_CE: - if (tcp_ca_needs_ecn(sk)) + if (tcp_ca_wants_ce_events(sk)) tcp_ca_event(sk, CA_EVENT_ECN_IS_CE); if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { @@ -360,7 +360,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) tp->ecn_flags |= TCP_ECN_SEEN; break; default: - if (tcp_ca_needs_ecn(sk)) + if (tcp_ca_wants_ce_events(sk)) tcp_ca_event(sk, CA_EVENT_ECN_NO_CE); tp->ecn_flags |= TCP_ECN_SEEN; break; @@ -1079,7 +1079,12 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) */ static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb) { + struct sock *sk = (struct sock *)tp; + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + tp->lost += tcp_skb_pcount(skb); + if (ca_ops->skb_marked_lost) + ca_ops->skb_marked_lost(sk, skb); } void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) @@ -1460,6 +1465,17 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); tcp_skb_pcount_add(skb, -pcount); + /* Adjust tx.in_flight as pcount is shifted from skb to prev. */ + if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount, + "prev in_flight: %u skb in_flight: %u pcount: %u", + TCP_SKB_CB(prev)->tx.in_flight, + TCP_SKB_CB(skb)->tx.in_flight, + pcount)) + TCP_SKB_CB(skb)->tx.in_flight = 0; + else + TCP_SKB_CB(skb)->tx.in_flight -= pcount; + TCP_SKB_CB(prev)->tx.in_flight += pcount; + /* When we're adding to gso_segs == 1, gso_size will be zero, * in theory this shouldn't be necessary but as long as DSACK * code can come after this skb later on it's better to keep @@ -3790,6 +3806,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; rs.prior_in_flight = tcp_packets_in_flight(tp); + tcp_rate_check_app_limited(sk); /* ts_recent update must be made after we are sure that the packet * is in window. @@ -3887,6 +3904,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tcp_newly_delivered(sk, delivered, flag); lost = tp->lost - lost; /* freshly marked lost */ rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); + rs.is_ece = !!(flag & FLAG_ECE); tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); @@ -5465,13 +5483,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) /* More than one full frame received... */ if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && + (tp->fast_ack_mode == 1 || /* ... and right edge of window advances far enough. * (tcp_recvmsg() will send ACK otherwise). * If application uses SO_RCVLOWAT, we want send ack now if * we have not received enough bytes to satisfy the condition. */ - (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || - __tcp_select_window(sk) >= tp->rcv_wnd)) || + (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || + __tcp_select_window(sk) >= tp->rcv_wnd))) || /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* Protocol state mandates a one-time immediate ACK */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5079832af5c10..0bfecd72de05a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -377,7 +377,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, th->cwr = 1; skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; } - } else if (!tcp_ca_needs_ecn(sk)) { + } else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) && + !tcp_ca_needs_ecn(sk)) { /* ACK or retransmitted segment: clear ECT|CE */ INET_ECN_dontxmit(sk); } @@ -1533,7 +1534,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; - int nsize, old_factor; + int nsize, old_factor, inflight_prev; long limit; int nlen; u8 flags; @@ -1610,6 +1611,15 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, if (diff) tcp_adjust_pcount(sk, skb, diff); + + /* Set buff tx.in_flight as if buff were sent by itself. */ + inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor; + if (WARN_ONCE(inflight_prev < 0, + "inconsistent: tx.in_flight: %u old_factor: %d", + TCP_SKB_CB(skb)->tx.in_flight, old_factor)) + inflight_prev = 0; + TCP_SKB_CB(buff)->tx.in_flight = inflight_prev + + tcp_skb_pcount(buff); } /* Link BUFF into the send queue. */ @@ -1978,13 +1988,12 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; - u32 min_tso, tso_segs; - - min_tso = ca_ops->min_tso_segs ? - ca_ops->min_tso_segs(sk) : - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; + u32 tso_segs; - tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); + tso_segs = ca_ops->tso_segs ? + ca_ops->tso_segs(sk, mss_now) : + tcp_tso_autosize(sk, mss_now, + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); return min_t(u32, tso_segs, sk->sk_gso_max_segs); } @@ -2619,6 +2628,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache; list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); tcp_init_tso_segs(skb, mss_now); + tcp_set_tx_in_flight(sk, skb); goto repair; /* Skip network transmission */ } @@ -3719,6 +3729,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) */ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; int space, err = 0; @@ -3733,8 +3744,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) * private TCP options. The cost is reduced data space in SYN :( */ tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp); + /* Sync mss_cache after updating the mss_clamp */ + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); - space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - + space = __tcp_mtu_to_mss(sk, icsk->icsk_pmtu_cookie) - MAX_TCP_OPTION_SPACE; space = min_t(size_t, space, fo->size); diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index fbab921670cc9..796fa6e5310ce 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -34,6 +34,24 @@ * ready to send in the write queue. */ +void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 in_flight; + + /* Check, sanitize, and record packets in flight after skb was sent. */ + in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb); + if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX, + "insane in_flight %u cc %s mss %u " + "cwnd %u pif %u %u %u %u\n", + in_flight, inet_csk(sk)->icsk_ca_ops->name, + tp->mss_cache, tp->snd_cwnd, + tp->packets_out, tp->retrans_out, + tp->sacked_out, tp->lost_out)) + in_flight = TCPCB_IN_FLIGHT_MAX; + TCP_SKB_CB(skb)->tx.in_flight = in_flight; +} + /* Snapshot the current delivery information in the skb, to generate * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). */ @@ -66,7 +84,9 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; TCP_SKB_CB(skb)->tx.delivered = tp->delivered; TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; + TCP_SKB_CB(skb)->tx.lost = tp->lost; TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; + tcp_set_tx_in_flight(sk, skb); } /* When an skb is sacked or acked, we fill in the rate sample with the (prior) @@ -87,17 +107,20 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, if (!rs->prior_delivered || after(scb->tx.delivered, rs->prior_delivered)) { + rs->prior_lost = scb->tx.lost; rs->prior_delivered_ce = scb->tx.delivered_ce; rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; + rs->tx_in_flight = scb->tx.in_flight; /* Record send time of most recently ACKed packet: */ tp->first_tx_mstamp = tcp_skb_timestamp_us(skb); /* Find the duration of the "send phase" of this window: */ - rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, - scb->tx.first_tx_mstamp); + rs->interval_us = tcp_stamp32_us_delta( + tp->first_tx_mstamp, + scb->tx.first_tx_mstamp); } /* Mark off the skb delivered once it's sacked to avoid being @@ -139,6 +162,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, return; } rs->delivered = tp->delivered - rs->prior_delivered; + rs->lost = tp->lost - rs->prior_lost; rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce; /* delivered_ce occupies less than 32 bits in the skb control block */ @@ -150,7 +174,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, * longer phase. */ snd_us = rs->interval_us; /* send phase */ - ack_us = tcp_stamp_us_delta(tp->tcp_mstamp, + ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp, rs->prior_mstamp); /* ack phase */ rs->interval_us = max(snd_us, ack_us); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 20cf4a98c69d8..b5f7e49a003a2 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -607,6 +607,7 @@ void tcp_write_timer_handler(struct sock *sk) goto out; } + tcp_rate_check_app_limited(sk); tcp_mstamp_refresh(tcp_sk(sk)); event = icsk->icsk_pending; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f908e2fd30b24..4df84013c4e6b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -554,7 +554,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, #ifdef CONFIG_IPV6_MROUTE if ((all || type == NETCONFA_MC_FORWARDING) && nla_put_s32(skb, NETCONFA_MC_FORWARDING, - devconf->mc_forwarding) < 0) + atomic_read(&devconf->mc_forwarding)) < 0) goto nla_put_failure; #endif if ((all || type == NETCONFA_PROXY_NEIGH) && @@ -5539,7 +5539,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE - array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; + array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding); #endif array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8fe7900f19499..7d7b7523d1265 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -441,11 +441,14 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; u32 flags = BIND_WITH_LOCK; + const struct proto *prot; int err = 0; + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); /* If the socket has its own bind function then use it. */ - if (sk->sk_prot->bind) - return sk->sk_prot->bind(sk, uaddr, addr_len); + if (prot->bind) + return prot->bind(sk, uaddr, addr_len); if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; @@ -555,6 +558,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; struct net *net = sock_net(sk); + const struct proto *prot; switch (cmd) { case SIOCADDRT: @@ -572,9 +576,11 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFDSTADDR: return addrconf_set_dstaddr(net, argp); default: - if (!sk->sk_prot->ioctl) + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + if (!prot->ioctl) return -ENOIOCTLCMD; - return sk->sk_prot->ioctl(sk, cmd, arg); + return prot->ioctl(sk, cmd, arg); } /*NOTREACHED*/ return 0; @@ -636,11 +642,14 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *, int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; + const struct proto *prot; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; - return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udpv6_sendmsg, + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udpv6_sendmsg, sk, msg, size); } @@ -650,13 +659,16 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; + const struct proto *prot; int addr_len = 0; int err; if (likely(!(flags & MSG_ERRQUEUE))) sock_rps_record_flow(sk); - err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 4514444e96c8d..4740afecf7c62 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk) { int err = 0; - if (sk->sk_state != TCP_CLOSE) { - local_bh_disable(); + if (sk->sk_state != TCP_CLOSE) err = __inet_hash(sk, NULL); - local_bh_enable(); - } return err; } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 80256717868e6..d4b1e2c5aa76d 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -508,7 +508,7 @@ int ip6_mc_input(struct sk_buff *skb) /* * IPv6 multicast router mode is now supported ;) */ - if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && + if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) && !(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 194832663d856..9d83c11ba1e74 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -485,7 +485,7 @@ int ip6_forward(struct sk_buff *skb) goto drop; if (!net->ipv6.devconf_all->disable_policy && - !idev->cnf.disable_policy && + (!idev || !idev->cnf.disable_policy) && !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8a2db926b5eb6..e3c884678dbe2 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -734,7 +734,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, in6_dev = __in6_dev_get(dev); if (in6_dev) { - in6_dev->cnf.mc_forwarding--; + atomic_dec(&in6_dev->cnf.mc_forwarding); inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); @@ -902,7 +902,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt, in6_dev = __in6_dev_get(dev); if (in6_dev) { - in6_dev->cnf.mc_forwarding++; + atomic_inc(&in6_dev->cnf.mc_forwarding); inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); @@ -1553,7 +1553,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) } else { rcu_assign_pointer(mrt->mroute_sk, sk); sock_set_flag(sk, SOCK_RCU_FREE); - net->ipv6.devconf_all->mc_forwarding++; + atomic_inc(&net->ipv6.devconf_all->mc_forwarding); } write_unlock_bh(&mrt_lock); @@ -1586,7 +1586,7 @@ int ip6mr_sk_done(struct sock *sk) * so the RCU grace period before sk freeing * is guaranteed by sk_destruct() */ - net->ipv6.devconf_all->mc_forwarding--; + atomic_dec(&net->ipv6.devconf_all->mc_forwarding); write_unlock_bh(&mrt_lock); inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a733803a710cf..222f6bf220ba0 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -475,7 +475,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, &tcp_prot, 1); - sk->sk_prot = &tcp_prot; + /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */ + WRITE_ONCE(sk->sk_prot, &tcp_prot); icsk->icsk_af_ops = &ipv4_specific; sk->sk_socket->ops = &inet_stream_ops; sk->sk_family = PF_INET; @@ -489,7 +490,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, prot, 1); - sk->sk_prot = prot; + /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */ + WRITE_ONCE(sk->sk_prot, prot); sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ea1cf414a92e7..da1bf48e79370 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4495,7 +4495,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) struct inet6_dev *idev; int type; - if (netif_is_l3_master(skb->dev) && + if (netif_is_l3_master(skb->dev) || dst->dev == net->loopback_dev) idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); else diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 26c00ebf4fbae..7f555d2e5357f 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -275,6 +275,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); + struct net_device *dev = NULL; struct llc_sap *sap; int rc = -EINVAL; @@ -286,16 +287,15 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) goto out; rc = -ENODEV; if (sk->sk_bound_dev_if) { - llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); - if (llc->dev && addr->sllc_arphrd != llc->dev->type) { - dev_put(llc->dev); - llc->dev = NULL; + dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); + if (dev && addr->sllc_arphrd != dev->type) { + dev_put(dev); + dev = NULL; } } else - llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); - if (!llc->dev) + dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); + if (!dev) goto out; - netdev_tracker_alloc(llc->dev, &llc->dev_tracker, GFP_KERNEL); rc = -EUSERS; llc->laddr.lsap = llc_ui_autoport(); if (!llc->laddr.lsap) @@ -304,6 +304,12 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) sap = llc_sap_open(llc->laddr.lsap, NULL); if (!sap) goto out; + + /* Note: We do not expect errors from this point. */ + llc->dev = dev; + netdev_tracker_alloc(llc->dev, &llc->dev_tracker, GFP_KERNEL); + dev = NULL; + memcpy(llc->laddr.mac, llc->dev->dev_addr, IFHWADDRLEN); memcpy(&llc->addr, addr, sizeof(llc->addr)); /* assign new connection to its SAP */ @@ -311,6 +317,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) sock_reset_flag(sk, SOCK_ZAPPED); rc = 0; out: + dev_put(dev); return rc; } @@ -333,6 +340,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); + struct net_device *dev = NULL; struct llc_sap *sap; int rc = -EINVAL; @@ -348,25 +356,27 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) rc = -ENODEV; rcu_read_lock(); if (sk->sk_bound_dev_if) { - llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); - if (llc->dev) { + dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); + if (dev) { if (is_zero_ether_addr(addr->sllc_mac)) - memcpy(addr->sllc_mac, llc->dev->dev_addr, + memcpy(addr->sllc_mac, dev->dev_addr, IFHWADDRLEN); - if (addr->sllc_arphrd != llc->dev->type || + if (addr->sllc_arphrd != dev->type || !ether_addr_equal(addr->sllc_mac, - llc->dev->dev_addr)) { + dev->dev_addr)) { rc = -EINVAL; - llc->dev = NULL; + dev = NULL; } } - } else - llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, + } else { + dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, addr->sllc_mac); - dev_hold_track(llc->dev, &llc->dev_tracker, GFP_ATOMIC); + } + dev_hold(dev); rcu_read_unlock(); - if (!llc->dev) + if (!dev) goto out; + if (!addr->sllc_sap) { rc = -EUSERS; addr->sllc_sap = llc_ui_autoport(); @@ -398,6 +408,12 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) goto out_put; } } + + /* Note: We do not expect errors from this point. */ + llc->dev = dev; + netdev_tracker_alloc(llc->dev, &llc->dev_tracker, GFP_KERNEL); + dev = NULL; + llc->laddr.lsap = addr->sllc_sap; memcpy(llc->laddr.mac, addr->sllc_mac, IFHWADDRLEN); memcpy(&llc->addr, addr, sizeof(llc->addr)); @@ -408,6 +424,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) out_put: llc_sap_put(sap); out: + dev_put(dev); release_sock(sk); return rc; } diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 26d2f8ba70297..03fb8070bdae6 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -453,9 +453,17 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw, return 0; } - if (WARN_ON_ONCE((status->encoding != RX_ENC_HE && streams > 4) || - (status->encoding == RX_ENC_HE && streams > 8))) + if (unlikely(status->encoding != RX_ENC_HE && streams > 4)) { + pr_warn_once("%s: status->encoding != RX_ENC_HE (%u != %d) && streams > 4 (%d > 4)\n", + __func__, status->encoding, RX_ENC_HE, streams); return 0; + } + + if (unlikely(status->encoding == RX_ENC_HE && streams > 8)) { + pr_warn_once("%s: status->encoding == RX_ENC_HE (%u == %d) && streams > 8 (%d > 8)\n", + __func__, status->encoding, RX_ENC_HE, streams); + return 0; + } duration = airtime_mcs_groups[group].duration[idx]; duration <<= airtime_mcs_groups[group].shift; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 87a208089caf7..58ff57dc669c4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2148,14 +2148,12 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, const struct mesh_setup *setup) { u8 *new_ie; - const u8 *old_ie; struct ieee80211_sub_if_data *sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); int i; /* allocate information elements */ new_ie = NULL; - old_ie = ifmsh->ie; if (setup->ie_len) { new_ie = kmemdup(setup->ie, setup->ie_len, @@ -2165,7 +2163,6 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, } ifmsh->ie_len = setup->ie_len; ifmsh->ie = new_ie; - kfree(old_ie); /* now copy the rest of the setup parameters */ ifmsh->mesh_id_len = setup->mesh_id_len; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 9479f2787ea79..88d9cc945a216 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -441,7 +441,7 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, #define PRINT_HT_CAP(_cond, _str) \ do { \ if (_cond) \ - p += scnprintf(p, sizeof(buf)+buf-p, "\t" _str "\n"); \ + p += scnprintf(p, bufsz + buf - p, "\t" _str "\n"); \ } while (0) char *buf, *p; int i; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e87bccaab561f..95aaf00c876c3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2380,7 +2380,7 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, const struct cfg80211_chan_def *chandef); u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype); -u8 *ieee80211_ie_build_he_cap(u8 *pos, +u8 *ieee80211_ie_build_he_cap(u32 disable_flags, u8 *pos, const struct ieee80211_sta_he_cap *he_cap, u8 *end); void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 15ac08d111ea1..6847fdf934392 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -580,7 +580,7 @@ int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata, return -ENOMEM; pos = skb_put(skb, ie_len); - ieee80211_ie_build_he_cap(pos, he_cap, pos + ie_len); + ieee80211_ie_build_he_cap(0, pos, he_cap, pos + ie_len); return 0; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 744842c4513b1..c4d3e2da73f23 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -636,7 +636,7 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_supported_band *sband) { - u8 *pos; + u8 *pos, *pre_he_pos; const struct ieee80211_sta_he_cap *he_cap = NULL; struct ieee80211_chanctx_conf *chanctx_conf; u8 he_cap_size; @@ -653,16 +653,21 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, he_cap = ieee80211_get_he_iftype_cap(sband, ieee80211_vif_type_p2p(&sdata->vif)); - if (!he_cap || !reg_cap) + if (!he_cap || !chanctx_conf || !reg_cap) return; + /* get a max size estimate */ he_cap_size = 2 + 1 + sizeof(he_cap->he_cap_elem) + ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) + ieee80211_he_ppe_size(he_cap->ppe_thres[0], he_cap->he_cap_elem.phy_cap_info); pos = skb_put(skb, he_cap_size); - ieee80211_ie_build_he_cap(pos, he_cap, pos + he_cap_size); + pre_he_pos = pos; + pos = ieee80211_ie_build_he_cap(sdata->u.mgd.flags, + pos, he_cap, pos + he_cap_size); + /* trim excess if any */ + skb_trim(skb, skb->len - (pre_he_pos + he_cap_size - pos)); ieee80211_ie_build_he_6ghz_cap(sdata, skb); } diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 8c6416129d5be..810ea5cd66814 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -664,7 +664,9 @@ static void rate_fixup_ratelist(struct ieee80211_vif *vif, * needs to be fixed. */ if (rates[i].flags & IEEE80211_TX_RC_MCS) { - WARN_ON(rates[i].idx > 76); + if (unlikely(rates[i].idx > 76)) + pr_warn("%s: rates[%d].idx == %d > 76\n", + __func__, i, rates[i].idx); if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) && info->control.use_cts_prot) @@ -674,7 +676,9 @@ static void rate_fixup_ratelist(struct ieee80211_vif *vif, } if (rates[i].flags & IEEE80211_TX_RC_VHT_MCS) { - WARN_ON(ieee80211_rate_get_vht_mcs(&rates[i]) > 9); + if (unlikely(ieee80211_rate_get_vht_mcs(&rates[i]) > 9)) + pr_warn("%s: ieee80211_rate_get_vht_mcs(&rates[%d]) == %d > 9\n", + __func__, i, ieee80211_rate_get_vht_mcs(&rates[i])); continue; } @@ -685,7 +689,9 @@ static void rate_fixup_ratelist(struct ieee80211_vif *vif, } /* RC is busted */ - if (WARN_ON_ONCE(rates[i].idx >= sband->n_bitrates)) { + if (unlikely(rates[i].idx >= sband->n_bitrates)) { + pr_warn_once("%s: rates[%d].idx == %d >= sband->n_bitrates == %d\n", + __func__, i, rates[i].idx, sband->n_bitrates); rates[i].idx = -1; continue; } diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 9c3b7fc377c17..a65d55bbb46b1 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -438,8 +438,13 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, if (minstrel_ht_is_legacy_group(group)) overhead = mi->overhead_legacy; - else + else { ampdu_len = minstrel_ht_avg_ampdu_len(mi); + if (unlikely(!ampdu_len)) { + pr_err_once("%s: ampdu_len == 0!\n", __func__); + ampdu_len = 1; + } + } nsecs = 1000 * overhead / ampdu_len; nsecs += minstrel_mcs_groups[group].duration[rate] << diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f71b042a5c8bb..342c2bfe27091 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1974,7 +1974,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, if (he_cap && cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band), IEEE80211_CHAN_NO_HE)) { - pos = ieee80211_ie_build_he_cap(pos, he_cap, end); + pos = ieee80211_ie_build_he_cap(0, pos, he_cap, end); if (!pos) goto out_err; } @@ -2918,10 +2918,11 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype) he_cap->he_cap_elem.phy_cap_info); } -u8 *ieee80211_ie_build_he_cap(u8 *pos, +u8 *ieee80211_ie_build_he_cap(u32 disable_flags, u8 *pos, const struct ieee80211_sta_he_cap *he_cap, u8 *end) { + struct ieee80211_he_cap_elem elem; u8 n; u8 ie_len; u8 *orig_pos = pos; @@ -2934,7 +2935,23 @@ u8 *ieee80211_ie_build_he_cap(u8 *pos, if (!he_cap) return orig_pos; - n = ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem); + /* modify on stack first to calculate 'n' and 'ie_len' correctly */ + elem = he_cap->he_cap_elem; + + if (disable_flags & IEEE80211_STA_DISABLE_40MHZ) + elem.phy_cap_info[0] &= + ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G | + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G); + + if (disable_flags & IEEE80211_STA_DISABLE_160MHZ) + elem.phy_cap_info[0] &= + ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; + + if (disable_flags & IEEE80211_STA_DISABLE_80P80MHZ) + elem.phy_cap_info[0] &= + ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G; + + n = ieee80211_he_mcs_nss_size(&elem); ie_len = 2 + 1 + sizeof(he_cap->he_cap_elem) + n + ieee80211_he_ppe_size(he_cap->ppe_thres[0], @@ -2948,8 +2965,8 @@ u8 *ieee80211_ie_build_he_cap(u8 *pos, *pos++ = WLAN_EID_EXT_HE_CAPABILITY; /* Fixed data */ - memcpy(pos, &he_cap->he_cap_elem, sizeof(he_cap->he_cap_elem)); - pos += sizeof(he_cap->he_cap_elem); + memcpy(pos, &elem, sizeof(elem)); + pos += sizeof(elem); memcpy(pos, &he_cap->he_mcs_nss_supp, n); pos += n; diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index c921de63b494b..fc05351d3a82e 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -90,13 +90,13 @@ static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen) static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name); - const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr); int rc, addrlen = msg->msg_namelen; struct sock *sk = sock->sk; struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct mctp_skb_cb *cb; struct mctp_route *rt; - struct sk_buff *skb; + struct sk_buff *skb = NULL; + int hlen; if (addr) { if (addrlen < sizeof(struct sockaddr_mctp)) @@ -119,6 +119,34 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (addr->smctp_network == MCTP_NET_ANY) addr->smctp_network = mctp_default_net(sock_net(sk)); + /* direct addressing */ + if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) { + DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, + extaddr, msg->msg_name); + struct net_device *dev; + + rc = -EINVAL; + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), extaddr->smctp_ifindex); + /* check for correct halen */ + if (dev && extaddr->smctp_halen == dev->addr_len) { + hlen = LL_RESERVED_SPACE(dev) + sizeof(struct mctp_hdr); + rc = 0; + } + rcu_read_unlock(); + if (rc) + goto err_free; + rt = NULL; + } else { + rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, + addr->smctp_addr.s_addr); + if (!rt) { + rc = -EHOSTUNREACH; + goto err_free; + } + hlen = LL_RESERVED_SPACE(rt->dev->dev) + sizeof(struct mctp_hdr); + } + skb = sock_alloc_send_skb(sk, hlen + 1 + len, msg->msg_flags & MSG_DONTWAIT, &rc); if (!skb) @@ -137,8 +165,8 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) cb = __mctp_cb(skb); cb->net = addr->smctp_network; - /* direct addressing */ - if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) { + if (!rt) { + /* fill extended address in cb */ DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, extaddr, msg->msg_name); @@ -149,17 +177,9 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } cb->ifindex = extaddr->smctp_ifindex; + /* smctp_halen is checked above */ cb->halen = extaddr->smctp_halen; memcpy(cb->haddr, extaddr->smctp_haddr, cb->halen); - - rt = NULL; - } else { - rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, - addr->smctp_addr.s_addr); - if (!rt) { - rc = -EHOSTUNREACH; - goto err_free; - } } rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr, diff --git a/net/mctp/device.c b/net/mctp/device.c index ef2755f82f87b..f86ef6d751bdc 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -24,12 +24,25 @@ struct mctp_dump_cb { size_t a_idx; }; -/* unlocked: caller must hold rcu_read_lock */ +/* unlocked: caller must hold rcu_read_lock. + * Returned mctp_dev has its refcount incremented, or NULL if unset. + */ struct mctp_dev *__mctp_dev_get(const struct net_device *dev) { - return rcu_dereference(dev->mctp_ptr); + struct mctp_dev *mdev = rcu_dereference(dev->mctp_ptr); + + /* RCU guarantees that any mdev is still live. + * Zero refcount implies a pending free, return NULL. + */ + if (mdev) + if (!refcount_inc_not_zero(&mdev->refs)) + return NULL; + return mdev; } +/* Returned mctp_dev does not have refcount incremented. The returned pointer + * remains live while rtnl_lock is held, as that prevents mctp_unregister() + */ struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev) { return rtnl_dereference(dev->mctp_ptr); @@ -123,6 +136,7 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb) if (mdev) { rc = mctp_dump_dev_addrinfo(mdev, skb, cb); + mctp_dev_put(mdev); // Error indicates full buffer, this // callback will get retried. if (rc < 0) @@ -297,7 +311,7 @@ void mctp_dev_hold(struct mctp_dev *mdev) void mctp_dev_put(struct mctp_dev *mdev) { - if (refcount_dec_and_test(&mdev->refs)) { + if (mdev && refcount_dec_and_test(&mdev->refs)) { dev_put(mdev->dev); kfree_rcu(mdev, rcu); } @@ -369,6 +383,7 @@ static size_t mctp_get_link_af_size(const struct net_device *dev, if (!mdev) return 0; ret = nla_total_size(4); /* IFLA_MCTP_NET */ + mctp_dev_put(mdev); return ret; } diff --git a/net/mctp/route.c b/net/mctp/route.c index e52cef7505002..1a296e211a507 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -498,6 +498,11 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) if (cb->ifindex) { /* direct route; use the hwaddr we stashed in sendmsg */ + if (cb->halen != skb->dev->addr_len) { + /* sanity check, sendmsg should have already caught this */ + kfree_skb(skb); + return -EMSGSIZE; + } daddr = cb->haddr; } else { /* If lookup fails let the device handle daddr==NULL */ @@ -507,7 +512,7 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol), daddr, skb->dev->dev_addr, skb->len); - if (rc) { + if (rc < 0) { kfree_skb(skb); return -EHOSTUNREACH; } @@ -707,7 +712,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, { const unsigned int hlen = sizeof(struct mctp_hdr); struct mctp_hdr *hdr, *hdr2; - unsigned int pos, size; + unsigned int pos, size, headroom; struct sk_buff *skb2; int rc; u8 seq; @@ -721,6 +726,9 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, return -EMSGSIZE; } + /* keep same headroom as the original skb */ + headroom = skb_headroom(skb); + /* we've got the header */ skb_pull(skb, hlen); @@ -728,7 +736,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, /* size of message payload */ size = min(mtu - hlen, skb->len - pos); - skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL); + skb2 = alloc_skb(headroom + hlen + size, GFP_KERNEL); if (!skb2) { rc = -ENOMEM; break; @@ -744,7 +752,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, skb_set_owner_w(skb2, skb->sk); /* establish packet */ - skb_reserve(skb2, MCTP_HEADER_MAXLEN); + skb_reserve(skb2, headroom); skb_reset_network_header(skb2); skb_put(skb2, hlen + size); skb2->transport_header = skb2->network_header + hlen; @@ -786,7 +794,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, { struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct mctp_skb_cb *cb = mctp_cb(skb); - struct mctp_route tmp_rt; + struct mctp_route tmp_rt = {0}; struct mctp_sk_key *key; struct net_device *dev; struct mctp_hdr *hdr; @@ -892,6 +900,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, mctp_route_release(rt); dev_put(dev); + mctp_dev_put(tmp_rt.dev); return rc; @@ -1057,11 +1066,13 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, rt->output(rt, skb); mctp_route_release(rt); + mctp_dev_put(mdev); return NET_RX_SUCCESS; err_drop: kfree_skb(skb); + mctp_dev_put(mdev); return NET_RX_DROP; } diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c index 7b7918702592a..e03ba66bbe181 100644 --- a/net/mctp/test/utils.c +++ b/net/mctp/test/utils.c @@ -54,7 +54,6 @@ struct mctp_test_dev *mctp_test_create_dev(void) rcu_read_lock(); dev->mdev = __mctp_dev_get(ndev); - mctp_dev_hold(dev->mdev); rcu_read_unlock(); return dev; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1c72f25f083ea..014c9d88f9479 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1196,6 +1196,7 @@ static struct sk_buff *__mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, g tcp_skb_entail(ssk, skb); return skb; } + tcp_skb_tsorted_anchor_cleanup(skb); kfree_skb(skb); return NULL; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index bf1e17c678f13..7552e1e9fd629 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -67,6 +67,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash); struct conntrack_gc_work { struct delayed_work dwork; u32 next_bucket; + u32 avg_timeout; + u32 start_time; bool exiting; bool early_drop; }; @@ -78,8 +80,19 @@ static __read_mostly bool nf_conntrack_locks_all; /* serialize hash resizes and nf_ct_iterate_cleanup */ static DEFINE_MUTEX(nf_conntrack_mutex); -#define GC_SCAN_INTERVAL (120u * HZ) +#define GC_SCAN_INTERVAL_MAX (60ul * HZ) +#define GC_SCAN_INTERVAL_MIN (1ul * HZ) + +/* clamp timeouts to this value (TCP unacked) */ +#define GC_SCAN_INTERVAL_CLAMP (300ul * HZ) + +/* large initial bias so that we don't scan often just because we have + * three entries with a 1s timeout. + */ +#define GC_SCAN_INTERVAL_INIT INT_MAX + #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) +#define GC_SCAN_EXPIRED_MAX (64000u / HZ) #define MIN_CHAINLEN 8u #define MAX_CHAINLEN (32u - MIN_CHAINLEN) @@ -1421,16 +1434,28 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct) static void gc_worker(struct work_struct *work) { - unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION; unsigned int i, hashsz, nf_conntrack_max95 = 0; - unsigned long next_run = GC_SCAN_INTERVAL; + u32 end_time, start_time = nfct_time_stamp; struct conntrack_gc_work *gc_work; + unsigned int expired_count = 0; + unsigned long next_run; + s32 delta_time; + gc_work = container_of(work, struct conntrack_gc_work, dwork.work); i = gc_work->next_bucket; if (gc_work->early_drop) nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; + if (i == 0) { + gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT; + gc_work->start_time = start_time; + } + + next_run = gc_work->avg_timeout; + + end_time = start_time + GC_SCAN_MAX_DURATION; + do { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; @@ -1447,6 +1472,7 @@ static void gc_worker(struct work_struct *work) hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { struct nf_conntrack_net *cnet; + unsigned long expires; struct net *net; tmp = nf_ct_tuplehash_to_ctrack(h); @@ -1456,11 +1482,29 @@ static void gc_worker(struct work_struct *work) continue; } + if (expired_count > GC_SCAN_EXPIRED_MAX) { + rcu_read_unlock(); + + gc_work->next_bucket = i; + gc_work->avg_timeout = next_run; + + delta_time = nfct_time_stamp - gc_work->start_time; + + /* re-sched immediately if total cycle time is exceeded */ + next_run = delta_time < (s32)GC_SCAN_INTERVAL_MAX; + goto early_exit; + } + if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); + expired_count++; continue; } + expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP); + next_run += expires; + next_run /= 2u; + if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp)) continue; @@ -1478,8 +1522,10 @@ static void gc_worker(struct work_struct *work) continue; } - if (gc_worker_can_early_drop(tmp)) + if (gc_worker_can_early_drop(tmp)) { nf_ct_kill(tmp); + expired_count++; + } nf_ct_put(tmp); } @@ -1492,33 +1538,38 @@ static void gc_worker(struct work_struct *work) cond_resched(); i++; - if (time_after(jiffies, end_time) && i < hashsz) { + delta_time = nfct_time_stamp - end_time; + if (delta_time > 0 && i < hashsz) { + gc_work->avg_timeout = next_run; gc_work->next_bucket = i; next_run = 0; - break; + goto early_exit; } } while (i < hashsz); + gc_work->next_bucket = 0; + + next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX); + + delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1); + if (next_run > (unsigned long)delta_time) + next_run -= delta_time; + else + next_run = 1; + +early_exit: if (gc_work->exiting) return; - /* - * Eviction will normally happen from the packet path, and not - * from this gc worker. - * - * This worker is only here to reap expired entries when system went - * idle after a busy period. - */ - if (next_run) { + if (next_run) gc_work->early_drop = false; - gc_work->next_bucket = 0; - } + queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { - INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker); + INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); gc_work->exiting = false; } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index ae4488a13c70c..ceb38a7b37cb7 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -556,6 +556,12 @@ static const struct nf_ct_ext_type helper_extend = { .id = NF_CT_EXT_HELPER, }; +void nf_ct_set_auto_assign_helper_warned(struct net *net) +{ + nf_ct_pernet(net)->auto_assign_helper_warned = true; +} +EXPORT_SYMBOL_GPL(nf_ct_set_auto_assign_helper_warned); + void nf_conntrack_helper_pernet_init(struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index d1582b888c0d8..8ec55cd72572e 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -341,8 +341,8 @@ static void tcp_options(const struct sk_buff *skb, if (!ptr) return; - state->td_scale = - state->flags = 0; + state->td_scale = 0; + state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL; while (length > 0) { int opcode=*ptr++; @@ -862,6 +862,16 @@ static bool tcp_can_early_drop(const struct nf_conn *ct) return false; } +static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state) +{ + state->td_end = 0; + state->td_maxend = 0; + state->td_maxwin = 0; + state->td_maxack = 0; + state->td_scale = 0; + state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL; +} + /* Returns verdict for packet, or -1 for invalid. */ int nf_conntrack_tcp_packet(struct nf_conn *ct, struct sk_buff *skb, @@ -968,8 +978,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = ct->proto.tcp.last_flags; - memset(&ct->proto.tcp.seen[dir], 0, - sizeof(struct ip_ct_tcp_state)); + nf_ct_tcp_state_reset(&ct->proto.tcp.seen[dir]); break; } ct->proto.tcp.last_index = index; diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 5c57ade6bd05a..0ccabf3fa6aa3 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -6,12 +6,29 @@ #include #include #include +#include static unsigned int nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { + struct vlan_ethhdr *veth; + __be16 proto; + switch (skb->protocol) { + case htons(ETH_P_8021Q): + veth = (struct vlan_ethhdr *)skb_mac_header(skb); + proto = veth->h_vlan_encapsulated_proto; + break; + case htons(ETH_P_PPP_SES): + proto = nf_flow_pppoe_proto(skb); + break; + default: + proto = skb->protocol; + break; + } + + switch (proto) { case htons(ETH_P_IP): return nf_flow_offload_ip_hook(priv, skb, state); case htons(ETH_P_IPV6): diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 889cf88d3dba6..6257d87c3a56d 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -8,8 +8,6 @@ #include #include #include -#include -#include #include #include #include @@ -239,22 +237,6 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, return NF_STOLEN; } -static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) -{ - __be16 proto; - - proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + - sizeof(struct pppoe_hdr))); - switch (proto) { - case htons(PPP_IP): - return htons(ETH_P_IP); - case htons(PPP_IPV6): - return htons(ETH_P_IPV6); - } - - return 0; -} - static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, u32 *offset) { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d71a33ae39b35..30d29d038d095 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9275,17 +9275,23 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) } EXPORT_SYMBOL_GPL(nft_parse_u32_check); -static unsigned int nft_parse_register(const struct nlattr *attr) +static int nft_parse_register(const struct nlattr *attr, u32 *preg) { unsigned int reg; reg = ntohl(nla_get_be32(attr)); switch (reg) { case NFT_REG_VERDICT...NFT_REG_4: - return reg * NFT_REG_SIZE / NFT_REG32_SIZE; + *preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE; + break; + case NFT_REG32_00...NFT_REG32_15: + *preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; + break; default: - return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; + return -ERANGE; } + + return 0; } /** @@ -9327,7 +9333,10 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len) u32 reg; int err; - reg = nft_parse_register(attr); + err = nft_parse_register(attr, ®); + if (err < 0) + return err; + err = nft_validate_register_load(reg, len); if (err < 0) return err; @@ -9382,7 +9391,10 @@ int nft_parse_register_store(const struct nft_ctx *ctx, int err; u32 reg; - reg = nft_parse_register(attr); + err = nft_parse_register(attr, ®); + if (err < 0) + return err; + err = nft_validate_register_store(ctx, reg, data, type, len); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 36e73f9828c50..8af98239655db 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -201,7 +201,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) const struct nft_rule_dp *rule, *last_rule; const struct net *net = nft_net(pkt); const struct nft_expr *expr, *last; - struct nft_regs regs; + struct nft_regs regs = {}; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; bool genbit = READ_ONCE(net->nft.gencursor); diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 7b727d3ebf9df..04bd2f89afe88 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -287,7 +287,7 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track, if (!track->regs[priv->sreg].selector) return false; - bitwise = nft_expr_priv(expr); + bitwise = nft_expr_priv(track->regs[priv->dreg].selector); if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector && track->regs[priv->dreg].bitwise && track->regs[priv->dreg].bitwise->ops == expr->ops && @@ -434,7 +434,7 @@ static bool nft_bitwise_fast_reduce(struct nft_regs_track *track, if (!track->regs[priv->sreg].selector) return false; - bitwise = nft_expr_priv(expr); + bitwise = nft_expr_priv(track->regs[priv->dreg].selector); if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector && track->regs[priv->dreg].bitwise && track->regs[priv->dreg].bitwise->ops == expr->ops && diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 5adf8bb628a80..9c7472af9e4a1 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1041,6 +1041,9 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, if (err < 0) goto err_put_helper; + /* Avoid the bogus warning, helper will be assigned after CT init */ + nf_ct_set_auto_assign_helper_warned(ctx->net); + return 0; err_put_helper: diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index d601974c9d2e0..b8f0111457650 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -36,12 +36,11 @@ static void nft_socket_wildcard(const struct nft_pktinfo *pkt, #ifdef CONFIG_SOCK_CGROUP_DATA static noinline bool -nft_sock_get_eval_cgroupv2(u32 *dest, const struct nft_pktinfo *pkt, u32 level) +nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level) { - struct sock *sk = skb_to_full_sk(pkt->skb); struct cgroup *cgrp; - if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk))) + if (!sk_fullsock(sk)) return false; cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); @@ -108,7 +107,7 @@ static void nft_socket_eval(const struct nft_expr *expr, break; #ifdef CONFIG_SOCK_CGROUP_DATA case NFT_SOCKET_CGROUPV2: - if (!nft_sock_get_eval_cgroupv2(dest, pkt, priv->level)) { + if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) { regs->verdict.code = NFT_BREAK; return; } diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index beb0e573266d0..54c0830039470 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -885,6 +885,8 @@ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len, unsigned char bitmask; unsigned char byte; + if (offset >= bitmap_len) + return -1; byte_offset = offset / 8; byte = bitmap[byte_offset]; bit_spot = offset; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7b344035bfe3f..47a876ccd2881 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -159,6 +159,8 @@ EXPORT_SYMBOL(do_trace_netlink_extack); static inline u32 netlink_group_mask(u32 group) { + if (group > 32) + return 0; return group ? 1 << (group - 1) : 0; } diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index d2537383a3e89..6a193cce2a754 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -560,6 +560,10 @@ static int nci_close_device(struct nci_dev *ndev) mutex_lock(&ndev->req_lock); if (!test_and_clear_bit(NCI_UP, &ndev->flags)) { + /* Need to flush the cmd wq in case + * there is a queued/running cmd_work + */ + flush_workqueue(ndev->cmd_wq); del_timer_sync(&ndev->cmd_timer); del_timer_sync(&ndev->data_timer); mutex_unlock(&ndev->req_lock); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 780d9e2246f39..8955f31fa47e9 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1051,7 +1051,7 @@ static int clone(struct datapath *dp, struct sk_buff *skb, int rem = nla_len(attr); bool dont_clone_flow_key; - /* The first action is always 'OVS_CLONE_ATTR_ARG'. */ + /* The first action is always 'OVS_CLONE_ATTR_EXEC'. */ clone_arg = nla_data(attr); dont_clone_flow_key = nla_get_u32(clone_arg); actions = nla_next(clone_arg, &rem); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index c07afff57dd32..4a947c13c813a 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -734,6 +734,57 @@ static bool skb_nfct_cached(struct net *net, } #if IS_ENABLED(CONFIG_NF_NAT) +static void ovs_nat_update_key(struct sw_flow_key *key, + const struct sk_buff *skb, + enum nf_nat_manip_type maniptype) +{ + if (maniptype == NF_NAT_MANIP_SRC) { + __be16 src; + + key->ct_state |= OVS_CS_F_SRC_NAT; + if (key->eth.type == htons(ETH_P_IP)) + key->ipv4.addr.src = ip_hdr(skb)->saddr; + else if (key->eth.type == htons(ETH_P_IPV6)) + memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, + sizeof(key->ipv6.addr.src)); + else + return; + + if (key->ip.proto == IPPROTO_UDP) + src = udp_hdr(skb)->source; + else if (key->ip.proto == IPPROTO_TCP) + src = tcp_hdr(skb)->source; + else if (key->ip.proto == IPPROTO_SCTP) + src = sctp_hdr(skb)->source; + else + return; + + key->tp.src = src; + } else { + __be16 dst; + + key->ct_state |= OVS_CS_F_DST_NAT; + if (key->eth.type == htons(ETH_P_IP)) + key->ipv4.addr.dst = ip_hdr(skb)->daddr; + else if (key->eth.type == htons(ETH_P_IPV6)) + memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, + sizeof(key->ipv6.addr.dst)); + else + return; + + if (key->ip.proto == IPPROTO_UDP) + dst = udp_hdr(skb)->dest; + else if (key->ip.proto == IPPROTO_TCP) + dst = tcp_hdr(skb)->dest; + else if (key->ip.proto == IPPROTO_SCTP) + dst = sctp_hdr(skb)->dest; + else + return; + + key->tp.dst = dst; + } +} + /* Modelled after nf_nat_ipv[46]_fn(). * range is only used for new, uninitialized NAT state. * Returns either NF_ACCEPT or NF_DROP. @@ -741,7 +792,7 @@ static bool skb_nfct_cached(struct net *net, static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype) + enum nf_nat_manip_type maniptype, struct sw_flow_key *key) { int hooknum, nh_off, err = NF_ACCEPT; @@ -813,58 +864,11 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, push: skb_push_rcsum(skb, nh_off); - return err; -} - -static void ovs_nat_update_key(struct sw_flow_key *key, - const struct sk_buff *skb, - enum nf_nat_manip_type maniptype) -{ - if (maniptype == NF_NAT_MANIP_SRC) { - __be16 src; - - key->ct_state |= OVS_CS_F_SRC_NAT; - if (key->eth.type == htons(ETH_P_IP)) - key->ipv4.addr.src = ip_hdr(skb)->saddr; - else if (key->eth.type == htons(ETH_P_IPV6)) - memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, - sizeof(key->ipv6.addr.src)); - else - return; - - if (key->ip.proto == IPPROTO_UDP) - src = udp_hdr(skb)->source; - else if (key->ip.proto == IPPROTO_TCP) - src = tcp_hdr(skb)->source; - else if (key->ip.proto == IPPROTO_SCTP) - src = sctp_hdr(skb)->source; - else - return; - - key->tp.src = src; - } else { - __be16 dst; - - key->ct_state |= OVS_CS_F_DST_NAT; - if (key->eth.type == htons(ETH_P_IP)) - key->ipv4.addr.dst = ip_hdr(skb)->daddr; - else if (key->eth.type == htons(ETH_P_IPV6)) - memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, - sizeof(key->ipv6.addr.dst)); - else - return; - - if (key->ip.proto == IPPROTO_UDP) - dst = udp_hdr(skb)->dest; - else if (key->ip.proto == IPPROTO_TCP) - dst = tcp_hdr(skb)->dest; - else if (key->ip.proto == IPPROTO_SCTP) - dst = sctp_hdr(skb)->dest; - else - return; + /* Update the flow key if NAT successful. */ + if (err == NF_ACCEPT) + ovs_nat_update_key(key, skb, maniptype); - key->tp.dst = dst; - } + return err; } /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */ @@ -906,7 +910,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, } else { return NF_ACCEPT; /* Connection is not NATed. */ } - err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); + err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key); if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { if (ct->status & IPS_SRC_NAT) { @@ -916,17 +920,13 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, maniptype = NF_NAT_MANIP_SRC; err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, - maniptype); + maniptype, key); } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL, - NF_NAT_MANIP_SRC); + NF_NAT_MANIP_SRC, key); } } - /* Mark NAT done if successful and update the flow key. */ - if (err == NF_ACCEPT) - ovs_nat_update_key(key, skb, maniptype); - return err; } #else /* !CONFIG_NF_NAT */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index fd1f809e9bc1b..c591b923016a6 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2201,8 +2201,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, icmpv6_key->icmpv6_type = ntohs(output->tp.src); icmpv6_key->icmpv6_code = ntohs(output->tp.dst); - if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || - icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { + if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || + swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { struct ovs_key_nd *nd_key; nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); @@ -2288,6 +2288,62 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size) return sfa; } +static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len); + +static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action) +{ + const struct nlattr *a; + int rem; + + nla_for_each_nested(a, action, rem) { + switch (nla_type(a)) { + case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL: + case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER: + ovs_nla_free_nested_actions(nla_data(a), nla_len(a)); + break; + } + } +} + +static void ovs_nla_free_clone_action(const struct nlattr *action) +{ + const struct nlattr *a = nla_data(action); + int rem = nla_len(action); + + switch (nla_type(a)) { + case OVS_CLONE_ATTR_EXEC: + /* The real list of actions follows this attribute. */ + a = nla_next(a, &rem); + ovs_nla_free_nested_actions(a, rem); + break; + } +} + +static void ovs_nla_free_dec_ttl_action(const struct nlattr *action) +{ + const struct nlattr *a = nla_data(action); + + switch (nla_type(a)) { + case OVS_DEC_TTL_ATTR_ACTION: + ovs_nla_free_nested_actions(nla_data(a), nla_len(a)); + break; + } +} + +static void ovs_nla_free_sample_action(const struct nlattr *action) +{ + const struct nlattr *a = nla_data(action); + int rem = nla_len(action); + + switch (nla_type(a)) { + case OVS_SAMPLE_ATTR_ARG: + /* The real list of actions follows this attribute. */ + a = nla_next(a, &rem); + ovs_nla_free_nested_actions(a, rem); + break; + } +} + static void ovs_nla_free_set_action(const struct nlattr *a) { const struct nlattr *ovs_key = nla_data(a); @@ -2301,25 +2357,54 @@ static void ovs_nla_free_set_action(const struct nlattr *a) } } -void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) +static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) { const struct nlattr *a; int rem; - if (!sf_acts) + /* Whenever new actions are added, the need to update this + * function should be considered. + */ + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23); + + if (!actions) return; - nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) { + nla_for_each_attr(a, actions, len, rem) { switch (nla_type(a)) { - case OVS_ACTION_ATTR_SET: - ovs_nla_free_set_action(a); + case OVS_ACTION_ATTR_CHECK_PKT_LEN: + ovs_nla_free_check_pkt_len_action(a); + break; + + case OVS_ACTION_ATTR_CLONE: + ovs_nla_free_clone_action(a); break; + case OVS_ACTION_ATTR_CT: ovs_ct_free_action(a); break; + + case OVS_ACTION_ATTR_DEC_TTL: + ovs_nla_free_dec_ttl_action(a); + break; + + case OVS_ACTION_ATTR_SAMPLE: + ovs_nla_free_sample_action(a); + break; + + case OVS_ACTION_ATTR_SET: + ovs_nla_free_set_action(a); + break; } } +} + +void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) +{ + if (!sf_acts) + return; + ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len); kfree(sf_acts); } @@ -3429,7 +3514,9 @@ static int clone_action_to_attr(const struct nlattr *attr, if (!start) return -EMSGSIZE; - err = ovs_nla_put_actions(nla_data(attr), rem, skb); + /* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */ + attr = nla_next(nla_data(attr), &rem); + err = ovs_nla_put_actions(attr, rem, skb); if (err) nla_nest_cancel(skb, start); diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 5b1927d66f0da..dac4fdc7488a3 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -78,6 +78,7 @@ struct rfkill_data { struct mutex mtx; wait_queue_head_t read_wait; bool input_handler; + u8 max_size; }; @@ -1153,6 +1154,8 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) if (!data) return -ENOMEM; + data->max_size = RFKILL_EVENT_SIZE_V1; + INIT_LIST_HEAD(&data->events); mutex_init(&data->mtx); init_waitqueue_head(&data->read_wait); @@ -1235,6 +1238,7 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf, list); sz = min_t(unsigned long, sizeof(ev->ev), count); + sz = min_t(unsigned long, sz, data->max_size); ret = sz; if (copy_to_user(buf, &ev->ev, sz)) ret = -EFAULT; @@ -1249,6 +1253,7 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf, static ssize_t rfkill_fop_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { + struct rfkill_data *data = file->private_data; struct rfkill *rfkill; struct rfkill_event_ext ev; int ret; @@ -1263,6 +1268,7 @@ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf, * our API version even in a write() call, if it cares. */ count = min(count, sizeof(ev)); + count = min_t(size_t, count, data->max_size); if (copy_from_user(&ev, buf, count)) return -EFAULT; @@ -1322,31 +1328,47 @@ static int rfkill_fop_release(struct inode *inode, struct file *file) return 0; } -#ifdef CONFIG_RFKILL_INPUT static long rfkill_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct rfkill_data *data = file->private_data; + int ret = -ENOSYS; + u32 size; if (_IOC_TYPE(cmd) != RFKILL_IOC_MAGIC) return -ENOSYS; - if (_IOC_NR(cmd) != RFKILL_IOC_NOINPUT) - return -ENOSYS; - mutex_lock(&data->mtx); - - if (!data->input_handler) { - if (atomic_inc_return(&rfkill_input_disabled) == 1) - printk(KERN_DEBUG "rfkill: input handler disabled\n"); - data->input_handler = true; + switch (_IOC_NR(cmd)) { +#ifdef CONFIG_RFKILL_INPUT + case RFKILL_IOC_NOINPUT: + if (!data->input_handler) { + if (atomic_inc_return(&rfkill_input_disabled) == 1) + printk(KERN_DEBUG "rfkill: input handler disabled\n"); + data->input_handler = true; + } + ret = 0; + break; +#endif + case RFKILL_IOC_MAX_SIZE: + if (get_user(size, (__u32 __user *)arg)) { + ret = -EFAULT; + break; + } + if (size < RFKILL_EVENT_SIZE_V1 || size > U8_MAX) { + ret = -EINVAL; + break; + } + data->max_size = size; + ret = 0; + break; + default: + break; } - mutex_unlock(&data->mtx); - return 0; + return ret; } -#endif static const struct file_operations rfkill_fops = { .owner = THIS_MODULE, @@ -1355,10 +1377,8 @@ static const struct file_operations rfkill_fops = { .write = rfkill_fop_write, .poll = rfkill_fop_poll, .release = rfkill_fop_release, -#ifdef CONFIG_RFKILL_INPUT .unlocked_ioctl = rfkill_fop_ioctl, .compat_ioctl = compat_ptr_ioctl, -#endif .llseek = no_llseek, }; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 7bd6f8a66a3ef..969e532f77a90 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -777,14 +777,12 @@ void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); -static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call, - unsigned long expire_at, - unsigned long now, - enum rxrpc_timer_trace why) -{ - trace_rxrpc_timer(call, why, now); - timer_reduce(&call->timer, expire_at); -} +void rxrpc_reduce_call_timer(struct rxrpc_call *call, + unsigned long expire_at, + unsigned long now, + enum rxrpc_timer_trace why); + +void rxrpc_delete_call_timer(struct rxrpc_call *call); /* * call_object.c @@ -808,6 +806,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *); bool __rxrpc_queue_call(struct rxrpc_call *); bool rxrpc_queue_call(struct rxrpc_call *); void rxrpc_see_call(struct rxrpc_call *); +bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op); void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_cleanup_call(struct rxrpc_call *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index df864e6922679..22e05de5d1ca9 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -310,7 +310,7 @@ void rxrpc_process_call(struct work_struct *work) } if (call->state == RXRPC_CALL_COMPLETE) { - del_timer_sync(&call->timer); + rxrpc_delete_call_timer(call); goto out_put; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 4eb91d958a48d..043508fd8d8a5 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -53,10 +53,30 @@ static void rxrpc_call_timer_expired(struct timer_list *t) if (call->state < RXRPC_CALL_COMPLETE) { trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); - rxrpc_queue_call(call); + __rxrpc_queue_call(call); + } else { + rxrpc_put_call(call, rxrpc_call_put); + } +} + +void rxrpc_reduce_call_timer(struct rxrpc_call *call, + unsigned long expire_at, + unsigned long now, + enum rxrpc_timer_trace why) +{ + if (rxrpc_try_get_call(call, rxrpc_call_got_timer)) { + trace_rxrpc_timer(call, why, now); + if (timer_reduce(&call->timer, expire_at)) + rxrpc_put_call(call, rxrpc_call_put_notimer); } } +void rxrpc_delete_call_timer(struct rxrpc_call *call) +{ + if (del_timer_sync(&call->timer)) + rxrpc_put_call(call, rxrpc_call_put_timer); +} + static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; /* @@ -463,6 +483,17 @@ void rxrpc_see_call(struct rxrpc_call *call) } } +bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +{ + const void *here = __builtin_return_address(0); + int n = atomic_fetch_add_unless(&call->usage, 1, 0); + + if (n == 0) + return false; + trace_rxrpc_call(call->debug_id, op, n, here, NULL); + return true; +} + /* * Note the addition of a ref on a call. */ @@ -510,8 +541,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) spin_unlock_bh(&call->lock); rxrpc_put_call_slot(call); - - del_timer_sync(&call->timer); + rxrpc_delete_call_timer(call); /* Make sure we don't get any more notifications */ write_lock_bh(&rx->recvmsg_lock); @@ -618,6 +648,8 @@ static void rxrpc_destroy_call(struct work_struct *work) struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); struct rxrpc_net *rxnet = call->rxnet; + rxrpc_delete_call_timer(call); + rxrpc_put_connection(call->conn); rxrpc_put_peer(call->peer); kfree(call->rxtx_buffer); @@ -652,8 +684,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); - del_timer_sync(&call->timer); - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 25bbc4cc8b135..f15d6942da453 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -113,8 +113,8 @@ static __net_exit void rxrpc_exit_net(struct net *net) struct rxrpc_net *rxnet = rxrpc_net(net); rxnet->live = false; - del_timer_sync(&rxnet->peer_keepalive_timer); cancel_work_sync(&rxnet->peer_keepalive_work); + del_timer_sync(&rxnet->peer_keepalive_timer); rxrpc_destroy_all_calls(rxnet); rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_peers(rxnet); diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c index ead3471307ee5..ee269e0e6ee87 100644 --- a/net/rxrpc/server_key.c +++ b/net/rxrpc/server_key.c @@ -84,6 +84,9 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep) prep->payload.data[1] = (struct rxrpc_security *)sec; + if (!sec->preparse_server_key) + return -EINVAL; + return sec->preparse_server_key(prep); } @@ -91,7 +94,7 @@ static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep) { const struct rxrpc_security *sec = prep->payload.data[1]; - if (sec) + if (sec && sec->free_preparse_server_key) sec->free_preparse_server_key(prep); } @@ -99,7 +102,7 @@ static void rxrpc_destroy_s(struct key *key) { const struct rxrpc_security *sec = key->payload.data[1]; - if (sec) + if (sec && sec->destroy_server_key) sec->destroy_server_key(key); } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index ec19f625863a0..25718acc0ff00 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -605,22 +605,25 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb, if (!ct) return false; if (!net_eq(net, read_pnet(&ct->ct_net))) - return false; + goto drop_ct; if (nf_ct_zone(ct)->id != zone_id) - return false; + goto drop_ct; /* Force conntrack entry direction. */ if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { if (nf_ct_is_confirmed(ct)) nf_ct_kill(ct); - nf_ct_put(ct); - nf_ct_set(skb, NULL, IP_CT_UNTRACKED); - - return false; + goto drop_ct; } return true; + +drop_ct: + nf_ct_put(ct); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); + + return false; } /* Trim the skb to the length specified by the IP/IPv6 header, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5ce1208a6ea36..130b5fda9c518 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1653,10 +1653,10 @@ static int tcf_chain_tp_insert(struct tcf_chain *chain, if (chain->flushing) return -EAGAIN; + RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); tcf_proto_get(tp); - RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); return 0; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1a9b1f140f9e9..ef5b3452254aa 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1005,6 +1005,7 @@ static int fl_set_key_mpls(struct nlattr **tb, static void fl_set_key_vlan(struct nlattr **tb, __be16 ethertype, int vlan_id_key, int vlan_prio_key, + int vlan_next_eth_type_key, struct flow_dissector_key_vlan *key_val, struct flow_dissector_key_vlan *key_mask) { @@ -1023,6 +1024,11 @@ static void fl_set_key_vlan(struct nlattr **tb, } key_val->vlan_tpid = ethertype; key_mask->vlan_tpid = cpu_to_be16(~0); + if (tb[vlan_next_eth_type_key]) { + key_val->vlan_eth_type = + nla_get_be16(tb[vlan_next_eth_type_key]); + key_mask->vlan_eth_type = cpu_to_be16(~0); + } } static void fl_set_key_flag(u32 flower_key, u32 flower_mask, @@ -1519,8 +1525,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb, if (eth_type_vlan(ethertype)) { fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID, - TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan, - &mask->vlan); + TCA_FLOWER_KEY_VLAN_PRIO, + TCA_FLOWER_KEY_VLAN_ETH_TYPE, + &key->vlan, &mask->vlan); if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) { ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]); @@ -1528,6 +1535,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_CVLAN_ID, TCA_FLOWER_KEY_CVLAN_PRIO, + TCA_FLOWER_KEY_CVLAN_ETH_TYPE, &key->cvlan, &mask->cvlan); fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_CVLAN_ETH_TYPE, @@ -2886,13 +2894,13 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, goto nla_put_failure; if (mask->basic.n_proto) { - if (mask->cvlan.vlan_tpid) { + if (mask->cvlan.vlan_eth_type) { if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE, key->basic.n_proto)) goto nla_put_failure; - } else if (mask->vlan.vlan_tpid) { + } else if (mask->vlan.vlan_eth_type) { if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE, - key->basic.n_proto)) + key->vlan.vlan_eth_type)) goto nla_put_failure; } } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 377f896bdedc4..b9c71a304d399 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -417,7 +417,8 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, { struct taprio_sched *q = qdisc_priv(sch); - if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) { + /* sk_flags are only safe to use on full sockets. */ + if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) { if (!is_valid_interval(skb, sch)) return qdisc_drop(skb, sch, to_free); } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index a18609f608fb7..e213aaf45d67c 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -914,6 +914,7 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx) ctx->asoc->base.sk->sk_err = -error; return; } + ctx->asoc->stats.octrlchunks++; break; case SCTP_CID_ABORT: @@ -938,7 +939,10 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx) case SCTP_CID_HEARTBEAT: if (chunk->pmtu_probe) { - sctp_packet_singleton(ctx->transport, chunk, ctx->gfp); + error = sctp_packet_singleton(ctx->transport, + chunk, ctx->gfp); + if (!error) + ctx->asoc->stats.octrlchunks++; break; } fallthrough; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index cc544a97c4afd..52edee1322fc3 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -781,7 +781,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, } } - if (security_sctp_assoc_request(new_asoc, chunk->skb)) { + if (security_sctp_assoc_request(new_asoc, chunk->head_skb ?: chunk->skb)) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } @@ -930,6 +930,11 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net, if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Set peer label for connection. */ + if (security_sctp_assoc_established((struct sctp_association *)asoc, + chunk->head_skb ?: chunk->skb)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Verify that the chunk length for the COOKIE-ACK is OK. * If we don't do this, any bundled chunks may be junked. */ @@ -945,9 +950,6 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net, */ sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL()); - /* Set peer label for connection. */ - security_inet_conn_established(ep->base.sk, chunk->skb); - /* RFC 2960 5.1 Normal Establishment of an Association * * E) Upon reception of the COOKIE ACK, endpoint "A" will move @@ -2260,7 +2262,7 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( } /* Update socket peer label if first association. */ - if (security_sctp_assoc_request(new_asoc, chunk->skb)) { + if (security_sctp_assoc_request(new_asoc, chunk->head_skb ?: chunk->skb)) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3e1a9600be5e1..7b0427658056d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5636,7 +5636,7 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) * Set the daddr and initialize id to something more random and also * copy over any ip options. */ - sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk); + sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sock->sk); sp->pf->copy_ip_options(sk, sock->sk); /* Populate the fields of the newsk from the oldsk and migrate the diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 284befa909676..303c5e56e4df4 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2625,8 +2625,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, sk->sk_state != SMC_CLOSED) { if (val) { SMC_STAT_INC(smc, ndly_cnt); - mod_delayed_work(smc->conn.lgr->tx_wq, - &smc->conn.tx_work, 0); + smc_tx_pending(&smc->conn); + cancel_delayed_work(&smc->conn.tx_work); } } break; @@ -2636,8 +2636,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, sk->sk_state != SMC_CLOSED) { if (!val) { SMC_STAT_INC(smc, cork_cnt); - mod_delayed_work(smc->conn.lgr->tx_wq, - &smc->conn.tx_work, 0); + smc_tx_pending(&smc->conn); + cancel_delayed_work(&smc->conn.tx_work); } } break; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index ce27399b38b1e..f9f3f59c79de2 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -191,7 +191,8 @@ static int smc_nl_ueid_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq, flags, SMC_NETLINK_DUMP_UEID); if (!hdr) return -ENOMEM; - snprintf(ueid_str, sizeof(ueid_str), "%s", ueid); + memcpy(ueid_str, ueid, SMC_MAX_EID_LEN); + ueid_str[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_EID_TABLE_ENTRY, ueid_str)) { genlmsg_cancel(skb, hdr); return -EMSGSIZE; @@ -252,7 +253,8 @@ int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb) goto end; smc_ism_get_system_eid(&seid); - snprintf(seid_str, sizeof(seid_str), "%s", seid); + memcpy(seid_str, seid, SMC_MAX_EID_LEN); + seid_str[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_SEID_ENTRY, seid_str)) goto err; read_lock(&smc_clc_eid_table.lock); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index be7d704976ffb..f40f6ed0fbdb4 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1989,7 +1989,7 @@ static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, */ static inline int smc_rmb_wnd_update_limit(int rmbe_size) { - return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); + return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); } /* map an rmb buf to a link */ diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 29f0a559d8847..4769f76505afc 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -311,8 +311,9 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (!strncmp(ibdev->ibdev->name, ib_name, sizeof(ibdev->ibdev->name)) || - !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, - IB_DEVICE_NAME_MAX - 1)) { + (ibdev->ibdev->dev.parent && + !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, + IB_DEVICE_NAME_MAX - 1))) { goto out; } } diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index be241d53020f1..7b0b6e24582f9 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -597,27 +597,32 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) return rc; } -/* Wakeup sndbuf consumers from process context - * since there is more data to transmit - */ -void smc_tx_work(struct work_struct *work) +void smc_tx_pending(struct smc_connection *conn) { - struct smc_connection *conn = container_of(to_delayed_work(work), - struct smc_connection, - tx_work); struct smc_sock *smc = container_of(conn, struct smc_sock, conn); int rc; - lock_sock(&smc->sk); if (smc->sk.sk_err) - goto out; + return; rc = smc_tx_sndbuf_nonempty(conn); if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked && !atomic_read(&conn->bytes_to_rcv)) conn->local_rx_ctrl.prod_flags.write_blocked = 0; +} + +/* Wakeup sndbuf consumers from process context + * since there is more data to transmit + */ +void smc_tx_work(struct work_struct *work) +{ + struct smc_connection *conn = container_of(to_delayed_work(work), + struct smc_connection, + tx_work); + struct smc_sock *smc = container_of(conn, struct smc_sock, conn); -out: + lock_sock(&smc->sk); + smc_tx_pending(conn); release_sock(&smc->sk); } diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h index 07e6ad76224a0..a59f370b8b432 100644 --- a/net/smc/smc_tx.h +++ b/net/smc/smc_tx.h @@ -27,6 +27,7 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn) return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep); } +void smc_tx_pending(struct smc_connection *conn); void smc_tx_work(struct work_struct *work); void smc_tx_init(struct smc_sock *smc); int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c83fe618767c4..0222ad4523a9d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1065,7 +1065,9 @@ rpc_task_get_next_xprt(struct rpc_clnt *clnt) static void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) { - if (task->tk_xprt) + if (task->tk_xprt && + !(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) && + (task->tk_flags & RPC_TASK_MOVEABLE))) return; if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) task->tk_xprt = rpc_task_get_first_xprt(clnt); @@ -1085,8 +1087,6 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_flags |= RPC_TASK_TIMEOUT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; - if (atomic_read(&clnt->cl_swapper)) - task->tk_flags |= RPC_TASK_SWAPPER; /* Add to the client's list of all tasks */ spin_lock(&clnt->cl_lock); list_add_tail(&task->tk_task, &clnt->cl_tasks); @@ -2197,6 +2197,7 @@ call_transmit_status(struct rpc_task *task) * socket just returned a connection error, * then hold onto the transport lock. */ + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); fallthrough; @@ -2280,6 +2281,7 @@ call_bc_transmit_status(struct rpc_task *task) case -ENOTCONN: case -EPIPE: break; + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); fallthrough; @@ -2362,6 +2364,11 @@ call_status(struct rpc_task *task) case -EPIPE: case -EAGAIN: break; + case -ENFILE: + case -ENOBUFS: + case -ENOMEM: + rpc_delay(task, HZ>>2); + break; case -EIO: /* shutdown or soft timeout */ goto out_exit; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index e2c835482791e..9020cedb7c95a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -186,11 +186,6 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, /* * Add new request to wait queue. - * - * Swapper tasks always get inserted at the head of the queue. - * This should avoid many nasty memory deadlocks and hopefully - * improve overall performance. - * Everyone else gets appended to the queue to ensure proper FIFO behavior. */ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task, @@ -199,8 +194,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, INIT_LIST_HEAD(&task->u.tk_wait.timer_list); if (RPC_IS_PRIORITY(queue)) __rpc_add_wait_queue_priority(queue, task, queue_priority); - else if (RPC_IS_SWAPPER(task)) - list_add(&task->u.tk_wait.list, &queue->tasks[0]); else list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); task->tk_waitqueue = queue; @@ -876,6 +869,15 @@ void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) ops->rpc_release(calldata); } +static bool xprt_needs_memalloc(struct rpc_xprt *xprt, struct rpc_task *tk) +{ + if (!xprt) + return false; + if (!atomic_read(&xprt->swapper)) + return false; + return test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == tk; +} + /* * This is the RPC `scheduler' (or rather, the finite state machine). */ @@ -884,6 +886,7 @@ static void __rpc_execute(struct rpc_task *task) struct rpc_wait_queue *queue; int task_is_async = RPC_IS_ASYNC(task); int status = 0; + unsigned long pflags = current->flags; WARN_ON_ONCE(RPC_IS_QUEUED(task)); if (RPC_IS_QUEUED(task)) @@ -906,6 +909,10 @@ static void __rpc_execute(struct rpc_task *task) } if (!do_action) break; + if (RPC_IS_SWAPPER(task) || + xprt_needs_memalloc(task->tk_xprt, task)) + current->flags |= PF_MEMALLOC; + trace_rpc_task_run_action(task, do_action); do_action(task); @@ -943,7 +950,7 @@ static void __rpc_execute(struct rpc_task *task) rpc_clear_running(task); spin_unlock(&queue->lock); if (task_is_async) - return; + goto out; /* sync task: sleep here */ trace_rpc_task_sync_sleep(task, task->tk_action); @@ -967,6 +974,8 @@ static void __rpc_execute(struct rpc_task *task) /* Release all resources associated with the task */ rpc_release_task(task); +out: + current_restore_flags(pflags, PF_MEMALLOC); } /* @@ -1023,8 +1032,8 @@ int rpc_malloc(struct rpc_task *task) struct rpc_buffer *buf; gfp_t gfp = GFP_NOFS; - if (RPC_IS_SWAPPER(task)) - gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + if (RPC_IS_ASYNC(task)) + gfp = GFP_NOWAIT | __GFP_NOWARN; size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b21ad79941474..4a423e481a281 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1213,6 +1213,8 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) dr->daddr = rqstp->rq_daddr; dr->argslen = rqstp->rq_arg.len >> 2; dr->xprt_hlen = rqstp->rq_xprt_hlen; + dr->xprt_ctxt = rqstp->rq_xprt_ctxt; + rqstp->rq_xprt_ctxt = NULL; /* back up head to the start of the buffer and copy */ skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; @@ -1251,6 +1253,7 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp) rqstp->rq_xprt_hlen = dr->xprt_hlen; rqstp->rq_daddr = dr->daddr; rqstp->rq_respages = rqstp->rq_pages; + rqstp->rq_xprt_ctxt = dr->xprt_ctxt; svc_xprt_received(rqstp->rq_xprt); return (dr->argslen<<2) - dr->xprt_hlen; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 478f857cdaed4..6ea3d87e11475 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1096,7 +1096,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr, int ret; *sentp = 0; - xdr_alloc_bvec(xdr, GFP_KERNEL); + ret = xdr_alloc_bvec(xdr, GFP_KERNEL); + if (ret < 0) + return ret; ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len); if (ret < 0) diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 05c758da6a92a..9d8a7d9f3e412 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -97,7 +97,7 @@ static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj, return 0; ret = sprintf(buf, "%s\n", xprt->address_strings[RPC_DISPLAY_ADDR]); xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, @@ -105,33 +105,31 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, char *buf) { struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); - struct sockaddr_storage saddr; - struct sock_xprt *sock; - ssize_t ret = -1; + size_t buflen = PAGE_SIZE; + ssize_t ret = -ENOTSOCK; if (!xprt || !xprt_connected(xprt)) { - xprt_put(xprt); - return -ENOTCONN; + ret = -ENOTCONN; + } else if (xprt->ops->get_srcaddr) { + ret = xprt->ops->get_srcaddr(xprt, buf, buflen); + if (ret > 0) { + if (ret < buflen - 1) { + buf[ret] = '\n'; + ret++; + buf[ret] = '\0'; + } + } } - - sock = container_of(xprt, struct sock_xprt, xprt); - mutex_lock(&sock->recv_mutex); - if (sock->sock == NULL || - kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0) - goto out; - - ret = sprintf(buf, "%pISc\n", &saddr); -out: - mutex_unlock(&sock->recv_mutex); xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) + struct kobj_attribute *attr, char *buf) { struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + unsigned short srcport = 0; + size_t buflen = PAGE_SIZE; ssize_t ret; if (!xprt || !xprt_connected(xprt)) { @@ -139,7 +137,11 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, return -ENOTCONN; } - ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" + if (xprt->ops->get_srcport) + srcport = xprt->ops->get_srcport(xprt); + + ret = snprintf(buf, buflen, + "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n" "binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n" "backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n" @@ -147,14 +149,11 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs, xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen, xprt->sending.qlen, xprt->pending.qlen, - xprt->backlog.qlen, xprt->main, - (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? - get_srcport(xprt) : 0, + xprt->backlog.qlen, xprt->main, srcport, atomic_long_read(&xprt->queuelen), - (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? - xprt->address_strings[RPC_DISPLAY_PORT] : "0"); + xprt->address_strings[RPC_DISPLAY_PORT]); xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, @@ -201,7 +200,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, } xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, @@ -220,7 +219,7 @@ static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, xprt_switch->xps_nunique_destaddr_xprts, atomic_long_read(&xprt_switch->xps_queuelen)); xprt_switch_put(xprt_switch); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a02de2bddb28b..d557a3cb2ad4a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -929,12 +929,7 @@ void xprt_connect(struct rpc_task *task) if (!xprt_lock_write(xprt, task)) return; - if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) { - trace_xprt_disconnect_cleanup(xprt); - xprt->ops->close(xprt); - } - - if (!xprt_connected(xprt)) { + if (!xprt_connected(xprt) && !test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; rpc_sleep_on_timeout(&xprt->pending, task, NULL, xprt_request_timeout(task->tk_rqstp)); @@ -1354,17 +1349,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task) INIT_LIST_HEAD(&req->rq_xmit2); goto out; } - } else if (RPC_IS_SWAPPER(task)) { - list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { - if (pos->rq_cong || pos->rq_bytes_sent) - continue; - if (RPC_IS_SWAPPER(pos->rq_task)) - continue; - /* Note: req is added _before_ pos */ - list_add_tail(&req->rq_xmit, &pos->rq_xmit); - INIT_LIST_HEAD(&req->rq_xmit2); - goto out; - } } else if (!req->rq_seqno) { list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { if (pos->rq_task->tk_owner != task->tk_owner) @@ -1503,6 +1487,9 @@ bool xprt_prepare_transmit(struct rpc_task *task) return false; } + if (atomic_read(&xprt->swapper)) + /* This will be clear in __rpc_execute */ + current->flags |= PF_MEMALLOC; return true; } @@ -1687,12 +1674,15 @@ static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt) { struct rpc_rqst *req = ERR_PTR(-EAGAIN); + gfp_t gfp_mask = GFP_KERNEL; if (xprt->num_reqs >= xprt->max_reqs) goto out; ++xprt->num_reqs; spin_unlock(&xprt->reserve_lock); - req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS); + if (current->flags & PF_WQ_WORKER) + gfp_mask |= __GFP_NORETRY | __GFP_NOWARN; + req = kzalloc(sizeof(*req), gfp_mask); spin_lock(&xprt->reserve_lock); if (req != NULL) goto out; @@ -2112,7 +2102,14 @@ static void xprt_destroy(struct rpc_xprt *xprt) */ wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); + /* + * xprt_schedule_autodisconnect() can run after XPRT_LOCKED + * is cleared. We use ->transport_lock to ensure the mod_timer() + * can only run *before* del_time_sync(), never after. + */ + spin_lock(&xprt->transport_lock); del_timer_sync(&xprt->timer); + spin_unlock(&xprt->transport_lock); /* * Destroy sockets etc from the system workqueue so they can diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index cf76a6ad127b2..864131a9fc6e3 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -831,7 +831,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) goto out_err; if (ret == 0) goto out_drop; - rqstp->rq_xprt_hlen = ret; + rqstp->rq_xprt_hlen = 0; if (svc_rdma_is_reverse_direction_reply(xprt, ctxt)) goto out_backchannel; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 42e375dbdadb4..6b7e10e5a141d 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -235,8 +235,11 @@ xprt_rdma_connect_worker(struct work_struct *work) struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, rx_connect_worker.work); struct rpc_xprt *xprt = &r_xprt->rx_xprt; + unsigned int pflags = current->flags; int rc; + if (atomic_read(&xprt->swapper)) + current->flags |= PF_MEMALLOC; rc = rpcrdma_xprt_connect(r_xprt); xprt_clear_connecting(xprt); if (!rc) { @@ -250,6 +253,7 @@ xprt_rdma_connect_worker(struct work_struct *work) rpcrdma_xprt_disconnect(r_xprt); xprt_unlock_connect(xprt, r_xprt); xprt_wake_pending_tasks(xprt, rc); + current_restore_flags(pflags, PF_MEMALLOC); } /** @@ -517,7 +521,7 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) return; out_sleep: - task->tk_status = -EAGAIN; + task->tk_status = -ENOMEM; xprt_add_backlog(xprt, task); } @@ -570,8 +574,8 @@ xprt_rdma_allocate(struct rpc_task *task) gfp_t flags; flags = RPCRDMA_DEF_GFP; - if (RPC_IS_SWAPPER(task)) - flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + if (RPC_IS_ASYNC(task)) + flags = GFP_NOWAIT | __GFP_NOWARN; if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize, flags)) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0f39e08ee580e..7aef2876beb38 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -763,12 +763,12 @@ xs_stream_start_connect(struct sock_xprt *transport) /** * xs_nospace - handle transmit was incomplete * @req: pointer to RPC request + * @transport: pointer to struct sock_xprt * */ -static int xs_nospace(struct rpc_rqst *req) +static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport) { - struct rpc_xprt *xprt = req->rq_xprt; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct rpc_xprt *xprt = &transport->xprt; struct sock *sk = transport->inet; int ret = -EAGAIN; @@ -779,25 +779,49 @@ static int xs_nospace(struct rpc_rqst *req) /* Don't race with disconnect */ if (xprt_connected(xprt)) { + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); + rcu_read_unlock(); + /* wait for more buffer space */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; xprt_wait_for_buffer_space(xprt); } else ret = -ENOTCONN; spin_unlock(&xprt->transport_lock); + return ret; +} - /* Race breaker in case memory is freed before above code is called */ - if (ret == -EAGAIN) { - struct socket_wq *wq; +static int xs_sock_nospace(struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(req->rq_xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; + int ret = -EAGAIN; - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); - rcu_read_unlock(); + lock_sock(sk); + if (!sock_writeable(sk)) + ret = xs_nospace(req, transport); + release_sock(sk); + return ret; +} - sk->sk_write_space(sk); - } +static int xs_stream_nospace(struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(req->rq_xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; + int ret = -EAGAIN; + + lock_sock(sk); + if (!sk_stream_memory_free(sk)) + ret = xs_nospace(req, transport); + release_sock(sk); return ret; } @@ -856,7 +880,7 @@ static int xs_local_send_request(struct rpc_rqst *req) /* Close the stream if the previous transmission was incomplete */ if (xs_send_request_was_aborted(transport, req)) { - xs_close(xprt); + xprt_force_disconnect(xprt); return -ENOTCONN; } @@ -887,14 +911,14 @@ static int xs_local_send_request(struct rpc_rqst *req) case -ENOBUFS: break; case -EAGAIN: - status = xs_nospace(req); + status = xs_stream_nospace(req); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); fallthrough; case -EPIPE: - xs_close(xprt); + xprt_force_disconnect(xprt); status = -ENOTCONN; } @@ -963,7 +987,7 @@ static int xs_udp_send_request(struct rpc_rqst *req) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req); + status = xs_sock_nospace(req); break; case -ENETUNREACH: case -ENOBUFS: @@ -1083,7 +1107,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req); + status = xs_stream_nospace(req); break; case -ECONNRESET: case -ECONNREFUSED: @@ -1179,6 +1203,16 @@ static void xs_reset_transport(struct sock_xprt *transport) if (sk == NULL) return; + /* + * Make sure we're calling this in a context from which it is safe + * to call __fput_sync(). In practice that means rpciod and the + * system workqueue. + */ + if (!(current->flags & PF_WQ_WORKER)) { + WARN_ON_ONCE(1); + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + return; + } if (atomic_read(&transport->xprt.swapper)) sk_clear_memalloc(sk); @@ -1202,7 +1236,7 @@ static void xs_reset_transport(struct sock_xprt *transport) mutex_unlock(&transport->recv_mutex); trace_rpc_socket_close(xprt, sock); - fput(filp); + __fput_sync(filp); xprt_disconnect_done(xprt); } @@ -1638,7 +1672,7 @@ static int xs_get_srcport(struct sock_xprt *transport) return port; } -unsigned short get_srcport(struct rpc_xprt *xprt) +static unsigned short xs_sock_srcport(struct rpc_xprt *xprt) { struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); unsigned short ret = 0; @@ -1648,7 +1682,25 @@ unsigned short get_srcport(struct rpc_xprt *xprt) mutex_unlock(&sock->recv_mutex); return ret; } -EXPORT_SYMBOL(get_srcport); + +static int xs_sock_srcaddr(struct rpc_xprt *xprt, char *buf, size_t buflen) +{ + struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); + union { + struct sockaddr sa; + struct sockaddr_storage st; + } saddr; + int ret = -ENOTCONN; + + mutex_lock(&sock->recv_mutex); + if (sock->sock) { + ret = kernel_getsockname(sock->sock, &saddr.sa); + if (ret >= 0) + ret = snprintf(buf, buflen, "%pISc", &saddr.sa); + } + mutex_unlock(&sock->recv_mutex); + return ret; +} static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port) { @@ -2052,7 +2104,10 @@ static void xs_udp_setup_socket(struct work_struct *work) struct rpc_xprt *xprt = &transport->xprt; struct socket *sock; int status = -EIO; + unsigned int pflags = current->flags; + if (atomic_read(&xprt->swapper)) + current->flags |= PF_MEMALLOC; sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP, false); @@ -2072,6 +2127,7 @@ static void xs_udp_setup_socket(struct work_struct *work) xprt_clear_connecting(xprt); xprt_unlock_connect(xprt, transport); xprt_wake_pending_tasks(xprt, status); + current_restore_flags(pflags, PF_MEMALLOC); } /** @@ -2231,11 +2287,19 @@ static void xs_tcp_setup_socket(struct work_struct *work) struct socket *sock = transport->sock; struct rpc_xprt *xprt = &transport->xprt; int status; + unsigned int pflags = current->flags; - if (!sock) { - sock = xs_create_sock(xprt, transport, - xs_addr(xprt)->sa_family, SOCK_STREAM, - IPPROTO_TCP, true); + if (atomic_read(&xprt->swapper)) + current->flags |= PF_MEMALLOC; + + if (xprt_connected(xprt)) + goto out; + if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT, + &transport->sock_state) || + !sock) { + xs_reset_transport(transport); + sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, + SOCK_STREAM, IPPROTO_TCP, true); if (IS_ERR(sock)) { xprt_wake_pending_tasks(xprt, PTR_ERR(sock)); goto out; @@ -2259,6 +2323,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) fallthrough; case -EINPROGRESS: /* SYN_SENT! */ + set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state); if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; fallthrough; @@ -2296,6 +2361,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) xprt_clear_connecting(xprt); out_unlock: xprt_unlock_connect(xprt, transport); + current_restore_flags(pflags, PF_MEMALLOC); } /** @@ -2319,13 +2385,9 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); - if (transport->sock != NULL && !xprt_connecting(xprt)) { + if (transport->sock != NULL) { dprintk("RPC: xs_connect delayed xprt %p for %lu " - "seconds\n", - xprt, xprt->reestablish_timeout / HZ); - - /* Start by resetting any existing state */ - xs_reset_transport(transport); + "seconds\n", xprt, xprt->reestablish_timeout / HZ); delay = xprt_reconnect_delay(xprt); xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO); @@ -2621,6 +2683,8 @@ static const struct rpc_xprt_ops xs_udp_ops = { .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, + .get_srcaddr = xs_sock_srcaddr, + .get_srcport = xs_sock_srcport, .buf_alloc = rpc_malloc, .buf_free = rpc_free, .send_request = xs_udp_send_request, @@ -2643,6 +2707,8 @@ static const struct rpc_xprt_ops xs_tcp_ops = { .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, + .get_srcaddr = xs_sock_srcaddr, + .get_srcport = xs_sock_srcport, .buf_alloc = rpc_malloc, .buf_free = rpc_free, .prepare_request = xs_stream_prepare_request, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7545321c3440b..17f8c523e33b0 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2852,7 +2852,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list) /* Try again later if dest link is congested */ if (tsk->cong_link_cnt) { - sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100)); + sk_reset_timer(sk, &sk->sk_timer, + jiffies + msecs_to_jiffies(100)); return; } /* Prepare SYN for retransmit */ diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index efc84845bb6b0..75a6995913839 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1495,7 +1495,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, if (prot->version == TLS_1_3_VERSION || prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) memcpy(iv + iv_offset, tls_ctx->rx.iv, - crypto_aead_ivsize(ctx->aead_recv)); + prot->iv_size + prot->salt_size); else memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c19569819866e..1e7ed5829ed51 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2084,7 +2084,7 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other if (ousk->oob_skb) consume_skb(ousk->oob_skb); - ousk->oob_skb = skb; + WRITE_ONCE(ousk->oob_skb, skb); scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); @@ -2602,9 +2602,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) oob_skb = u->oob_skb; - if (!(state->flags & MSG_PEEK)) { - u->oob_skb = NULL; - } + if (!(state->flags & MSG_PEEK)) + WRITE_ONCE(u->oob_skb, NULL); unix_state_unlock(sk); @@ -2639,7 +2638,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, skb = NULL; } else if (sock_flag(sk, SOCK_URGINLINE)) { if (!(flags & MSG_PEEK)) { - u->oob_skb = NULL; + WRITE_ONCE(u->oob_skb, NULL); consume_skb(skb); } } else if (!(flags & MSG_PEEK)) { @@ -3094,11 +3093,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCATMARK: { struct sk_buff *skb; - struct unix_sock *u = unix_sk(sk); int answ = 0; skb = skb_peek(&sk->sk_receive_queue); - if (skb && skb == u->oob_skb) + if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) answ = 1; err = put_user(answ, (int __user *)arg); } @@ -3139,6 +3137,10 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa mask |= EPOLLIN | EPOLLRDNORM; if (sk_is_readable(sk)) mask |= EPOLLIN | EPOLLRDNORM; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (READ_ONCE(unix_sk(sk)->oob_skb)) + mask |= EPOLLPRI; +#endif /* Connection-based need to check for termination and startup */ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 5afc194a58bbd..ba1c8cc0c4671 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -622,6 +622,13 @@ static int virtio_vsock_probe(struct virtio_device *vdev) INIT_WORK(&vsock->event_work, virtio_transport_event_work); INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); + if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET)) + vsock->seqpacket_allow = true; + + vdev->priv = vsock; + + virtio_device_ready(vdev); + mutex_lock(&vsock->tx_lock); vsock->tx_run = true; mutex_unlock(&vsock->tx_lock); @@ -636,10 +643,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->event_run = true; mutex_unlock(&vsock->event_lock); - if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET)) - vsock->seqpacket_allow = true; - - vdev->priv = vsock; rcu_assign_pointer(the_virtio_vsock, vsock); mutex_unlock(&the_virtio_vsock_mutex); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c01fbcc848e86..dc171ca0d1b12 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -519,7 +519,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT, - [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, + /* allow 3 for NUL-termination, we used to declare this NLA_STRING */ + [NL80211_ATTR_REG_ALPHA2] = NLA_POLICY_RANGE(NLA_BINARY, 2, 3), [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, diff --git a/net/wireless/scan.c b/net/wireless/scan.c index b888522f133b3..4a6d864329106 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -700,8 +700,12 @@ static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap, for (i = 0; i < request->n_ssids; i++) { /* wildcard ssid in the scan request */ - if (!request->ssids[i].ssid_len) + if (!request->ssids[i].ssid_len) { + if (ap->multi_bss && !ap->transmitted_bssid) + continue; + return true; + } if (ap->ssid_len && ap->ssid_len == request->ssids[i].ssid_len) { @@ -827,6 +831,9 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) !cfg80211_find_ssid_match(ap, request)) continue; + if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid) + continue; + cfg80211_scan_req_add_chan(request, chan, true); memcpy(scan_6ghz_params->bssid, ap->bssid, ETH_ALEN); scan_6ghz_params->short_ssid = ap->short_ssid; @@ -2011,11 +2018,13 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, /* this is a nontransmitting bss, we need to add it to * transmitting bss' list if it is not there */ + spin_lock_bh(&rdev->bss_lock); if (cfg80211_add_nontrans_list(non_tx_data->tx_bss, &res->pub)) { if (__cfg80211_unlink_bss(rdev, res)) rdev->bss_generation++; } + spin_unlock_bh(&rdev->bss_lock); } trace_cfg80211_return_bss(&res->pub); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 3583354a7d7fe..3a171828638b1 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1765,10 +1765,15 @@ void x25_kill_by_neigh(struct x25_neigh *nb) write_lock_bh(&x25_list_lock); - sk_for_each(s, &x25_list) - if (x25_sk(s)->neighbour == nb) + sk_for_each(s, &x25_list) { + if (x25_sk(s)->neighbour == nb) { + write_unlock_bh(&x25_list_lock); + lock_sock(s); x25_disconnect(s, ENETUNREACH, 0, 0); - + release_sock(s); + write_lock_bh(&x25_list_lock); + } + } write_unlock_bh(&x25_list_lock); /* Remove any related forwards */ diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 28ef3f4465ae9..ac343cd8ff3f6 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -403,18 +403,8 @@ EXPORT_SYMBOL(xsk_tx_peek_release_desc_batch); static int xsk_wakeup(struct xdp_sock *xs, u8 flags) { struct net_device *dev = xs->dev; - int err; - - rcu_read_lock(); - err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags); - rcu_read_unlock(); - - return err; -} -static int xsk_zc_xmit(struct xdp_sock *xs) -{ - return xsk_wakeup(xs, XDP_WAKEUP_TX); + return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags); } static void xsk_destruct_skb(struct sk_buff *skb) @@ -533,6 +523,12 @@ static int xsk_generic_xmit(struct sock *sk) mutex_lock(&xs->mutex); + /* Since we dropped the RCU read lock, the socket state might have changed. */ + if (unlikely(!xsk_is_bound(xs))) { + err = -ENXIO; + goto out; + } + if (xs->queue_id >= xs->dev->real_num_tx_queues) goto out; @@ -596,16 +592,26 @@ static int xsk_generic_xmit(struct sock *sk) return err; } -static int __xsk_sendmsg(struct sock *sk) +static int xsk_xmit(struct sock *sk) { struct xdp_sock *xs = xdp_sk(sk); + int ret; if (unlikely(!(xs->dev->flags & IFF_UP))) return -ENETDOWN; if (unlikely(!xs->tx)) return -ENOBUFS; - return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk); + if (xs->zc) + return xsk_wakeup(xs, XDP_WAKEUP_TX); + + /* Drop the RCU lock since the SKB path might sleep. */ + rcu_read_unlock(); + ret = xsk_generic_xmit(sk); + /* Reaquire RCU lock before going into common code. */ + rcu_read_lock(); + + return ret; } static bool xsk_no_wakeup(struct sock *sk) @@ -619,7 +625,7 @@ static bool xsk_no_wakeup(struct sock *sk) #endif } -static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) +static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { bool need_wait = !(m->msg_flags & MSG_DONTWAIT); struct sock *sk = sock->sk; @@ -639,11 +645,22 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) pool = xs->pool; if (pool->cached_need_wakeup & XDP_WAKEUP_TX) - return __xsk_sendmsg(sk); + return xsk_xmit(sk); return 0; } -static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) +static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) +{ + int ret; + + rcu_read_lock(); + ret = __xsk_sendmsg(sock, m, total_len); + rcu_read_unlock(); + + return ret; +} + +static int __xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) { bool need_wait = !(flags & MSG_DONTWAIT); struct sock *sk = sock->sk; @@ -669,6 +686,17 @@ static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int fl return 0; } +static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) +{ + int ret; + + rcu_read_lock(); + ret = __xsk_recvmsg(sock, m, len, flags); + rcu_read_unlock(); + + return ret; +} + static __poll_t xsk_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { @@ -679,8 +707,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sock, wait); - if (unlikely(!xsk_is_bound(xs))) + rcu_read_lock(); + if (unlikely(!xsk_is_bound(xs))) { + rcu_read_unlock(); return mask; + } pool = xs->pool; @@ -689,7 +720,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, xsk_wakeup(xs, pool->cached_need_wakeup); else /* Poll needs to drive Tx also in copy mode */ - __xsk_sendmsg(sk); + xsk_xmit(sk); } if (xs->rx && !xskq_prod_is_empty(xs->rx)) @@ -697,6 +728,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, if (xs->tx && xsk_tx_writeable(xs)) mask |= EPOLLOUT | EPOLLWRNORM; + rcu_read_unlock(); return mask; } @@ -728,7 +760,6 @@ static void xsk_unbind_dev(struct xdp_sock *xs) /* Wait for driver to stop using the xdp socket. */ xp_del_xsk(xs->pool, xs); - xs->dev = NULL; synchronize_net(); dev_put(dev); } diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index fd39bb660ebcd..0202a90b65e3a 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -584,9 +584,13 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) u32 nb_entries1 = 0, nb_entries2; if (unlikely(pool->dma_need_sync)) { + struct xdp_buff *buff; + /* Slow path */ - *xdp = xp_alloc(pool); - return !!*xdp; + buff = xp_alloc(pool); + if (buff) + *xdp = buff; + return !!buff; } if (unlikely(pool->free_list_cnt)) { diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index aa50864e4415a..9f3446af50ce2 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1984,15 +1984,15 @@ int main(int argc, char **argv) setlocale(LC_ALL, ""); + prev_time = get_nsecs(); + start_time = prev_time; + if (!opt_quiet) { ret = pthread_create(&pt, NULL, poller, NULL); if (ret) exit_with_error(ret); } - prev_time = get_nsecs(); - start_time = prev_time; - /* Configure sched priority for better wake-up accuracy */ memset(&schparam, 0, sizeof(schparam)); schparam.sched_priority = opt_schprio; diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c index 7a15910d21718..8859fc1935428 100644 --- a/samples/landlock/sandboxer.c +++ b/samples/landlock/sandboxer.c @@ -134,6 +134,7 @@ static int populate_ruleset( ret = 0; out_free_name: + free(path_list); free(env_path_name); return ret; } diff --git a/scripts/atomic/fallbacks/read_acquire b/scripts/atomic/fallbacks/read_acquire index 803ba75610766..a0ea1d26e6b2e 100755 --- a/scripts/atomic/fallbacks/read_acquire +++ b/scripts/atomic/fallbacks/read_acquire @@ -2,6 +2,15 @@ cat <counter); + ${int} ret; + + if (__native_word(${atomic}_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_${atomic}_read(v); + __atomic_acquire_fence(); + } + + return ret; } EOF diff --git a/scripts/atomic/fallbacks/set_release b/scripts/atomic/fallbacks/set_release index 86ede759f24ea..05cdb7f42477a 100755 --- a/scripts/atomic/fallbacks/set_release +++ b/scripts/atomic/fallbacks/set_release @@ -2,6 +2,11 @@ cat <counter, i); + if (__native_word(${atomic}_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_${atomic}_set(v, i); + } } EOF diff --git a/scripts/dtc/Makefile b/scripts/dtc/Makefile index 95aaf7431bffa..1cba78e1dce68 100644 --- a/scripts/dtc/Makefile +++ b/scripts/dtc/Makefile @@ -29,7 +29,7 @@ dtc-objs += yamltree.o # To include installed in a non-default path HOSTCFLAGS_yamltree.o := $(shell pkg-config --cflags yaml-0.1) # To link libyaml installed in a non-default path -HOSTLDLIBS_dtc := $(shell pkg-config yaml-0.1 --libs) +HOSTLDLIBS_dtc := $(shell pkg-config --libs yaml-0.1) endif # Generated files need one more search path to include headers in source tree diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index 589454bce9301..8425da41de0da 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -86,25 +86,31 @@ static struct plugin_info latent_entropy_plugin_info = { .help = "disable\tturn off latent entropy instrumentation\n", }; -static unsigned HOST_WIDE_INT seed; -/* - * get_random_seed() (this is a GCC function) generates the seed. - * This is a simple random generator without any cryptographic security because - * the entropy doesn't come from here. - */ +static unsigned HOST_WIDE_INT deterministic_seed; +static unsigned HOST_WIDE_INT rnd_buf[32]; +static size_t rnd_idx = ARRAY_SIZE(rnd_buf); +static int urandom_fd = -1; + static unsigned HOST_WIDE_INT get_random_const(void) { - unsigned int i; - unsigned HOST_WIDE_INT ret = 0; - - for (i = 0; i < 8 * sizeof(ret); i++) { - ret = (ret << 1) | (seed & 1); - seed >>= 1; - if (ret & 1) - seed ^= 0xD800000000000000ULL; + if (deterministic_seed) { + unsigned HOST_WIDE_INT w = deterministic_seed; + w ^= w << 13; + w ^= w >> 7; + w ^= w << 17; + deterministic_seed = w; + return deterministic_seed; } - return ret; + if (urandom_fd < 0) { + urandom_fd = open("/dev/urandom", O_RDONLY); + gcc_assert(urandom_fd >= 0); + } + if (rnd_idx >= ARRAY_SIZE(rnd_buf)) { + gcc_assert(read(urandom_fd, rnd_buf, sizeof(rnd_buf)) == sizeof(rnd_buf)); + rnd_idx = 0; + } + return rnd_buf[rnd_idx++]; } static tree tree_get_random_const(tree type) @@ -537,8 +543,6 @@ static void latent_entropy_start_unit(void *gcc_data __unused, tree type, id; int quals; - seed = get_random_seed(false); - if (in_lto_p) return; @@ -573,6 +577,12 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, const struct plugin_argument * const argv = plugin_info->argv; int i; + /* + * Call get_random_seed() with noinit=true, so that this returns + * 0 in the case where no seed has been passed via -frandom-seed. + */ + deterministic_seed = get_random_seed(true); + static const struct ggc_root_tab gt_ggc_r_gt_latent_entropy[] = { { .base = &latent_entropy_decl, diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c index e9db7dcb3e5f4..b04aa8e91a41f 100644 --- a/scripts/gcc-plugins/stackleak_plugin.c +++ b/scripts/gcc-plugins/stackleak_plugin.c @@ -429,6 +429,23 @@ static unsigned int stackleak_cleanup_execute(void) return 0; } +/* + * STRING_CST may or may not be NUL terminated: + * https://gcc.gnu.org/onlinedocs/gccint/Constant-expressions.html + */ +static inline bool string_equal(tree node, const char *string, int length) +{ + if (TREE_STRING_LENGTH(node) < length) + return false; + if (TREE_STRING_LENGTH(node) > length + 1) + return false; + if (TREE_STRING_LENGTH(node) == length + 1 && + TREE_STRING_POINTER(node)[length] != '\0') + return false; + return !memcmp(TREE_STRING_POINTER(node), string, length); +} +#define STRING_EQUAL(node, str) string_equal(node, str, strlen(str)) + static bool stackleak_gate(void) { tree section; @@ -438,13 +455,13 @@ static bool stackleak_gate(void) if (section && TREE_VALUE(section)) { section = TREE_VALUE(TREE_VALUE(section)); - if (!strncmp(TREE_STRING_POINTER(section), ".init.text", 10)) + if (STRING_EQUAL(section, ".init.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".devinit.text", 13)) + if (STRING_EQUAL(section, ".devinit.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".cpuinit.text", 13)) + if (STRING_EQUAL(section, ".cpuinit.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".meminit.text", 13)) + if (STRING_EQUAL(section, ".meminit.text")) return false; } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 6bfa332179140..e04ae56931e2e 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -669,7 +669,7 @@ static void handle_modversion(const struct module *mod, unsigned int crc; if (sym->st_shndx == SHN_UNDEF) { - warn("EXPORT symbol \"%s\" [%s%s] version ...\n" + warn("EXPORT symbol \"%s\" [%s%s] version generation failed, symbol will not be versioned.\n" "Is \"%s\" prototyped in ?\n", symname, mod->name, mod->is_vmlinux ? "" : ".ko", symname); diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 08f907382c618..7d87772f0ce68 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -86,7 +86,7 @@ static int __init evm_set_fixmode(char *str) else pr_err("invalid \"%s\" mode", str); - return 0; + return 1; } __setup("evm=", evm_set_fixmode); diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c index 5de0d599a2748..97bc27bbf0797 100644 --- a/security/keys/keyctl_pkey.c +++ b/security/keys/keyctl_pkey.c @@ -135,15 +135,23 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par switch (op) { case KEYCTL_PKEY_ENCRYPT: + if (uparams.in_len > info.max_dec_size || + uparams.out_len > info.max_enc_size) + return -EINVAL; + break; case KEYCTL_PKEY_DECRYPT: if (uparams.in_len > info.max_enc_size || uparams.out_len > info.max_dec_size) return -EINVAL; break; case KEYCTL_PKEY_SIGN: + if (uparams.in_len > info.max_data_size || + uparams.out_len > info.max_sig_size) + return -EINVAL; + break; case KEYCTL_PKEY_VERIFY: - if (uparams.in_len > info.max_sig_size || - uparams.out_len > info.max_data_size) + if (uparams.in_len > info.max_data_size || + uparams.in2_len > info.max_sig_size) return -EINVAL; break; default: @@ -151,7 +159,7 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par } params->in_len = uparams.in_len; - params->out_len = uparams.out_len; + params->out_len = uparams.out_len; /* Note: same as in2_len */ return 0; } diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c index d5c891d8d3534..9b9d3ef79cbe3 100644 --- a/security/keys/trusted-keys/trusted_core.c +++ b/security/keys/trusted-keys/trusted_core.c @@ -27,10 +27,10 @@ module_param_named(source, trusted_key_source, charp, 0); MODULE_PARM_DESC(source, "Select trusted keys source (tpm or tee)"); static const struct trusted_key_source trusted_key_sources[] = { -#if defined(CONFIG_TCG_TPM) +#if IS_REACHABLE(CONFIG_TCG_TPM) { "tpm", &trusted_key_tpm_ops }, #endif -#if defined(CONFIG_TEE) +#if IS_REACHABLE(CONFIG_TEE) { "tee", &trusted_key_tee_ops }, #endif }; @@ -351,7 +351,7 @@ static int __init init_trusted(void) static void __exit cleanup_trusted(void) { - static_call(trusted_key_exit)(); + static_call_cond(trusted_key_exit)(); } late_initcall(init_trusted); diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 32396962f04d6..7e27ce394020d 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -192,7 +192,7 @@ SYSCALL_DEFINE3(landlock_create_ruleset, return PTR_ERR(ruleset); /* Creates anonymous FD referring to the ruleset. */ - ruleset_fd = anon_inode_getfd("landlock-ruleset", &ruleset_fops, + ruleset_fd = anon_inode_getfd("[landlock-ruleset]", &ruleset_fops, ruleset, O_RDWR | O_CLOEXEC); if (ruleset_fd < 0) landlock_put_ruleset(ruleset); diff --git a/security/security.c b/security/security.c index 22261d79f3333..b7cf5cbfdc677 100644 --- a/security/security.c +++ b/security/security.c @@ -884,9 +884,22 @@ int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) return call_int_hook(fs_context_dup, 0, fc, src_fc); } -int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) +int security_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param) { - return call_int_hook(fs_context_parse_param, -ENOPARAM, fc, param); + struct security_hook_list *hp; + int trc; + int rc = -ENOPARAM; + + hlist_for_each_entry(hp, &security_hook_heads.fs_context_parse_param, + list) { + trc = hp->hook.fs_context_parse_param(fc, param); + if (trc == 0) + rc = 0; + else if (trc != -ENOPARAM) + return trc; + } + return rc; } int security_sb_alloc(struct super_block *sb) @@ -2391,6 +2404,13 @@ void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, } EXPORT_SYMBOL(security_sctp_sk_clone); +int security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ + return call_int_hook(sctp_assoc_established, 0, asoc, skb); +} +EXPORT_SYMBOL(security_sctp_assoc_established); + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5b6895e4fc29e..ea725891e566e 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -342,6 +342,10 @@ static void inode_free_security(struct inode *inode) struct selinux_mnt_opts { const char *fscontext, *context, *rootcontext, *defcontext; + u32 fscontext_sid; + u32 context_sid; + u32 rootcontext_sid; + u32 defcontext_sid; }; static void selinux_free_mnt_opts(void *mnt_opts) @@ -479,7 +483,7 @@ static int selinux_is_sblabel_mnt(struct super_block *sb) static int sb_check_xattr_support(struct super_block *sb) { - struct superblock_security_struct *sbsec = sb->s_security; + struct superblock_security_struct *sbsec = selinux_superblock(sb); struct dentry *root = sb->s_root; struct inode *root_inode = d_backing_inode(root); u32 sid; @@ -598,15 +602,14 @@ static int bad_option(struct superblock_security_struct *sbsec, char flag, return 0; } -static int parse_sid(struct super_block *sb, const char *s, u32 *sid, - gfp_t gfp) +static int parse_sid(struct super_block *sb, const char *s, u32 *sid) { int rc = security_context_str_to_sid(&selinux_state, s, - sid, gfp); + sid, GFP_KERNEL); if (rc) pr_warn("SELinux: security_context_str_to_sid" "(%s) failed for (dev %s, type %s) errno=%d\n", - s, sb->s_id, sb->s_type->name, rc); + s, sb ? sb->s_id : "?", sb ? sb->s_type->name : "?", rc); return rc; } @@ -673,8 +676,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, */ if (opts) { if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &fscontext_sid, - GFP_KERNEL); + rc = parse_sid(sb, opts->fscontext, &fscontext_sid); if (rc) goto out; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, @@ -683,8 +685,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= FSCONTEXT_MNT; } if (opts->context) { - rc = parse_sid(sb, opts->context, &context_sid, - GFP_KERNEL); + rc = parse_sid(sb, opts->context, &context_sid); if (rc) goto out; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, @@ -693,8 +694,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= CONTEXT_MNT; } if (opts->rootcontext) { - rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid, - GFP_KERNEL); + rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid); if (rc) goto out; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, @@ -703,8 +703,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= ROOTCONTEXT_MNT; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &defcontext_sid, - GFP_KERNEL); + rc = parse_sid(sb, opts->defcontext, &defcontext_sid); if (rc) goto out; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, @@ -996,21 +995,29 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts) if (opts->context || opts->defcontext) goto err; opts->context = s; + if (selinux_initialized(&selinux_state)) + parse_sid(NULL, s, &opts->context_sid); break; case Opt_fscontext: if (opts->fscontext) goto err; opts->fscontext = s; + if (selinux_initialized(&selinux_state)) + parse_sid(NULL, s, &opts->fscontext_sid); break; case Opt_rootcontext: if (opts->rootcontext) goto err; opts->rootcontext = s; + if (selinux_initialized(&selinux_state)) + parse_sid(NULL, s, &opts->rootcontext_sid); break; case Opt_defcontext: if (opts->context || opts->defcontext) goto err; opts->defcontext = s; + if (selinux_initialized(&selinux_state)) + parse_sid(NULL, s, &opts->defcontext_sid); break; } @@ -2647,9 +2654,7 @@ static int selinux_sb_eat_lsm_opts(char *options, void **mnt_opts) static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts) { struct selinux_mnt_opts *opts = mnt_opts; - struct superblock_security_struct *sbsec = sb->s_security; - u32 sid; - int rc; + struct superblock_security_struct *sbsec = selinux_superblock(sb); /* * Superblock not initialized (i.e. no options) - reject if any @@ -2666,34 +2671,36 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts) return (sbsec->flags & SE_MNTMASK) ? 1 : 0; if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &sid, GFP_NOWAIT); - if (rc) + if (opts->fscontext_sid == SECSID_NULL) return 1; - if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) + else if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, + opts->fscontext_sid)) return 1; } if (opts->context) { - rc = parse_sid(sb, opts->context, &sid, GFP_NOWAIT); - if (rc) + if (opts->context_sid == SECSID_NULL) return 1; - if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) + else if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, + opts->context_sid)) return 1; } if (opts->rootcontext) { - struct inode_security_struct *root_isec; - - root_isec = backing_inode_security(sb->s_root); - rc = parse_sid(sb, opts->rootcontext, &sid, GFP_NOWAIT); - if (rc) - return 1; - if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) + if (opts->rootcontext_sid == SECSID_NULL) return 1; + else { + struct inode_security_struct *root_isec; + + root_isec = backing_inode_security(sb->s_root); + if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, + opts->rootcontext_sid)) + return 1; + } } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &sid, GFP_NOWAIT); - if (rc) + if (opts->defcontext_sid == SECSID_NULL) return 1; - if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) + else if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, + opts->defcontext_sid)) return 1; } return 0; @@ -2713,14 +2720,14 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) return 0; if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &sid, GFP_KERNEL); + rc = parse_sid(sb, opts->fscontext, &sid); if (rc) return rc; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) goto out_bad_option; } if (opts->context) { - rc = parse_sid(sb, opts->context, &sid, GFP_KERNEL); + rc = parse_sid(sb, opts->context, &sid); if (rc) return rc; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) @@ -2729,14 +2736,14 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) if (opts->rootcontext) { struct inode_security_struct *root_isec; root_isec = backing_inode_security(sb->s_root); - rc = parse_sid(sb, opts->rootcontext, &sid, GFP_KERNEL); + rc = parse_sid(sb, opts->rootcontext, &sid); if (rc) return rc; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) goto out_bad_option; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &sid, GFP_KERNEL); + rc = parse_sid(sb, opts->defcontext, &sid); if (rc) return rc; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) @@ -2860,10 +2867,9 @@ static int selinux_fs_context_parse_param(struct fs_context *fc, return opt; rc = selinux_add_opt(opt, param->string, &fc->security); - if (!rc) { + if (!rc) param->string = NULL; - rc = 1; - } + return rc; } @@ -3745,6 +3751,12 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd, CAP_OPT_NONE, true); break; + case FIOCLEX: + case FIONCLEX: + if (!selinux_policycap_ioctl_skip_cloexec()) + error = ioctl_has_perm(cred, file, FILE__IOCTL, (u16) cmd); + break; + /* default case assumes that the command will go * to the file's ioctl() function. */ @@ -5299,37 +5311,38 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent) sksec->sclass = isec->sclass; } -/* Called whenever SCTP receives an INIT chunk. This happens when an incoming - * connect(2), sctp_connectx(3) or sctp_sendmsg(3) (with no association - * already present). +/* + * Determines peer_secid for the asoc and updates socket's peer label + * if it's the first association on the socket. */ -static int selinux_sctp_assoc_request(struct sctp_association *asoc, - struct sk_buff *skb) +static int selinux_sctp_process_new_assoc(struct sctp_association *asoc, + struct sk_buff *skb) { - struct sk_security_struct *sksec = asoc->base.sk->sk_security; + struct sock *sk = asoc->base.sk; + u16 family = sk->sk_family; + struct sk_security_struct *sksec = sk->sk_security; struct common_audit_data ad; struct lsm_network_audit net = {0,}; - u8 peerlbl_active; - u32 peer_sid = SECINITSID_UNLABELED; - u32 conn_sid; - int err = 0; + int err; - if (!selinux_policycap_extsockclass()) - return 0; + /* handle mapped IPv4 packets arriving via IPv6 sockets */ + if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) + family = PF_INET; - peerlbl_active = selinux_peerlbl_enabled(); + if (selinux_peerlbl_enabled()) { + asoc->peer_secid = SECSID_NULL; - if (peerlbl_active) { /* This will return peer_sid = SECSID_NULL if there are * no peer labels, see security_net_peersid_resolve(). */ - err = selinux_skb_peerlbl_sid(skb, asoc->base.sk->sk_family, - &peer_sid); + err = selinux_skb_peerlbl_sid(skb, family, &asoc->peer_secid); if (err) return err; - if (peer_sid == SECSID_NULL) - peer_sid = SECINITSID_UNLABELED; + if (asoc->peer_secid == SECSID_NULL) + asoc->peer_secid = SECINITSID_UNLABELED; + } else { + asoc->peer_secid = SECINITSID_UNLABELED; } if (sksec->sctp_assoc_state == SCTP_ASSOC_UNSET) { @@ -5340,8 +5353,8 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, * then it is approved by policy and used as the primary * peer SID for getpeercon(3). */ - sksec->peer_sid = peer_sid; - } else if (sksec->peer_sid != peer_sid) { + sksec->peer_sid = asoc->peer_secid; + } else if (sksec->peer_sid != asoc->peer_secid) { /* Other association peer SIDs are checked to enforce * consistency among the peer SIDs. */ @@ -5349,11 +5362,32 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, ad.u.net = &net; ad.u.net->sk = asoc->base.sk; err = avc_has_perm(&selinux_state, - sksec->peer_sid, peer_sid, sksec->sclass, - SCTP_SOCKET__ASSOCIATION, &ad); + sksec->peer_sid, asoc->peer_secid, + sksec->sclass, SCTP_SOCKET__ASSOCIATION, + &ad); if (err) return err; } + return 0; +} + +/* Called whenever SCTP receives an INIT or COOKIE ECHO chunk. This + * happens on an incoming connect(2), sctp_connectx(3) or + * sctp_sendmsg(3) (with no association already present). + */ +static int selinux_sctp_assoc_request(struct sctp_association *asoc, + struct sk_buff *skb) +{ + struct sk_security_struct *sksec = asoc->base.sk->sk_security; + u32 conn_sid; + int err; + + if (!selinux_policycap_extsockclass()) + return 0; + + err = selinux_sctp_process_new_assoc(asoc, skb); + if (err) + return err; /* Compute the MLS component for the connection and store * the information in asoc. This will be used by SCTP TCP type @@ -5361,17 +5395,36 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, * socket to be generated. selinux_sctp_sk_clone() will then * plug this into the new socket. */ - err = selinux_conn_sid(sksec->sid, peer_sid, &conn_sid); + err = selinux_conn_sid(sksec->sid, asoc->peer_secid, &conn_sid); if (err) return err; asoc->secid = conn_sid; - asoc->peer_secid = peer_sid; /* Set any NetLabel labels including CIPSO/CALIPSO options. */ return selinux_netlbl_sctp_assoc_request(asoc, skb); } +/* Called when SCTP receives a COOKIE ACK chunk as the final + * response to an association request (initited by us). + */ +static int selinux_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ + struct sk_security_struct *sksec = asoc->base.sk->sk_security; + + if (!selinux_policycap_extsockclass()) + return 0; + + /* Inherit secid from the parent socket - this will be picked up + * by selinux_sctp_sk_clone() if the association gets peeled off + * into a new socket. + */ + asoc->secid = sksec->sid; + + return selinux_sctp_process_new_assoc(asoc, skb); +} + /* Check if sctp IPv4/IPv6 addresses are valid for binding or connecting * based on their @optname. */ @@ -7192,6 +7245,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(sctp_assoc_request, selinux_sctp_assoc_request), LSM_HOOK_INIT(sctp_sk_clone, selinux_sctp_sk_clone), LSM_HOOK_INIT(sctp_bind_connect, selinux_sctp_bind_connect), + LSM_HOOK_INIT(sctp_assoc_established, selinux_sctp_assoc_established), LSM_HOOK_INIT(inet_conn_request, selinux_inet_conn_request), LSM_HOOK_INIT(inet_csk_clone, selinux_inet_csk_clone), LSM_HOOK_INIT(inet_conn_established, selinux_inet_conn_established), diff --git a/security/selinux/include/policycap.h b/security/selinux/include/policycap.h index 2ec038efbb03c..a9e572ca4fd96 100644 --- a/security/selinux/include/policycap.h +++ b/security/selinux/include/policycap.h @@ -11,6 +11,7 @@ enum { POLICYDB_CAPABILITY_CGROUPSECLABEL, POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION, POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS, + POLICYDB_CAPABILITY_IOCTL_SKIP_CLOEXEC, __POLICYDB_CAPABILITY_MAX }; #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1) diff --git a/security/selinux/include/policycap_names.h b/security/selinux/include/policycap_names.h index b89289f092c93..ebd64afe1defd 100644 --- a/security/selinux/include/policycap_names.h +++ b/security/selinux/include/policycap_names.h @@ -12,7 +12,8 @@ const char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX] = { "always_check_network", "cgroup_seclabel", "nnp_nosuid_transition", - "genfs_seclabel_symlinks" + "genfs_seclabel_symlinks", + "ioctl_skip_cloexec" }; #endif /* _SELINUX_POLICYCAP_NAMES_H_ */ diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index ac0ece01305a6..c0d966020ebdd 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -219,6 +219,13 @@ static inline bool selinux_policycap_genfs_seclabel_symlinks(void) return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS]); } +static inline bool selinux_policycap_ioctl_skip_cloexec(void) +{ + struct selinux_state *state = &selinux_state; + + return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_IOCTL_SKIP_CLOEXEC]); +} + struct selinux_policy_convert_data; struct selinux_load_state { diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index e4cd7cb856f37..f2f6203e0fff5 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -2127,6 +2127,8 @@ static int sel_fill_super(struct super_block *sb, struct fs_context *fc) } ret = sel_make_avc_files(dentry); + if (ret) + goto err; dentry = sel_make_dir(sb->s_root, "ss", &fsi->last_ino); if (IS_ERR(dentry)) { diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 90697317895fb..c576832febc67 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -347,7 +347,7 @@ int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x, int rc; struct xfrm_sec_ctx *ctx; char *ctx_str = NULL; - int str_len; + u32 str_len; if (!polsec) return 0; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 14b279cc75c96..6207762dbdb13 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2510,7 +2510,7 @@ static int smk_ipv6_check(struct smack_known *subject, #ifdef CONFIG_AUDIT smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net); ad.a.u.net->family = PF_INET6; - ad.a.u.net->dport = ntohs(address->sin6_port); + ad.a.u.net->dport = address->sin6_port; if (act == SMK_RECEIVING) ad.a.u.net->v6info.saddr = address->sin6_addr; else diff --git a/security/tomoyo/load_policy.c b/security/tomoyo/load_policy.c index 3445ae6fd4794..363b65be87ab7 100644 --- a/security/tomoyo/load_policy.c +++ b/security/tomoyo/load_policy.c @@ -24,7 +24,7 @@ static const char *tomoyo_loader; static int __init tomoyo_loader_setup(char *str) { tomoyo_loader = str; - return 0; + return 1; } __setup("TOMOYO_loader=", tomoyo_loader_setup); @@ -64,7 +64,7 @@ static const char *tomoyo_trigger; static int __init tomoyo_trigger_setup(char *str) { tomoyo_trigger = str; - return 0; + return 1; } __setup("TOMOYO_trigger=", tomoyo_trigger_setup); diff --git a/sound/core/init.c b/sound/core/init.c index 31ba7024e3add..726a8353201f8 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -209,6 +209,12 @@ static void __snd_card_release(struct device *dev, void *data) * snd_card_register(), the very first devres action to call snd_card_free() * is added automatically. In that way, the resource disconnection is assured * at first, then released in the expected order. + * + * If an error happens at the probe before snd_card_register() is called and + * there have been other devres resources, you'd need to free the card manually + * via snd_card_free() call in the error; otherwise it may lead to UAF due to + * devres call orders. You can use snd_card_free_on_error() helper for + * handling it more easily. */ int snd_devm_card_new(struct device *parent, int idx, const char *xid, struct module *module, size_t extra_size, @@ -235,6 +241,28 @@ int snd_devm_card_new(struct device *parent, int idx, const char *xid, } EXPORT_SYMBOL_GPL(snd_devm_card_new); +/** + * snd_card_free_on_error - a small helper for handling devm probe errors + * @dev: the managed device object + * @ret: the return code from the probe callback + * + * This function handles the explicit snd_card_free() call at the error from + * the probe callback. It's just a small helper for simplifying the error + * handling with the managed devices. + */ +int snd_card_free_on_error(struct device *dev, int ret) +{ + struct snd_card *card; + + if (!ret) + return 0; + card = devres_find(dev, __snd_card_release, NULL, NULL); + if (card) + snd_card_free(card); + return ret; +} +EXPORT_SYMBOL_GPL(snd_card_free_on_error); + static int snd_card_init(struct snd_card *card, struct device *parent, int idx, const char *xid, struct module *module, size_t extra_size) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 6fd763d4d15b1..15dc7160ba34e 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -499,6 +499,10 @@ static const struct snd_malloc_ops snd_dma_wc_ops = { }; #endif /* CONFIG_X86 */ +#ifdef CONFIG_SND_DMA_SGBUF +static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size); +#endif + /* * Non-contiguous pages allocator */ @@ -509,8 +513,18 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir, DEFAULT_GFP, 0); - if (!sgt) + if (!sgt) { +#ifdef CONFIG_SND_DMA_SGBUF + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG) + dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; + else + dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK; + return snd_dma_sg_fallback_alloc(dmab, size); +#else return NULL; +#endif + } + dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, sg_dma_address(sgt->sgl)); p = dma_vmap_noncontiguous(dmab->dev.dev, size, sgt); @@ -633,6 +647,8 @@ static void *snd_dma_sg_wc_alloc(struct snd_dma_buffer *dmab, size_t size) if (!p) return NULL; + if (dmab->dev.type != SNDRV_DMA_TYPE_DEV_WC_SG) + return p; for_each_sgtable_page(sgt, &iter, 0) set_memory_wc(sg_wc_address(&iter), 1); return p; @@ -665,6 +681,95 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = { .get_page = snd_dma_noncontig_get_page, .get_chunk_size = snd_dma_noncontig_get_chunk_size, }; + +/* Fallback SG-buffer allocations for x86 */ +struct snd_dma_sg_fallback { + size_t count; + struct page **pages; + dma_addr_t *addrs; +}; + +static void __snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab, + struct snd_dma_sg_fallback *sgbuf) +{ + size_t i; + + if (sgbuf->count && dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) + set_pages_array_wb(sgbuf->pages, sgbuf->count); + for (i = 0; i < sgbuf->count && sgbuf->pages[i]; i++) + dma_free_coherent(dmab->dev.dev, PAGE_SIZE, + page_address(sgbuf->pages[i]), + sgbuf->addrs[i]); + kvfree(sgbuf->pages); + kvfree(sgbuf->addrs); + kfree(sgbuf); +} + +static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) +{ + struct snd_dma_sg_fallback *sgbuf; + struct page **pages; + size_t i, count; + void *p; + + sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL); + if (!sgbuf) + return NULL; + count = PAGE_ALIGN(size) >> PAGE_SHIFT; + pages = kvcalloc(count, sizeof(*pages), GFP_KERNEL); + if (!pages) + goto error; + sgbuf->pages = pages; + sgbuf->addrs = kvcalloc(count, sizeof(*sgbuf->addrs), GFP_KERNEL); + if (!sgbuf->addrs) + goto error; + + for (i = 0; i < count; sgbuf->count++, i++) { + p = dma_alloc_coherent(dmab->dev.dev, PAGE_SIZE, + &sgbuf->addrs[i], DEFAULT_GFP); + if (!p) + goto error; + sgbuf->pages[i] = virt_to_page(p); + } + + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) + set_pages_array_wc(pages, count); + p = vmap(pages, count, VM_MAP, PAGE_KERNEL); + if (!p) + goto error; + dmab->private_data = sgbuf; + return p; + + error: + __snd_dma_sg_fallback_free(dmab, sgbuf); + return NULL; +} + +static void snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab) +{ + vunmap(dmab->area); + __snd_dma_sg_fallback_free(dmab, dmab->private_data); +} + +static int snd_dma_sg_fallback_mmap(struct snd_dma_buffer *dmab, + struct vm_area_struct *area) +{ + struct snd_dma_sg_fallback *sgbuf = dmab->private_data; + + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) + area->vm_page_prot = pgprot_writecombine(area->vm_page_prot); + return vm_map_pages(area, sgbuf->pages, sgbuf->count); +} + +static const struct snd_malloc_ops snd_dma_sg_fallback_ops = { + .alloc = snd_dma_sg_fallback_alloc, + .free = snd_dma_sg_fallback_free, + .mmap = snd_dma_sg_fallback_mmap, + /* reuse vmalloc helpers */ + .get_addr = snd_dma_vmalloc_get_addr, + .get_page = snd_dma_vmalloc_get_page, + .get_chunk_size = snd_dma_vmalloc_get_chunk_size, +}; #endif /* CONFIG_SND_DMA_SGBUF */ /* @@ -736,6 +841,10 @@ static const struct snd_malloc_ops *dma_ops[] = { #ifdef CONFIG_GENERIC_ALLOCATOR [SNDRV_DMA_TYPE_DEV_IRAM] = &snd_dma_iram_ops, #endif /* CONFIG_GENERIC_ALLOCATOR */ +#ifdef CONFIG_SND_DMA_SGBUF + [SNDRV_DMA_TYPE_DEV_SG_FALLBACK] = &snd_dma_sg_fallback_ops, + [SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK] = &snd_dma_sg_fallback_ops, +#endif #endif /* CONFIG_HAS_DMA */ }; diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 3ee9edf858156..f158f0abd25d8 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -774,6 +774,11 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, if (oss_period_size < 16) return -EINVAL; + + /* don't allocate too large period; 1MB period must be enough */ + if (oss_period_size > 1024 * 1024) + return -ENOMEM; + runtime->oss.period_bytes = oss_period_size; runtime->oss.period_frames = 1; runtime->oss.periods = oss_periods; @@ -1043,10 +1048,9 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) goto failure; } #endif - oss_period_size *= oss_frame_size; - - oss_buffer_size = oss_period_size * runtime->oss.periods; - if (oss_buffer_size < 0) { + oss_period_size = array_size(oss_period_size, oss_frame_size); + oss_buffer_size = array_size(oss_period_size, runtime->oss.periods); + if (oss_buffer_size <= 0) { err = -EINVAL; goto failure; } diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 061ba06bc9262..82e180c776ae1 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -62,7 +62,10 @@ static int snd_pcm_plugin_alloc(struct snd_pcm_plugin *plugin, snd_pcm_uframes_t width = snd_pcm_format_physical_width(format->format); if (width < 0) return width; - size = frames * format->channels * width; + size = array3_size(frames, format->channels, width); + /* check for too large period size once again */ + if (size > 1024 * 1024) + return -ENOMEM; if (snd_BUG_ON(size % 8)) return -ENXIO; size /= 8; diff --git a/sound/core/pcm.c b/sound/core/pcm.c index ba4a987ed1c62..977d54320a5ca 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -969,6 +969,8 @@ int snd_pcm_attach_substream(struct snd_pcm *pcm, int stream, init_waitqueue_head(&runtime->tsleep); runtime->status->state = SNDRV_PCM_STATE_OPEN; + mutex_init(&runtime->buffer_mutex); + atomic_set(&runtime->buffer_accessing, 0); substream->runtime = runtime; substream->private_data = pcm->private_data; @@ -1002,6 +1004,7 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream) } else { substream->runtime = NULL; } + mutex_destroy(&runtime->buffer_mutex); kfree(runtime); put_pid(substream->pid); substream->pid = NULL; diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index f2090025236b9..1fc7c50ffa625 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -2273,6 +2273,10 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, err = -EINVAL; goto _end_unlock; } + if (!atomic_inc_unless_negative(&runtime->buffer_accessing)) { + err = -EBUSY; + goto _end_unlock; + } snd_pcm_stream_unlock_irq(substream); if (!is_playback) snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_CPU); @@ -2281,6 +2285,7 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, if (is_playback) snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); snd_pcm_stream_lock_irq(substream); + atomic_dec(&runtime->buffer_accessing); if (err < 0) goto _end_unlock; err = pcm_accessible_state(runtime); diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c index b70ce3b69ab4d..8848d2f3160d8 100644 --- a/sound/core/pcm_memory.c +++ b/sound/core/pcm_memory.c @@ -163,19 +163,20 @@ static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry, size_t size; struct snd_dma_buffer new_dmab; + mutex_lock(&substream->pcm->open_mutex); if (substream->runtime) { buffer->error = -EBUSY; - return; + goto unlock; } if (!snd_info_get_line(buffer, line, sizeof(line))) { snd_info_get_str(str, line, sizeof(str)); size = simple_strtoul(str, NULL, 10) * 1024; if ((size != 0 && size < 8192) || size > substream->dma_max) { buffer->error = -EINVAL; - return; + goto unlock; } if (substream->dma_buffer.bytes == size) - return; + goto unlock; memset(&new_dmab, 0, sizeof(new_dmab)); new_dmab.dev = substream->dma_buffer.dev; if (size > 0) { @@ -189,7 +190,7 @@ static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry, substream->pcm->card->number, substream->pcm->device, substream->stream ? 'c' : 'p', substream->number, substream->pcm->name, size); - return; + goto unlock; } substream->buffer_bytes_max = size; } else { @@ -201,6 +202,8 @@ static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry, } else { buffer->error = -EINVAL; } + unlock: + mutex_unlock(&substream->pcm->open_mutex); } static inline void preallocate_info_init(struct snd_pcm_substream *substream) diff --git a/sound/core/pcm_misc.c b/sound/core/pcm_misc.c index 4866aed97aacc..5588b6a1ee8bd 100644 --- a/sound/core/pcm_misc.c +++ b/sound/core/pcm_misc.c @@ -433,7 +433,7 @@ int snd_pcm_format_set_silence(snd_pcm_format_t format, void *data, unsigned int return 0; width = pcm_formats[(INT)format].phys; /* physical width */ pat = pcm_formats[(INT)format].silence; - if (! width) + if (!width || !pat) return -EINVAL; /* signed or 1 byte data */ if (pcm_formats[(INT)format].signd == 1 || width <= 8) { diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index a056b3ef3c843..4adaee62ef333 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -685,6 +685,30 @@ static int snd_pcm_hw_params_choose(struct snd_pcm_substream *pcm, return 0; } +/* acquire buffer_mutex; if it's in r/w operation, return -EBUSY, otherwise + * block the further r/w operations + */ +static int snd_pcm_buffer_access_lock(struct snd_pcm_runtime *runtime) +{ + if (!atomic_dec_unless_positive(&runtime->buffer_accessing)) + return -EBUSY; + mutex_lock(&runtime->buffer_mutex); + return 0; /* keep buffer_mutex, unlocked by below */ +} + +/* release buffer_mutex and clear r/w access flag */ +static void snd_pcm_buffer_access_unlock(struct snd_pcm_runtime *runtime) +{ + mutex_unlock(&runtime->buffer_mutex); + atomic_inc(&runtime->buffer_accessing); +} + +#if IS_ENABLED(CONFIG_SND_PCM_OSS) +#define is_oss_stream(substream) ((substream)->oss.oss) +#else +#define is_oss_stream(substream) false +#endif + static int snd_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { @@ -696,22 +720,25 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; + err = snd_pcm_buffer_access_lock(runtime); + if (err < 0) + return err; snd_pcm_stream_lock_irq(substream); switch (runtime->status->state) { case SNDRV_PCM_STATE_OPEN: case SNDRV_PCM_STATE_SETUP: case SNDRV_PCM_STATE_PREPARED: + if (!is_oss_stream(substream) && + atomic_read(&substream->mmap_count)) + err = -EBADFD; break; default: - snd_pcm_stream_unlock_irq(substream); - return -EBADFD; + err = -EBADFD; + break; } snd_pcm_stream_unlock_irq(substream); -#if IS_ENABLED(CONFIG_SND_PCM_OSS) - if (!substream->oss.oss) -#endif - if (atomic_read(&substream->mmap_count)) - return -EBADFD; + if (err) + goto unlock; snd_pcm_sync_stop(substream, true); @@ -799,16 +826,21 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, if (usecs >= 0) cpu_latency_qos_add_request(&substream->latency_pm_qos_req, usecs); - return 0; + err = 0; _error: - /* hardware might be unusable from this time, - so we force application to retry to set - the correct hardware parameter settings */ - snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN); - if (substream->ops->hw_free != NULL) - substream->ops->hw_free(substream); - if (substream->managed_buffer_alloc) - snd_pcm_lib_free_pages(substream); + if (err) { + /* hardware might be unusable from this time, + * so we force application to retry to set + * the correct hardware parameter settings + */ + snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN); + if (substream->ops->hw_free != NULL) + substream->ops->hw_free(substream); + if (substream->managed_buffer_alloc) + snd_pcm_lib_free_pages(substream); + } + unlock: + snd_pcm_buffer_access_unlock(runtime); return err; } @@ -848,26 +880,33 @@ static int do_hw_free(struct snd_pcm_substream *substream) static int snd_pcm_hw_free(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime; - int result; + int result = 0; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; + result = snd_pcm_buffer_access_lock(runtime); + if (result < 0) + return result; snd_pcm_stream_lock_irq(substream); switch (runtime->status->state) { case SNDRV_PCM_STATE_SETUP: case SNDRV_PCM_STATE_PREPARED: + if (atomic_read(&substream->mmap_count)) + result = -EBADFD; break; default: - snd_pcm_stream_unlock_irq(substream); - return -EBADFD; + result = -EBADFD; + break; } snd_pcm_stream_unlock_irq(substream); - if (atomic_read(&substream->mmap_count)) - return -EBADFD; + if (result) + goto unlock; result = do_hw_free(substream); snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN); cpu_latency_qos_remove_request(&substream->latency_pm_qos_req); + unlock: + snd_pcm_buffer_access_unlock(runtime); return result; } @@ -1173,15 +1212,17 @@ struct action_ops { static int snd_pcm_action_group(const struct action_ops *ops, struct snd_pcm_substream *substream, snd_pcm_state_t state, - bool do_lock) + bool stream_lock) { struct snd_pcm_substream *s = NULL; struct snd_pcm_substream *s1; int res = 0, depth = 1; snd_pcm_group_for_each_entry(s, substream) { - if (do_lock && s != substream) { - if (s->pcm->nonatomic) + if (s != substream) { + if (!stream_lock) + mutex_lock_nested(&s->runtime->buffer_mutex, depth); + else if (s->pcm->nonatomic) mutex_lock_nested(&s->self_group.mutex, depth); else spin_lock_nested(&s->self_group.lock, depth); @@ -1209,18 +1250,18 @@ static int snd_pcm_action_group(const struct action_ops *ops, ops->post_action(s, state); } _unlock: - if (do_lock) { - /* unlock streams */ - snd_pcm_group_for_each_entry(s1, substream) { - if (s1 != substream) { - if (s1->pcm->nonatomic) - mutex_unlock(&s1->self_group.mutex); - else - spin_unlock(&s1->self_group.lock); - } - if (s1 == s) /* end */ - break; + /* unlock streams */ + snd_pcm_group_for_each_entry(s1, substream) { + if (s1 != substream) { + if (!stream_lock) + mutex_unlock(&s1->runtime->buffer_mutex); + else if (s1->pcm->nonatomic) + mutex_unlock(&s1->self_group.mutex); + else + spin_unlock(&s1->self_group.lock); } + if (s1 == s) /* end */ + break; } return res; } @@ -1350,10 +1391,15 @@ static int snd_pcm_action_nonatomic(const struct action_ops *ops, /* Guarantee the group members won't change during non-atomic action */ down_read(&snd_pcm_link_rwsem); + res = snd_pcm_buffer_access_lock(substream->runtime); + if (res < 0) + goto unlock; if (snd_pcm_stream_linked(substream)) res = snd_pcm_action_group(ops, substream, state, false); else res = snd_pcm_action_single(ops, substream, state); + snd_pcm_buffer_access_unlock(substream->runtime); + unlock: up_read(&snd_pcm_link_rwsem); return res; } @@ -1843,11 +1889,13 @@ static int snd_pcm_do_reset(struct snd_pcm_substream *substream, int err = snd_pcm_ops_ioctl(substream, SNDRV_PCM_IOCTL1_RESET, NULL); if (err < 0) return err; + snd_pcm_stream_lock_irq(substream); runtime->hw_ptr_base = 0; runtime->hw_ptr_interrupt = runtime->status->hw_ptr - runtime->status->hw_ptr % runtime->period_size; runtime->silence_start = runtime->status->hw_ptr; runtime->silence_filled = 0; + snd_pcm_stream_unlock_irq(substream); return 0; } @@ -1855,10 +1903,12 @@ static void snd_pcm_post_reset(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; + snd_pcm_stream_lock_irq(substream); runtime->control->appl_ptr = runtime->status->hw_ptr; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && runtime->silence_size > 0) snd_pcm_playback_silence(substream, ULONG_MAX); + snd_pcm_stream_unlock_irq(substream); } static const struct action_ops snd_pcm_action_reset = { diff --git a/sound/drivers/mtpav.c b/sound/drivers/mtpav.c index 11235baaf6fa5..f212f233ea618 100644 --- a/sound/drivers/mtpav.c +++ b/sound/drivers/mtpav.c @@ -693,8 +693,6 @@ static int snd_mtpav_probe(struct platform_device *dev) mtp_card->outmidihwport = 0xffffffff; timer_setup(&mtp_card->timer, snd_mtpav_output_timer, 0); - card->private_free = snd_mtpav_free; - err = snd_mtpav_get_RAWMIDI(mtp_card); if (err < 0) return err; @@ -716,6 +714,8 @@ static int snd_mtpav_probe(struct platform_device *dev) if (err < 0) return err; + card->private_free = snd_mtpav_free; + platform_set_drvdata(dev, card); printk(KERN_INFO "Motu MidiTimePiece on parallel port irq: %d ioport: 0x%lx\n", irq, port); return 0; diff --git a/sound/firewire/fcp.c b/sound/firewire/fcp.c index bbfbebf4affbc..df44dd5dc4b22 100644 --- a/sound/firewire/fcp.c +++ b/sound/firewire/fcp.c @@ -240,9 +240,7 @@ int fcp_avc_transaction(struct fw_unit *unit, t.response_match_bytes = response_match_bytes; t.state = STATE_PENDING; init_waitqueue_head(&t.wait); - - if (*(const u8 *)command == 0x00 || *(const u8 *)command == 0x03) - t.deferrable = true; + t.deferrable = (*(const u8 *)command == 0x00 || *(const u8 *)command == 0x03); spin_lock_irq(&transactions_lock); list_add_tail(&t.list, &transactions); diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index 4fb90ceb4053b..70fd8b13938ed 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -11,6 +11,7 @@ #include #include #include +#include static int dsp_driver; @@ -31,7 +32,12 @@ struct config_entry { u16 device; u8 acpi_hid[ACPI_ID_LEN]; const struct dmi_system_id *dmi_table; - u8 codec_hid[ACPI_ID_LEN]; + const struct snd_soc_acpi_codecs *codec_hid; +}; + +static const struct snd_soc_acpi_codecs __maybe_unused essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, }; /* @@ -77,7 +83,7 @@ static const struct config_entry config_table[] = { { .flags = FLAG_SOF, .device = 0x5a98, - .codec_hid = "ESSX8336", + .codec_hid = &essx_83x6, }, #endif #if IS_ENABLED(CONFIG_SND_SOC_INTEL_APL) @@ -163,7 +169,7 @@ static const struct config_entry config_table[] = { { .flags = FLAG_SOF, .device = 0x3198, - .codec_hid = "ESSX8336", + .codec_hid = &essx_83x6, }, #endif @@ -193,6 +199,11 @@ static const struct config_entry config_table[] = { {} } }, + { + .flags = FLAG_SOF, + .device = 0x09dc8, + .codec_hid = &essx_83x6, + }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x9dc8, @@ -251,7 +262,7 @@ static const struct config_entry config_table[] = { { .flags = FLAG_SOF, .device = 0x02c8, - .codec_hid = "ESSX8336", + .codec_hid = &essx_83x6, }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, @@ -280,7 +291,7 @@ static const struct config_entry config_table[] = { { .flags = FLAG_SOF, .device = 0x06c8, - .codec_hid = "ESSX8336", + .codec_hid = &essx_83x6, }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, @@ -327,7 +338,7 @@ static const struct config_entry config_table[] = { { .flags = FLAG_SOF, .device = 0x4dc8, - .codec_hid = "ESSX8336", + .codec_hid = &essx_83x6, }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC, @@ -353,7 +364,7 @@ static const struct config_entry config_table[] = { { .flags = FLAG_SOF, .device = 0xa0c8, - .codec_hid = "ESSX8336", + .codec_hid = &essx_83x6, }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, @@ -414,8 +425,15 @@ static const struct config_entry *snd_intel_dsp_find_config continue; if (table->dmi_table && !dmi_check_system(table->dmi_table)) continue; - if (table->codec_hid[0] && !acpi_dev_present(table->codec_hid, NULL, -1)) - continue; + if (table->codec_hid) { + int i; + + for (i = 0; i < table->codec_hid->num_codecs; i++) + if (acpi_dev_present(table->codec_hid->codecs[i], NULL, -1)) + break; + if (i == table->codec_hid->num_codecs) + continue; + } return table; } return NULL; diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c index 128476aa7c61d..4063da3782833 100644 --- a/sound/hda/intel-nhlt.c +++ b/sound/hda/intel-nhlt.c @@ -130,6 +130,28 @@ bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt, u8 link_type) } EXPORT_SYMBOL(intel_nhlt_has_endpoint_type); +int intel_nhlt_ssp_endpoint_mask(struct nhlt_acpi_table *nhlt, u8 device_type) +{ + struct nhlt_endpoint *epnt; + int ssp_mask = 0; + int i; + + if (!nhlt || (device_type != NHLT_DEVICE_BT && device_type != NHLT_DEVICE_I2S)) + return 0; + + epnt = (struct nhlt_endpoint *)nhlt->desc; + for (i = 0; i < nhlt->endpoint_count; i++) { + if (epnt->linktype == NHLT_LINK_SSP && epnt->device_type == device_type) { + /* for SSP the virtual bus id is the SSP port */ + ssp_mask |= BIT(epnt->virtual_bus_id); + } + epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length); + } + + return ssp_mask; +} +EXPORT_SYMBOL(intel_nhlt_ssp_endpoint_mask); + static struct nhlt_specific_cfg * nhlt_get_specific_cfg(struct device *dev, struct nhlt_fmt *fmt, u8 num_ch, u32 rate, u8 vbps, u8 bps) diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index b6bdebd9ef275..10112e1bb25dc 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -494,7 +494,7 @@ static int snd_cs423x_pnpbios_detect(struct pnp_dev *pdev, static int dev; int err; struct snd_card *card; - struct pnp_dev *cdev; + struct pnp_dev *cdev, *iter; char cid[PNP_ID_LEN]; if (pnp_device_is_isapnp(pdev)) @@ -510,9 +510,11 @@ static int snd_cs423x_pnpbios_detect(struct pnp_dev *pdev, strcpy(cid, pdev->id[0].id); cid[5] = '1'; cdev = NULL; - list_for_each_entry(cdev, &(pdev->protocol->devices), protocol_list) { - if (!strcmp(cdev->id[0].id, cid)) + list_for_each_entry(iter, &(pdev->protocol->devices), protocol_list) { + if (!strcmp(iter->id[0].id, cid)) { + cdev = iter; break; + } } err = snd_cs423x_card_new(&pdev->dev, dev, &card); if (err < 0) diff --git a/sound/isa/galaxy/galaxy.c b/sound/isa/galaxy/galaxy.c index ea001c80149dd..3164eb8510fa4 100644 --- a/sound/isa/galaxy/galaxy.c +++ b/sound/isa/galaxy/galaxy.c @@ -478,7 +478,7 @@ static void snd_galaxy_free(struct snd_card *card) galaxy_set_config(galaxy, galaxy->config); } -static int snd_galaxy_probe(struct device *dev, unsigned int n) +static int __snd_galaxy_probe(struct device *dev, unsigned int n) { struct snd_galaxy *galaxy; struct snd_wss *chip; @@ -598,6 +598,11 @@ static int snd_galaxy_probe(struct device *dev, unsigned int n) return 0; } +static int snd_galaxy_probe(struct device *dev, unsigned int n) +{ + return snd_card_free_on_error(dev, __snd_galaxy_probe(dev, n)); +} + static struct isa_driver snd_galaxy_driver = { .match = snd_galaxy_match, .probe = snd_galaxy_probe, diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c index 26ab7ff807684..60398fced046b 100644 --- a/sound/isa/sc6000.c +++ b/sound/isa/sc6000.c @@ -537,7 +537,7 @@ static void snd_sc6000_free(struct snd_card *card) sc6000_setup_board(vport, 0); } -static int snd_sc6000_probe(struct device *devptr, unsigned int dev) +static int __snd_sc6000_probe(struct device *devptr, unsigned int dev) { static const int possible_irqs[] = { 5, 7, 9, 10, 11, -1 }; static const int possible_dmas[] = { 1, 3, 0, -1 }; @@ -662,6 +662,11 @@ static int snd_sc6000_probe(struct device *devptr, unsigned int dev) return 0; } +static int snd_sc6000_probe(struct device *devptr, unsigned int dev) +{ + return snd_card_free_on_error(devptr, __snd_sc6000_probe(devptr, dev)); +} + static struct isa_driver snd_sc6000_driver = { .match = snd_sc6000_match, .probe = snd_sc6000_probe, diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index 01f296d524ce6..cb60a07d39a8e 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -938,8 +938,8 @@ static int snd_ac97_ad18xx_pcm_get_volume(struct snd_kcontrol *kcontrol, struct int codec = kcontrol->private_value & 3; mutex_lock(&ac97->page_mutex); - ucontrol->value.integer.value[0] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 0) & 31); - ucontrol->value.integer.value[1] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 8) & 31); + ucontrol->value.integer.value[0] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 8) & 31); + ucontrol->value.integer.value[1] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 0) & 31); mutex_unlock(&ac97->page_mutex); return 0; } diff --git a/sound/pci/ad1889.c b/sound/pci/ad1889.c index bba4dae8dcc70..50e30704bf6f9 100644 --- a/sound/pci/ad1889.c +++ b/sound/pci/ad1889.c @@ -844,8 +844,8 @@ snd_ad1889_create(struct snd_card *card, struct pci_dev *pci) } static int -snd_ad1889_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +__snd_ad1889_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { int err; static int devno; @@ -904,6 +904,12 @@ snd_ad1889_probe(struct pci_dev *pci, return 0; } +static int snd_ad1889_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_ad1889_probe(pci, pci_id)); +} + static const struct pci_device_id snd_ad1889_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_ANALOG_DEVICES, PCI_DEVICE_ID_AD1889JS) }, { 0, }, diff --git a/sound/pci/ali5451/ali5451.c b/sound/pci/ali5451/ali5451.c index 92eb59db106de..2378a39abaebe 100644 --- a/sound/pci/ali5451/ali5451.c +++ b/sound/pci/ali5451/ali5451.c @@ -2124,8 +2124,8 @@ static int snd_ali_create(struct snd_card *card, return 0; } -static int snd_ali_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_ali_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct snd_ali *codec; @@ -2170,6 +2170,12 @@ static int snd_ali_probe(struct pci_dev *pci, return 0; } +static int snd_ali_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_ali_probe(pci, pci_id)); +} + static struct pci_driver ali5451_driver = { .name = KBUILD_MODNAME, .id_table = snd_ali_ids, diff --git a/sound/pci/als300.c b/sound/pci/als300.c index b86565dcdbe41..c70aff0601205 100644 --- a/sound/pci/als300.c +++ b/sound/pci/als300.c @@ -708,7 +708,7 @@ static int snd_als300_probe(struct pci_dev *pci, err = snd_als300_create(card, pci, chip_type); if (err < 0) - return err; + goto error; strcpy(card->driver, "ALS300"); if (chip->chip_type == DEVICE_ALS300_PLUS) @@ -723,11 +723,15 @@ static int snd_als300_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver als300_driver = { diff --git a/sound/pci/als4000.c b/sound/pci/als4000.c index 535eccd124bee..f33aeb692a112 100644 --- a/sound/pci/als4000.c +++ b/sound/pci/als4000.c @@ -806,8 +806,8 @@ static void snd_card_als4000_free( struct snd_card *card ) snd_als4000_free_gameport(acard); } -static int snd_card_als4000_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_card_als4000_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -930,6 +930,12 @@ static int snd_card_als4000_probe(struct pci_dev *pci, return 0; } +static int snd_card_als4000_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_card_als4000_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static int snd_als4000_suspend(struct device *dev) { diff --git a/sound/pci/atiixp.c b/sound/pci/atiixp.c index b8e035d5930d2..43d01f1847ed7 100644 --- a/sound/pci/atiixp.c +++ b/sound/pci/atiixp.c @@ -1572,8 +1572,8 @@ static int snd_atiixp_init(struct snd_card *card, struct pci_dev *pci) } -static int snd_atiixp_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_atiixp_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct atiixp *chip; @@ -1623,6 +1623,12 @@ static int snd_atiixp_probe(struct pci_dev *pci, return 0; } +static int snd_atiixp_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_atiixp_probe(pci, pci_id)); +} + static struct pci_driver atiixp_driver = { .name = KBUILD_MODNAME, .id_table = snd_atiixp_ids, diff --git a/sound/pci/atiixp_modem.c b/sound/pci/atiixp_modem.c index 178dce8ef1e99..8864c4c3c7e13 100644 --- a/sound/pci/atiixp_modem.c +++ b/sound/pci/atiixp_modem.c @@ -1201,8 +1201,8 @@ static int snd_atiixp_init(struct snd_card *card, struct pci_dev *pci) } -static int snd_atiixp_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_atiixp_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct atiixp_modem *chip; @@ -1247,6 +1247,12 @@ static int snd_atiixp_probe(struct pci_dev *pci, return 0; } +static int snd_atiixp_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_atiixp_probe(pci, pci_id)); +} + static struct pci_driver atiixp_modem_driver = { .name = KBUILD_MODNAME, .id_table = snd_atiixp_ids, diff --git a/sound/pci/au88x0/au88x0.c b/sound/pci/au88x0/au88x0.c index 342ef2a6655e3..eb234153691bc 100644 --- a/sound/pci/au88x0/au88x0.c +++ b/sound/pci/au88x0/au88x0.c @@ -193,7 +193,7 @@ snd_vortex_create(struct snd_card *card, struct pci_dev *pci) // constructor -- see "Constructor" sub-section static int -snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -310,6 +310,12 @@ snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_vortex_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_vortex_probe(pci, pci_id)); +} + // pci_driver definition static struct pci_driver vortex_driver = { .name = KBUILD_MODNAME, diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c index d56f126d6fdd9..29a4bcdec237a 100644 --- a/sound/pci/aw2/aw2-alsa.c +++ b/sound/pci/aw2/aw2-alsa.c @@ -275,7 +275,7 @@ static int snd_aw2_probe(struct pci_dev *pci, /* (3) Create main component */ err = snd_aw2_create(card, pci); if (err < 0) - return err; + goto error; /* initialize mutex */ mutex_init(&chip->mtx); @@ -294,13 +294,17 @@ static int snd_aw2_probe(struct pci_dev *pci, /* (6) Register card instance */ err = snd_card_register(card); if (err < 0) - return err; + goto error; /* (7) Set PCI driver data */ pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } /* open callback */ diff --git a/sound/pci/azt3328.c b/sound/pci/azt3328.c index 089050470ff27..7f329dfc5404a 100644 --- a/sound/pci/azt3328.c +++ b/sound/pci/azt3328.c @@ -2427,7 +2427,7 @@ snd_azf3328_create(struct snd_card *card, } static int -snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2520,6 +2520,12 @@ snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_azf3328_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static inline void snd_azf3328_suspend_regs(const struct snd_azf3328 *chip, diff --git a/sound/pci/bt87x.c b/sound/pci/bt87x.c index d23f931638410..621985bfee5d7 100644 --- a/sound/pci/bt87x.c +++ b/sound/pci/bt87x.c @@ -805,8 +805,8 @@ static int snd_bt87x_detect_card(struct pci_dev *pci) return SND_BT87X_BOARD_UNKNOWN; } -static int snd_bt87x_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_bt87x_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -889,6 +889,12 @@ static int snd_bt87x_probe(struct pci_dev *pci, return 0; } +static int snd_bt87x_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_bt87x_probe(pci, pci_id)); +} + /* default entries for all Bt87x cards - it's not exported */ /* driver_data is set to 0 to call detection */ static const struct pci_device_id snd_bt87x_default_ids[] = { diff --git a/sound/pci/ca0106/ca0106_main.c b/sound/pci/ca0106/ca0106_main.c index 36fb150b72fb5..f4cc112bddf3e 100644 --- a/sound/pci/ca0106/ca0106_main.c +++ b/sound/pci/ca0106/ca0106_main.c @@ -1725,8 +1725,8 @@ static int snd_ca0106_midi(struct snd_ca0106 *chip, unsigned int channel) } -static int snd_ca0106_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_ca0106_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1786,6 +1786,12 @@ static int snd_ca0106_probe(struct pci_dev *pci, return 0; } +static int snd_ca0106_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_ca0106_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static int snd_ca0106_suspend(struct device *dev) { diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c index 9a678b5cf2855..727db6d433916 100644 --- a/sound/pci/cmipci.c +++ b/sound/pci/cmipci.c @@ -298,7 +298,6 @@ MODULE_PARM_DESC(joystick_port, "Joystick port address."); #define CM_MICGAINZ 0x01 /* mic boost */ #define CM_MICGAINZ_SHIFT 0 -#define CM_REG_MIXER3 0x24 #define CM_REG_AUX_VOL 0x26 #define CM_VAUXL_MASK 0xf0 #define CM_VAUXR_MASK 0x0f @@ -3248,15 +3247,19 @@ static int snd_cmipci_probe(struct pci_dev *pci, err = snd_cmipci_create(card, pci, dev); if (err < 0) - return err; + goto error; err = snd_card_register(card); if (err < 0) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } #ifdef CONFIG_PM_SLEEP @@ -3265,7 +3268,7 @@ static int snd_cmipci_probe(struct pci_dev *pci, */ static const unsigned char saved_regs[] = { CM_REG_FUNCTRL1, CM_REG_CHFORMAT, CM_REG_LEGACY_CTRL, CM_REG_MISC_CTRL, - CM_REG_MIXER0, CM_REG_MIXER1, CM_REG_MIXER2, CM_REG_MIXER3, CM_REG_PLL, + CM_REG_MIXER0, CM_REG_MIXER1, CM_REG_MIXER2, CM_REG_AUX_VOL, CM_REG_PLL, CM_REG_CH0_FRAME1, CM_REG_CH0_FRAME2, CM_REG_CH1_FRAME1, CM_REG_CH1_FRAME2, CM_REG_EXT_MISC, CM_REG_INT_STATUS, CM_REG_INT_HLDCLR, CM_REG_FUNCTRL0, diff --git a/sound/pci/cs4281.c b/sound/pci/cs4281.c index e7367402b84a3..0c9cadf7b3b80 100644 --- a/sound/pci/cs4281.c +++ b/sound/pci/cs4281.c @@ -1827,8 +1827,8 @@ static void snd_cs4281_opl3_command(struct snd_opl3 *opl3, unsigned short cmd, spin_unlock_irqrestore(&opl3->reg_lock, flags); } -static int snd_cs4281_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_cs4281_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1888,6 +1888,12 @@ static int snd_cs4281_probe(struct pci_dev *pci, return 0; } +static int snd_cs4281_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_cs4281_probe(pci, pci_id)); +} + /* * Power Management */ diff --git a/sound/pci/cs5535audio/cs5535audio.c b/sound/pci/cs5535audio/cs5535audio.c index 499fa0148f9a4..440b8f9b40c96 100644 --- a/sound/pci/cs5535audio/cs5535audio.c +++ b/sound/pci/cs5535audio/cs5535audio.c @@ -281,8 +281,8 @@ static int snd_cs5535audio_create(struct snd_card *card, return 0; } -static int snd_cs5535audio_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_cs5535audio_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -331,6 +331,12 @@ static int snd_cs5535audio_probe(struct pci_dev *pci, return 0; } +static int snd_cs5535audio_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_cs5535audio_probe(pci, pci_id)); +} + static struct pci_driver cs5535audio_driver = { .name = KBUILD_MODNAME, .id_table = snd_cs5535audio_ids, diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 25b012ef5c3e6..c70c3ac4e99a5 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -1970,8 +1970,8 @@ static int snd_echo_create(struct snd_card *card, } /* constructor */ -static int snd_echo_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_echo_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2139,6 +2139,11 @@ static int snd_echo_probe(struct pci_dev *pci, return 0; } +static int snd_echo_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_echo_probe(pci, pci_id)); +} #if defined(CONFIG_PM_SLEEP) diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c index c49c44dc10820..89043392f3ec7 100644 --- a/sound/pci/emu10k1/emu10k1x.c +++ b/sound/pci/emu10k1/emu10k1x.c @@ -1491,8 +1491,8 @@ static int snd_emu10k1x_midi(struct emu10k1x *emu) return 0; } -static int snd_emu10k1x_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_emu10k1x_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1554,6 +1554,12 @@ static int snd_emu10k1x_probe(struct pci_dev *pci, return 0; } +static int snd_emu10k1x_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_emu10k1x_probe(pci, pci_id)); +} + // PCI IDs static const struct pci_device_id snd_emu10k1x_ids[] = { { PCI_VDEVICE(CREATIVE, 0x0006), 0 }, /* Dell OEM version (EMU10K1) */ diff --git a/sound/pci/ens1370.c b/sound/pci/ens1370.c index 2651f0c64c062..94efe347a97a9 100644 --- a/sound/pci/ens1370.c +++ b/sound/pci/ens1370.c @@ -2304,8 +2304,8 @@ static irqreturn_t snd_audiopci_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static int snd_audiopci_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_audiopci_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2369,6 +2369,12 @@ static int snd_audiopci_probe(struct pci_dev *pci, return 0; } +static int snd_audiopci_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_audiopci_probe(pci, pci_id)); +} + static struct pci_driver ens137x_driver = { .name = KBUILD_MODNAME, .id_table = snd_audiopci_ids, diff --git a/sound/pci/es1938.c b/sound/pci/es1938.c index 00b976f42a3db..e34ec6f89e7e0 100644 --- a/sound/pci/es1938.c +++ b/sound/pci/es1938.c @@ -1716,8 +1716,8 @@ static int snd_es1938_mixer(struct es1938 *chip) } -static int snd_es1938_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_es1938_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1796,6 +1796,12 @@ static int snd_es1938_probe(struct pci_dev *pci, return 0; } +static int snd_es1938_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_es1938_probe(pci, pci_id)); +} + static struct pci_driver es1938_driver = { .name = KBUILD_MODNAME, .id_table = snd_es1938_ids, diff --git a/sound/pci/es1968.c b/sound/pci/es1968.c index 6a8a02a9ecf41..4a7e20bb11bca 100644 --- a/sound/pci/es1968.c +++ b/sound/pci/es1968.c @@ -2741,8 +2741,8 @@ static int snd_es1968_create(struct snd_card *card, /* */ -static int snd_es1968_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_es1968_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2848,6 +2848,12 @@ static int snd_es1968_probe(struct pci_dev *pci, return 0; } +static int snd_es1968_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_es1968_probe(pci, pci_id)); +} + static struct pci_driver es1968_driver = { .name = KBUILD_MODNAME, .id_table = snd_es1968_ids, diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index 9c22ff19e56d2..62b3cb126c6d0 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -1268,8 +1268,8 @@ static int snd_fm801_create(struct snd_card *card, return 0; } -static int snd_card_fm801_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_card_fm801_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1333,6 +1333,12 @@ static int snd_card_fm801_probe(struct pci_dev *pci, return 0; } +static int snd_card_fm801_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_card_fm801_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static const unsigned char saved_regs[] = { FM801_PCM_VOL, FM801_I2S_VOL, FM801_FM_VOL, FM801_REC_SRC, diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 572ff0d1fafee..8eff25d2d9e67 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2066,14 +2066,16 @@ static const struct hda_controller_ops pci_hda_ops = { .position_check = azx_position_check, }; +static DECLARE_BITMAP(probed_devs, SNDRV_CARDS); + static int azx_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { - static int dev; struct snd_card *card; struct hda_intel *hda; struct azx *chip; bool schedule_probe; + int dev; int err; if (pci_match_id(driver_denylist, pci)) { @@ -2081,10 +2083,11 @@ static int azx_probe(struct pci_dev *pci, return -ENODEV; } + dev = find_first_zero_bit(probed_devs, SNDRV_CARDS); if (dev >= SNDRV_CARDS) return -ENODEV; if (!enable[dev]) { - dev++; + set_bit(dev, probed_devs); return -ENOENT; } @@ -2151,7 +2154,7 @@ static int azx_probe(struct pci_dev *pci, if (schedule_probe) schedule_delayed_work(&hda->probe_work, 0); - dev++; + set_bit(dev, probed_devs); if (chip->disabled) complete_all(&hda->probe_wait); return 0; @@ -2374,6 +2377,7 @@ static void azx_remove(struct pci_dev *pci) cancel_delayed_work_sync(&hda->probe_work); device_lock(&pci->dev); + clear_bit(chip->dev_index, probed_devs); pci_set_drvdata(pci, NULL); snd_card_free(card); } diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 92df4f243ec65..cf4f277dccdda 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1617,6 +1617,7 @@ static void hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, struct hda_codec *codec = per_pin->codec; struct hdmi_spec *spec = codec->spec; struct hdmi_eld *eld = &spec->temp_eld; + struct device *dev = hda_codec_dev(codec); hda_nid_t pin_nid = per_pin->pin_nid; int dev_id = per_pin->dev_id; /* @@ -1630,8 +1631,13 @@ static void hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, int present; int ret; +#ifdef CONFIG_PM + if (dev->power.runtime_status == RPM_SUSPENDING) + return; +#endif + ret = snd_hda_power_up_pm(codec); - if (ret < 0 && pm_runtime_suspended(hda_codec_dev(codec))) + if (ret < 0 && pm_runtime_suspended(dev)) goto out; present = snd_hda_jack_pin_sense(codec, pin_nid, dev_id); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3a42457984e98..ca40c2bd8ba62 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2619,6 +2619,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65f5, "Clevo PD50PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), @@ -3617,8 +3618,8 @@ static void alc256_shutup(struct hda_codec *codec) /* If disable 3k pulldown control for alc257, the Mic detection will not work correctly * when booting with headset plugged. So skip setting it for the codec alc257 */ - if (spec->codec_variant != ALC269_TYPE_ALC257 && - spec->codec_variant != ALC269_TYPE_ALC256) + if (codec->core.vendor_id != 0x10ec0236 && + codec->core.vendor_id != 0x10ec0257) alc_update_coef_idx(codec, 0x46, 0, 3 << 12); if (!spec->no_shutup_pins) @@ -6948,6 +6949,7 @@ enum { ALC236_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, + ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS, ALC269VC_FIXUP_ACER_HEADSET_MIC, @@ -8273,6 +8275,14 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x20, AC_VERB_SET_COEF_INDEX, 0x08}, + { 0x20, AC_VERB_SET_PROC_COEF, 0x2fcf}, + { } + }, + }, [ALC295_FIXUP_ASUS_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -9020,6 +9030,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), @@ -9053,6 +9064,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), + SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), @@ -9103,6 +9115,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x8561, "Clevo NH[57][0-9][ER][ACDH]Q", ALC269_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[57][0-9]RZ[Q]", ALC269_FIXUP_DMIC), SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x866d, "Clevo NP5[05]PN[HJK]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x867d, "Clevo NP7[01]PN[HJK]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME), SND_PCI_QUIRK(0x1558, 0x8a20, "Clevo NH55DCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -9204,6 +9218,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x505d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x505f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x5062, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), + SND_PCI_QUIRK(0x17aa, 0x508b, "Thinkpad X12 Gen 1", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), @@ -9397,6 +9412,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC298_FIXUP_HUAWEI_MBX_STEREO, .name = "huawei-mbx-stereo"}, {.id = ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, .name = "alc256-medion-headset"}, {.id = ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc298-samsung-headphone"}, + {.id = ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc256-samsung-headphone"}, {.id = ALC255_FIXUP_XIAOMI_HEADSET_MIC, .name = "alc255-xiaomi-headset"}, {.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"}, {.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"}, @@ -11067,6 +11083,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2), + SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50), diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c index f6275868877a7..6fab2ad85bbec 100644 --- a/sound/pci/ice1712/ice1724.c +++ b/sound/pci/ice1712/ice1724.c @@ -2519,8 +2519,8 @@ static int snd_vt1724_create(struct snd_card *card, * */ -static int snd_vt1724_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_vt1724_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2662,6 +2662,12 @@ static int snd_vt1724_probe(struct pci_dev *pci, return 0; } +static int snd_vt1724_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_vt1724_probe(pci, pci_id)); +} + #ifdef CONFIG_PM_SLEEP static int snd_vt1724_suspend(struct device *dev) { diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index a51032b3ac4d8..ae285c0a629c8 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -3109,8 +3109,8 @@ static int check_default_spdif_aclink(struct pci_dev *pci) return 0; } -static int snd_intel8x0_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_intel8x0_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct intel8x0 *chip; @@ -3189,6 +3189,12 @@ static int snd_intel8x0_probe(struct pci_dev *pci, return 0; } +static int snd_intel8x0_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_intel8x0_probe(pci, pci_id)); +} + static struct pci_driver intel8x0_driver = { .name = KBUILD_MODNAME, .id_table = snd_intel8x0_ids, diff --git a/sound/pci/intel8x0m.c b/sound/pci/intel8x0m.c index 7de3cb2f17b52..2845cc006d0cf 100644 --- a/sound/pci/intel8x0m.c +++ b/sound/pci/intel8x0m.c @@ -1178,8 +1178,8 @@ static struct shortname_table { { 0 }, }; -static int snd_intel8x0m_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_intel8x0m_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct intel8x0m *chip; @@ -1225,6 +1225,12 @@ static int snd_intel8x0m_probe(struct pci_dev *pci, return 0; } +static int snd_intel8x0m_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_intel8x0m_probe(pci, pci_id)); +} + static struct pci_driver intel8x0m_driver = { .name = KBUILD_MODNAME, .id_table = snd_intel8x0m_ids, diff --git a/sound/pci/korg1212/korg1212.c b/sound/pci/korg1212/korg1212.c index 5c9e240ff6a9c..33b4f95d65b3f 100644 --- a/sound/pci/korg1212/korg1212.c +++ b/sound/pci/korg1212/korg1212.c @@ -2355,7 +2355,7 @@ snd_korg1212_probe(struct pci_dev *pci, err = snd_korg1212_create(card, pci); if (err < 0) - return err; + goto error; strcpy(card->driver, "korg1212"); strcpy(card->shortname, "korg1212"); @@ -2366,10 +2366,14 @@ snd_korg1212_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver korg1212_driver = { diff --git a/sound/pci/lola/lola.c b/sound/pci/lola/lola.c index 5269a1d396a5b..1aa30e90b86a7 100644 --- a/sound/pci/lola/lola.c +++ b/sound/pci/lola/lola.c @@ -637,8 +637,8 @@ static int lola_create(struct snd_card *card, struct pci_dev *pci, int dev) return 0; } -static int lola_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __lola_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -687,6 +687,12 @@ static int lola_probe(struct pci_dev *pci, return 0; } +static int lola_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __lola_probe(pci, pci_id)); +} + /* PCI IDs */ static const struct pci_device_id lola_ids[] = { { PCI_VDEVICE(DIGIGRAM, 0x0001) }, diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c index 168a1084f7303..bd9b6148dd6fb 100644 --- a/sound/pci/lx6464es/lx6464es.c +++ b/sound/pci/lx6464es/lx6464es.c @@ -1019,7 +1019,7 @@ static int snd_lx6464es_probe(struct pci_dev *pci, err = snd_lx6464es_create(card, pci); if (err < 0) { dev_err(card->dev, "error during snd_lx6464es_create\n"); - return err; + goto error; } strcpy(card->driver, "LX6464ES"); @@ -1036,12 +1036,16 @@ static int snd_lx6464es_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) - return err; + goto error; dev_dbg(chip->card->dev, "initialization successful\n"); pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver lx6464es_driver = { diff --git a/sound/pci/maestro3.c b/sound/pci/maestro3.c index 056838ead21d6..261850775c807 100644 --- a/sound/pci/maestro3.c +++ b/sound/pci/maestro3.c @@ -2637,7 +2637,7 @@ snd_m3_create(struct snd_card *card, struct pci_dev *pci, /* */ static int -snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2702,6 +2702,12 @@ snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_m3_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_m3_probe(pci, pci_id)); +} + static struct pci_driver m3_driver = { .name = KBUILD_MODNAME, .id_table = snd_m3_ids, diff --git a/sound/pci/nm256/nm256.c b/sound/pci/nm256/nm256.c index c9c178504959e..f99a1e96e9231 100644 --- a/sound/pci/nm256/nm256.c +++ b/sound/pci/nm256/nm256.c @@ -1573,7 +1573,6 @@ snd_nm256_create(struct snd_card *card, struct pci_dev *pci) chip->coeffs_current = 0; snd_nm256_init_chip(chip); - card->private_free = snd_nm256_free; // pci_set_master(pci); /* needed? */ return 0; @@ -1680,6 +1679,7 @@ static int snd_nm256_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) return err; + card->private_free = snd_nm256_free; pci_set_drvdata(pci, card); return 0; diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c index 4fb3f2484fdba..92ffe9dc20c55 100644 --- a/sound/pci/oxygen/oxygen_lib.c +++ b/sound/pci/oxygen/oxygen_lib.c @@ -576,7 +576,7 @@ static void oxygen_card_free(struct snd_card *card) mutex_destroy(&chip->mutex); } -int oxygen_pci_probe(struct pci_dev *pci, int index, char *id, +static int __oxygen_pci_probe(struct pci_dev *pci, int index, char *id, struct module *owner, const struct pci_device_id *ids, int (*get_model)(struct oxygen *chip, @@ -701,6 +701,16 @@ int oxygen_pci_probe(struct pci_dev *pci, int index, char *id, pci_set_drvdata(pci, card); return 0; } + +int oxygen_pci_probe(struct pci_dev *pci, int index, char *id, + struct module *owner, + const struct pci_device_id *ids, + int (*get_model)(struct oxygen *chip, + const struct pci_device_id *id)) +{ + return snd_card_free_on_error(&pci->dev, + __oxygen_pci_probe(pci, index, id, owner, ids, get_model)); +} EXPORT_SYMBOL(oxygen_pci_probe); #ifdef CONFIG_PM_SLEEP diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c index 5a987c683c41c..b37c877c2c160 100644 --- a/sound/pci/riptide/riptide.c +++ b/sound/pci/riptide/riptide.c @@ -2023,7 +2023,7 @@ static void snd_riptide_joystick_remove(struct pci_dev *pci) #endif static int -snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -2124,6 +2124,12 @@ snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_card_riptide_probe(pci, pci_id)); +} + static struct pci_driver driver = { .name = KBUILD_MODNAME, .id_table = snd_riptide_ids, diff --git a/sound/pci/rme32.c b/sound/pci/rme32.c index 5b6bd9f0b2f77..9c0ac025e1432 100644 --- a/sound/pci/rme32.c +++ b/sound/pci/rme32.c @@ -1875,7 +1875,7 @@ static void snd_rme32_card_free(struct snd_card *card) } static int -snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +__snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { static int dev; struct rme32 *rme32; @@ -1927,6 +1927,12 @@ snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return 0; } +static int +snd_rme32_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_rme32_probe(pci, pci_id)); +} + static struct pci_driver rme32_driver = { .name = KBUILD_MODNAME, .id_table = snd_rme32_ids, diff --git a/sound/pci/rme96.c b/sound/pci/rme96.c index 8fc8115049203..bccb7e0d3d116 100644 --- a/sound/pci/rme96.c +++ b/sound/pci/rme96.c @@ -2430,8 +2430,8 @@ static void snd_rme96_card_free(struct snd_card *card) } static int -snd_rme96_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +__snd_rme96_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct rme96 *rme96; @@ -2498,6 +2498,12 @@ snd_rme96_probe(struct pci_dev *pci, return 0; } +static int snd_rme96_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_rme96_probe(pci, pci_id)); +} + static struct pci_driver rme96_driver = { .name = KBUILD_MODNAME, .id_table = snd_rme96_ids, diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index 96c12dfb24cf9..3db641318d3ae 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -5444,17 +5444,21 @@ static int snd_hdsp_probe(struct pci_dev *pci, hdsp->pci = pci; err = snd_hdsp_create(card, hdsp); if (err) - return err; + goto error; strcpy(card->shortname, "Hammerfall DSP"); sprintf(card->longname, "%s at 0x%lx, irq %d", hdsp->card_name, hdsp->port, hdsp->irq); err = snd_card_register(card); if (err) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver hdsp_driver = { diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index ff06ee82607cf..fa1812e7a49dc 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -6895,7 +6895,7 @@ static int snd_hdspm_probe(struct pci_dev *pci, err = snd_hdspm_create(card, hdspm); if (err < 0) - return err; + goto error; if (hdspm->io_type != MADIface) { snprintf(card->shortname, sizeof(card->shortname), "%s_%x", @@ -6914,12 +6914,16 @@ static int snd_hdspm_probe(struct pci_dev *pci, err = snd_card_register(card); if (err < 0) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver hdspm_driver = { diff --git a/sound/pci/rme9652/rme9652.c b/sound/pci/rme9652/rme9652.c index 7755e19aa7761..1d614fe89a6ae 100644 --- a/sound/pci/rme9652/rme9652.c +++ b/sound/pci/rme9652/rme9652.c @@ -2572,7 +2572,7 @@ static int snd_rme9652_probe(struct pci_dev *pci, rme9652->pci = pci; err = snd_rme9652_create(card, rme9652, precise_ptr[dev]); if (err) - return err; + goto error; strcpy(card->shortname, rme9652->card_name); @@ -2580,10 +2580,14 @@ static int snd_rme9652_probe(struct pci_dev *pci, card->shortname, rme9652->port, rme9652->irq); err = snd_card_register(card); if (err) - return err; + goto error; pci_set_drvdata(pci, card); dev++; return 0; + + error: + snd_card_free(card); + return err; } static struct pci_driver rme9652_driver = { diff --git a/sound/pci/sis7019.c b/sound/pci/sis7019.c index 0b722b0e0604b..fabe393607f8f 100644 --- a/sound/pci/sis7019.c +++ b/sound/pci/sis7019.c @@ -1331,8 +1331,8 @@ static int sis_chip_create(struct snd_card *card, return 0; } -static int snd_sis7019_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_sis7019_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct sis7019 *sis; @@ -1352,8 +1352,8 @@ static int snd_sis7019_probe(struct pci_dev *pci, if (!codecs) codecs = SIS_PRIMARY_CODEC_PRESENT; - rc = snd_card_new(&pci->dev, index, id, THIS_MODULE, - sizeof(*sis), &card); + rc = snd_devm_card_new(&pci->dev, index, id, THIS_MODULE, + sizeof(*sis), &card); if (rc < 0) return rc; @@ -1386,6 +1386,12 @@ static int snd_sis7019_probe(struct pci_dev *pci, return 0; } +static int snd_sis7019_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_sis7019_probe(pci, pci_id)); +} + static struct pci_driver sis7019_driver = { .name = KBUILD_MODNAME, .id_table = snd_sis7019_ids, diff --git a/sound/pci/sonicvibes.c b/sound/pci/sonicvibes.c index c8c49881008fd..f91cbf6eeca0f 100644 --- a/sound/pci/sonicvibes.c +++ b/sound/pci/sonicvibes.c @@ -1387,8 +1387,8 @@ static int snd_sonicvibes_midi(struct sonicvibes *sonic, return 0; } -static int snd_sonic_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_sonic_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -1459,6 +1459,12 @@ static int snd_sonic_probe(struct pci_dev *pci, return 0; } +static int snd_sonic_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_sonic_probe(pci, pci_id)); +} + static struct pci_driver sonicvibes_driver = { .name = KBUILD_MODNAME, .id_table = snd_sonic_ids, diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c index 65514f7e42d7d..361b83fd721e6 100644 --- a/sound/pci/via82xx.c +++ b/sound/pci/via82xx.c @@ -2458,8 +2458,8 @@ static int check_dxs_list(struct pci_dev *pci, int revision) return VIA_DXS_48K; }; -static int snd_via82xx_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_via82xx_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct via82xx *chip; @@ -2569,6 +2569,12 @@ static int snd_via82xx_probe(struct pci_dev *pci, return 0; } +static int snd_via82xx_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_via82xx_probe(pci, pci_id)); +} + static struct pci_driver via82xx_driver = { .name = KBUILD_MODNAME, .id_table = snd_via82xx_ids, diff --git a/sound/pci/via82xx_modem.c b/sound/pci/via82xx_modem.c index 234f7fbed2364..ca7f024bf8ec6 100644 --- a/sound/pci/via82xx_modem.c +++ b/sound/pci/via82xx_modem.c @@ -1103,8 +1103,8 @@ static int snd_via82xx_create(struct snd_card *card, } -static int snd_via82xx_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_via82xx_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { struct snd_card *card; struct via82xx_modem *chip; @@ -1157,6 +1157,12 @@ static int snd_via82xx_probe(struct pci_dev *pci, return 0; } +static int snd_via82xx_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_via82xx_probe(pci, pci_id)); +} + static struct pci_driver via82xx_modem_driver = { .name = KBUILD_MODNAME, .id_table = snd_via82xx_modem_ids, diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c index cd05ee2802c9e..5247015e8b316 100644 --- a/sound/soc/amd/acp/acp-mach-common.c +++ b/sound/soc/amd/acp/acp-mach-common.c @@ -556,6 +556,8 @@ int acp_legacy_dai_links_create(struct snd_soc_card *card) num_links++; links = devm_kzalloc(dev, sizeof(struct snd_soc_dai_link) * num_links, GFP_KERNEL); + if (!links) + return -ENOMEM; if (drv_data->hs_cpu_id == I2S_SP) { links[i].name = "acp-headset-codec"; diff --git a/sound/soc/amd/vangogh/acp5x-mach.c b/sound/soc/amd/vangogh/acp5x-mach.c index 14cf325e4b237..5d7a17755fa7f 100644 --- a/sound/soc/amd/vangogh/acp5x-mach.c +++ b/sound/soc/amd/vangogh/acp5x-mach.c @@ -165,6 +165,7 @@ static int acp5x_cs35l41_hw_params(struct snd_pcm_substream *substream, unsigned int num_codecs = rtd->num_codecs; unsigned int bclk_val; + ret = 0; for (i = 0; i < num_codecs; i++) { codec_dai = asoc_rtd_to_codec(rtd, i); if ((strcmp(codec_dai->name, "spi-VLV1776:00") == 0) || diff --git a/sound/soc/amd/vangogh/acp5x-pcm-dma.c b/sound/soc/amd/vangogh/acp5x-pcm-dma.c index f10de38976cb5..bfca4cf423cf1 100644 --- a/sound/soc/amd/vangogh/acp5x-pcm-dma.c +++ b/sound/soc/amd/vangogh/acp5x-pcm-dma.c @@ -281,7 +281,7 @@ static int acp5x_dma_hw_params(struct snd_soc_component *component, return -EINVAL; } size = params_buffer_bytes(params); - rtd->dma_addr = substream->dma_buffer.addr; + rtd->dma_addr = substream->runtime->dma_addr; rtd->num_pages = (PAGE_ALIGN(size) >> PAGE_SHIFT); config_acp5x_dma(rtd, substream->stream); return 0; @@ -426,51 +426,51 @@ static int acp5x_audio_remove(struct platform_device *pdev) static int __maybe_unused acp5x_pcm_resume(struct device *dev) { struct i2s_dev_data *adata; - u32 val, reg_val, frmt_val; + struct i2s_stream_instance *rtd; + u32 val; - reg_val = 0; - frmt_val = 0; adata = dev_get_drvdata(dev); if (adata->play_stream && adata->play_stream->runtime) { - struct i2s_stream_instance *rtd = - adata->play_stream->runtime->private_data; + rtd = adata->play_stream->runtime->private_data; config_acp5x_dma(rtd, SNDRV_PCM_STREAM_PLAYBACK); - switch (rtd->i2s_instance) { - case I2S_HS_INSTANCE: - reg_val = ACP_HSTDM_ITER; - frmt_val = ACP_HSTDM_TXFRMT; - break; - case I2S_SP_INSTANCE: - default: - reg_val = ACP_I2STDM_ITER; - frmt_val = ACP_I2STDM_TXFRMT; + acp_writel((rtd->xfer_resolution << 3), rtd->acp5x_base + ACP_HSTDM_ITER); + if (adata->tdm_mode == TDM_ENABLE) { + acp_writel(adata->tdm_fmt, adata->acp5x_base + ACP_HSTDM_TXFRMT); + val = acp_readl(adata->acp5x_base + ACP_HSTDM_ITER); + acp_writel(val | 0x2, adata->acp5x_base + ACP_HSTDM_ITER); + } + } + if (adata->i2ssp_play_stream && adata->i2ssp_play_stream->runtime) { + rtd = adata->i2ssp_play_stream->runtime->private_data; + config_acp5x_dma(rtd, SNDRV_PCM_STREAM_PLAYBACK); + acp_writel((rtd->xfer_resolution << 3), rtd->acp5x_base + ACP_I2STDM_ITER); + if (adata->tdm_mode == TDM_ENABLE) { + acp_writel(adata->tdm_fmt, adata->acp5x_base + ACP_I2STDM_TXFRMT); + val = acp_readl(adata->acp5x_base + ACP_I2STDM_ITER); + acp_writel(val | 0x2, adata->acp5x_base + ACP_I2STDM_ITER); } - acp_writel((rtd->xfer_resolution << 3), - rtd->acp5x_base + reg_val); } if (adata->capture_stream && adata->capture_stream->runtime) { - struct i2s_stream_instance *rtd = - adata->capture_stream->runtime->private_data; + rtd = adata->capture_stream->runtime->private_data; config_acp5x_dma(rtd, SNDRV_PCM_STREAM_CAPTURE); - switch (rtd->i2s_instance) { - case I2S_HS_INSTANCE: - reg_val = ACP_HSTDM_IRER; - frmt_val = ACP_HSTDM_RXFRMT; - break; - case I2S_SP_INSTANCE: - default: - reg_val = ACP_I2STDM_IRER; - frmt_val = ACP_I2STDM_RXFRMT; + acp_writel((rtd->xfer_resolution << 3), rtd->acp5x_base + ACP_HSTDM_IRER); + if (adata->tdm_mode == TDM_ENABLE) { + acp_writel(adata->tdm_fmt, adata->acp5x_base + ACP_HSTDM_RXFRMT); + val = acp_readl(adata->acp5x_base + ACP_HSTDM_IRER); + acp_writel(val | 0x2, adata->acp5x_base + ACP_HSTDM_IRER); } - acp_writel((rtd->xfer_resolution << 3), - rtd->acp5x_base + reg_val); } - if (adata->tdm_mode == TDM_ENABLE) { - acp_writel(adata->tdm_fmt, adata->acp5x_base + frmt_val); - val = acp_readl(adata->acp5x_base + reg_val); - acp_writel(val | 0x2, adata->acp5x_base + reg_val); + if (adata->i2ssp_capture_stream && adata->i2ssp_capture_stream->runtime) { + rtd = adata->i2ssp_capture_stream->runtime->private_data; + config_acp5x_dma(rtd, SNDRV_PCM_STREAM_CAPTURE); + acp_writel((rtd->xfer_resolution << 3), rtd->acp5x_base + ACP_I2STDM_IRER); + if (adata->tdm_mode == TDM_ENABLE) { + acp_writel(adata->tdm_fmt, adata->acp5x_base + ACP_I2STDM_RXFRMT); + val = acp_readl(adata->acp5x_base + ACP_I2STDM_IRER); + acp_writel(val | 0x2, adata->acp5x_base + ACP_I2STDM_IRER); + } } acp_writel(1, adata->acp5x_base + ACP_EXTERNAL_INTR_ENB); return 0; diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index 26e2bc690d86e..c1dea8d624164 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -280,7 +280,10 @@ static int atmel_ssc_startup(struct snd_pcm_substream *substream, /* Enable PMC peripheral clock for this SSC */ pr_debug("atmel_ssc_dai: Starting clock\n"); - clk_enable(ssc_p->ssc->clk); + ret = clk_enable(ssc_p->ssc->clk); + if (ret) + return ret; + ssc_p->mck_rate = clk_get_rate(ssc_p->ssc->clk); /* Reset the SSC unless initialized to keep it in a clean state */ diff --git a/sound/soc/atmel/mikroe-proto.c b/sound/soc/atmel/mikroe-proto.c index 627564c18c270..ce46d8a0b7e43 100644 --- a/sound/soc/atmel/mikroe-proto.c +++ b/sound/soc/atmel/mikroe-proto.c @@ -115,7 +115,8 @@ static int snd_proto_probe(struct platform_device *pdev) cpu_np = of_parse_phandle(np, "i2s-controller", 0); if (!cpu_np) { dev_err(&pdev->dev, "i2s-controller missing\n"); - return -EINVAL; + ret = -EINVAL; + goto put_codec_node; } dai->cpus->of_node = cpu_np; dai->platforms->of_node = cpu_np; @@ -125,7 +126,8 @@ static int snd_proto_probe(struct platform_device *pdev) &bitclkmaster, &framemaster); if (bitclkmaster != framemaster) { dev_err(&pdev->dev, "Must be the same bitclock and frame master\n"); - return -EINVAL; + ret = -EINVAL; + goto put_cpu_node; } if (bitclkmaster) { if (codec_np == bitclkmaster) @@ -136,18 +138,20 @@ static int snd_proto_probe(struct platform_device *pdev) dai_fmt |= snd_soc_daifmt_parse_clock_provider_as_flag(np, NULL); } - of_node_put(bitclkmaster); - of_node_put(framemaster); - dai->dai_fmt = dai_fmt; - - of_node_put(codec_np); - of_node_put(cpu_np); + dai->dai_fmt = dai_fmt; ret = snd_soc_register_card(&snd_proto); if (ret) dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n"); + +put_cpu_node: + of_node_put(bitclkmaster); + of_node_put(framemaster); + of_node_put(cpu_np); +put_codec_node: + of_node_put(codec_np); return ret; } diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c index 915da92e1ec82..33e43013ff770 100644 --- a/sound/soc/atmel/sam9g20_wm8731.c +++ b/sound/soc/atmel/sam9g20_wm8731.c @@ -214,6 +214,7 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev) cpu_np = of_parse_phandle(np, "atmel,ssc-controller", 0); if (!cpu_np) { dev_err(&pdev->dev, "dai and pcm info missing\n"); + of_node_put(codec_np); return -EINVAL; } at91sam9g20ek_dai.cpus->of_node = cpu_np; diff --git a/sound/soc/atmel/sam9x5_wm8731.c b/sound/soc/atmel/sam9x5_wm8731.c index 7c45dc4f8c1bb..99310e40e7a62 100644 --- a/sound/soc/atmel/sam9x5_wm8731.c +++ b/sound/soc/atmel/sam9x5_wm8731.c @@ -142,7 +142,7 @@ static int sam9x5_wm8731_driver_probe(struct platform_device *pdev) if (!cpu_np) { dev_err(&pdev->dev, "atmel,ssc-controller node missing\n"); ret = -EINVAL; - goto out; + goto out_put_codec_np; } dai->cpus->of_node = cpu_np; dai->platforms->of_node = cpu_np; @@ -153,12 +153,9 @@ static int sam9x5_wm8731_driver_probe(struct platform_device *pdev) if (ret != 0) { dev_err(&pdev->dev, "Failed to set SSC %d for audio: %d\n", ret, priv->ssc_id); - goto out; + goto out_put_cpu_np; } - of_node_put(codec_np); - of_node_put(cpu_np); - ret = devm_snd_soc_register_card(&pdev->dev, card); if (ret) { dev_err(&pdev->dev, "Platform device allocation failed\n"); @@ -167,10 +164,14 @@ static int sam9x5_wm8731_driver_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "%s ok\n", __func__); - return ret; + goto out_put_cpu_np; out_put_audio: atmel_ssc_put_audio(priv->ssc_id); +out_put_cpu_np: + of_node_put(cpu_np); +out_put_codec_np: + of_node_put(codec_np); out: return ret; } diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index d3e5ae8310ef2..30c00380499cd 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -733,6 +733,7 @@ config SND_SOC_CS4349 config SND_SOC_CS47L15 tristate + depends on MFD_CS47L15 config SND_SOC_CS47L24 tristate @@ -740,15 +741,19 @@ config SND_SOC_CS47L24 config SND_SOC_CS47L35 tristate + depends on MFD_CS47L35 config SND_SOC_CS47L85 tristate + depends on MFD_CS47L85 config SND_SOC_CS47L90 tristate + depends on MFD_CS47L90 config SND_SOC_CS47L92 tristate + depends on MFD_CS47L92 # Cirrus Logic Quad-Channel ADC config SND_SOC_CS53L30 diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c index 77a0176946459..f3787d77f892b 100644 --- a/sound/soc/codecs/cs35l41.c +++ b/sound/soc/codecs/cs35l41.c @@ -1035,8 +1035,8 @@ static int cs35l41_irq_gpio_config(struct cs35l41_private *cs35l41) regmap_update_bits(cs35l41->regmap, CS35L41_GPIO2_CTRL1, CS35L41_GPIO_POL_MASK | CS35L41_GPIO_DIR_MASK, - irq_gpio_cfg1->irq_pol_inv << CS35L41_GPIO_POL_SHIFT | - !irq_gpio_cfg1->irq_out_en << CS35L41_GPIO_DIR_SHIFT); + irq_gpio_cfg2->irq_pol_inv << CS35L41_GPIO_POL_SHIFT | + !irq_gpio_cfg2->irq_out_en << CS35L41_GPIO_DIR_SHIFT); regmap_update_bits(cs35l41->regmap, CS35L41_GPIO_PAD_CONTROL, CS35L41_GPIO1_CTRL_MASK | CS35L41_GPIO2_CTRL_MASK, @@ -1091,7 +1091,7 @@ static struct snd_soc_dai_driver cs35l41_dai[] = { .capture = { .stream_name = "AMP Capture", .channels_min = 1, - .channels_max = 8, + .channels_max = 4, .rates = SNDRV_PCM_RATE_KNOT, .formats = CS35L41_TX_FORMATS, }, diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 43d98bdb5b5b0..2c294868008ed 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -1637,7 +1637,11 @@ static irqreturn_t cs42l42_irq_thread(int irq, void *data) mutex_lock(&cs42l42->jack_detect_mutex); - /* Check auto-detect status */ + /* + * Check auto-detect status. Don't assume a previous unplug event has + * cleared the flags. If the jack is unplugged and plugged during + * system suspend there won't have been an unplug event. + */ if ((~masks[5]) & irq_params_table[5].mask) { if (stickies[5] & CS42L42_HSDET_AUTO_DONE_MASK) { cs42l42_process_hs_type_detect(cs42l42); @@ -1645,11 +1649,15 @@ static irqreturn_t cs42l42_irq_thread(int irq, void *data) case CS42L42_PLUG_CTIA: case CS42L42_PLUG_OMTP: snd_soc_jack_report(cs42l42->jack, SND_JACK_HEADSET, - SND_JACK_HEADSET); + SND_JACK_HEADSET | + SND_JACK_BTN_0 | SND_JACK_BTN_1 | + SND_JACK_BTN_2 | SND_JACK_BTN_3); break; case CS42L42_PLUG_HEADPHONE: snd_soc_jack_report(cs42l42->jack, SND_JACK_HEADPHONE, - SND_JACK_HEADPHONE); + SND_JACK_HEADSET | + SND_JACK_BTN_0 | SND_JACK_BTN_1 | + SND_JACK_BTN_2 | SND_JACK_BTN_3); break; default: break; diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c index 6ffe88345de5f..3a3dc0539d921 100644 --- a/sound/soc/codecs/lpass-rx-macro.c +++ b/sound/soc/codecs/lpass-rx-macro.c @@ -2039,6 +2039,10 @@ static int rx_macro_load_compander_coeff(struct snd_soc_component *component, int i; int hph_pwr_mode; + /* AUX does not have compander */ + if (comp == INTERP_AUX) + return 0; + if (!rx->comp_enabled[comp]) return 0; @@ -2268,7 +2272,7 @@ static int rx_macro_mux_get(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_dapm_to_component(widget->dapm); struct rx_macro *rx = snd_soc_component_get_drvdata(component); - ucontrol->value.integer.value[0] = + ucontrol->value.enumerated.item[0] = rx->rx_port_value[widget->shift]; return 0; } @@ -2280,7 +2284,7 @@ static int rx_macro_mux_put(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_dapm_to_component(widget->dapm); struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; struct snd_soc_dapm_update *update = NULL; - u32 rx_port_value = ucontrol->value.integer.value[0]; + u32 rx_port_value = ucontrol->value.enumerated.item[0]; u32 aif_rst; struct rx_macro *rx = snd_soc_component_get_drvdata(component); @@ -2392,7 +2396,7 @@ static int rx_macro_get_hph_pwr_mode(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct rx_macro *rx = snd_soc_component_get_drvdata(component); - ucontrol->value.integer.value[0] = rx->hph_pwr_mode; + ucontrol->value.enumerated.item[0] = rx->hph_pwr_mode; return 0; } @@ -2402,7 +2406,7 @@ static int rx_macro_put_hph_pwr_mode(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct rx_macro *rx = snd_soc_component_get_drvdata(component); - rx->hph_pwr_mode = ucontrol->value.integer.value[0]; + rx->hph_pwr_mode = ucontrol->value.enumerated.item[0]; return 0; } @@ -3542,6 +3546,8 @@ static int rx_macro_probe(struct platform_device *pdev) return PTR_ERR(base); rx->regmap = devm_regmap_init_mmio(dev, base, &rx_regmap_config); + if (IS_ERR(rx->regmap)) + return PTR_ERR(rx->regmap); dev_set_drvdata(dev, rx); diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c index a4c0a155af565..9c96ab1bf84f9 100644 --- a/sound/soc/codecs/lpass-tx-macro.c +++ b/sound/soc/codecs/lpass-tx-macro.c @@ -1821,6 +1821,8 @@ static int tx_macro_probe(struct platform_device *pdev) } tx->regmap = devm_regmap_init_mmio(dev, base, &tx_regmap_config); + if (IS_ERR(tx->regmap)) + return PTR_ERR(tx->regmap); dev_set_drvdata(dev, tx); diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c index 11147e35689b2..e14c277e6a8b6 100644 --- a/sound/soc/codecs/lpass-va-macro.c +++ b/sound/soc/codecs/lpass-va-macro.c @@ -780,7 +780,7 @@ static int va_macro_dec_mode_get(struct snd_kcontrol *kcontrol, struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; int path = e->shift_l; - ucontrol->value.integer.value[0] = va->dec_mode[path]; + ucontrol->value.enumerated.item[0] = va->dec_mode[path]; return 0; } @@ -789,7 +789,7 @@ static int va_macro_dec_mode_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol); - int value = ucontrol->value.integer.value[0]; + int value = ucontrol->value.enumerated.item[0]; struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; int path = e->shift_l; struct va_macro *va = snd_soc_component_get_drvdata(comp); diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index 75baf8eb70299..69d2915f40d88 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -2405,6 +2405,8 @@ static int wsa_macro_probe(struct platform_device *pdev) return PTR_ERR(base); wsa->regmap = devm_regmap_init_mmio(dev, base, &wsa_regmap_config); + if (IS_ERR(wsa->regmap)) + return PTR_ERR(wsa->regmap); dev_set_drvdata(dev, wsa); diff --git a/sound/soc/codecs/max98927.c b/sound/soc/codecs/max98927.c index 5ba5f876eab87..fd84780bf689f 100644 --- a/sound/soc/codecs/max98927.c +++ b/sound/soc/codecs/max98927.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include "max98927.h" diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 485cda46dbb9b..e52a559c52d68 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -1222,8 +1222,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) } irq = platform_get_irq_byname(pdev, "mbhc_switch_int"); - if (irq < 0) - return irq; + if (irq < 0) { + ret = irq; + goto err_disable_clk; + } ret = devm_request_threaded_irq(dev, irq, NULL, pm8916_mbhc_switch_irq_handler, @@ -1235,8 +1237,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) if (priv->mbhc_btn_enabled) { irq = platform_get_irq_byname(pdev, "mbhc_but_press_det"); - if (irq < 0) - return irq; + if (irq < 0) { + ret = irq; + goto err_disable_clk; + } ret = devm_request_threaded_irq(dev, irq, NULL, mbhc_btn_press_irq_handler, @@ -1247,8 +1251,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) dev_err(dev, "cannot request mbhc button press irq\n"); irq = platform_get_irq_byname(pdev, "mbhc_but_rel_det"); - if (irq < 0) - return irq; + if (irq < 0) { + ret = irq; + goto err_disable_clk; + } ret = devm_request_threaded_irq(dev, irq, NULL, mbhc_btn_release_irq_handler, @@ -1265,6 +1271,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) return devm_snd_soc_register_component(dev, &pm8916_wcd_analog, pm8916_wcd_analog_dai, ARRAY_SIZE(pm8916_wcd_analog_dai)); + +err_disable_clk: + clk_disable_unprepare(priv->mclk); + return ret; } static int pm8916_wcd_analog_spmi_remove(struct platform_device *pdev) diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c index fcc10c8bc6259..9ad7fc0baf072 100644 --- a/sound/soc/codecs/msm8916-wcd-digital.c +++ b/sound/soc/codecs/msm8916-wcd-digital.c @@ -1201,7 +1201,7 @@ static int msm8916_wcd_digital_probe(struct platform_device *pdev) ret = clk_prepare_enable(priv->mclk); if (ret < 0) { dev_err(dev, "failed to enable mclk %d\n", ret); - return ret; + goto err_clk; } dev_set_drvdata(dev, priv); @@ -1209,6 +1209,9 @@ static int msm8916_wcd_digital_probe(struct platform_device *pdev) return devm_snd_soc_register_component(dev, &msm8916_wcd_digital, msm8916_wcd_digital_dai, ARRAY_SIZE(msm8916_wcd_digital_dai)); +err_clk: + clk_disable_unprepare(priv->ahbclk); + return ret; } static int msm8916_wcd_digital_remove(struct platform_device *pdev) diff --git a/sound/soc/codecs/mt6358.c b/sound/soc/codecs/mt6358.c index 9b263a9a669dc..4c7b5d940799b 100644 --- a/sound/soc/codecs/mt6358.c +++ b/sound/soc/codecs/mt6358.c @@ -107,6 +107,7 @@ int mt6358_set_mtkaif_protocol(struct snd_soc_component *cmpnt, priv->mtkaif_protocol = mtkaif_protocol; return 0; } +EXPORT_SYMBOL_GPL(mt6358_set_mtkaif_protocol); static void playback_gpio_set(struct mt6358_priv *priv) { @@ -273,6 +274,7 @@ int mt6358_mtkaif_calibration_enable(struct snd_soc_component *cmpnt) 1 << RG_AUD_PAD_TOP_DAT_MISO_LOOPBACK_SFT); return 0; } +EXPORT_SYMBOL_GPL(mt6358_mtkaif_calibration_enable); int mt6358_mtkaif_calibration_disable(struct snd_soc_component *cmpnt) { @@ -296,6 +298,7 @@ int mt6358_mtkaif_calibration_disable(struct snd_soc_component *cmpnt) capture_gpio_reset(priv); return 0; } +EXPORT_SYMBOL_GPL(mt6358_mtkaif_calibration_disable); int mt6358_set_mtkaif_calibration_phase(struct snd_soc_component *cmpnt, int phase_1, int phase_2) @@ -310,6 +313,7 @@ int mt6358_set_mtkaif_calibration_phase(struct snd_soc_component *cmpnt, phase_2 << RG_AUD_PAD_TOP_PHASE_MODE2_SFT); return 0; } +EXPORT_SYMBOL_GPL(mt6358_set_mtkaif_calibration_phase); /* dl pga gain */ enum { diff --git a/sound/soc/codecs/rk817_codec.c b/sound/soc/codecs/rk817_codec.c index 03f24edfe4f64..8fffe378618d0 100644 --- a/sound/soc/codecs/rk817_codec.c +++ b/sound/soc/codecs/rk817_codec.c @@ -508,12 +508,14 @@ static int rk817_platform_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "%s() register codec error %d\n", __func__, ret); - goto err_; + goto err_clk; } return 0; -err_: +err_clk: + clk_disable_unprepare(rk817_codec_data->mclk); +err_: return ret; } diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c index 2138f62e6af5d..3a8fba101b20f 100644 --- a/sound/soc/codecs/rt5663.c +++ b/sound/soc/codecs/rt5663.c @@ -3478,6 +3478,8 @@ static int rt5663_parse_dp(struct rt5663_priv *rt5663, struct device *dev) table_size = sizeof(struct impedance_mapping_table) * rt5663->pdata.impedance_sensing_num; rt5663->imp_table = devm_kzalloc(dev, table_size, GFP_KERNEL); + if (!rt5663->imp_table) + return -ENOMEM; ret = device_property_read_u32_array(dev, "realtek,impedance_sensing_table", (u32 *)rt5663->imp_table, table_size); diff --git a/sound/soc/codecs/rt5682s.c b/sound/soc/codecs/rt5682s.c index 1e662d1be2b3e..92b8753f1267b 100644 --- a/sound/soc/codecs/rt5682s.c +++ b/sound/soc/codecs/rt5682s.c @@ -822,6 +822,7 @@ static void rt5682s_jack_detect_handler(struct work_struct *work) { struct rt5682s_priv *rt5682s = container_of(work, struct rt5682s_priv, jack_detect_work.work); + struct snd_soc_dapm_context *dapm; int val, btn_type; if (!rt5682s->component || !rt5682s->component->card || @@ -832,7 +833,9 @@ static void rt5682s_jack_detect_handler(struct work_struct *work) return; } - mutex_lock(&rt5682s->jdet_mutex); + dapm = snd_soc_component_get_dapm(rt5682s->component); + + snd_soc_dapm_mutex_lock(dapm); mutex_lock(&rt5682s->calibrate_mutex); val = snd_soc_component_read(rt5682s->component, RT5682S_AJD1_CTRL) @@ -889,6 +892,9 @@ static void rt5682s_jack_detect_handler(struct work_struct *work) rt5682s->irq_work_delay_time = 50; } + mutex_unlock(&rt5682s->calibrate_mutex); + snd_soc_dapm_mutex_unlock(dapm); + snd_soc_jack_report(rt5682s->hs_jack, rt5682s->jack_type, SND_JACK_HEADSET | SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 | SND_JACK_BTN_3); @@ -898,9 +904,6 @@ static void rt5682s_jack_detect_handler(struct work_struct *work) schedule_delayed_work(&rt5682s->jd_check_work, 0); else cancel_delayed_work_sync(&rt5682s->jd_check_work); - - mutex_unlock(&rt5682s->calibrate_mutex); - mutex_unlock(&rt5682s->jdet_mutex); } static void rt5682s_jd_check_handler(struct work_struct *work) @@ -908,14 +911,9 @@ static void rt5682s_jd_check_handler(struct work_struct *work) struct rt5682s_priv *rt5682s = container_of(work, struct rt5682s_priv, jd_check_work.work); - if (snd_soc_component_read(rt5682s->component, RT5682S_AJD1_CTRL) - & RT5682S_JDH_RS_MASK) { + if (snd_soc_component_read(rt5682s->component, RT5682S_AJD1_CTRL) & RT5682S_JDH_RS_MASK) { /* jack out */ - rt5682s->jack_type = rt5682s_headset_detect(rt5682s->component, 0); - - snd_soc_jack_report(rt5682s->hs_jack, rt5682s->jack_type, - SND_JACK_HEADSET | SND_JACK_BTN_0 | SND_JACK_BTN_1 | - SND_JACK_BTN_2 | SND_JACK_BTN_3); + schedule_delayed_work(&rt5682s->jack_detect_work, 0); } else { schedule_delayed_work(&rt5682s->jd_check_work, 500); } @@ -1323,7 +1321,6 @@ static int rt5682s_hp_amp_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); - struct rt5682s_priv *rt5682s = snd_soc_component_get_drvdata(component); switch (event) { case SND_SOC_DAPM_POST_PMU: @@ -1339,8 +1336,6 @@ static int rt5682s_hp_amp_event(struct snd_soc_dapm_widget *w, snd_soc_component_write(component, RT5682S_BIAS_CUR_CTRL_11, 0x6666); snd_soc_component_write(component, RT5682S_BIAS_CUR_CTRL_12, 0xa82a); - mutex_lock(&rt5682s->jdet_mutex); - snd_soc_component_update_bits(component, RT5682S_HP_CTRL_2, RT5682S_HPO_L_PATH_MASK | RT5682S_HPO_R_PATH_MASK | RT5682S_HPO_SEL_IP_EN_SW, RT5682S_HPO_L_PATH_EN | @@ -1348,8 +1343,6 @@ static int rt5682s_hp_amp_event(struct snd_soc_dapm_widget *w, usleep_range(5000, 10000); snd_soc_component_update_bits(component, RT5682S_HP_AMP_DET_CTL_1, RT5682S_CP_SW_SIZE_MASK, RT5682S_CP_SW_SIZE_L | RT5682S_CP_SW_SIZE_S); - - mutex_unlock(&rt5682s->jdet_mutex); break; case SND_SOC_DAPM_POST_PMD: @@ -3103,7 +3096,6 @@ static int rt5682s_i2c_probe(struct i2c_client *i2c, mutex_init(&rt5682s->calibrate_mutex); mutex_init(&rt5682s->sar_mutex); - mutex_init(&rt5682s->jdet_mutex); rt5682s_calibrate(rt5682s); regmap_update_bits(rt5682s->regmap, RT5682S_MICBIAS_2, diff --git a/sound/soc/codecs/rt5682s.h b/sound/soc/codecs/rt5682s.h index 1bf2ef7ce5784..397a2531b6f68 100644 --- a/sound/soc/codecs/rt5682s.h +++ b/sound/soc/codecs/rt5682s.h @@ -1446,7 +1446,6 @@ struct rt5682s_priv { struct delayed_work jd_check_work; struct mutex calibrate_mutex; struct mutex sar_mutex; - struct mutex jdet_mutex; #ifdef CONFIG_COMMON_CLK struct clk_hw dai_clks_hw[RT5682S_DAI_NUM_CLKS]; diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index 6c468527fec61..1e75e93cf28f2 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -3023,14 +3023,14 @@ static int wcd934x_hph_impedance_get(struct snd_kcontrol *kcontrol, return 0; } static const struct snd_kcontrol_new hph_type_detect_controls[] = { - SOC_SINGLE_EXT("HPH Type", 0, 0, UINT_MAX, 0, + SOC_SINGLE_EXT("HPH Type", 0, 0, WCD_MBHC_HPH_STEREO, 0, wcd934x_get_hph_type, NULL), }; static const struct snd_kcontrol_new impedance_detect_controls[] = { - SOC_SINGLE_EXT("HPHL Impedance", 0, 0, UINT_MAX, 0, + SOC_SINGLE_EXT("HPHL Impedance", 0, 0, INT_MAX, 0, wcd934x_hph_impedance_get, NULL), - SOC_SINGLE_EXT("HPHR Impedance", 0, 1, UINT_MAX, 0, + SOC_SINGLE_EXT("HPHR Impedance", 0, 1, INT_MAX, 0, wcd934x_hph_impedance_get, NULL), }; @@ -3308,13 +3308,16 @@ static int wcd934x_rx_hph_mode_put(struct snd_kcontrol *kc, mode_val = ucontrol->value.enumerated.item[0]; + if (mode_val == wcd->hph_mode) + return 0; + if (mode_val == 0) { dev_err(wcd->dev, "Invalid HPH Mode, default to ClSH HiFi\n"); mode_val = CLS_H_LOHIFI; } wcd->hph_mode = mode_val; - return 0; + return 1; } static int slim_rx_mux_get(struct snd_kcontrol *kc, @@ -5883,6 +5886,7 @@ static int wcd934x_codec_parse_data(struct wcd934x_codec *wcd) } wcd->sidev = of_slim_get_device(wcd->sdev->ctrl, ifc_dev_np); + of_node_put(ifc_dev_np); if (!wcd->sidev) { dev_err(dev, "Unable to get SLIM Interface device\n"); return -EINVAL; diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 36cbc66914f90..9ae65cbabb1aa 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -2504,7 +2504,7 @@ static int wcd938x_tx_mode_get(struct snd_kcontrol *kcontrol, struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; int path = e->shift_l; - ucontrol->value.integer.value[0] = wcd938x->tx_mode[path]; + ucontrol->value.enumerated.item[0] = wcd938x->tx_mode[path]; return 0; } @@ -2528,7 +2528,7 @@ static int wcd938x_rx_hph_mode_get(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component); - ucontrol->value.integer.value[0] = wcd938x->hph_mode; + ucontrol->value.enumerated.item[0] = wcd938x->hph_mode; return 0; } @@ -3575,14 +3575,14 @@ static int wcd938x_hph_impedance_get(struct snd_kcontrol *kcontrol, } static const struct snd_kcontrol_new hph_type_detect_controls[] = { - SOC_SINGLE_EXT("HPH Type", 0, 0, UINT_MAX, 0, + SOC_SINGLE_EXT("HPH Type", 0, 0, WCD_MBHC_HPH_STEREO, 0, wcd938x_get_hph_type, NULL), }; static const struct snd_kcontrol_new impedance_detect_controls[] = { - SOC_SINGLE_EXT("HPHL Impedance", 0, 0, UINT_MAX, 0, + SOC_SINGLE_EXT("HPHL Impedance", 0, 0, INT_MAX, 0, wcd938x_hph_impedance_get, NULL), - SOC_SINGLE_EXT("HPHR Impedance", 0, 1, UINT_MAX, 0, + SOC_SINGLE_EXT("HPHR Impedance", 0, 1, INT_MAX, 0, wcd938x_hph_impedance_get, NULL), }; diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c index 15d42ce3b21d6..41504ce2a682f 100644 --- a/sound/soc/codecs/wm8350.c +++ b/sound/soc/codecs/wm8350.c @@ -1537,18 +1537,38 @@ static int wm8350_component_probe(struct snd_soc_component *component) wm8350_clear_bits(wm8350, WM8350_JACK_DETECT, WM8350_JDL_ENA | WM8350_JDR_ENA); - wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L, wm8350_hpl_jack_handler, 0, "Left jack detect", priv); - wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R, + if (ret != 0) + goto err; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R, wm8350_hpr_jack_handler, 0, "Right jack detect", priv); - wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICSCD, + if (ret != 0) + goto free_jck_det_l; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICSCD, wm8350_mic_handler, 0, "Microphone short", priv); - wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICD, + if (ret != 0) + goto free_jck_det_r; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICD, wm8350_mic_handler, 0, "Microphone detect", priv); + if (ret != 0) + goto free_micscd; return 0; + +free_micscd: + wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_MICSCD, priv); +free_jck_det_r: + wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R, priv); +free_jck_det_l: + wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L, priv); +err: + return ret; } static void wm8350_component_remove(struct snd_soc_component *component) diff --git a/sound/soc/dwc/dwc-i2s.c b/sound/soc/dwc/dwc-i2s.c index 5cb58929090d4..1edac3e10f345 100644 --- a/sound/soc/dwc/dwc-i2s.c +++ b/sound/soc/dwc/dwc-i2s.c @@ -403,9 +403,13 @@ static int dw_i2s_runtime_suspend(struct device *dev) static int dw_i2s_runtime_resume(struct device *dev) { struct dw_i2s_dev *dw_dev = dev_get_drvdata(dev); + int ret; - if (dw_dev->capability & DW_I2S_MASTER) - clk_enable(dw_dev->clk); + if (dw_dev->capability & DW_I2S_MASTER) { + ret = clk_enable(dw_dev->clk); + if (ret) + return ret; + } return 0; } @@ -422,10 +426,13 @@ static int dw_i2s_resume(struct snd_soc_component *component) { struct dw_i2s_dev *dev = snd_soc_component_get_drvdata(component); struct snd_soc_dai *dai; - int stream; + int stream, ret; - if (dev->capability & DW_I2S_MASTER) - clk_enable(dev->clk); + if (dev->capability & DW_I2S_MASTER) { + ret = clk_enable(dev->clk); + if (ret) + return ret; + } for_each_component_dais(component, dai) { for_each_pcm_streams(stream) diff --git a/sound/soc/fsl/fsl_spdif.c b/sound/soc/fsl/fsl_spdif.c index d178b479c8bd4..06d4a014f296d 100644 --- a/sound/soc/fsl/fsl_spdif.c +++ b/sound/soc/fsl/fsl_spdif.c @@ -610,6 +610,8 @@ static void fsl_spdif_shutdown(struct snd_pcm_substream *substream, mask = SCR_TXFIFO_AUTOSYNC_MASK | SCR_TXFIFO_CTRL_MASK | SCR_TXSEL_MASK | SCR_USRC_SEL_MASK | SCR_TXFIFO_FSEL_MASK; + /* Disable TX clock */ + regmap_update_bits(regmap, REG_SPDIF_STC, STC_TXCLK_ALL_EN_MASK, 0); } else { scr = SCR_RXFIFO_OFF | SCR_RXFIFO_CTL_ZERO; mask = SCR_RXFIFO_FSEL_MASK | SCR_RXFIFO_AUTOSYNC_MASK| diff --git a/sound/soc/fsl/imx-es8328.c b/sound/soc/fsl/imx-es8328.c index 09c674ee79f1a..168973035e35f 100644 --- a/sound/soc/fsl/imx-es8328.c +++ b/sound/soc/fsl/imx-es8328.c @@ -87,6 +87,7 @@ static int imx_es8328_probe(struct platform_device *pdev) if (int_port > MUX_PORT_MAX || int_port == 0) { dev_err(dev, "mux-int-port: hardware only has %d mux ports\n", MUX_PORT_MAX); + ret = -EINVAL; goto fail; } diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index a81323d1691d0..9736102e68088 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -275,6 +275,7 @@ int asoc_simple_hw_params(struct snd_pcm_substream *substream, mclk_fs = props->mclk_fs; if (mclk_fs) { + struct snd_soc_component *component; mclk = params_rate(params) * mclk_fs; for_each_prop_dai_codec(props, i, pdai) { @@ -282,16 +283,30 @@ int asoc_simple_hw_params(struct snd_pcm_substream *substream, if (ret < 0) return ret; } + for_each_prop_dai_cpu(props, i, pdai) { ret = asoc_simple_set_clk_rate(pdai, mclk); if (ret < 0) return ret; } + + /* Ensure sysclk is set on all components in case any + * (such as platform components) are missed by calls to + * snd_soc_dai_set_sysclk. + */ + for_each_rtd_components(rtd, i, component) { + ret = snd_soc_component_set_sysclk(component, 0, 0, + mclk, SND_SOC_CLOCK_IN); + if (ret && ret != -ENOTSUPP) + return ret; + } + for_each_rtd_codec_dais(rtd, i, sdai) { ret = snd_soc_dai_set_sysclk(sdai, 0, mclk, SND_SOC_CLOCK_IN); if (ret && ret != -ENOTSUPP) return ret; } + for_each_rtd_cpu_dais(rtd, i, sdai) { ret = snd_soc_dai_set_sysclk(sdai, 0, mclk, SND_SOC_CLOCK_OUT); if (ret && ret != -ENOTSUPP) diff --git a/sound/soc/intel/boards/sof_es8336.c b/sound/soc/intel/boards/sof_es8336.c index 20d577eaab6d7..28d7670b8f8f8 100644 --- a/sound/soc/intel/boards/sof_es8336.c +++ b/sound/soc/intel/boards/sof_es8336.c @@ -63,7 +63,12 @@ static const struct acpi_gpio_mapping *gpio_mapping = acpi_es8336_gpios; static void log_quirks(struct device *dev) { - dev_info(dev, "quirk SSP%ld", SOF_ES8336_SSP_CODEC(quirk)); + dev_info(dev, "quirk mask %#lx\n", quirk); + dev_info(dev, "quirk SSP%ld\n", SOF_ES8336_SSP_CODEC(quirk)); + if (quirk & SOF_ES8336_ENABLE_DMIC) + dev_info(dev, "quirk DMIC enabled\n"); + if (quirk & SOF_ES8336_TGL_GPIO_QUIRK) + dev_info(dev, "quirk TGL GPIO enabled\n"); } static int sof_es8316_speaker_power_event(struct snd_soc_dapm_widget *w, diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index da515eb1ddbe7..1f00679b42409 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -185,7 +185,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { .callback = sof_sdw_quirk_cb, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Convertible"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Conv"), }, .driver_data = (void *)(SOF_SDW_TGL_HDMI | SOF_SDW_PCH_DMIC | diff --git a/sound/soc/intel/common/soc-acpi-intel-bxt-match.c b/sound/soc/intel/common/soc-acpi-intel-bxt-match.c index 342d340522045..04a92e74d99bc 100644 --- a/sound/soc/intel/common/soc-acpi-intel-bxt-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-bxt-match.c @@ -41,6 +41,11 @@ static struct snd_soc_acpi_mach *apl_quirk(void *arg) return mach; } +static const struct snd_soc_acpi_codecs essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + static const struct snd_soc_acpi_codecs bxt_codecs = { .num_codecs = 1, .codecs = {"MX98357A"} @@ -83,7 +88,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_bxt_machines[] = { .sof_tplg_filename = "sof-apl-tdf8532.tplg", }, { - .id = "ESSX8336", + .comp_ids = &essx_83x6, .drv_name = "sof-essx8336", .sof_fw_filename = "sof-apl.ri", .sof_tplg_filename = "sof-apl-es8336.tplg", diff --git a/sound/soc/intel/common/soc-acpi-intel-cml-match.c b/sound/soc/intel/common/soc-acpi-intel-cml-match.c index 4eebc79d4b486..14395833d89e8 100644 --- a/sound/soc/intel/common/soc-acpi-intel-cml-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-cml-match.c @@ -9,6 +9,11 @@ #include #include +static const struct snd_soc_acpi_codecs essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + static const struct snd_soc_acpi_codecs rt1011_spk_codecs = { .num_codecs = 1, .codecs = {"10EC1011"} @@ -82,7 +87,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_cml_machines[] = { .sof_tplg_filename = "sof-cml-da7219-max98390.tplg", }, { - .id = "ESSX8336", + .comp_ids = &essx_83x6, .drv_name = "sof-essx8336", .sof_fw_filename = "sof-cml.ri", .sof_tplg_filename = "sof-cml-es8336.tplg", diff --git a/sound/soc/intel/common/soc-acpi-intel-glk-match.c b/sound/soc/intel/common/soc-acpi-intel-glk-match.c index 8492b7e2a9450..7aa6a870d5a5c 100644 --- a/sound/soc/intel/common/soc-acpi-intel-glk-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-glk-match.c @@ -9,6 +9,11 @@ #include #include +static const struct snd_soc_acpi_codecs essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + static const struct snd_soc_acpi_codecs glk_codecs = { .num_codecs = 1, .codecs = {"MX98357A"} @@ -58,7 +63,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_glk_machines[] = { .sof_tplg_filename = "sof-glk-cs42l42.tplg", }, { - .id = "ESSX8336", + .comp_ids = &essx_83x6, .drv_name = "sof-essx8336", .sof_fw_filename = "sof-glk.ri", .sof_tplg_filename = "sof-glk-es8336.tplg", diff --git a/sound/soc/intel/common/soc-acpi-intel-jsl-match.c b/sound/soc/intel/common/soc-acpi-intel-jsl-match.c index 278ec196da7bf..9d0d0e1437a4b 100644 --- a/sound/soc/intel/common/soc-acpi-intel-jsl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-jsl-match.c @@ -9,6 +9,11 @@ #include #include +static const struct snd_soc_acpi_codecs essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + static const struct snd_soc_acpi_codecs jsl_7219_98373_codecs = { .num_codecs = 1, .codecs = {"MX98373"} @@ -87,7 +92,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_jsl_machines[] = { .sof_tplg_filename = "sof-jsl-cs42l42-mx98360a.tplg", }, { - .id = "ESSX8336", + .comp_ids = &essx_83x6, .drv_name = "sof-essx8336", .sof_fw_filename = "sof-jsl.ri", .sof_tplg_filename = "sof-jsl-es8336.tplg", diff --git a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c index da31bb3cca17c..e2658bca69318 100644 --- a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c @@ -10,6 +10,11 @@ #include #include "soc-acpi-intel-sdw-mockup-match.h" +static const struct snd_soc_acpi_codecs essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + static const struct snd_soc_acpi_codecs tgl_codecs = { .num_codecs = 1, .codecs = {"MX98357A"} @@ -389,7 +394,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_tgl_machines[] = { .sof_tplg_filename = "sof-tgl-rt1011-rt5682.tplg", }, { - .id = "ESSX8336", + .comp_ids = &essx_83x6, .drv_name = "sof-essx8336", .sof_fw_filename = "sof-tgl.ri", .sof_tplg_filename = "sof-tgl-es8336.tplg", diff --git a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c index 718505c754188..f090dee0c7a4f 100644 --- a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c @@ -695,8 +695,11 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev) } card = (struct snd_soc_card *)of_device_get_match_data(&pdev->dev); - if (!card) - return -EINVAL; + if (!card) { + ret = -EINVAL; + goto put_platform_node; + } + card->dev = &pdev->dev; hdmi_codec = of_parse_phandle(pdev->dev.of_node, @@ -761,12 +764,15 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev) if (!mt8183_da7219_max98357_headset_dev.dlc.of_node) { dev_err(&pdev->dev, "Property 'mediatek,headset-codec' missing/invalid\n"); - return -EINVAL; + ret = -EINVAL; + goto put_hdmi_codec; } priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; + if (!priv) { + ret = -ENOMEM; + goto put_hdmi_codec; + } snd_soc_card_set_drvdata(card, priv); @@ -775,13 +781,16 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev) ret = PTR_ERR(pinctrl); dev_err(&pdev->dev, "%s failed to select default state %d\n", __func__, ret); - return ret; + goto put_hdmi_codec; } ret = devm_snd_soc_register_card(&pdev->dev, card); - of_node_put(platform_node); + +put_hdmi_codec: of_node_put(hdmi_codec); +put_platform_node: + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c b/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c index f7daad1bfe1ed..ee91569c09117 100644 --- a/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c +++ b/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c @@ -1116,8 +1116,10 @@ static int mt8192_mt6359_dev_probe(struct platform_device *pdev) } card = (struct snd_soc_card *)of_device_get_match_data(&pdev->dev); - if (!card) - return -EINVAL; + if (!card) { + ret = -EINVAL; + goto put_platform_node; + } card->dev = &pdev->dev; hdmi_codec = of_parse_phandle(pdev->dev.of_node, @@ -1159,20 +1161,24 @@ static int mt8192_mt6359_dev_probe(struct platform_device *pdev) } priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; + if (!priv) { + ret = -ENOMEM; + goto put_hdmi_codec; + } snd_soc_card_set_drvdata(card, priv); ret = mt8192_afe_gpio_init(&pdev->dev); if (ret) { dev_err(&pdev->dev, "init gpio error %d\n", ret); - return ret; + goto put_hdmi_codec; } ret = devm_snd_soc_register_card(&pdev->dev, card); - of_node_put(platform_node); +put_hdmi_codec: of_node_put(hdmi_codec); +put_platform_node: + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c b/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c index 29c2d3407cc7c..e3146311722f8 100644 --- a/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c +++ b/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c @@ -1342,7 +1342,8 @@ static int mt8195_mt6359_rt1019_rt5682_dev_probe(struct platform_device *pdev) "mediatek,dai-link"); if (ret) { dev_dbg(&pdev->dev, "Parse dai-link fail\n"); - return -EINVAL; + ret = -EINVAL; + goto put_node; } } else { if (!sof_on) @@ -1398,6 +1399,7 @@ static int mt8195_mt6359_rt1019_rt5682_dev_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(&pdev->dev, card); +put_node: of_node_put(platform_node); of_node_put(adsp_node); of_node_put(dp_node); diff --git a/sound/soc/mxs/mxs-saif.c b/sound/soc/mxs/mxs-saif.c index 6a2d24d489647..879c1221a809b 100644 --- a/sound/soc/mxs/mxs-saif.c +++ b/sound/soc/mxs/mxs-saif.c @@ -455,7 +455,10 @@ static int mxs_saif_hw_params(struct snd_pcm_substream *substream, * basic clock which should be fast enough for the internal * logic. */ - clk_enable(saif->clk); + ret = clk_enable(saif->clk); + if (ret) + return ret; + ret = clk_set_rate(saif->clk, 24000000); clk_disable(saif->clk); if (ret) diff --git a/sound/soc/mxs/mxs-sgtl5000.c b/sound/soc/mxs/mxs-sgtl5000.c index 2412dc7e65d44..746f409386751 100644 --- a/sound/soc/mxs/mxs-sgtl5000.c +++ b/sound/soc/mxs/mxs-sgtl5000.c @@ -118,6 +118,9 @@ static int mxs_sgtl5000_probe(struct platform_device *pdev) codec_np = of_parse_phandle(np, "audio-codec", 0); if (!saif_np[0] || !saif_np[1] || !codec_np) { dev_err(&pdev->dev, "phandle missing or invalid\n"); + of_node_put(codec_np); + of_node_put(saif_np[0]); + of_node_put(saif_np[1]); return -EINVAL; } diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index a6d7656c206e5..4ce5d25793875 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -716,19 +716,23 @@ static int rockchip_i2s_probe(struct platform_device *pdev) i2s->mclk = devm_clk_get(&pdev->dev, "i2s_clk"); if (IS_ERR(i2s->mclk)) { dev_err(&pdev->dev, "Can't retrieve i2s master clock\n"); - return PTR_ERR(i2s->mclk); + ret = PTR_ERR(i2s->mclk); + goto err_clk; } regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res); - if (IS_ERR(regs)) - return PTR_ERR(regs); + if (IS_ERR(regs)) { + ret = PTR_ERR(regs); + goto err_clk; + } i2s->regmap = devm_regmap_init_mmio(&pdev->dev, regs, &rockchip_i2s_regmap_config); if (IS_ERR(i2s->regmap)) { dev_err(&pdev->dev, "Failed to initialise managed register map\n"); - return PTR_ERR(i2s->regmap); + ret = PTR_ERR(i2s->regmap); + goto err_clk; } i2s->bclk_ratio = 64; @@ -768,7 +772,8 @@ static int rockchip_i2s_probe(struct platform_device *pdev) i2s_runtime_suspend(&pdev->dev); err_pm_disable: pm_runtime_disable(&pdev->dev); - +err_clk: + clk_disable_unprepare(i2s->hclk); return ret; } diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index 5f9cb5c4c7f09..98700e75b82a1 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -469,14 +469,14 @@ static int rockchip_i2s_tdm_set_fmt(struct snd_soc_dai *cpu_dai, txcr_val = I2S_TXCR_IBM_NORMAL; rxcr_val = I2S_RXCR_IBM_NORMAL; break; - case SND_SOC_DAIFMT_DSP_A: /* PCM no delay mode */ - txcr_val = I2S_TXCR_TFS_PCM; - rxcr_val = I2S_RXCR_TFS_PCM; - break; - case SND_SOC_DAIFMT_DSP_B: /* PCM delay 1 mode */ + case SND_SOC_DAIFMT_DSP_A: /* PCM delay 1 mode */ txcr_val = I2S_TXCR_TFS_PCM | I2S_TXCR_PBM_MODE(1); rxcr_val = I2S_RXCR_TFS_PCM | I2S_RXCR_PBM_MODE(1); break; + case SND_SOC_DAIFMT_DSP_B: /* PCM no delay mode */ + txcr_val = I2S_TXCR_TFS_PCM; + rxcr_val = I2S_RXCR_TFS_PCM; + break; default: ret = -EINVAL; goto err_pm_put; @@ -1738,7 +1738,7 @@ static int __maybe_unused rockchip_i2s_tdm_resume(struct device *dev) struct rk_i2s_tdm_dev *i2s_tdm = dev_get_drvdata(dev); int ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; ret = regcache_sync(i2s_tdm->regmap); diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c index cdf3b7f69ba70..e9a1eb6bdf66a 100644 --- a/sound/soc/sh/fsi.c +++ b/sound/soc/sh/fsi.c @@ -816,14 +816,27 @@ static int fsi_clk_enable(struct device *dev, return ret; } - clk_enable(clock->xck); - clk_enable(clock->ick); - clk_enable(clock->div); + ret = clk_enable(clock->xck); + if (ret) + goto err; + ret = clk_enable(clock->ick); + if (ret) + goto disable_xck; + ret = clk_enable(clock->div); + if (ret) + goto disable_ick; clock->count++; } return ret; + +disable_ick: + clk_disable(clock->ick); +disable_xck: + clk_disable(clock->xck); +err: + return ret; } static int fsi_clk_disable(struct device *dev, diff --git a/sound/soc/sh/rz-ssi.c b/sound/soc/sh/rz-ssi.c index e8d98b362f9db..7379b1489e358 100644 --- a/sound/soc/sh/rz-ssi.c +++ b/sound/soc/sh/rz-ssi.c @@ -411,54 +411,56 @@ static int rz_ssi_pio_recv(struct rz_ssi_priv *ssi, struct rz_ssi_stream *strm) { struct snd_pcm_substream *substream = strm->substream; struct snd_pcm_runtime *runtime; + bool done = false; u16 *buf; int fifo_samples; int frames_left; - int samples = 0; + int samples; int i; if (!rz_ssi_stream_is_valid(ssi, strm)) return -EINVAL; runtime = substream->runtime; - /* frames left in this period */ - frames_left = runtime->period_size - (strm->buffer_pos % - runtime->period_size); - if (frames_left == 0) - frames_left = runtime->period_size; - /* Samples in RX FIFO */ - fifo_samples = (rz_ssi_reg_readl(ssi, SSIFSR) >> - SSIFSR_RDC_SHIFT) & SSIFSR_RDC_MASK; + while (!done) { + /* frames left in this period */ + frames_left = runtime->period_size - + (strm->buffer_pos % runtime->period_size); + if (!frames_left) + frames_left = runtime->period_size; + + /* Samples in RX FIFO */ + fifo_samples = (rz_ssi_reg_readl(ssi, SSIFSR) >> + SSIFSR_RDC_SHIFT) & SSIFSR_RDC_MASK; + + /* Only read full frames at a time */ + samples = 0; + while (frames_left && (fifo_samples >= runtime->channels)) { + samples += runtime->channels; + fifo_samples -= runtime->channels; + frames_left--; + } - /* Only read full frames at a time */ - while (frames_left && (fifo_samples >= runtime->channels)) { - samples += runtime->channels; - fifo_samples -= runtime->channels; - frames_left--; - } + /* not enough samples yet */ + if (!samples) + break; - /* not enough samples yet */ - if (samples == 0) - return 0; + /* calculate new buffer index */ + buf = (u16 *)(runtime->dma_area); + buf += strm->buffer_pos * runtime->channels; - /* calculate new buffer index */ - buf = (u16 *)(runtime->dma_area); - buf += strm->buffer_pos * runtime->channels; - - /* Note, only supports 16-bit samples */ - for (i = 0; i < samples; i++) - *buf++ = (u16)(rz_ssi_reg_readl(ssi, SSIFRDR) >> 16); + /* Note, only supports 16-bit samples */ + for (i = 0; i < samples; i++) + *buf++ = (u16)(rz_ssi_reg_readl(ssi, SSIFRDR) >> 16); - rz_ssi_reg_mask_setl(ssi, SSIFSR, SSIFSR_RDF, 0); - rz_ssi_pointer_update(strm, samples / runtime->channels); + rz_ssi_reg_mask_setl(ssi, SSIFSR, SSIFSR_RDF, 0); + rz_ssi_pointer_update(strm, samples / runtime->channels); - /* - * If we finished this period, but there are more samples in - * the RX FIFO, call this function again - */ - if (frames_left == 0 && fifo_samples >= runtime->channels) - rz_ssi_pio_recv(ssi, strm); + /* check if there are no more samples in the RX FIFO */ + if (!(!frames_left && fifo_samples >= runtime->channels)) + done = true; + } return 0; } @@ -975,6 +977,9 @@ static int rz_ssi_probe(struct platform_device *pdev) ssi->playback.priv = ssi; ssi->capture.priv = ssi; + spin_lock_init(&ssi->lock); + dev_set_drvdata(&pdev->dev, ssi); + /* Error Interrupt */ ssi->irq_int = platform_get_irq_byname(pdev, "int_req"); if (ssi->irq_int < 0) @@ -1027,8 +1032,6 @@ static int rz_ssi_probe(struct platform_device *pdev) return dev_err_probe(ssi->dev, ret, "pm_runtime_resume_and_get failed\n"); } - spin_lock_init(&ssi->lock); - dev_set_drvdata(&pdev->dev, ssi); ret = devm_snd_soc_register_component(&pdev->dev, &rz_ssi_soc_component, rz_ssi_soc_dai, ARRAY_SIZE(rz_ssi_soc_dai)); diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index 8e2494a9f3a7f..e9dd25894dc0f 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -567,6 +567,11 @@ int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) return -EINVAL; } + if (!codec_dai) { + dev_err(rtd->card->dev, "Missing codec\n"); + return -EINVAL; + } + /* check client and interface hw capabilities */ if (snd_soc_dai_stream_valid(codec_dai, SNDRV_PCM_STREAM_PLAYBACK) && snd_soc_dai_stream_valid(cpu_dai, SNDRV_PCM_STREAM_PLAYBACK)) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 434e61b46983c..a088bc9f7dd7c 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3233,7 +3233,7 @@ int snd_soc_get_dai_name(const struct of_phandle_args *args, for_each_component(pos) { struct device_node *component_of_node = soc_component_to_node(pos); - if (component_of_node != args->np) + if (component_of_node != args->np || !pos->num_dai) continue; ret = snd_soc_component_of_xlate_dai_name(pos, args, dai_name); diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index c54c8ca8d7156..359987bf76d1b 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -86,10 +86,10 @@ static int dmaengine_pcm_hw_params(struct snd_soc_component *component, memset(&slave_config, 0, sizeof(slave_config)); - if (!pcm->config) - prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config; - else + if (pcm->config && pcm->config->prepare_slave_config) prepare_slave_config = pcm->config->prepare_slave_config; + else + prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config; if (prepare_slave_config) { int ret = prepare_slave_config(substream, params, &slave_config); diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 2630df024dff3..cb24805668bd8 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -512,7 +512,8 @@ static int soc_tplg_kcontrol_bind_io(struct snd_soc_tplg_ctl_hdr *hdr, if (le32_to_cpu(hdr->ops.info) == SND_SOC_TPLG_CTL_BYTES && k->iface & SNDRV_CTL_ELEM_IFACE_MIXER - && k->access & SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE + && (k->access & SNDRV_CTL_ELEM_ACCESS_TLV_READ + || k->access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) && k->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) { struct soc_bytes_ext *sbe; struct snd_soc_tplg_bytes_control *be; diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c index 6d6757075f7c3..e755c0c5f86c0 100644 --- a/sound/soc/sof/debug.c +++ b/sound/soc/sof/debug.c @@ -960,7 +960,7 @@ static void snd_sof_dbg_print_fw_state(struct snd_sof_dev *sdev, const char *lev void snd_sof_dsp_dbg_dump(struct snd_sof_dev *sdev, const char *msg, u32 flags) { - char *level = flags & SOF_DBG_DUMP_OPTIONAL ? KERN_DEBUG : KERN_ERR; + char *level = (flags & SOF_DBG_DUMP_OPTIONAL) ? KERN_DEBUG : KERN_ERR; bool print_all = sof_debug_check_flag(SOF_DBG_PRINT_ALL_DUMPS); if (flags & SOF_DBG_DUMP_OPTIONAL && !print_all) diff --git a/sound/soc/sof/imx/imx8m.c b/sound/soc/sof/imx/imx8m.c index 788e77bcb6038..60251486b24b2 100644 --- a/sound/soc/sof/imx/imx8m.c +++ b/sound/soc/sof/imx/imx8m.c @@ -224,6 +224,7 @@ static int imx8m_probe(struct snd_sof_dev *sdev) } ret = of_address_to_resource(res_node, 0, &res); + of_node_put(res_node); if (ret) { dev_err(&pdev->dev, "failed to get reserved region address\n"); goto exit_pdev_unregister; diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index 88b6176af021c..d83e1a36707af 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -84,6 +84,7 @@ if SND_SOC_SOF_PCI config SND_SOC_SOF_MERRIFIELD tristate "SOF support for Tangier/Merrifield" default SND_SOC_SOF_PCI + select SND_SOC_SOF_PCI_DEV select SND_SOC_SOF_INTEL_ATOM_HIFI_EP help This adds support for Sound Open Firmware for Intel(R) platforms diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c index cd12589355eff..28a54145c1506 100644 --- a/sound/soc/sof/intel/hda-dai.c +++ b/sound/soc/sof/intel/hda-dai.c @@ -59,6 +59,8 @@ static struct hdac_ext_stream * { struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); struct sof_intel_hda_stream *hda_stream; + const struct sof_intel_dsp_desc *chip; + struct snd_sof_dev *sdev; struct hdac_ext_stream *res = NULL; struct hdac_stream *stream = NULL; @@ -77,9 +79,20 @@ static struct hdac_ext_stream * continue; hda_stream = hstream_to_sof_hda_stream(hstream); + sdev = hda_stream->sdev; + chip = get_chip_info(sdev->pdata); /* check if link is available */ if (!hstream->link_locked) { + /* + * choose the first available link for platforms that do not have the + * PROCEN_FMT_QUIRK set. + */ + if (!(chip->quirks & SOF_INTEL_PROCEN_FMT_QUIRK)) { + res = hstream; + break; + } + if (stream->opened) { /* * check if the stream tag matches the stream diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index 33306d2023a78..9bbfdab8009de 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -47,7 +47,7 @@ static struct hdac_ext_stream *cl_stream_prepare(struct snd_sof_dev *sdev, unsig ret = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV_SG, &pci->dev, size, dmab); if (ret < 0) { dev_err(sdev->dev, "error: memory alloc failed: %d\n", ret); - goto error; + goto out_put; } hstream->period_bytes = 0;/* initialize period_bytes */ @@ -58,22 +58,23 @@ static struct hdac_ext_stream *cl_stream_prepare(struct snd_sof_dev *sdev, unsig ret = hda_dsp_iccmax_stream_hw_params(sdev, dsp_stream, dmab, NULL); if (ret < 0) { dev_err(sdev->dev, "error: iccmax stream prepare failed: %d\n", ret); - goto error; + goto out_free; } } else { ret = hda_dsp_stream_hw_params(sdev, dsp_stream, dmab, NULL); if (ret < 0) { dev_err(sdev->dev, "error: hdac prepare failed: %d\n", ret); - goto error; + goto out_free; } hda_dsp_stream_spib_config(sdev, dsp_stream, HDA_DSP_SPIB_ENABLE, size); } return dsp_stream; -error: - hda_dsp_stream_put(sdev, direction, hstream->stream_tag); +out_free: snd_dma_free_pages(dmab); +out_put: + hda_dsp_stream_put(sdev, direction, hstream->stream_tag); return ERR_PTR(ret); } diff --git a/sound/soc/sof/intel/hda-pcm.c b/sound/soc/sof/intel/hda-pcm.c index d78aa5d8552d5..8aeb00eacd219 100644 --- a/sound/soc/sof/intel/hda-pcm.c +++ b/sound/soc/sof/intel/hda-pcm.c @@ -315,6 +315,7 @@ int hda_dsp_pcm_open(struct snd_sof_dev *sdev, runtime->hw.info &= ~SNDRV_PCM_INFO_PAUSE; if (hda_always_enable_dmi_l1 || + direction == SNDRV_PCM_STREAM_PLAYBACK || spcm->stream[substream->stream].d0i3_compatible) flags |= SOF_HDA_STREAM_DMI_L1_COMPATIBLE; diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 1385695d77458..028751549f6da 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -432,11 +432,9 @@ static char *hda_model; module_param(hda_model, charp, 0444); MODULE_PARM_DESC(hda_model, "Use the given HDA board model."); -#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) || IS_ENABLED(CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE) -static int hda_dmic_num = -1; -module_param_named(dmic_num, hda_dmic_num, int, 0444); +static int dmic_num_override = -1; +module_param_named(dmic_num, dmic_num_override, int, 0444); MODULE_PARM_DESC(dmic_num, "SOF HDA DMIC number"); -#endif #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) static bool hda_codec_use_common_hdmi = IS_ENABLED(CONFIG_SND_HDA_CODEC_HDMI); @@ -644,24 +642,35 @@ static int hda_init(struct snd_sof_dev *sdev) return ret; } -#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) || IS_ENABLED(CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE) - -static int check_nhlt_dmic(struct snd_sof_dev *sdev) +static int check_dmic_num(struct snd_sof_dev *sdev) { struct nhlt_acpi_table *nhlt; - int dmic_num; + int dmic_num = 0; nhlt = intel_nhlt_init(sdev->dev); if (nhlt) { dmic_num = intel_nhlt_get_dmic_geo(sdev->dev, nhlt); intel_nhlt_free(nhlt); - if (dmic_num >= 1 && dmic_num <= 4) - return dmic_num; } - return 0; + /* allow for module parameter override */ + if (dmic_num_override != -1) { + dev_dbg(sdev->dev, + "overriding DMICs detected in NHLT tables %d by kernel param %d\n", + dmic_num, dmic_num_override); + dmic_num = dmic_num_override; + } + + if (dmic_num < 0 || dmic_num > 4) { + dev_dbg(sdev->dev, "invalid dmic_number %d\n", dmic_num); + dmic_num = 0; + } + + return dmic_num; } +#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) || IS_ENABLED(CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE) + static const char *fixup_tplg_name(struct snd_sof_dev *sdev, const char *sof_tplg_filename, const char *idisp_str, @@ -697,16 +706,8 @@ static int dmic_topology_fixup(struct snd_sof_dev *sdev, const char *dmic_str; int dmic_num; - /* first check NHLT for DMICs */ - dmic_num = check_nhlt_dmic(sdev); - - /* allow for module parameter override */ - if (hda_dmic_num != -1) { - dev_dbg(sdev->dev, - "overriding DMICs detected in NHLT tables %d by kernel param %d\n", - dmic_num, hda_dmic_num); - dmic_num = hda_dmic_num; - } + /* first check for DMICs (using NHLT or module parameter) */ + dmic_num = check_dmic_num(sdev); switch (dmic_num) { case 1: @@ -1188,7 +1189,7 @@ static bool link_slaves_found(struct snd_sof_dev *sdev, struct hdac_bus *bus = sof_to_bus(sdev); struct sdw_intel_slave_id *ids = sdw->ids; int num_slaves = sdw->num_slaves; - unsigned int part_id, link_id, unique_id, mfg_id; + unsigned int part_id, link_id, unique_id, mfg_id, version; int i, j, k; for (i = 0; i < link->num_adr; i++) { @@ -1198,12 +1199,14 @@ static bool link_slaves_found(struct snd_sof_dev *sdev, mfg_id = SDW_MFG_ID(adr); part_id = SDW_PART_ID(adr); link_id = SDW_DISCO_LINK_ID(adr); + version = SDW_VERSION(adr); for (j = 0; j < num_slaves; j++) { /* find out how many identical parts were reported on that link */ if (ids[j].link_id == link_id && ids[j].id.part_id == part_id && - ids[j].id.mfg_id == mfg_id) + ids[j].id.mfg_id == mfg_id && + ids[j].id.sdw_version == version) reported_part_count++; } @@ -1212,21 +1215,24 @@ static bool link_slaves_found(struct snd_sof_dev *sdev, if (ids[j].link_id != link_id || ids[j].id.part_id != part_id || - ids[j].id.mfg_id != mfg_id) + ids[j].id.mfg_id != mfg_id || + ids[j].id.sdw_version != version) continue; /* find out how many identical parts are expected */ for (k = 0; k < link->num_adr; k++) { u64 adr2 = link->adr_d[k].adr; - unsigned int part_id2, link_id2, mfg_id2; + unsigned int part_id2, link_id2, mfg_id2, version2; mfg_id2 = SDW_MFG_ID(adr2); part_id2 = SDW_PART_ID(adr2); link_id2 = SDW_DISCO_LINK_ID(adr2); + version2 = SDW_VERSION(adr2); if (link_id2 == link_id && part_id2 == part_id && - mfg_id2 == mfg_id) + mfg_id2 == mfg_id && + version2 == version) expected_part_count++; } @@ -1387,6 +1393,9 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) if (!sof_pdata->tplg_filename) sof_pdata->tplg_filename = mach->sof_tplg_filename; + /* report to machine driver if any DMICs are found */ + mach->mach_params.dmic_num = check_dmic_num(sdev); + if (mach->link_mask) { mach->mach_params.links = mach->links; mach->mach_params.link_mask = mach->link_mask; diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c index 2ed92c990b97c..dd9013c476649 100644 --- a/sound/soc/sti/uniperif_player.c +++ b/sound/soc/sti/uniperif_player.c @@ -91,7 +91,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id) SET_UNIPERIF_ITM_BCLR_FIFO_ERROR(player); /* Stop the player */ - snd_pcm_stop_xrun(player->substream); + snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN); } ret = IRQ_HANDLED; @@ -105,7 +105,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id) SET_UNIPERIF_ITM_BCLR_DMA_ERROR(player); /* Stop the player */ - snd_pcm_stop_xrun(player->substream); + snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN); ret = IRQ_HANDLED; } @@ -138,7 +138,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id) dev_err(player->dev, "Underflow recovery failed\n"); /* Stop the player */ - snd_pcm_stop_xrun(player->substream); + snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN); ret = IRQ_HANDLED; } diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c index 136059331211d..065c5f0d1f5f0 100644 --- a/sound/soc/sti/uniperif_reader.c +++ b/sound/soc/sti/uniperif_reader.c @@ -65,7 +65,7 @@ static irqreturn_t uni_reader_irq_handler(int irq, void *dev_id) if (unlikely(status & UNIPERIF_ITS_FIFO_ERROR_MASK(reader))) { dev_err(reader->dev, "FIFO error detected\n"); - snd_pcm_stop_xrun(reader->substream); + snd_pcm_stop(reader->substream, SNDRV_PCM_STATE_XRUN); ret = IRQ_HANDLED; } diff --git a/sound/soc/ti/davinci-i2s.c b/sound/soc/ti/davinci-i2s.c index 6dca51862dd76..0363a088d2e00 100644 --- a/sound/soc/ti/davinci-i2s.c +++ b/sound/soc/ti/davinci-i2s.c @@ -708,7 +708,9 @@ static int davinci_i2s_probe(struct platform_device *pdev) dev->clk = clk_get(&pdev->dev, NULL); if (IS_ERR(dev->clk)) return -ENODEV; - clk_enable(dev->clk); + ret = clk_enable(dev->clk); + if (ret) + goto err_put_clk; dev->dev = &pdev->dev; dev_set_drvdata(&pdev->dev, dev); @@ -730,6 +732,7 @@ static int davinci_i2s_probe(struct platform_device *pdev) snd_soc_unregister_component(&pdev->dev); err_release_clk: clk_disable(dev->clk); +err_put_clk: clk_put(dev->clk); return ret; } diff --git a/sound/soc/xilinx/xlnx_formatter_pcm.c b/sound/soc/xilinx/xlnx_formatter_pcm.c index ce19a6058b279..5c4158069a5a8 100644 --- a/sound/soc/xilinx/xlnx_formatter_pcm.c +++ b/sound/soc/xilinx/xlnx_formatter_pcm.c @@ -84,6 +84,7 @@ struct xlnx_pcm_drv_data { struct snd_pcm_substream *play_stream; struct snd_pcm_substream *capture_stream; struct clk *axi_clk; + unsigned int sysclk; }; /* @@ -314,6 +315,15 @@ static irqreturn_t xlnx_s2mm_irq_handler(int irq, void *arg) return IRQ_NONE; } +static int xlnx_formatter_set_sysclk(struct snd_soc_component *component, + int clk_id, int source, unsigned int freq, int dir) +{ + struct xlnx_pcm_drv_data *adata = dev_get_drvdata(component->dev); + + adata->sysclk = freq; + return 0; +} + static int xlnx_formatter_pcm_open(struct snd_soc_component *component, struct snd_pcm_substream *substream) { @@ -450,11 +460,25 @@ static int xlnx_formatter_pcm_hw_params(struct snd_soc_component *component, u64 size; struct snd_pcm_runtime *runtime = substream->runtime; struct xlnx_pcm_stream_param *stream_data = runtime->private_data; + struct xlnx_pcm_drv_data *adata = dev_get_drvdata(component->dev); active_ch = params_channels(params); if (active_ch > stream_data->ch_limit) return -EINVAL; + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && + adata->sysclk) { + unsigned int mclk_fs = adata->sysclk / params_rate(params); + + if (adata->sysclk % params_rate(params) != 0) { + dev_warn(component->dev, "sysclk %u not divisible by rate %u\n", + adata->sysclk, params_rate(params)); + return -EINVAL; + } + + writel(mclk_fs, stream_data->mmio + XLNX_AUD_FS_MULTIPLIER); + } + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE && stream_data->xfer_mode == AES_TO_PCM) { val = readl(stream_data->mmio + XLNX_AUD_STS); @@ -552,6 +576,7 @@ static int xlnx_formatter_pcm_new(struct snd_soc_component *component, static const struct snd_soc_component_driver xlnx_asoc_component = { .name = DRV_NAME, + .set_sysclk = xlnx_formatter_set_sysclk, .open = xlnx_formatter_pcm_open, .close = xlnx_formatter_pcm_close, .hw_params = xlnx_formatter_pcm_hw_params, diff --git a/sound/spi/at73c213.c b/sound/spi/at73c213.c index 76c0e37a838cf..8a2da6b1012eb 100644 --- a/sound/spi/at73c213.c +++ b/sound/spi/at73c213.c @@ -218,7 +218,9 @@ static int snd_at73c213_pcm_open(struct snd_pcm_substream *substream) runtime->hw = snd_at73c213_playback_hw; chip->substream = substream; - clk_enable(chip->ssc->clk); + err = clk_enable(chip->ssc->clk); + if (err) + return err; return 0; } @@ -776,7 +778,9 @@ static int snd_at73c213_chip_init(struct snd_at73c213 *chip) goto out; /* Enable DAC master clock. */ - clk_enable(chip->board->dac_clk); + retval = clk_enable(chip->board->dac_clk); + if (retval) + goto out; /* Initialize at73c213 on SPI bus. */ retval = snd_at73c213_write_reg(chip, DAC_RST, 0x04); @@ -889,7 +893,9 @@ static int snd_at73c213_dev_init(struct snd_card *card, chip->card = card; chip->irq = -1; - clk_enable(chip->ssc->clk); + retval = clk_enable(chip->ssc->clk); + if (retval) + return retval; retval = request_irq(irq, snd_at73c213_interrupt, 0, "at73c213", chip); if (retval) { @@ -1008,7 +1014,9 @@ static int snd_at73c213_remove(struct spi_device *spi) int retval; /* Stop playback. */ - clk_enable(chip->ssc->clk); + retval = clk_enable(chip->ssc->clk); + if (retval) + goto out; ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXDIS)); clk_disable(chip->ssc->clk); @@ -1088,9 +1096,16 @@ static int snd_at73c213_resume(struct device *dev) { struct snd_card *card = dev_get_drvdata(dev); struct snd_at73c213 *chip = card->private_data; + int retval; - clk_enable(chip->board->dac_clk); - clk_enable(chip->ssc->clk); + retval = clk_enable(chip->board->dac_clk); + if (retval) + return retval; + retval = clk_enable(chip->ssc->clk); + if (retval) { + clk_disable(chip->board->dac_clk); + return retval; + } ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXEN)); return 0; diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 96991ddf5055d..64f5544d0a0aa 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -542,6 +542,16 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x05a7, 0x40fa), .map = bose_soundlink_map, }, + { + /* Corsair Virtuoso SE Latest (wired mode) */ + .id = USB_ID(0x1b1c, 0x0a3f), + .map = corsair_virtuoso_map, + }, + { + /* Corsair Virtuoso SE Latest (wireless mode) */ + .id = USB_ID(0x1b1c, 0x0a40), + .map = corsair_virtuoso_map, + }, { /* Corsair Virtuoso SE (wired mode) */ .id = USB_ID(0x1b1c, 0x0a3d), diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index e447ddd6854cd..d35cf54cab338 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -3360,9 +3360,10 @@ void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer, if (unitid == 7 && cval->control == UAC_FU_VOLUME) snd_dragonfly_quirk_db_scale(mixer, cval, kctl); break; - /* lowest playback value is muted on C-Media devices */ - case USB_ID(0x0d8c, 0x000c): - case USB_ID(0x0d8c, 0x0014): + /* lowest playback value is muted on some devices */ + case USB_ID(0x0d8c, 0x000c): /* C-Media */ + case USB_ID(0x0d8c, 0x0014): /* C-Media */ + case USB_ID(0x19f7, 0x0003): /* RODE NT-USB */ if (strstr(kctl->id.name, "Playback")) cval->min_mute = 1; break; diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index cec6e91afea24..6d699065e81a2 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -669,9 +669,9 @@ static const struct snd_pcm_hardware snd_usb_hardware = SNDRV_PCM_INFO_PAUSE, .channels_min = 1, .channels_max = 256, - .buffer_bytes_max = 1024 * 1024, + .buffer_bytes_max = INT_MAX, /* limited by BUFFER_TIME later */ .period_bytes_min = 64, - .period_bytes_max = 512 * 1024, + .period_bytes_max = INT_MAX, /* limited by PERIOD_TIME later */ .periods_min = 2, .periods_max = 1024, }; @@ -1064,6 +1064,18 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre return err; } + /* set max period and buffer sizes for 1 and 2 seconds, respectively */ + err = snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_PERIOD_TIME, + 0, 1000000); + if (err < 0) + return err; + err = snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_BUFFER_TIME, + 0, 2000000); + if (err < 0) + return err; + /* additional hw constraints for implicit fb */ err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT, hw_rule_format_implicit_fb, subs, diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 4a3ff6468aa75..fa664cf03c326 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1665,7 +1665,7 @@ static void hdmi_lpe_audio_free(struct snd_card *card) * This function is called when the i915 driver creates the * hdmi-lpe-audio platform device. */ -static int hdmi_lpe_audio_probe(struct platform_device *pdev) +static int __hdmi_lpe_audio_probe(struct platform_device *pdev) { struct snd_card *card; struct snd_intelhad_card *card_ctx; @@ -1828,6 +1828,11 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) return 0; } +static int hdmi_lpe_audio_probe(struct platform_device *pdev) +{ + return snd_card_free_on_error(&pdev->dev, __hdmi_lpe_audio_probe(pdev)); +} + static const struct dev_pm_ops hdmi_lpe_audio_pm = { SET_SYSTEM_SLEEP_PM_OPS(hdmi_lpe_audio_suspend, hdmi_lpe_audio_resume) }; diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index a4a39c3e0f196..0c2610cde6ea2 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -128,9 +128,9 @@ #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ -/* SRBDS support */ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 -#define RNGDS_MITG_DIS BIT(0) +#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ +#define RTM_ALLOW BIT(1) /* TSX development mode */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 59833125ac0a1..a2c665beda87c 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -902,7 +902,7 @@ static int do_show(int argc, char **argv) equal_fn_for_key_as_id, NULL); btf_map_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!btf_prog_table || !btf_map_table) { + if (IS_ERR(btf_prog_table) || IS_ERR(btf_map_table)) { hashmap__free(btf_prog_table); hashmap__free(btf_map_table); if (fd >= 0) diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index b4695df2ea3d7..a7387c265e3cf 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -927,7 +927,6 @@ static int do_skeleton(int argc, char **argv) s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\ if (!s) \n\ goto err; \n\ - obj->skeleton = s; \n\ \n\ s->sz = sizeof(*s); \n\ s->name = \"%1$s\"; \n\ @@ -1000,6 +999,7 @@ static int do_skeleton(int argc, char **argv) \n\ s->data = (void *)%2$s__elf_bytes(&s->data_sz); \n\ \n\ + obj->skeleton = s; \n\ return 0; \n\ err: \n\ bpf_object__destroy_skeleton(s); \n\ diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 2c258db0d3521..97dec81950e5d 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -2,6 +2,7 @@ /* Copyright (C) 2020 Facebook */ #include +#include #include #include #include @@ -306,7 +307,7 @@ static int do_show(int argc, char **argv) if (show_pinned) { link_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!link_table) { + if (IS_ERR(link_table)) { p_err("failed to create hashmap for pinned paths"); return -1; } diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index cc530a2298124..0bba33729c7f0 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -620,17 +620,14 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) u32_as_hash_field(info->id)) printf("\n\tpinned %s", (char *)entry->value); } - printf("\n"); if (frozen_str) { frozen = atoi(frozen_str); free(frozen_str); } - if (!info->btf_id && !frozen) - return 0; - - printf("\t"); + if (info->btf_id || frozen) + printf("\n\t"); if (info->btf_id) printf("btf_id %d", info->btf_id); @@ -699,7 +696,7 @@ static int do_show(int argc, char **argv) if (show_pinned) { map_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!map_table) { + if (IS_ERR(map_table)) { p_err("failed to create hashmap for pinned paths"); return -1; } @@ -805,29 +802,30 @@ static int maps_have_btf(int *fds, int nb_fds) static struct btf *btf_vmlinux; -static struct btf *get_map_kv_btf(const struct bpf_map_info *info) +static int get_map_kv_btf(const struct bpf_map_info *info, struct btf **btf) { - struct btf *btf = NULL; + int err = 0; if (info->btf_vmlinux_value_type_id) { if (!btf_vmlinux) { btf_vmlinux = libbpf_find_kernel_btf(); - if (libbpf_get_error(btf_vmlinux)) + err = libbpf_get_error(btf_vmlinux); + if (err) { p_err("failed to get kernel btf"); + return err; + } } - return btf_vmlinux; + *btf = btf_vmlinux; } else if (info->btf_value_type_id) { - int err; - - btf = btf__load_from_kernel_by_id(info->btf_id); - err = libbpf_get_error(btf); - if (err) { + *btf = btf__load_from_kernel_by_id(info->btf_id); + err = libbpf_get_error(*btf); + if (err) p_err("failed to get btf"); - btf = ERR_PTR(err); - } + } else { + *btf = NULL; } - return btf; + return err; } static void free_map_kv_btf(struct btf *btf) @@ -862,8 +860,7 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, prev_key = NULL; if (wtr) { - btf = get_map_kv_btf(info); - err = libbpf_get_error(btf); + err = get_map_kv_btf(info, &btf); if (err) { goto exit_free; } @@ -1054,11 +1051,8 @@ static void print_key_value(struct bpf_map_info *info, void *key, json_writer_t *btf_wtr; struct btf *btf; - btf = btf__load_from_kernel_by_id(info->btf_id); - if (libbpf_get_error(btf)) { - p_err("failed to get btf"); + if (get_map_kv_btf(info, &btf)) return; - } if (json_output) { print_entry_json(info, key, value, btf); diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index 56b598eee043a..7c384d10e95f8 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2020 Facebook */ #include +#include #include #include #include @@ -101,7 +102,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) libbpf_print_fn_t default_print; *map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!*map) { + if (IS_ERR(*map)) { p_err("failed to create hashmap for PID references"); return -1; } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2a21d50516bc4..33ca834d5f510 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -641,7 +641,7 @@ static int do_show(int argc, char **argv) if (show_pinned) { prog_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!prog_table) { + if (IS_ERR(prog_table)) { p_err("failed to create hashmap for pinned paths"); return -1; } diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 1480910c792e2..de66e1cc07348 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -217,9 +217,16 @@ strip-libs = $(filter-out -l%,$(1)) PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) -PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` +PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) +ifeq ($(CC_NO_CLANG), 0) + PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + FLAGS_PERL_EMBED += -Wno-compound-token-split-by-macro +endif + $(OUTPUT)test-libperl.bin: $(BUILD) $(FLAGS_PERL_EMBED) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b0383d371b9af..49340175feb94 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2286,8 +2286,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if current task belongs to the cgroup2. - * * 1, if current task does not belong to the cgroup2. + * * 1, if current task belongs to the cgroup2. + * * 0, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index f947b61b21071..b8b37fe760069 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -131,7 +131,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) @@ -194,7 +194,7 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT) sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}'| \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \ sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \ diff -u $(OUTPUT)libbpf_global_syms.tmp \ diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 90f56b0f585f0..122d79c8f4b42 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -112,6 +112,10 @@ #elif defined(bpf_target_s390) +struct pt_regs___s390 { + unsigned long orig_gpr2; +}; + /* s390 provides user_pt_regs instead of struct pt_regs to userspace */ #define __PT_REGS_CAST(x) ((const user_pt_regs *)(x)) #define __PT_PARM1_REG gprs[2] @@ -124,6 +128,8 @@ #define __PT_RC_REG gprs[2] #define __PT_SP_REG gprs[15] #define __PT_IP_REG psw.addr +#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; }) +#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2) #elif defined(bpf_target_arm) @@ -140,6 +146,10 @@ #elif defined(bpf_target_arm64) +struct pt_regs___arm64 { + unsigned long orig_x0; +}; + /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ #define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x)) #define __PT_PARM1_REG regs[0] @@ -152,6 +162,8 @@ #define __PT_RC_REG regs[0] #define __PT_SP_REG sp #define __PT_IP_REG pc +#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; }) +#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0) #elif defined(bpf_target_mips) @@ -178,6 +190,8 @@ #define __PT_RC_REG gpr[3] #define __PT_SP_REG sp #define __PT_IP_REG nip +/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ctx #elif defined(bpf_target_sparc) @@ -206,10 +220,10 @@ #define __PT_PARM4_REG a3 #define __PT_PARM5_REG a4 #define __PT_RET_REG ra -#define __PT_FP_REG fp +#define __PT_FP_REG s0 #define __PT_RC_REG a5 #define __PT_SP_REG sp -#define __PT_IP_REG epc +#define __PT_IP_REG pc #endif diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 061839f045255..51862fdee850b 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -375,8 +375,28 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id, const struct btf_dump_type_data_opts *opts); /* - * A set of helpers for easier BTF types handling + * A set of helpers for easier BTF types handling. + * + * The inline functions below rely on constants from the kernel headers which + * may not be available for applications including this header file. To avoid + * compilation errors, we define all the constants here that were added after + * the initial introduction of the BTF_KIND* constants. */ +#ifndef BTF_KIND_FUNC +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#endif +#ifndef BTF_KIND_VAR +#define BTF_KIND_VAR 14 /* Variable */ +#define BTF_KIND_DATASEC 15 /* Section */ +#endif +#ifndef BTF_KIND_FLOAT +#define BTF_KIND_FLOAT 16 /* Floating point */ +#endif +/* The kernel header switched to enums, so these two were never #defined */ +#define BTF_KIND_DECL_TAG 17 /* Decl Tag */ +#define BTF_KIND_TYPE_TAG 18 /* Type Tag */ + static inline __u16 btf_kind(const struct btf_type *t) { return BTF_INFO_KIND(t->info); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index b9a3260c83cbd..6b1bc1f43728c 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1505,6 +1505,11 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id, if (s->name_resolved) return *cached_name ? *cached_name : orig_name; + if (btf_is_fwd(t) || (btf_is_enum(t) && btf_vlen(t) == 0)) { + s->name_resolved = 1; + return orig_name; + } + dup_cnt = btf_dump_name_dups(d, name_map, orig_name); if (dup_cnt > 1) { const size_t max_len = 256; @@ -1861,14 +1866,16 @@ static int btf_dump_array_data(struct btf_dump *d, { const struct btf_array *array = btf_array(t); const struct btf_type *elem_type; - __u32 i, elem_size = 0, elem_type_id; + __u32 i, elem_type_id; + __s64 elem_size; bool is_array_member; elem_type_id = array->type; elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); elem_size = btf__resolve_size(d->btf, elem_type_id); if (elem_size <= 0) { - pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id); + pr_warn("unexpected elem size %zd for array type [%u]\n", + (ssize_t)elem_size, id); return -EINVAL; } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7f10dd501a52b..94a6a8543cbc9 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4854,7 +4854,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b LIBBPF_OPTS(bpf_map_create_opts, create_attr); struct bpf_map_def *def = &map->def; const char *map_name = NULL; - __u32 max_entries; int err = 0; if (kernel_supports(obj, FEAT_PROG_NAME)) @@ -4864,21 +4863,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; - if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) { - int nr_cpus; - - nr_cpus = libbpf_num_possible_cpus(); - if (nr_cpus < 0) { - pr_warn("map '%s': failed to determine number of system CPUs: %d\n", - map->name, nr_cpus); - return nr_cpus; - } - pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); - max_entries = nr_cpus; - } else { - max_entries = def->max_entries; - } - if (bpf_map__is_struct_ops(map)) create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; @@ -4928,7 +4912,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (obj->gen_loader) { bpf_gen__map_create(obj->gen_loader, def->type, map_name, - def->key_size, def->value_size, max_entries, + def->key_size, def->value_size, def->max_entries, &create_attr, is_inner ? -1 : map - obj->maps); /* Pretend to have valid FD to pass various fd >= 0 checks. * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. @@ -4937,7 +4921,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b } else { map->fd = bpf_map_create(def->type, map_name, def->key_size, def->value_size, - max_entries, &create_attr); + def->max_entries, &create_attr); } if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { @@ -4954,7 +4938,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b map->btf_value_type_id = 0; map->fd = bpf_map_create(def->type, map_name, def->key_size, def->value_size, - max_entries, &create_attr); + def->max_entries, &create_attr); } err = map->fd < 0 ? -errno : 0; @@ -5058,6 +5042,24 @@ static int bpf_object_init_prog_arrays(struct bpf_object *obj) return 0; } +static int map_set_def_max_entries(struct bpf_map *map) +{ + if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) { + int nr_cpus; + + nr_cpus = libbpf_num_possible_cpus(); + if (nr_cpus < 0) { + pr_warn("map '%s': failed to determine number of system CPUs: %d\n", + map->name, nr_cpus); + return nr_cpus; + } + pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); + map->def.max_entries = nr_cpus; + } + + return 0; +} + static int bpf_object__create_maps(struct bpf_object *obj) { @@ -5090,6 +5092,10 @@ bpf_object__create_maps(struct bpf_object *obj) continue; } + err = map_set_def_max_entries(map); + if (err) + goto err_out; + retried = false; retry: if (map->pin_path) { @@ -11795,6 +11801,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) { + if (!s) + return; + if (s->progs) bpf_object__detach_skeleton(s); if (s->obj) diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 5297839677930..9a89fdfe4987e 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -431,4 +431,4 @@ LIBBPF_0.7.0 { libbpf_probe_bpf_map_type; libbpf_probe_bpf_prog_type; libbpf_set_memlock_rlim_max; -}; +} LIBBPF_0.6.0; diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 39f25e09b51e2..fadde7d80a51c 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -87,29 +87,75 @@ enum { NL_DONE, }; +static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags) +{ + int len; + + do { + len = recvmsg(sock, mhdr, flags); + } while (len < 0 && (errno == EINTR || errno == EAGAIN)); + + if (len < 0) + return -errno; + return len; +} + +static int alloc_iov(struct iovec *iov, int len) +{ + void *nbuf; + + nbuf = realloc(iov->iov_base, len); + if (!nbuf) + return -ENOMEM; + + iov->iov_base = nbuf; + iov->iov_len = len; + return 0; +} + static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn, void *cookie) { + struct iovec iov = {}; + struct msghdr mhdr = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; bool multipart = true; struct nlmsgerr *err; struct nlmsghdr *nh; - char buf[4096]; int len, ret; + ret = alloc_iov(&iov, 4096); + if (ret) + goto done; + while (multipart) { start: multipart = false; - len = recv(sock, buf, sizeof(buf), 0); + len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC); + if (len < 0) { + ret = len; + goto done; + } + + if (len > iov.iov_len) { + ret = alloc_iov(&iov, len); + if (ret) + goto done; + } + + len = netlink_recvmsg(sock, &mhdr, 0); if (len < 0) { - ret = -errno; + ret = len; goto done; } if (len == 0) break; - for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len); nh = NLMSG_NEXT(nh, len)) { if (nh->nlmsg_pid != nl_pid) { ret = -LIBBPF_ERRNO__WRNGPID; @@ -130,7 +176,8 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, libbpf_nla_dump_errormsg(nh); goto done; case NLMSG_DONE: - return 0; + ret = 0; + goto done; default: break; } @@ -142,15 +189,17 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, case NL_NEXT: goto start; case NL_DONE: - return 0; + ret = 0; + goto done; default: - return ret; + goto done; } } } } ret = 0; done: + free(iov.iov_base); return ret; } diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index edafe56664f3a..32a2f5749c711 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -1193,12 +1193,23 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, int xsk_umem__delete(struct xsk_umem *umem) { + struct xdp_mmap_offsets off; + int err; + if (!umem) return 0; if (umem->refcount) return -EBUSY; + err = xsk_get_mmap_offsets(umem->fd, &off); + if (!err && umem->fill_save && umem->comp_save) { + munmap(umem->fill_save->ring - off.fr.desc, + off.fr.desc + umem->config.fill_size * sizeof(__u64)); + munmap(umem->comp_save->ring - off.cr.desc, + off.cr.desc + umem->config.comp_size * sizeof(__u64)); + } + close(umem->fd); free(umem); diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index fa854c83b7e7b..ed616fc19b4f2 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -69,7 +69,7 @@ static int test_stat_cpu(void) perf_evlist__set_maps(evlist, cpus, NULL); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { cpus = perf_evsel__cpus(evsel); @@ -130,7 +130,7 @@ static int test_stat_thread(void) perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { perf_evsel__read(evsel, 0, 0, &counts); @@ -187,7 +187,7 @@ static int test_stat_thread_enable(void) perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { perf_evsel__read(evsel, 0, 0, &counts); @@ -507,7 +507,7 @@ static int test_stat_multiplexing(void) perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__enable(evlist); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 7c33ec67c4a95..3470813daf4aa 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1090,6 +1090,17 @@ static void annotate_call_site(struct objtool_file *file, : arch_nop_insn(insn->len)); insn->type = sibling ? INSN_RETURN : INSN_NOP; + + if (sibling) { + /* + * We've replaced the tail-call JMP insn by two new + * insn: RET; INT3, except we only have a single struct + * insn here. Mark it retpoline_safe to avoid the SLS + * warning, instead of adding another insn. + */ + insn->retpoline_safe = true; + } + return; } diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 96ad944ca6a88..f3bf9297bcc03 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -272,6 +272,9 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) + ifeq ($(CC_NO_CLANG), 0) + PYTHON_EMBED_CCOPTS := $(filter-out -ffat-lto-objects, $(PYTHON_EMBED_CCOPTS)) + endif endif FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) @@ -790,6 +793,9 @@ else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) CFLAGS += -DHAVE_LIBPERL_SUPPORT + ifeq ($(CC_NO_CLANG), 0) + CFLAGS += -Wno-compound-token-split-by-macro + endif $(call detected,CONFIG_LIBPERL) endif endif diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 2100d46ccf5e6..bb4ab99afa7f8 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -239,6 +239,12 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, arm_spe_set_timestamp(itr, arm_spe_evsel); } + /* + * Set this only so that perf report knows that SPE generates memory info. It has no effect + * on the opening of the event or the SPE data produced. + */ + evsel__set_sample_bit(arm_spe_evsel, DATA_SRC); + /* Add dummy event to keep tracking */ err = parse_events(evlist, "dummy:u", NULL); if (err) diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index 8d9b55959256a..cfc208d71f00a 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -20,17 +20,27 @@ int arch_evlist__add_default_attrs(struct evlist *evlist) struct evsel *arch_evlist__leader(struct list_head *list) { - struct evsel *evsel, *first; + struct evsel *evsel, *first, *slots = NULL; + bool has_topdown = false; first = list_first_entry(list, struct evsel, core.node); if (!pmu_have_event("cpu", "slots")) return first; + /* If there is a slots event and a topdown event then the slots event comes first. */ __evlist__for_each_entry(list, evsel) { - if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") && - evsel->name && strcasestr(evsel->name, "slots")) - return evsel; + if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") && evsel->name) { + if (strcasestr(evsel->name, "slots")) { + slots = evsel; + if (slots == first) + return first; + } + if (!strncasecmp(evsel->name, "topdown", 7)) + has_topdown = true; + if (slots && has_topdown) + return slots; + } } return first; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3f98689dd6878..60baa3dadc4b6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -955,10 +955,10 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) * Enable counters and exec the command: */ if (forks) { - evlist__start_workload(evsel_list); err = enable_counters(); if (err) return -1; + evlist__start_workload(evsel_list); t0 = rdclock(); clock_gettime(CLOCK_MONOTONIC, &ref_time); diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 2f6b67189b426..6aae7b6c376b4 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -434,7 +434,7 @@ void pthread__unblock_sigwinch(void) static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) { - return eprintf(level, verbose, fmt, ap); + return veprintf(level, verbose, fmt, ap); } int main(int argc, const char **argv) diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json index 9ff67206ade4e..821d2f2a8f251 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json @@ -314,6 +314,19 @@ "SampleAfterValue": "2000003", "UMask": "0x82" }, + { + "BriefDescription": "All retired memory instructions.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.ANY", + "L1_Hit_Indication": "1", + "PEBS": "1", + "PublicDescription": "Counts all retired memory instructions - loads and stores.", + "SampleAfterValue": "2000003", + "UMask": "0x83" + }, { "BriefDescription": "Retired load instructions with locked access.", "Counter": "0,1,2,3", @@ -358,6 +371,7 @@ "EventCode": "0xD0", "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", "PEBS": "1", + "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB).", "SampleAfterValue": "100003", "UMask": "0x11" }, @@ -370,6 +384,7 @@ "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", "L1_Hit_Indication": "1", "PEBS": "1", + "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB).", "SampleAfterValue": "100003", "UMask": "0x12" }, @@ -733,7 +748,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010491", + "MSRValue": "0x10491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -772,7 +787,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0491", + "MSRValue": "0x4003C0491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -785,7 +800,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0491", + "MSRValue": "0x1003C0491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -798,7 +813,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0491", + "MSRValue": "0x8003C0491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -811,7 +826,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010490", + "MSRValue": "0x10490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -850,7 +865,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0490", + "MSRValue": "0x4003C0490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -863,7 +878,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0490", + "MSRValue": "0x1003C0490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -876,7 +891,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0490", + "MSRValue": "0x8003C0490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -889,7 +904,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010120", + "MSRValue": "0x10120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -928,7 +943,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0120", + "MSRValue": "0x4003C0120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -941,7 +956,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0120", + "MSRValue": "0x1003C0120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -954,7 +969,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0120", + "MSRValue": "0x8003C0120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -967,7 +982,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010122", + "MSRValue": "0x10122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1006,7 +1021,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0122", + "MSRValue": "0x4003C0122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1019,7 +1034,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0122", + "MSRValue": "0x1003C0122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1032,7 +1047,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0122", + "MSRValue": "0x8003C0122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1045,7 +1060,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010004", + "MSRValue": "0x10004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1084,7 +1099,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0004", + "MSRValue": "0x4003C0004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1097,7 +1112,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0004", + "MSRValue": "0x1003C0004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1110,7 +1125,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0004", + "MSRValue": "0x8003C0004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1123,7 +1138,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010001", + "MSRValue": "0x10001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1162,7 +1177,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0001", + "MSRValue": "0x4003C0001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1175,7 +1190,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0001", + "MSRValue": "0x1003C0001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1188,7 +1203,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0001", + "MSRValue": "0x8003C0001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1201,7 +1216,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010002", + "MSRValue": "0x10002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1240,7 +1255,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0002", + "MSRValue": "0x4003C0002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1253,7 +1268,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0002", + "MSRValue": "0x1003C0002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1266,7 +1281,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0002", + "MSRValue": "0x8003C0002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1279,7 +1294,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010400", + "MSRValue": "0x10400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1318,7 +1333,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0400", + "MSRValue": "0x4003C0400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1331,7 +1346,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0400", + "MSRValue": "0x1003C0400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1344,7 +1359,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0400", + "MSRValue": "0x8003C0400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1357,7 +1372,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010010", + "MSRValue": "0x10010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1396,7 +1411,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0010", + "MSRValue": "0x4003C0010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1409,7 +1424,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0010", + "MSRValue": "0x1003C0010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1422,7 +1437,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0010", + "MSRValue": "0x8003C0010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1435,7 +1450,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010020", + "MSRValue": "0x10020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1474,7 +1489,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0020", + "MSRValue": "0x4003C0020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1487,7 +1502,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0020", + "MSRValue": "0x1003C0020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1500,7 +1515,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0020", + "MSRValue": "0x8003C0020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1513,7 +1528,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010080", + "MSRValue": "0x10080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1552,7 +1567,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0080", + "MSRValue": "0x4003C0080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1565,7 +1580,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0080", + "MSRValue": "0x1003C0080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1578,7 +1593,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0080", + "MSRValue": "0x8003C0080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1591,7 +1606,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010100", + "MSRValue": "0x10100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1630,7 +1645,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0100", + "MSRValue": "0x4003C0100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1643,7 +1658,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0100", + "MSRValue": "0x1003C0100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1656,7 +1671,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0100", + "MSRValue": "0x8003C0100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json index 503737ed3a83c..9e873ab224502 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json @@ -1,73 +1,81 @@ [ { - "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "BriefDescription": "Counts once for most SIMD 128-bit packed computational double precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", + "PublicDescription": "Counts once for most SIMD 128-bit packed computational double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x4" }, { - "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "BriefDescription": "Counts once for most SIMD 128-bit packed computational single precision floating-point instruction retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", + "PublicDescription": "Counts once for most SIMD 128-bit packed computational single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x8" }, { - "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "BriefDescription": "Counts once for most SIMD 256-bit packed double computational precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", + "PublicDescription": "Counts once for most SIMD 256-bit packed double computational precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x10" }, { - "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "BriefDescription": "Counts once for most SIMD 256-bit packed single computational precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", + "PublicDescription": "Counts once for most SIMD 256-bit packed single computational precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x20" }, { - "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.", + "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE", + "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x40" }, { - "BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 16 calculations per element.", + "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE", + "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x80" }, { - "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "BriefDescription": "Counts once for most SIMD scalar computational double precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "PublicDescription": "Counts once for most SIMD scalar computational double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SIMD scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x1" }, { - "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "BriefDescription": "Counts once for most SIMD scalar computational single precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", + "PublicDescription": "Counts once for most SIMD scalar computational single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SIMD scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x2" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json index 078706a500919..ecce4273ae52c 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json @@ -30,7 +30,21 @@ "UMask": "0x2" }, { - "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.", + "BriefDescription": "Retired Instructions who experienced DSB miss.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x1", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired Instructions who experienced a critical DSB miss.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xC6", @@ -38,7 +52,7 @@ "MSRIndex": "0x3F7", "MSRValue": "0x11", "PEBS": "1", - "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", + "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.", "SampleAfterValue": "100007", "TakenAlone": "1", "UMask": "0x1" diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json index 6f29b02fa320c..60c286b4fe54c 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json @@ -299,7 +299,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00491", + "MSRValue": "0x83FC00491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -312,7 +312,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00491", + "MSRValue": "0x63FC00491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -325,7 +325,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000491", + "MSRValue": "0x604000491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -338,7 +338,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800491", + "MSRValue": "0x63B800491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -377,7 +377,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00490", + "MSRValue": "0x83FC00490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -390,7 +390,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00490", + "MSRValue": "0x63FC00490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -403,7 +403,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000490", + "MSRValue": "0x604000490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -416,7 +416,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800490", + "MSRValue": "0x63B800490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -455,7 +455,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00120", + "MSRValue": "0x83FC00120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -468,7 +468,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00120", + "MSRValue": "0x63FC00120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -481,7 +481,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000120", + "MSRValue": "0x604000120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -494,7 +494,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800120", + "MSRValue": "0x63B800120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -533,7 +533,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00122", + "MSRValue": "0x83FC00122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -546,7 +546,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00122", + "MSRValue": "0x63FC00122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -559,7 +559,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000122", + "MSRValue": "0x604000122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -572,7 +572,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800122", + "MSRValue": "0x63B800122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -611,7 +611,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00004", + "MSRValue": "0x83FC00004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -624,7 +624,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00004", + "MSRValue": "0x63FC00004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -637,7 +637,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000004", + "MSRValue": "0x604000004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -650,7 +650,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800004", + "MSRValue": "0x63B800004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -689,7 +689,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00001", + "MSRValue": "0x83FC00001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -702,7 +702,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00001", + "MSRValue": "0x63FC00001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -715,7 +715,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000001", + "MSRValue": "0x604000001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -728,7 +728,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800001", + "MSRValue": "0x63B800001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -767,7 +767,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00002", + "MSRValue": "0x83FC00002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -780,7 +780,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00002", + "MSRValue": "0x63FC00002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -793,7 +793,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000002", + "MSRValue": "0x604000002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -806,7 +806,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800002", + "MSRValue": "0x63B800002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -845,7 +845,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00400", + "MSRValue": "0x83FC00400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -858,7 +858,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00400", + "MSRValue": "0x63FC00400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -871,7 +871,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000400", + "MSRValue": "0x604000400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -884,7 +884,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800400", + "MSRValue": "0x63B800400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -923,7 +923,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00010", + "MSRValue": "0x83FC00010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -936,7 +936,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00010", + "MSRValue": "0x63FC00010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -949,7 +949,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000010", + "MSRValue": "0x604000010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -962,7 +962,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800010", + "MSRValue": "0x63B800010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1001,7 +1001,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00020", + "MSRValue": "0x83FC00020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1014,7 +1014,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00020", + "MSRValue": "0x63FC00020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1027,7 +1027,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000020", + "MSRValue": "0x604000020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1040,7 +1040,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800020", + "MSRValue": "0x63B800020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1079,7 +1079,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00080", + "MSRValue": "0x83FC00080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1092,7 +1092,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00080", + "MSRValue": "0x63FC00080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1105,7 +1105,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000080", + "MSRValue": "0x604000080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1118,7 +1118,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800080", + "MSRValue": "0x63B800080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1157,7 +1157,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00100", + "MSRValue": "0x83FC00100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1170,7 +1170,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00100", + "MSRValue": "0x63FC00100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1183,7 +1183,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000100", + "MSRValue": "0x604000100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1196,7 +1196,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800100", + "MSRValue": "0x63B800100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json index ca57481206660..12eabae3e2242 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json @@ -435,6 +435,17 @@ "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", "SampleAfterValue": "2000003" }, + { + "BriefDescription": "Number of all retired NOP instructions.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091, SKL044", + "EventCode": "0xC0", + "EventName": "INST_RETIRED.NOP", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, { "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", "Counter": "1", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 863c9e103969e..b016f7d1ff3de 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -1,26 +1,167 @@ [ + { + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)", + "MetricGroup": "TopdownL1", + "MetricName": "Frontend_Bound", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound." + }, + { + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Frontend_Bound_SMT", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU." + }, + { + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)", + "MetricGroup": "TopdownL1", + "MetricName": "Bad_Speculation", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example." + }, + { + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Bad_Speculation_SMT", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU." + }, + { + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricConstraint": "NO_NMI_WATCHDOG", + "MetricExpr": "1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)", + "MetricGroup": "TopdownL1", + "MetricName": "Backend_Bound", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound." + }, + { + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Backend_Bound_SMT", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU." + }, + { + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)", + "MetricGroup": "TopdownL1", + "MetricName": "Retiring", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. " + }, + { + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Retiring_SMT", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. SMT version; use when SMT is enabled and measuring per logical CPU." + }, + { + "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", + "MetricExpr": "100 * ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) )", + "MetricGroup": "Bad;BadSpec;BrMispredicts", + "MetricName": "Mispredictions" + }, + { + "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", + "MetricExpr": "100 * ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )", + "MetricGroup": "Bad;BadSpec;BrMispredicts_SMT", + "MetricName": "Mispredictions_SMT" + }, + { + "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", + "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (OFFCORE_REQUESTS_BUFFER.SQ_FULL / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) ) + ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( ((L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )) * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / CPU_CLK_UNHALTED.THREAD) / #(max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) ", + "MetricGroup": "Mem;MemoryBW;Offcore", + "MetricName": "Memory_Bandwidth" + }, + { + "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", + "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) ) + ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( ((L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )) * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / CPU_CLK_UNHALTED.THREAD) / #(max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) ", + "MetricGroup": "Mem;MemoryBW;Offcore_SMT", + "MetricName": "Memory_Bandwidth_SMT" + }, + { + "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", + "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD ) / CPU_CLK_UNHALTED.THREAD - (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD)) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (( (20.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) - (3.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) ) * MEM_LOAD_RETIRED.L3_HIT * (1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / 2) / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) + ( (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD)) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) )", + "MetricGroup": "Mem;MemoryLat;Offcore", + "MetricName": "Memory_Latency" + }, + { + "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", + "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD ) / CPU_CLK_UNHALTED.THREAD - (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD)) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( (20.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) - (3.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) ) * MEM_LOAD_RETIRED.L3_HIT * (1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / 2) / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) + ( (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD)) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) )", + "MetricGroup": "Mem;MemoryLat;Offcore_SMT", + "MetricName": "Memory_Latency_SMT" + }, + { + "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", + "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( 9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / CPU_CLK_UNHALTED.THREAD) / (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) + ( (EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (( 9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / CPU_CLK_UNHALTED.THREAD) / #(EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) ) ) ", + "MetricGroup": "Mem;MemoryTLB", + "MetricName": "Memory_Data_TLBs" + }, + { + "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", + "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( 9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / CPU_CLK_UNHALTED.THREAD) / (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) + ( (EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( 9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / #(EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) ) ) ", + "MetricGroup": "Mem;MemoryTLB;_SMT", + "MetricName": "Memory_Data_TLBs_SMT" + }, + { + "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)", + "MetricExpr": "100 * (( BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) ) / (4 * CPU_CLK_UNHALTED.THREAD))", + "MetricGroup": "Ret", + "MetricName": "Branching_Overhead" + }, + { + "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)", + "MetricExpr": "100 * (( BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricGroup": "Ret_SMT", + "MetricName": "Branching_Overhead_SMT" + }, + { + "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", + "MetricExpr": "100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD))", + "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB", + "MetricName": "Big_Code" + }, + { + "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", + "MetricExpr": "100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB_SMT", + "MetricName": "Big_Code_SMT" + }, + { + "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", + "MetricExpr": "100 * ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) - (100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)))", + "MetricGroup": "Fed;FetchBW;Frontend", + "MetricName": "Instruction_Fetch_BW" + }, + { + "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", + "MetricExpr": "100 * ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) - (100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))", + "MetricGroup": "Fed;FetchBW;Frontend_SMT", + "MetricName": "Instruction_Fetch_BW_SMT" + }, { "BriefDescription": "Instructions Per Cycle (per Logical Processor)", "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "Summary", + "MetricGroup": "Ret;Summary", "MetricName": "IPC" }, { "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", - "MetricGroup": "Pipeline;Retire", + "MetricGroup": "Pipeline;Ret;Retire", "MetricName": "UPI" }, { "BriefDescription": "Instruction per taken branch", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", - "MetricGroup": "Branches;FetchBW;PGO", - "MetricName": "IpTB" + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN", + "MetricGroup": "Branches;Fed;FetchBW", + "MetricName": "UpTB" }, { "BriefDescription": "Cycles Per Instruction (per Logical Processor)", "MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)", - "MetricGroup": "Pipeline", + "MetricGroup": "Pipeline;Mem", "MetricName": "CPI" }, { @@ -30,39 +171,84 @@ "MetricName": "CLKS" }, { - "BriefDescription": "Instructions Per Cycle (per physical core)", + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", + "MetricExpr": "4 * CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TmaL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", + "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "MetricGroup": "TmaL1_SMT", + "MetricName": "SLOTS_SMT" + }, + { + "BriefDescription": "The ratio of Executed- by Issued-Uops", + "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY", + "MetricGroup": "Cor;Pipeline", + "MetricName": "Execute_per_Issue", + "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage." + }, + { + "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)", "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "SMT;TmaL1", + "MetricGroup": "Ret;SMT;TmaL1", "MetricName": "CoreIPC" }, { - "BriefDescription": "Instructions Per Cycle (per physical core)", + "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)", "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", - "MetricGroup": "SMT;TmaL1", + "MetricGroup": "Ret;SMT;TmaL1_SMT", "MetricName": "CoreIPC_SMT" }, { "BriefDescription": "Floating Point Operations Per Cycle", "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "Flops", + "MetricGroup": "Ret;Flops", "MetricName": "FLOPc" }, { "BriefDescription": "Floating Point Operations Per Cycle", "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", - "MetricGroup": "Flops_SMT", + "MetricGroup": "Ret;Flops_SMT", "MetricName": "FLOPc_SMT" }, + { + "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width)", + "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * CPU_CLK_UNHALTED.THREAD )", + "MetricGroup": "Cor;Flops;HPC", + "MetricName": "FP_Arith_Utilization", + "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting." + }, + { + "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )", + "MetricGroup": "Cor;Flops;HPC_SMT", + "MetricName": "FP_Arith_Utilization_SMT", + "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting. SMT version; use when SMT is enabled and measuring per logical CPU." + }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", - "MetricGroup": "Pipeline;PortsUtil", + "MetricGroup": "Backend;Cor;Pipeline;PortsUtil", "MetricName": "ILP" }, + { + "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", + "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) * (4 * CPU_CLK_UNHALTED.THREAD) / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;BrMispredicts", + "MetricName": "Branch_Misprediction_Cost" + }, + { + "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", + "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;BrMispredicts_SMT", + "MetricName": "Branch_Misprediction_Cost_SMT" + }, { "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", - "MetricGroup": "BrMispredicts", + "MetricGroup": "Bad;BadSpec;BrMispredicts", "MetricName": "IpMispredict" }, { @@ -86,122 +272,249 @@ { "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", - "MetricGroup": "Branches;InsType", + "MetricGroup": "Branches;Fed;InsType", "MetricName": "IpBranch" }, { "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", - "MetricGroup": "Branches", + "MetricGroup": "Branches;Fed;PGO", "MetricName": "IpCall" }, + { + "BriefDescription": "Instruction per taken branch", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO", + "MetricName": "IpTB" + }, { "BriefDescription": "Branch instructions per taken branch. ", "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", - "MetricGroup": "Branches;PGO", + "MetricGroup": "Branches;Fed;PGO", "MetricName": "BpTkBranch" }, { "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )", - "MetricGroup": "Flops;FpArith;InsType", + "MetricGroup": "Flops;InsType", "MetricName": "IpFLOP" }, + { + "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / ( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) )", + "MetricGroup": "Flops;InsType", + "MetricName": "IpArith", + "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW." + }, + { + "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE", + "MetricGroup": "Flops;FpScalar;InsType", + "MetricName": "IpArith_Scalar_SP", + "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, + { + "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "MetricGroup": "Flops;FpScalar;InsType", + "MetricName": "IpArith_Scalar_DP", + "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, + { + "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE )", + "MetricGroup": "Flops;FpVector;InsType", + "MetricName": "IpArith_AVX128", + "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, + { + "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )", + "MetricGroup": "Flops;FpVector;InsType", + "MetricName": "IpArith_AVX256", + "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, + { + "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )", + "MetricGroup": "Flops;FpVector;InsType", + "MetricName": "IpArith_AVX512", + "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, { "BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST", "MetricExpr": "INST_RETIRED.ANY", "MetricGroup": "Summary;TmaL1", "MetricName": "Instructions" }, + { + "BriefDescription": "Average number of Uops issued by front-end when it issued something", + "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@", + "MetricGroup": "Fed;FetchBW", + "MetricName": "Fetch_UpC" + }, { "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", - "MetricGroup": "DSB;FetchBW", + "MetricGroup": "DSB;Fed;FetchBW", "MetricName": "DSB_Coverage" }, { - "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", + "BriefDescription": "Total penalty related to DSB (uop cache) misses - subset/see of/the Instruction_Fetch_BW Bottleneck.", + "MetricExpr": "(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) + ((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD))) * (( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / CPU_CLK_UNHALTED.THREAD / 2) / #((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)))", + "MetricGroup": "DSBmiss;Fed", + "MetricName": "DSB_Misses_Cost" + }, + { + "BriefDescription": "Total penalty related to DSB (uop cache) misses - subset/see of/the Instruction_Fetch_BW Bottleneck.", + "MetricExpr": "(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + ((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) / 2) / #((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))", + "MetricGroup": "DSBmiss;Fed_SMT", + "MetricName": "DSB_Misses_Cost_SMT" + }, + { + "BriefDescription": "Number of Instructions per non-speculative DSB miss", + "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS", + "MetricGroup": "DSBmiss;Fed", + "MetricName": "IpDSB_Miss_Ret" + }, + { + "BriefDescription": "Fraction of branches that are non-taken conditionals", + "MetricExpr": "BR_INST_RETIRED.NOT_TAKEN / BR_INST_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;Branches;CodeGen;PGO", + "MetricName": "Cond_NT" + }, + { + "BriefDescription": "Fraction of branches that are taken conditionals", + "MetricExpr": "( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) / BR_INST_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;Branches;CodeGen;PGO", + "MetricName": "Cond_TK" + }, + { + "BriefDescription": "Fraction of branches that are CALL or RET", + "MetricExpr": "( BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN ) / BR_INST_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;Branches", + "MetricName": "CallRet" + }, + { + "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps", + "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;Branches", + "MetricName": "Jump" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles)", "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", - "MetricGroup": "MemoryBound;MemoryLat", - "MetricName": "Load_Miss_Real_Latency" + "MetricGroup": "Mem;MemoryBound;MemoryLat", + "MetricName": "Load_Miss_Real_Latency", + "PublicDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles). Latency may be overestimated for multi-load instructions - e.g. repeat strings." }, { "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)", "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", - "MetricGroup": "MemoryBound;MemoryBW", + "MetricGroup": "Mem;MemoryBound;MemoryBW", "MetricName": "MLP" }, - { - "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricConstraint": "NO_NMI_WATCHDOG", - "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CORE_CLKS )", - "MetricGroup": "MemoryTLB", - "MetricName": "Page_Walks_Utilization" - }, { "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", - "MetricGroup": "MemoryBW", + "MetricGroup": "Mem;MemoryBW", "MetricName": "L1D_Cache_Fill_BW" }, { "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", - "MetricGroup": "MemoryBW", + "MetricGroup": "Mem;MemoryBW", "MetricName": "L2_Cache_Fill_BW" }, { "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time", - "MetricGroup": "MemoryBW", + "MetricGroup": "Mem;MemoryBW", "MetricName": "L3_Cache_Fill_BW" }, { "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]", "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time", - "MetricGroup": "MemoryBW;Offcore", + "MetricGroup": "Mem;MemoryBW;Offcore", "MetricName": "L3_Cache_Access_BW" }, { "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY", - "MetricGroup": "CacheMisses", + "MetricGroup": "Mem;CacheMisses", "MetricName": "L1MPKI" }, + { + "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)", + "MetricExpr": "1000 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY", + "MetricGroup": "Mem;CacheMisses", + "MetricName": "L1MPKI_Load" + }, { "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY", - "MetricGroup": "CacheMisses", + "MetricGroup": "Mem;Backend;CacheMisses", "MetricName": "L2MPKI" }, { "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY", - "MetricGroup": "CacheMisses;Offcore", + "MetricGroup": "Mem;CacheMisses;Offcore", "MetricName": "L2MPKI_All" }, + { + "BriefDescription": "L2 cache misses per kilo instruction for all demand loads (including speculative)", + "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY", + "MetricGroup": "Mem;CacheMisses", + "MetricName": "L2MPKI_Load" + }, { "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY", - "MetricGroup": "CacheMisses", + "MetricGroup": "Mem;CacheMisses", "MetricName": "L2HPKI_All" }, + { + "BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)", + "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY", + "MetricGroup": "Mem;CacheMisses", + "MetricName": "L2HPKI_Load" + }, { "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY", - "MetricGroup": "CacheMisses", + "MetricGroup": "Mem;CacheMisses", "MetricName": "L3MPKI" }, + { + "BriefDescription": "Fill Buffer (FB) true hits per kilo instructions for retired demand loads", + "MetricExpr": "1000 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY", + "MetricGroup": "Mem;CacheMisses", + "MetricName": "FB_HPKI" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricConstraint": "NO_NMI_WATCHDOG", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CPU_CLK_UNHALTED.THREAD )", + "MetricGroup": "Mem;MemoryTLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )", + "MetricGroup": "Mem;MemoryTLB_SMT", + "MetricName": "Page_Walks_Utilization_SMT" + }, { "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)", "MetricExpr": "1000 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY", - "MetricGroup": "L2Evicts;Server", + "MetricGroup": "L2Evicts;Mem;Server", "MetricName": "L2_Evictions_Silent_PKI" }, { "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction", "MetricExpr": "1000 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY", - "MetricGroup": "L2Evicts;Server", + "MetricGroup": "L2Evicts;Mem;Server", "MetricName": "L2_Evictions_NonSilent_PKI" }, { @@ -219,7 +532,7 @@ { "BriefDescription": "Giga Floating Point Operations Per Second", "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time", - "MetricGroup": "Flops;HPC", + "MetricGroup": "Cor;Flops;HPC", "MetricName": "GFLOPs" }, { @@ -228,6 +541,48 @@ "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0", + "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Power", + "MetricName": "Power_License0_Utilization", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes." + }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "MetricGroup": "Power_SMT", + "MetricName": "Power_License0_Utilization_SMT", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes. SMT version; use when SMT is enabled and measuring per logical CPU." + }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1", + "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Power", + "MetricName": "Power_License1_Utilization", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions." + }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "MetricGroup": "Power_SMT", + "MetricName": "Power_License1_Utilization_SMT", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions. SMT version; use when SMT is enabled and measuring per logical CPU." + }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX)", + "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Power", + "MetricName": "Power_License2_Utilization", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions." + }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "MetricGroup": "Power_SMT", + "MetricName": "Power_License2_Utilization_SMT", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions. SMT version; use when SMT is enabled and measuring per logical CPU." + }, { "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active", "MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", @@ -240,34 +595,46 @@ "MetricGroup": "OS", "MetricName": "Kernel_Utilization" }, + { + "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k", + "MetricGroup": "OS", + "MetricName": "Kernel_CPI" + }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", - "MetricGroup": "HPC;MemoryBW;SoC", + "MetricGroup": "HPC;Mem;MemoryBW;SoC", "MetricName": "DRAM_BW_Use" }, { "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", - "MetricGroup": "MemoryLat;SoC", + "MetricGroup": "Mem;MemoryLat;SoC", "MetricName": "MEM_Read_Latency" }, { "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@", - "MetricGroup": "MemoryBW;SoC", + "MetricGroup": "Mem;MemoryBW;SoC", "MetricName": "MEM_Parallel_Reads" }, + { + "BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches", + "MetricExpr": "1000000000 * ( UNC_M_RPQ_OCCUPANCY / UNC_M_RPQ_INSERTS ) / imc_0@event\\=0x0@", + "MetricGroup": "Mem;MemoryLat;SoC;Server", + "MetricName": "MEM_DRAM_Read_Latency" + }, { "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]", "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time", - "MetricGroup": "IoBW;SoC;Server", + "MetricGroup": "IoBW;Mem;SoC;Server", "MetricName": "IO_Write_BW" }, { "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]", "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time", - "MetricGroup": "IoBW;SoC;Server", + "MetricGroup": "IoBW;Mem;SoC;Server", "MetricName": "IO_Read_BW" }, { diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json index 6ed92bc5c129b..06c5ca26ca3f3 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json @@ -537,6 +537,18 @@ "PublicDescription": "Counts clockticks of the 1GHz trafiic controller clock in the IIO unit.", "Unit": "IIO" }, + { + "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0-3", + "Counter": "0,1,2,3", + "EventCode": "0xC2", + "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.ALL_PARTS", + "FCMask": "0x4", + "PerPkg": "1", + "PortMask": "0x0f", + "PublicDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0-3", + "UMask": "0x03", + "Unit": "IIO" + }, { "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0", "Counter": "0,1,2,3", @@ -585,6 +597,17 @@ "UMask": "0x03", "Unit": "IIO" }, + { + "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0-3", + "Counter": "2,3", + "EventCode": "0xD5", + "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.ALL_PARTS", + "FCMask": "0x04", + "PerPkg": "1", + "PublicDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0-3", + "UMask": "0x0f", + "Unit": "IIO" + }, { "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0", "Counter": "2,3", diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 2dab2d2620608..afdca7f2959f0 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -122,7 +122,7 @@ NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__thread(struct thread *thr } err = unwind__get_entries(unwind_entry, &cnt, thread, - &sample, MAX_STACK); + &sample, MAX_STACK, false); if (err) pr_debug("unwind failed\n"); else if (cnt != MAX_STACK) { diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh new file mode 100755 index 0000000000000..6ffbb27afabac --- /dev/null +++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh @@ -0,0 +1,68 @@ +#!/bin/sh +# Check Arm64 callgraphs are complete in fp mode +# SPDX-License-Identifier: GPL-2.0 + +lscpu | grep -q "aarch64" || exit 2 + +if ! [ -x "$(command -v cc)" ]; then + echo "failed: no compiler, install gcc" + exit 2 +fi + +PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +TEST_PROGRAM_SOURCE=$(mktemp /tmp/test_program.XXXXX.c) +TEST_PROGRAM=$(mktemp /tmp/test_program.XXXXX) + +cleanup_files() +{ + rm -f $PERF_DATA + rm -f $TEST_PROGRAM_SOURCE + rm -f $TEST_PROGRAM +} + +trap cleanup_files exit term int + +cat << EOF > $TEST_PROGRAM_SOURCE +int a = 0; +void leaf(void) { + for (;;) + a += a; +} +void parent(void) { + leaf(); +} +int main(void) { + parent(); + return 0; +} +EOF + +echo " + Compiling test program ($TEST_PROGRAM)..." + +CFLAGS="-g -O0 -fno-inline -fno-omit-frame-pointer" +cc $CFLAGS $TEST_PROGRAM_SOURCE -o $TEST_PROGRAM || exit 1 + +# Add a 1 second delay to skip samples that are not in the leaf() function +perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 -- $TEST_PROGRAM 2> /dev/null & +PID=$! + +echo " + Recording (PID=$PID)..." +sleep 2 +echo " + Stopping perf-record..." + +kill $PID +wait $PID + +# expected perf-script output: +# +# program +# 728 leaf +# 753 parent +# 76c main +# ... + +perf script -i $PERF_DATA -F comm,ip,sym | head -n4 +perf script -i $PERF_DATA -F comm,ip,sym | head -n4 | \ + awk '{ if ($2 != "") sym[i++] = $2 } END { if (sym[0] != "leaf" || + sym[1] != "parent" || + sym[2] != "main") exit 1 }' diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c index 2242a885fbd73..4940be4a0569c 100644 --- a/tools/perf/util/arm64-frame-pointer-unwind-support.c +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c @@ -53,7 +53,7 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0; } - ret = unwind__get_entries(add_entry, &entries, thread, sample, 2); + ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true); sample->user_regs = old_regs; if (ret || entries.length != 2) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 3945500036937..564abe17a0bd2 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2983,7 +2983,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread, return 0; return unwind__get_entries(unwind_entry, cursor, - thread, sample, max_stack); + thread, sample, max_stack, false); } int thread__resolve_callchain(struct thread *thread, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 24997925ae00d..dd84fed698a3b 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1523,7 +1523,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, bool use_uncore_alias; LIST_HEAD(config_terms); - if (verbose > 1) { + pmu = parse_state->fake_pmu ?: perf_pmu__find(name); + + if (verbose > 1 && !(pmu && pmu->selectable)) { fprintf(stderr, "Attempting to add event pmu '%s' with '", name); if (head_config) { @@ -1536,7 +1538,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, fprintf(stderr, "' that may result in non-fatal errors\n"); } - pmu = parse_state->fake_pmu ?: perf_pmu__find(name); if (!pmu) { char *err_str; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 498b05708db57..245dc70d1882a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2084,6 +2084,7 @@ prefetch_event(char *buf, u64 head, size_t mmap_size, bool needs_swap, union perf_event *error) { union perf_event *event; + u16 event_size; /* * Ensure we have enough space remaining to read @@ -2096,15 +2097,23 @@ prefetch_event(char *buf, u64 head, size_t mmap_size, if (needs_swap) perf_event_header__bswap(&event->header); - if (head + event->header.size <= mmap_size) + event_size = event->header.size; + if (head + event_size <= mmap_size) return event; /* We're not fetching the event so swap back again */ if (needs_swap) perf_event_header__bswap(&event->header); - pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:" - " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size); + /* Check if the event fits into the next mmapped buf. */ + if (event_size <= mmap_size - head % page_size) { + /* Remap buf and fetch again. */ + return NULL; + } + + /* Invalid input. Event size should never exceed mmap_size. */ + pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:" + " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size); return error; } diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 483f05004e682..c255a2c90cd67 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -1,12 +1,14 @@ -from os import getenv +from os import getenv, path from subprocess import Popen, PIPE from re import sub cc = getenv("CC") cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline() +src_feature_tests = getenv('srctree') + '/tools/build/feature' def clang_has_option(option): - return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] + cc_output = Popen([cc, option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines() + return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ] if cc_is_clang: from distutils.sysconfig import get_config_vars @@ -23,6 +25,8 @@ def clang_has_option(option): vars[var] = sub("-fstack-protector-strong", "", vars[var]) if not clang_has_option("-fno-semantic-interposition"): vars[var] = sub("-fno-semantic-interposition", "", vars[var]) + if not clang_has_option("-ffat-lto-objects"): + vars[var] = sub("-ffat-lto-objects", "", vars[var]) from distutils.core import setup, Extension diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index a74b517f74974..94aa40f6e3482 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -200,7 +200,8 @@ frame_callback(Dwfl_Frame *state, void *arg) bool isactivation; if (!dwfl_frame_pc(state, &pc, NULL)) { - pr_err("%s", dwfl_errmsg(-1)); + if (!ui->best_effort) + pr_err("%s", dwfl_errmsg(-1)); return DWARF_CB_ABORT; } @@ -208,7 +209,8 @@ frame_callback(Dwfl_Frame *state, void *arg) report_module(pc, ui); if (!dwfl_frame_pc(state, &pc, &isactivation)) { - pr_err("%s", dwfl_errmsg(-1)); + if (!ui->best_effort) + pr_err("%s", dwfl_errmsg(-1)); return DWARF_CB_ABORT; } @@ -222,7 +224,8 @@ frame_callback(Dwfl_Frame *state, void *arg) int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, struct perf_sample *data, - int max_stack) + int max_stack, + bool best_effort) { struct unwind_info *ui, ui_buf = { .sample = data, @@ -231,6 +234,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, .cb = cb, .arg = arg, .max_stack = max_stack, + .best_effort = best_effort }; Dwarf_Word ip; int err = -EINVAL, i; diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h index 0cbd2650e280e..8c88bc4f2304b 100644 --- a/tools/perf/util/unwind-libdw.h +++ b/tools/perf/util/unwind-libdw.h @@ -20,6 +20,7 @@ struct unwind_info { void *arg; int max_stack; int idx; + bool best_effort; struct unwind_entry entries[]; }; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 71a3533491815..41e29fc7648ae 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -96,6 +96,7 @@ struct unwind_info { struct perf_sample *sample; struct machine *machine; struct thread *thread; + bool best_effort; }; #define dw_read(ptr, type, end) ({ \ @@ -553,7 +554,8 @@ static int access_reg(unw_addr_space_t __maybe_unused as, ret = perf_reg_value(&val, &ui->sample->user_regs, id); if (ret) { - pr_err("unwind: can't read reg %d\n", regnum); + if (!ui->best_effort) + pr_err("unwind: can't read reg %d\n", regnum); return ret; } @@ -666,7 +668,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, return -1; ret = unw_init_remote(&c, addr_space, ui); - if (ret) + if (ret && !ui->best_effort) display_error(ret); while (!ret && (unw_step(&c) > 0) && i < max_stack) { @@ -704,12 +706,14 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack) + struct perf_sample *data, int max_stack, + bool best_effort) { struct unwind_info ui = { .sample = data, .thread = thread, .machine = thread->maps->machine, + .best_effort = best_effort }; if (!data->user_regs.regs) diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index e89a5479b3613..509c287ee7628 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -80,9 +80,11 @@ void unwind__finish_access(struct maps *maps) int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack) + struct perf_sample *data, int max_stack, + bool best_effort) { if (thread->maps->unwind_libunwind_ops) - return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack); + return thread->maps->unwind_libunwind_ops->get_entries(cb, arg, thread, data, + max_stack, best_effort); return 0; } diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index ab8ad469c8de5..b2a03fa5289b3 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -23,13 +23,19 @@ struct unwind_libunwind_ops { void (*finish_access)(struct maps *maps); int (*get_entries)(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack); + struct perf_sample *data, int max_stack, bool best_effort); }; #ifdef HAVE_DWARF_UNWIND_SUPPORT +/* + * When best_effort is set, don't report errors and fail silently. This could + * be expanded in the future to be more permissive about things other than + * error messages. + */ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, - struct perf_sample *data, int max_stack); + struct perf_sample *data, int max_stack, + bool best_effort); /* libunwind specific */ #ifdef HAVE_LIBUNWIND_SUPPORT #ifndef LIBUNWIND__ARCH_REG_ID @@ -65,7 +71,8 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, void *arg __maybe_unused, struct thread *thread __maybe_unused, struct perf_sample *data __maybe_unused, - int max_stack __maybe_unused) + int max_stack __maybe_unused, + bool best_effort __maybe_unused) { return 0; } diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 1acdf2fc31c59..3299fb0977b27 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -25,7 +25,7 @@ cxl_pmem-y += config_check.o obj-m += cxl_core.o -cxl_core-y := $(CXL_CORE_SRC)/bus.o +cxl_core-y := $(CXL_CORE_SRC)/port.o cxl_core-y += $(CXL_CORE_SRC)/pmem.o cxl_core-y += $(CXL_CORE_SRC)/regs.o cxl_core-y += $(CXL_CORE_SRC)/memdev.o diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 736d99006fb7a..f0a410962af0d 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -511,7 +511,7 @@ static __init int cxl_test_init(void) for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) { struct platform_device *bridge = - cxl_host_bridge[i / NR_CXL_ROOT_PORTS]; + cxl_host_bridge[i % ARRAY_SIZE(cxl_host_bridge)]; struct platform_device *pdev; pdev = platform_device_alloc("cxl_root_port", i); diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c index d0f06e40c16d0..eac71fbb24ce2 100644 --- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c @@ -1,13 +1,24 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include "bind_perm.skel.h" - +#define _GNU_SOURCE +#include +#include #include #include #include +#include "test_progs.h" +#include "bind_perm.skel.h" + static int duration; +static int create_netns(void) +{ + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) + return -1; + + return 0; +} + void try_bind(int family, int port, int expected_errno) { struct sockaddr_storage addr = {}; @@ -75,6 +86,9 @@ void test_bind_perm(void) struct bind_perm *skel; int cgroup_fd; + if (create_netns()) + return; + cgroup_fd = test__join_cgroup("/bind_perm"); if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno)) return; diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h new file mode 100644 index 0000000000000..5bb11fe595a43 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BPF_MISC_H__ +#define __BPF_MISC_H__ + +#if defined(__TARGET_ARCH_x86) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__x64_" +#elif defined(__TARGET_ARCH_s390) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__s390x_" +#elif defined(__TARGET_ARCH_arm64) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__arm64_" +#else +#define SYSCALL_WRAPPER 0 +#define SYS_PREFIX "__se_" +#endif + +#endif diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index 8812a90da4eb8..702578a5e496d 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -7,20 +7,7 @@ #include #include - -#if defined(__TARGET_ARCH_x86) -#define SYSCALL_WRAPPER 1 -#define SYS_PREFIX "__x64_" -#elif defined(__TARGET_ARCH_s390) -#define SYSCALL_WRAPPER 1 -#define SYS_PREFIX "__s390x_" -#elif defined(__TARGET_ARCH_arm64) -#define SYSCALL_WRAPPER 1 -#define SYS_PREFIX "__arm64_" -#else -#define SYSCALL_WRAPPER 0 -#define SYS_PREFIX "" -#endif +#include "bpf_misc.h" static struct sockaddr_in old; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index 83b0aaa52ef77..a0e0d85e29da7 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -412,8 +412,7 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx) /* Narrow loads from remote_port field. Expect SRC_PORT. */ if (LSB(ctx->remote_port, 0) != ((SRC_PORT >> 0) & 0xff) || - LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff) || - LSB(ctx->remote_port, 2) != 0 || LSB(ctx->remote_port, 3) != 0) + LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff)) return SK_DROP; if (LSW(ctx->remote_port, 0) != SRC_PORT) return SK_DROP; diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index 81b57b9aaaeae..7967348b11af6 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -113,7 +113,7 @@ static void tpcpy(struct bpf_tcp_sock *dst, #define RET_LOG() ({ \ linum = __LINE__; \ - bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST); \ + bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY); \ return CG_OK; \ }) diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh index ec4e15948e406..5252b91f48a18 100755 --- a/tools/testing/selftests/bpf/test_lirc_mode2.sh +++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh @@ -3,6 +3,7 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +ret=$ksft_skip msg="skip all tests:" if [ $UID != 0 ]; then @@ -25,7 +26,7 @@ do fi done -if [ -n $LIRCDEV ]; +if [ -n "$LIRCDEV" ]; then TYPE=lirc_mode2 ./test_lirc_mode2_user $LIRCDEV $INPUTDEV @@ -36,3 +37,5 @@ then echo -e ${GREEN}"PASS: $TYPE"${NC} fi fi + +exit $ret diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh index b497bb85b667f..6c69c42b1d607 100755 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -120,6 +120,14 @@ setup() ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0 + # disable IPv6 DAD because it sometimes takes too long and fails tests + ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip link add veth1 type veth peer name veth2 ip link add veth3 type veth peer name veth4 ip link add veth5 type veth peer name veth6 @@ -289,7 +297,7 @@ test_ping() ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null RET=$? elif [ "${PROTO}" == "IPv6" ] ; then - ip netns exec ${NS1} ping6 -c 1 -W 6 -I veth1 ${IPv6_DST} 2>&1 > /dev/null + ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null RET=$? else echo " test_ping: unknown PROTO: ${PROTO}" diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh index 05f8727409997..cc57cb87e65f6 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh @@ -32,6 +32,11 @@ DRV_MODE="xdpgeneric xdpdrv xdpegress" PASS=0 FAIL=0 LOG_DIR=$(mktemp -d) +declare -a NS +NS[0]="ns0-$(mktemp -u XXXXXX)" +NS[1]="ns1-$(mktemp -u XXXXXX)" +NS[2]="ns2-$(mktemp -u XXXXXX)" +NS[3]="ns3-$(mktemp -u XXXXXX)" test_pass() { @@ -47,11 +52,9 @@ test_fail() clean_up() { - for i in $(seq $NUM); do - ip link del veth$i 2> /dev/null - ip netns del ns$i 2> /dev/null + for i in $(seq 0 $NUM); do + ip netns del ${NS[$i]} 2> /dev/null done - ip netns del ns0 2> /dev/null } # Kselftest framework requirement - SKIP code is 4. @@ -79,23 +82,22 @@ setup_ns() mode="xdpdrv" fi - ip netns add ns0 + ip netns add ${NS[0]} for i in $(seq $NUM); do - ip netns add ns$i - ip -n ns$i link add veth0 index 2 type veth \ - peer name veth$i netns ns0 index $((1 + $i)) - ip -n ns0 link set veth$i up - ip -n ns$i link set veth0 up - - ip -n ns$i addr add 192.0.2.$i/24 dev veth0 - ip -n ns$i addr add 2001:db8::$i/64 dev veth0 + ip netns add ${NS[$i]} + ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]} + ip -n ${NS[$i]} link set veth0 up + ip -n ${NS[0]} link set veth$i up + + ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0 + ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0 # Add a neigh entry for IPv4 ping test - ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0 - ip -n ns$i link set veth0 $mode obj \ + ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0 + ip -n ${NS[$i]} link set veth0 $mode obj \ xdp_dummy.o sec xdp &> /dev/null || \ { test_fail "Unable to load dummy xdp" && exit 1; } IFACES="$IFACES veth$i" - veth_mac[$i]=$(ip -n ns0 link show veth$i | awk '/link\/ether/ {print $2}') + veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}') done } @@ -104,10 +106,10 @@ do_egress_tests() local mode=$1 # mac test - ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log & - ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log & + ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log & + ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log & sleep 0.5 - ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null + ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null sleep 0.5 pkill tcpdump @@ -123,18 +125,18 @@ do_ping_tests() local mode=$1 # ping6 test: echo request should be redirect back to itself, not others - ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02 + ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02 - ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log & - ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log & - ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log & + ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log & + ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log & + ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log & sleep 0.5 # ARP test - ip netns exec ns1 arping -q -c 2 -I veth0 192.0.2.254 + ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254 # IPv4 test - ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null + ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null # IPv6 test - ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null + ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null sleep 0.5 pkill tcpdump @@ -180,7 +182,7 @@ do_tests() xdpgeneric) drv_p="-S";; esac - ip netns exec ns0 ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log & + ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log & xdp_pid=$! sleep 1 if ! ps -p $xdp_pid > /dev/null; then @@ -197,10 +199,10 @@ do_tests() kill $xdp_pid } -trap clean_up EXIT - check_env +trap clean_up EXIT + for mode in ${DRV_MODE}; do setup_ns $mode do_tests $mode diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 0a5d23da486df..5f8296d29e778 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -266,22 +266,24 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size } static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, - struct ifobject *ifobject, u32 qid) + struct ifobject *ifobject, bool shared) { - struct xsk_socket_config cfg; + struct xsk_socket_config cfg = {}; struct xsk_ring_cons *rxr; struct xsk_ring_prod *txr; xsk->umem = umem; cfg.rx_size = xsk->rxqsize; cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - cfg.libbpf_flags = 0; + cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; cfg.xdp_flags = ifobject->xdp_flags; cfg.bind_flags = ifobject->bind_flags; + if (shared) + cfg.bind_flags |= XDP_SHARED_UMEM; txr = ifobject->tx_on ? &xsk->tx : NULL; rxr = ifobject->rx_on ? &xsk->rx : NULL; - return xsk_socket__create(&xsk->xsk, ifobject->ifname, qid, umem->umem, rxr, txr, &cfg); + return xsk_socket__create(&xsk->xsk, ifobject->ifname, 0, umem->umem, rxr, txr, &cfg); } static struct option long_options[] = { @@ -387,7 +389,6 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, for (i = 0; i < MAX_INTERFACES; i++) { struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; - ifobj->umem = &ifobj->umem_arr[0]; ifobj->xsk = &ifobj->xsk_arr[0]; ifobj->use_poll = false; ifobj->pacing_on = true; @@ -401,11 +402,12 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, ifobj->tx_on = false; } + memset(ifobj->umem, 0, sizeof(*ifobj->umem)); + ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS; + ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; + for (j = 0; j < MAX_SOCKETS; j++) { - memset(&ifobj->umem_arr[j], 0, sizeof(ifobj->umem_arr[j])); memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); - ifobj->umem_arr[j].num_frames = DEFAULT_UMEM_BUFFERS; - ifobj->umem_arr[j].frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; } } @@ -906,7 +908,10 @@ static bool rx_stats_are_valid(struct ifobject *ifobject) return true; case STAT_TEST_RX_FULL: xsk_stat = stats.rx_ring_full; - expected_stat -= RX_FULL_RXQSIZE; + if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) + expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE; + else + expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE; break; case STAT_TEST_RX_FILL_EMPTY: xsk_stat = stats.rx_fill_ring_empty_descs; @@ -947,7 +952,10 @@ static void tx_stats_validate(struct ifobject *ifobject) static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) { + u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; + int ret, ifindex; + void *bufs; u32 i; ifobject->ns_fd = switch_namespace(ifobject->nsname); @@ -955,23 +963,20 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (ifobject->umem->unaligned_mode) mmap_flags |= MAP_HUGETLB; - for (i = 0; i < test->nb_sockets; i++) { - u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; - u32 ctr = 0; - void *bufs; - int ret; + bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); + if (bufs == MAP_FAILED) + exit_with_error(errno); - bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); - if (bufs == MAP_FAILED) - exit_with_error(errno); + ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz); + if (ret) + exit_with_error(-ret); - ret = xsk_configure_umem(&ifobject->umem_arr[i], bufs, umem_sz); - if (ret) - exit_with_error(-ret); + for (i = 0; i < test->nb_sockets; i++) { + u32 ctr = 0; while (ctr++ < SOCK_RECONF_CTR) { - ret = xsk_configure_socket(&ifobject->xsk_arr[i], &ifobject->umem_arr[i], - ifobject, i); + ret = xsk_configure_socket(&ifobject->xsk_arr[i], ifobject->umem, + ifobject, !!i); if (!ret) break; @@ -982,8 +987,22 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) } } - ifobject->umem = &ifobject->umem_arr[0]; ifobject->xsk = &ifobject->xsk_arr[0]; + + if (!ifobject->rx_on) + return; + + ifindex = if_nametoindex(ifobject->ifname); + if (!ifindex) + exit_with_error(errno); + + ret = xsk_setup_xdp_prog(ifindex, &ifobject->xsk_map_fd); + if (ret) + exit_with_error(-ret); + + ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd); + if (ret) + exit_with_error(-ret); } static void testapp_cleanup_xsk_res(struct ifobject *ifobj) @@ -1139,14 +1158,16 @@ static void testapp_bidi(struct test_spec *test) static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) { + int ret; + xsk_socket__delete(ifobj_tx->xsk->xsk); - xsk_umem__delete(ifobj_tx->umem->umem); xsk_socket__delete(ifobj_rx->xsk->xsk); - xsk_umem__delete(ifobj_rx->umem->umem); - ifobj_tx->umem = &ifobj_tx->umem_arr[1]; ifobj_tx->xsk = &ifobj_tx->xsk_arr[1]; - ifobj_rx->umem = &ifobj_rx->umem_arr[1]; ifobj_rx->xsk = &ifobj_rx->xsk_arr[1]; + + ret = xsk_socket__update_xskmap(ifobj_rx->xsk->xsk, ifobj_rx->xsk_map_fd); + if (ret) + exit_with_error(-ret); } static void testapp_bpf_res(struct test_spec *test) @@ -1405,13 +1426,13 @@ static struct ifobject *ifobject_create(void) if (!ifobj->xsk_arr) goto out_xsk_arr; - ifobj->umem_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->umem_arr)); - if (!ifobj->umem_arr) - goto out_umem_arr; + ifobj->umem = calloc(1, sizeof(*ifobj->umem)); + if (!ifobj->umem) + goto out_umem; return ifobj; -out_umem_arr: +out_umem: free(ifobj->xsk_arr); out_xsk_arr: free(ifobj); @@ -1420,7 +1441,7 @@ static struct ifobject *ifobject_create(void) static void ifobject_delete(struct ifobject *ifobj) { - free(ifobj->umem_arr); + free(ifobj->umem); free(ifobj->xsk_arr); free(ifobj); } diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h index 2f705f44b7483..62a3e63886325 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xdpxceiver.h @@ -125,10 +125,10 @@ struct ifobject { struct xsk_socket_info *xsk; struct xsk_socket_info *xsk_arr; struct xsk_umem_info *umem; - struct xsk_umem_info *umem_arr; thread_func_t func_ptr; struct pkt_stream *pkt_stream; int ns_fd; + int xsk_map_fd; u32 dst_ip; u32 src_ip; u32 xdp_flags; diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index 7eca977999170..554ca649d4701 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -306,7 +306,8 @@ static void guest_restore_active(struct test_args *args, uint32_t prio, intid, ap1r; int i; - /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs + /* + * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs * in descending order, so intid+1 can preempt intid. */ for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) { @@ -315,7 +316,8 @@ static void guest_restore_active(struct test_args *args, gic_set_priority(intid, prio); } - /* In a real migration, KVM would restore all GIC state before running + /* + * In a real migration, KVM would restore all GIC state before running * guest code. */ for (i = 0; i < num; i++) { @@ -472,10 +474,10 @@ static void test_restore_active(struct test_args *args, struct kvm_inject_desc * guest_restore_active(args, MIN_SPI, 4, f->cmd); } -static void guest_code(struct test_args args) +static void guest_code(struct test_args *args) { - uint32_t i, nr_irqs = args.nr_irqs; - bool level_sensitive = args.level_sensitive; + uint32_t i, nr_irqs = args->nr_irqs; + bool level_sensitive = args->level_sensitive; struct kvm_inject_desc *f, *inject_fns; gic_init(GIC_V3, 1, dist, redist); @@ -484,11 +486,11 @@ static void guest_code(struct test_args args) gic_irq_enable(i); for (i = MIN_SPI; i < nr_irqs; i++) - gic_irq_set_config(i, !args.level_sensitive); + gic_irq_set_config(i, !level_sensitive); - gic_set_eoi_split(args.eoi_split); + gic_set_eoi_split(args->eoi_split); - reset_priorities(&args); + reset_priorities(args); gic_set_priority_mask(CPU_PRIO_MASK); inject_fns = level_sensitive ? inject_level_fns @@ -497,17 +499,18 @@ static void guest_code(struct test_args args) local_irq_enable(); /* Start the tests. */ - for_each_supported_inject_fn(&args, inject_fns, f) { - test_injection(&args, f); - test_preemption(&args, f); - test_injection_failure(&args, f); + for_each_supported_inject_fn(args, inject_fns, f) { + test_injection(args, f); + test_preemption(args, f); + test_injection_failure(args, f); } - /* Restore the active state of IRQs. This would happen when live + /* + * Restore the active state of IRQs. This would happen when live * migrating IRQs in the middle of being handled. */ - for_each_supported_activate_fn(&args, set_active_fns, f) - test_restore_active(&args, f); + for_each_supported_activate_fn(args, set_active_fns, f) + test_restore_active(args, f); GUEST_DONE(); } @@ -573,8 +576,8 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm, kvm_gsi_routing_write(vm, routing); } else { ret = _kvm_gsi_routing_write(vm, routing); - /* The kernel only checks for KVM_IRQCHIP_NUM_PINS. */ - if (intid >= KVM_IRQCHIP_NUM_PINS) + /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */ + if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS) TEST_ASSERT(ret != 0 && errno == EINVAL, "Bad intid %u did not cause KVM_SET_GSI_ROUTING " "error: rc: %i errno: %i", intid, ret, errno); @@ -739,6 +742,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) int gic_fd; struct kvm_vm *vm; struct kvm_inject_args inject_args; + vm_vaddr_t args_gva; struct test_args args = { .nr_irqs = nr_irqs, @@ -757,7 +761,9 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) vcpu_init_descriptor_tables(vm, VCPU_ID); /* Setup the guest args page (so it gets the args). */ - vcpu_args_set(vm, 0, 1, args); + args_gva = vm_vaddr_alloc_page(vm); + memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); + vcpu_args_set(vm, 0, 1, args_gva); gic_fd = vgic_v3_setup(vm, 1, nr_irqs, GICD_BASE_GPA, GICR_BASE_GPA); @@ -841,7 +847,8 @@ int main(int argc, char **argv) } } - /* If the user just specified nr_irqs and/or gic_version, then run all + /* + * If the user just specified nr_irqs and/or gic_version, then run all * combinations. */ if (default_args) { diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index dc284c6bdbc37..eca5c622efd25 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -101,7 +101,9 @@ static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, #define PGTBL_PTE_WRITE_SHIFT 2 #define PGTBL_PTE_READ_MASK 0x0000000000000002ULL #define PGTBL_PTE_READ_SHIFT 1 -#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_EXECUTE_MASK | \ +#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_ACCESSED_MASK | \ + PGTBL_PTE_DIRTY_MASK | \ + PGTBL_PTE_EXECUTE_MASK | \ PGTBL_PTE_WRITE_MASK | \ PGTBL_PTE_READ_MASK) #define PGTBL_PTE_VALID_MASK 0x0000000000000001ULL diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c index 00f613c0583cd..263bf3ed8fd55 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c @@ -19,7 +19,7 @@ struct gicv3_data { unsigned int nr_spis; }; -#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K) +#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K) #define DIST_BIT (1U << 31) enum gicv3_intid_range { @@ -105,7 +105,8 @@ static void gicv3_set_eoi_split(bool split) { uint32_t val; - /* All other fields are read-only, so no need to read CTLR first. In + /* + * All other fields are read-only, so no need to read CTLR first. In * fact, the kernel does the same. */ val = split ? (1U << 1) : 0; @@ -159,9 +160,10 @@ static void gicv3_access_reg(uint32_t intid, uint64_t offset, uint32_t cpu_or_dist; GUEST_ASSERT(bits_per_field <= reg_bits); - GUEST_ASSERT(*val < (1U << bits_per_field)); - /* Some registers like IROUTER are 64 bit long. Those are currently not - * supported by readl nor writel, so just asserting here until then. + GUEST_ASSERT(!write || *val < (1U << bits_per_field)); + /* + * This function does not support 64 bit accesses. Just asserting here + * until we implement readq/writeq. */ GUEST_ASSERT(reg_bits == 32); diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c index f5cd0c536d85c..5d45046c1b805 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c @@ -140,9 +140,6 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid, uint64_t val; bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid); - /* Check that the addr part of the attr is within 32 bits. */ - assert(attr <= KVM_DEV_ARM_VGIC_OFFSET_MASK); - uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS : KVM_DEV_ARM_VGIC_GRP_DIST_REGS; @@ -152,7 +149,11 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid, attr += SZ_64K; } - /* All calls will succeed, even with invalid intid's, as long as the + /* Check that the addr part of the attr is within 32 bits. */ + assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0); + + /* + * All calls will succeed, even with invalid intid's, as long as the * addr part of the attr is within 32 bits (checked above). An invalid * intid will just make the read/writes point to above the intended * register space (i.e., ICPENDR after ISPENDR). diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index d377f2603d98a..3961487a4870d 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -268,7 +268,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6); } -static void guest_hang(void) +static void __aligned(16) guest_hang(void) { while (1) ; diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config index a26a3fa9e9255..8bd847f0463cd 100644 --- a/tools/testing/selftests/lkdtm/config +++ b/tools/testing/selftests/lkdtm/config @@ -6,6 +6,7 @@ CONFIG_HARDENED_USERCOPY=y # CONFIG_HARDENED_USERCOPY_FALLBACK is not set CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y +CONFIG_UBSAN=y CONFIG_UBSAN_BOUNDS=y CONFIG_UBSAN_TRAP=y CONFIG_STACKPROTECTOR_STRONG=y diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c index b019e0b8221c7..84fda3b490735 100644 --- a/tools/testing/selftests/mqueue/mq_perf_tests.c +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -180,6 +180,9 @@ void shutdown(int exit_val, char *err_cause, int line_no) if (in_shutdown++) return; + /* Free the cpu_set allocated using CPU_ALLOC in main function */ + CPU_FREE(cpu_set); + for (i = 0; i < num_cpus_to_pin; i++) if (cpu_threads[i]) { pthread_kill(cpu_threads[i], SIGUSR1); @@ -551,6 +554,12 @@ int main(int argc, char *argv[]) perror("sysconf(_SC_NPROCESSORS_ONLN)"); exit(1); } + + if (getuid() != 0) + ksft_exit_skip("Not running as root, but almost all tests " + "require root in order to modify\nsystem settings. " + "Exiting.\n"); + cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN)); cpu_set = CPU_ALLOC(cpus_online); if (cpu_set == NULL) { @@ -589,7 +598,7 @@ int main(int argc, char *argv[]) cpu_set)) { fprintf(stderr, "Any given CPU may " "only be given once.\n"); - exit(1); + goto err_code; } else CPU_SET_S(cpus_to_pin[cpu], cpu_set_size, cpu_set); @@ -607,7 +616,7 @@ int main(int argc, char *argv[]) queue_path = malloc(strlen(option) + 2); if (!queue_path) { perror("malloc()"); - exit(1); + goto err_code; } queue_path[0] = '/'; queue_path[1] = 0; @@ -622,17 +631,12 @@ int main(int argc, char *argv[]) fprintf(stderr, "Must pass at least one CPU to continuous " "mode.\n"); poptPrintUsage(popt_context, stderr, 0); - exit(1); + goto err_code; } else if (!continuous_mode) { num_cpus_to_pin = 1; cpus_to_pin[0] = cpus_online - 1; } - if (getuid() != 0) - ksft_exit_skip("Not running as root, but almost all tests " - "require root in order to modify\nsystem settings. " - "Exiting.\n"); - max_msgs = fopen(MAX_MSGS, "r+"); max_msgsize = fopen(MAX_MSGSIZE, "r+"); if (!max_msgs) @@ -740,4 +744,9 @@ int main(int argc, char *argv[]) sleep(1); } shutdown(0, "", 0); + +err_code: + CPU_FREE(cpu_set); + exit(1); + } diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c index 3dece8b292536..b57e91e1c3f28 100644 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -218,10 +218,10 @@ main(int argc, char **argv) /* Test 1: * veriyf that SIGURG is - * delivered and 63 bytes are - * read and oob is '@' + * delivered, 63 bytes are + * read, oob is '@', and POLLPRI works. */ - wait_for_data(pfd, POLLIN | POLLPRI); + wait_for_data(pfd, POLLPRI); read_oob(pfd, &oob); len = read_data(pfd, buf, 1024); if (!signal_recvd || len != 63 || oob != '@') { diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index f0f4ab96b8f3e..621af6895f4d5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -432,6 +432,8 @@ do_transfer() local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") + local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") + local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") timeout ${timeout_test} \ ip netns exec ${listener_ns} \ @@ -524,6 +526,23 @@ do_transfer() fi fi + if $checksum; then + local csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") + local csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") + + local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) + if [ $csum_err_s_nr -gt 0 ]; then + printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]" + rets=1 + fi + + local csum_err_c_nr=$((csum_err_c - stat_csum_err_c)) + if [ $csum_err_c_nr -gt 0 ]; then + printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]" + retc=1 + fi + fi + if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then printf "[ OK ]" fi diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh index ea5a7a808f120..1fd1250ebc667 100755 --- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh +++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh @@ -120,11 +120,11 @@ echo "[ OK ]" # Move the underlay to a non-default VRF ip -netns hv-1 link set veth0 vrf vrf-underlay -ip -netns hv-1 link set veth0 down -ip -netns hv-1 link set veth0 up +ip -netns hv-1 link set vxlan0 down +ip -netns hv-1 link set vxlan0 up ip -netns hv-2 link set veth0 vrf vrf-underlay -ip -netns hv-2 link set veth0 down -ip -netns hv-2 link set veth0 up +ip -netns hv-2 link set vxlan0 down +ip -netns hv-2 link set vxlan0 up echo -n "Check VM connectivity through VXLAN (underlay in a VRF) " ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c index aee631c5284eb..044bc0e9ed81a 100644 --- a/tools/testing/selftests/net/timestamping.c +++ b/tools/testing/selftests/net/timestamping.c @@ -325,8 +325,8 @@ int main(int argc, char **argv) struct ifreq device; struct ifreq hwtstamp; struct hwtstamp_config hwconfig, hwconfig_requested; - struct so_timestamping so_timestamping_get = { 0, -1 }; - struct so_timestamping so_timestamping = { 0, -1 }; + struct so_timestamping so_timestamping_get = { 0, 0 }; + struct so_timestamping so_timestamping = { 0, 0 }; struct sockaddr_in addr; struct ip_mreq imr; struct in_addr iaddr; diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 6e468e0f42f78..5d70b04c482c9 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -683,6 +683,9 @@ TEST_F(tls, splice_cmsg_to_pipe) char buf[10]; int p[2]; + if (self->notls) + SKIP(return, "no TLS support"); + ASSERT_GE(pipe(p), 0); EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1); @@ -703,6 +706,9 @@ TEST_F(tls, splice_dec_cmsg_to_pipe) char buf[10]; int p[2]; + if (self->notls) + SKIP(return, "no TLS support"); + ASSERT_GE(pipe(p), 0); EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index eae88aacca2aa..b2929fb15f7e6 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -71,8 +71,8 @@ usage () { echo " --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\"" echo " --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\"" echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\"" - echo " --doall" - echo " --doallmodconfig / --do-no-allmodconfig" + echo " --do-all" + echo " --do-allmodconfig / --do-no-allmodconfig" echo " --do-clocksourcewd / --do-no-clocksourcewd" echo " --do-kasan / --do-no-kasan" echo " --do-kcsan / --do-no-kcsan" diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 2956584e1e37f..75af864e07b65 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -4,7 +4,7 @@ include ../lib.mk .PHONY: all clean -CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \ +CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh "$(CC)" \ ../x86/trivial_64bit_program.c) ifndef OBJCOPY diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c index 9d4322c946e2b..006b464c8fc94 100644 --- a/tools/testing/selftests/sgx/load.c +++ b/tools/testing/selftests/sgx/load.c @@ -21,7 +21,7 @@ void encl_delete(struct encl *encl) { - struct encl_segment *heap_seg = &encl->segment_tbl[encl->nr_segments - 1]; + struct encl_segment *heap_seg; if (encl->encl_base) munmap((void *)encl->encl_base, encl->encl_size); @@ -32,10 +32,11 @@ void encl_delete(struct encl *encl) if (encl->fd) close(encl->fd); - munmap(heap_seg->src, heap_seg->size); - - if (encl->segment_tbl) + if (encl->segment_tbl) { + heap_seg = &encl->segment_tbl[encl->nr_segments - 1]; + munmap(heap_seg->src, heap_seg->size); free(encl->segment_tbl); + } memset(encl, 0, sizeof(*encl)); } diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index 370c4995f7c4a..b0bd95a4730d5 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -147,6 +147,7 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl, if (!encl_load("test_encl.elf", encl, heap_size)) { encl_delete(encl); TH_LOG("Failed to load the test enclave.\n"); + return false; } if (!encl_measure(encl)) @@ -185,8 +186,6 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl, return true; err: - encl_delete(encl); - for (i = 0; i < encl->nr_segments; i++) { seg = &encl->segment_tbl[i]; @@ -207,6 +206,8 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl, TH_LOG("Failed to initialize the test enclave.\n"); + encl_delete(encl); + return false; } diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index a14b5b8008970..1530c3e0242ef 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -51,9 +51,9 @@ TEST_GEN_FILES += split_huge_page_test TEST_GEN_FILES += ksm_tests ifeq ($(MACHINE),x86_64) -CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) -CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c) -CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie) +CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c) +CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie) TARGETS := protection_keys BINARIES_32 := $(TARGETS:%=%_32) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 3fc1d2ee29485..c964bfe9fbcda 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -120,6 +120,9 @@ struct uffd_stats { ~(unsigned long)(sizeof(unsigned long long) \ - 1))) +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + const char *examples = "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" "./userfaultfd anon 100 99999\n\n" diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 8a1f62ab3c8e6..53df7d3893d31 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -6,9 +6,9 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean UNAME_M := $(shell uname -m) -CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) -CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) -CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie) +CAN_BUILD_I386 := $(shell ./check_cc.sh "$(CC)" trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./check_cc.sh "$(CC)" trivial_64bit_program.c) +CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh index 3e2089c8cf549..8c669c0d662ee 100755 --- a/tools/testing/selftests/x86/check_cc.sh +++ b/tools/testing/selftests/x86/check_cc.sh @@ -7,7 +7,7 @@ CC="$1" TESTPROG="$2" shift 2 -if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then +if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then echo 1 else echo 0 diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index 52c053cc1789d..e88f5c870141e 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -782,7 +782,7 @@ int osnoise_hist_main(int argc, char *argv[]) return_value = 0; if (!tracefs_trace_is_on(trace->inst)) { - printf("rtla timelat hit stop tracing\n"); + printf("rtla osnoise hit stop tracing\n"); if (params->trace_output) { printf(" Saving trace to %s\n", params->trace_output); save_trace_to_file(record->trace.inst, params->trace_output); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0afc016cc54d4..610cc7920c8a2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -117,6 +117,8 @@ EXPORT_SYMBOL_GPL(kvm_debugfs_dir); static const struct file_operations stat_fops_per_vm; +static struct file_operations kvm_chardev_ops; + static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); #ifdef CONFIG_KVM_COMPAT @@ -437,8 +439,8 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) { - kvm_dirty_ring_free(&vcpu->dirty_ring); kvm_arch_vcpu_destroy(vcpu); + kvm_dirty_ring_free(&vcpu->dirty_ring); /* * No need for rcu_read_lock as VCPU_RUN is the only place that changes @@ -1137,6 +1139,16 @@ static struct kvm *kvm_create_vm(unsigned long type) preempt_notifier_inc(); kvm_init_pm_notifier(kvm); + /* + * When the fd passed to this ioctl() is opened it pins the module, + * but try_module_get() also prevents getting a reference if the module + * is in MODULE_STATE_GOING (e.g. if someone ran "rmmod --wait"). + */ + if (!try_module_get(kvm_chardev_ops.owner)) { + r = -ENODEV; + goto out_err; + } + return kvm; out_err: @@ -1226,6 +1238,7 @@ static void kvm_destroy_vm(struct kvm *kvm) preempt_notifier_dec(); hardware_disable_all(); mmdrop(mm); + module_put(kvm_chardev_ops.owner); } void kvm_get_kvm(struct kvm *kvm) diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index ce878f4be4daa..1621f8efd9616 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -191,6 +191,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn); if (kvm_is_error_hva(gpc->uhva)) { + gpc->pfn = KVM_PFN_ERR_FAULT; ret = -EFAULT; goto out; }